From db30e7bb96265861a8e65d6faff724f4e3d27c3f Mon Sep 17 00:00:00 2001 From: duerst Date: Thu, 2 Jun 2016 01:24:52 +0000 Subject: * string.c: Raise ArgumentError when invalid string is detected in case mapping methods. * enc/unicode.c: Check for invalid string and signal with negative length value. * test/ruby/enc/test_case_mapping.rb: Add tests for above. * test/ruby/test_m17n_comb.rb: Add a message to clarify test failure. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@55253 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 12 ++++++++++++ enc/unicode.c | 8 +++++++- string.c | 26 +++++++++++++++++++------- test/ruby/enc/test_case_mapping.rb | 7 +++++++ test/ruby/test_m17n_comb.rb | 2 +- 5 files changed, 46 insertions(+), 9 deletions(-) diff --git a/ChangeLog b/ChangeLog index cd1a0f980b..9ef0400bc6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,15 @@ +Thu Jun 2 10:24:48 2016 Martin Duerst + + * string.c: Raise ArgumentError when invalid string is detected in + case mapping methods. + + * enc/unicode.c: Check for invalid string and signal with negative + length value. + + * test/ruby/enc/test_case_mapping.rb: Add tests for above. + + * test/ruby/test_m17n_comb.rb: Add a message to clarify test failure. + Wed Jun 1 21:41:05 2016 Kazuki Yamaguchi * ext/openssl/extconf.rb: Check existence of ASN1_TIME_adj(). The old diff --git a/enc/unicode.c b/enc/unicode.c index 5a9aea0fa7..39d1e7a31f 100644 --- a/enc/unicode.c +++ b/enc/unicode.c @@ -669,14 +669,20 @@ onigenc_unicode_case_map(OnigCaseFoldType* flagP, OnigCodePoint code; OnigUChar *to_start = to; OnigCaseFoldType flags = *flagP; + int codepoint_length; + to_end -= CASE_MAPPING_SLACK; /* copy flags ONIGENC_CASE_UPCASE and ONIGENC_CASE_DOWNCASE over to * ONIGENC_CASE_UP_SPECIAL and ONIGENC_CASE_DOWN_SPECIAL */ flags |= (flags&(ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE))<='a' && code<='z') { if (flags&ONIGENC_CASE_UPCASE) { diff --git a/string.c b/string.c index ad4a115c42..44823520f0 100644 --- a/string.c +++ b/string.c @@ -5785,6 +5785,7 @@ rb_str_casemap(VALUE source, OnigCaseFoldType *flags, rb_encoding *enc) mapping_buffer pre_buffer, /* only next pointer used */ *current_buffer = &pre_buffer; int buffer_count = 0; + int buffer_length_or_invalid; if (RSTRING_LEN(source) == 0) return rb_str_dup(source); @@ -5799,12 +5800,23 @@ rb_str_casemap(VALUE source, OnigCaseFoldType *flags, rb_encoding *enc) current_buffer = current_buffer->next; current_buffer->next = NULL; current_buffer->capa = capa; - target_length += current_buffer->used - = onigenc_unicode_case_map(flags, - (const OnigUChar**)&source_current, source_end, - current_buffer->space, - current_buffer->space+current_buffer->capa, - enc); + buffer_length_or_invalid = onigenc_unicode_case_map(flags, + (const OnigUChar**)&source_current, source_end, + current_buffer->space, + current_buffer->space+current_buffer->capa, + enc); + if (buffer_length_or_invalid < 0) { + mapping_buffer *previous_buffer; + + current_buffer = pre_buffer.next; + while (current_buffer) { + previous_buffer = current_buffer; + current_buffer = current_buffer->next; + xfree(previous_buffer); + } + rb_raise(rb_eArgError, "input string invalid"); + } + target_length += current_buffer->used = buffer_length_or_invalid; } /* fprintf(stderr, "Buffer count is %d\n", buffer_count); *//* for tuning */ @@ -5819,7 +5831,7 @@ rb_str_casemap(VALUE source, OnigCaseFoldType *flags, rb_encoding *enc) memcpy(target_current, current_buffer->space, current_buffer->used); target_current += current_buffer->used; previous_buffer = current_buffer; - current_buffer=current_buffer->next; + current_buffer = current_buffer->next; xfree(previous_buffer); } } diff --git a/test/ruby/enc/test_case_mapping.rb b/test/ruby/enc/test_case_mapping.rb index 2ef3360314..ac86729886 100644 --- a/test/ruby/enc/test_case_mapping.rb +++ b/test/ruby/enc/test_case_mapping.rb @@ -60,6 +60,13 @@ class TestCaseMappingPreliminary < Test::Unit::TestCase check_swapcase_properties 'yUKIHIRO matsumoto (MAtz)', 'Yukihiro MATSUMOTO (maTZ)', :lithuanian end + def test_invalid + assert_raise(ArgumentError, "Should not be possible to upcase invalid string.") { "\xEB".force_encoding('UTF-8').upcase } + assert_raise(ArgumentError, "Should not be possible to downcase invalid string.") { "\xEB".force_encoding('UTF-8').downcase } + assert_raise(ArgumentError, "Should not be possible to capitalize invalid string.") { "\xEB".force_encoding('UTF-8').capitalize } + assert_raise(ArgumentError, "Should not be possible to swapcase invalid string.") { "\xEB".force_encoding('UTF-8').swapcase } + end + def test_general check_downcase_properties 'résumé dürst ĭñŧėřŋãţijňőńæłĩżàťïōņ', 'RÉSUMÉ DÜRST ĬÑŦĖŘŊÃŢIJŇŐŃÆŁĨŻÀŤÏŌŅ', :lithuanian check_upcase_properties 'RÉSUMÉ DÜRST ĬÑŦĖŘŊÃŢIJŇŐŃÆŁĨŻÀŤÏŌŅ', 'résumé dürst ĭñŧėřŋãţijňőńæłĩżàťïōņ', :lithuanian diff --git a/test/ruby/test_m17n_comb.rb b/test/ruby/test_m17n_comb.rb index 054287e2b5..99c162a92f 100644 --- a/test/ruby/test_m17n_comb.rb +++ b/test/ruby/test_m17n_comb.rb @@ -808,7 +808,7 @@ class TestM17NComb < Test::Unit::TestCase def test_str_downcase STRINGS.each {|s| if !s.valid_encoding? - assert_raise(ArgumentError) { s.downcase } + assert_raise(ArgumentError, "Offending string: #{s.inspect}, encoding: #{s.encoding}") { s.downcase } next end t = s.downcase -- cgit v1.2.3