diff options
-rw-r--r-- | ChangeLog | 12 | ||||
-rw-r--r-- | enc/unicode.c | 8 | ||||
-rw-r--r-- | string.c | 26 | ||||
-rw-r--r-- | test/ruby/enc/test_case_mapping.rb | 7 | ||||
-rw-r--r-- | test/ruby/test_m17n_comb.rb | 2 |
5 files changed, 46 insertions, 9 deletions
@@ -1,3 +1,15 @@ +Thu Jun 2 10:24:48 2016 Martin Duerst <duerst@it.aoyama.ac.jp> + + * string.c: Raise ArgumentError when invalid string is detected in + case mapping methods. + + * enc/unicode.c: Check for invalid string and signal with negative + length value. + + * test/ruby/enc/test_case_mapping.rb: Add tests for above. + + * test/ruby/test_m17n_comb.rb: Add a message to clarify test failure. + Wed Jun 1 21:41:05 2016 Kazuki Yamaguchi <k@rhe.jp> * ext/openssl/extconf.rb: Check existence of ASN1_TIME_adj(). The old diff --git a/enc/unicode.c b/enc/unicode.c index 5a9aea0fa7..39d1e7a31f 100644 --- a/enc/unicode.c +++ b/enc/unicode.c @@ -669,14 +669,20 @@ onigenc_unicode_case_map(OnigCaseFoldType* flagP, OnigCodePoint code; OnigUChar *to_start = to; OnigCaseFoldType flags = *flagP; + int codepoint_length; + to_end -= CASE_MAPPING_SLACK; /* copy flags ONIGENC_CASE_UPCASE and ONIGENC_CASE_DOWNCASE over to * ONIGENC_CASE_UP_SPECIAL and ONIGENC_CASE_DOWN_SPECIAL */ flags |= (flags&(ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE))<<ONIGENC_CASE_SPECIAL_OFFSET; while (*pp<end && to<=to_end) { + codepoint_length = ONIGENC_PRECISE_MBC_ENC_LEN(enc, *pp, end); + if (codepoint_length < 0) + return codepoint_length; /* encoding invalid */ code = ONIGENC_MBC_TO_CODE(enc, *pp, end); - *pp += enclen(enc, *pp, end); + *pp += codepoint_length; + if (code<='z') { /* ASCII comes first */ if (code>='a' && code<='z') { if (flags&ONIGENC_CASE_UPCASE) { @@ -5785,6 +5785,7 @@ rb_str_casemap(VALUE source, OnigCaseFoldType *flags, rb_encoding *enc) mapping_buffer pre_buffer, /* only next pointer used */ *current_buffer = &pre_buffer; int buffer_count = 0; + int buffer_length_or_invalid; if (RSTRING_LEN(source) == 0) return rb_str_dup(source); @@ -5799,12 +5800,23 @@ rb_str_casemap(VALUE source, OnigCaseFoldType *flags, rb_encoding *enc) current_buffer = current_buffer->next; current_buffer->next = NULL; current_buffer->capa = capa; - target_length += current_buffer->used - = onigenc_unicode_case_map(flags, - (const OnigUChar**)&source_current, source_end, - current_buffer->space, - current_buffer->space+current_buffer->capa, - enc); + buffer_length_or_invalid = onigenc_unicode_case_map(flags, + (const OnigUChar**)&source_current, source_end, + current_buffer->space, + current_buffer->space+current_buffer->capa, + enc); + if (buffer_length_or_invalid < 0) { + mapping_buffer *previous_buffer; + + current_buffer = pre_buffer.next; + while (current_buffer) { + previous_buffer = current_buffer; + current_buffer = current_buffer->next; + xfree(previous_buffer); + } + rb_raise(rb_eArgError, "input string invalid"); + } + target_length += current_buffer->used = buffer_length_or_invalid; } /* fprintf(stderr, "Buffer count is %d\n", buffer_count); *//* for tuning */ @@ -5819,7 +5831,7 @@ rb_str_casemap(VALUE source, OnigCaseFoldType *flags, rb_encoding *enc) memcpy(target_current, current_buffer->space, current_buffer->used); target_current += current_buffer->used; previous_buffer = current_buffer; - current_buffer=current_buffer->next; + current_buffer = current_buffer->next; xfree(previous_buffer); } } diff --git a/test/ruby/enc/test_case_mapping.rb b/test/ruby/enc/test_case_mapping.rb index 2ef3360314..ac86729886 100644 --- a/test/ruby/enc/test_case_mapping.rb +++ b/test/ruby/enc/test_case_mapping.rb @@ -60,6 +60,13 @@ class TestCaseMappingPreliminary < Test::Unit::TestCase check_swapcase_properties 'yUKIHIRO matsumoto (MAtz)', 'Yukihiro MATSUMOTO (maTZ)', :lithuanian end + def test_invalid + assert_raise(ArgumentError, "Should not be possible to upcase invalid string.") { "\xEB".force_encoding('UTF-8').upcase } + assert_raise(ArgumentError, "Should not be possible to downcase invalid string.") { "\xEB".force_encoding('UTF-8').downcase } + assert_raise(ArgumentError, "Should not be possible to capitalize invalid string.") { "\xEB".force_encoding('UTF-8').capitalize } + assert_raise(ArgumentError, "Should not be possible to swapcase invalid string.") { "\xEB".force_encoding('UTF-8').swapcase } + end + def test_general check_downcase_properties 'résumé dürst ĭñŧėřŋãţijňőńæłĩżàťïōņ', 'RÉSUMÉ DÜRST ĬÑŦĖŘŊÃŢIJŇŐŃÆŁĨŻÀŤÏŌŅ', :lithuanian check_upcase_properties 'RÉSUMÉ DÜRST ĬÑŦĖŘŊÃŢIJŇŐŃÆŁĨŻÀŤÏŌŅ', 'résumé dürst ĭñŧėřŋãţijňőńæłĩżàťïōņ', :lithuanian diff --git a/test/ruby/test_m17n_comb.rb b/test/ruby/test_m17n_comb.rb index 054287e2b5..99c162a92f 100644 --- a/test/ruby/test_m17n_comb.rb +++ b/test/ruby/test_m17n_comb.rb @@ -808,7 +808,7 @@ class TestM17NComb < Test::Unit::TestCase def test_str_downcase STRINGS.each {|s| if !s.valid_encoding? - assert_raise(ArgumentError) { s.downcase } + assert_raise(ArgumentError, "Offending string: #{s.inspect}, encoding: #{s.encoding}") { s.downcase } next end t = s.downcase |