aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorduerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2016-06-02 01:24:52 +0000
committerduerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2016-06-02 01:24:52 +0000
commitdb30e7bb96265861a8e65d6faff724f4e3d27c3f (patch)
treeafcc1e9b300fbe31a1b078c067ea5b85f01b2382
parentbbe6fefbd24f49431c44ad556cca580e8644e561 (diff)
downloadruby-db30e7bb96265861a8e65d6faff724f4e3d27c3f.tar.gz
* string.c: Raise ArgumentError when invalid string is detected in
case mapping methods. * enc/unicode.c: Check for invalid string and signal with negative length value. * test/ruby/enc/test_case_mapping.rb: Add tests for above. * test/ruby/test_m17n_comb.rb: Add a message to clarify test failure. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@55253 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog12
-rw-r--r--enc/unicode.c8
-rw-r--r--string.c26
-rw-r--r--test/ruby/enc/test_case_mapping.rb7
-rw-r--r--test/ruby/test_m17n_comb.rb2
5 files changed, 46 insertions, 9 deletions
diff --git a/ChangeLog b/ChangeLog
index cd1a0f980b..9ef0400bc6 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+Thu Jun 2 10:24:48 2016 Martin Duerst <duerst@it.aoyama.ac.jp>
+
+ * string.c: Raise ArgumentError when invalid string is detected in
+ case mapping methods.
+
+ * enc/unicode.c: Check for invalid string and signal with negative
+ length value.
+
+ * test/ruby/enc/test_case_mapping.rb: Add tests for above.
+
+ * test/ruby/test_m17n_comb.rb: Add a message to clarify test failure.
+
Wed Jun 1 21:41:05 2016 Kazuki Yamaguchi <k@rhe.jp>
* ext/openssl/extconf.rb: Check existence of ASN1_TIME_adj(). The old
diff --git a/enc/unicode.c b/enc/unicode.c
index 5a9aea0fa7..39d1e7a31f 100644
--- a/enc/unicode.c
+++ b/enc/unicode.c
@@ -669,14 +669,20 @@ onigenc_unicode_case_map(OnigCaseFoldType* flagP,
OnigCodePoint code;
OnigUChar *to_start = to;
OnigCaseFoldType flags = *flagP;
+ int codepoint_length;
+
to_end -= CASE_MAPPING_SLACK;
/* copy flags ONIGENC_CASE_UPCASE and ONIGENC_CASE_DOWNCASE over to
* ONIGENC_CASE_UP_SPECIAL and ONIGENC_CASE_DOWN_SPECIAL */
flags |= (flags&(ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE))<<ONIGENC_CASE_SPECIAL_OFFSET;
while (*pp<end && to<=to_end) {
+ codepoint_length = ONIGENC_PRECISE_MBC_ENC_LEN(enc, *pp, end);
+ if (codepoint_length < 0)
+ return codepoint_length; /* encoding invalid */
code = ONIGENC_MBC_TO_CODE(enc, *pp, end);
- *pp += enclen(enc, *pp, end);
+ *pp += codepoint_length;
+
if (code<='z') { /* ASCII comes first */
if (code>='a' && code<='z') {
if (flags&ONIGENC_CASE_UPCASE) {
diff --git a/string.c b/string.c
index ad4a115c42..44823520f0 100644
--- a/string.c
+++ b/string.c
@@ -5785,6 +5785,7 @@ rb_str_casemap(VALUE source, OnigCaseFoldType *flags, rb_encoding *enc)
mapping_buffer pre_buffer, /* only next pointer used */
*current_buffer = &pre_buffer;
int buffer_count = 0;
+ int buffer_length_or_invalid;
if (RSTRING_LEN(source) == 0) return rb_str_dup(source);
@@ -5799,12 +5800,23 @@ rb_str_casemap(VALUE source, OnigCaseFoldType *flags, rb_encoding *enc)
current_buffer = current_buffer->next;
current_buffer->next = NULL;
current_buffer->capa = capa;
- target_length += current_buffer->used
- = onigenc_unicode_case_map(flags,
- (const OnigUChar**)&source_current, source_end,
- current_buffer->space,
- current_buffer->space+current_buffer->capa,
- enc);
+ buffer_length_or_invalid = onigenc_unicode_case_map(flags,
+ (const OnigUChar**)&source_current, source_end,
+ current_buffer->space,
+ current_buffer->space+current_buffer->capa,
+ enc);
+ if (buffer_length_or_invalid < 0) {
+ mapping_buffer *previous_buffer;
+
+ current_buffer = pre_buffer.next;
+ while (current_buffer) {
+ previous_buffer = current_buffer;
+ current_buffer = current_buffer->next;
+ xfree(previous_buffer);
+ }
+ rb_raise(rb_eArgError, "input string invalid");
+ }
+ target_length += current_buffer->used = buffer_length_or_invalid;
}
/* fprintf(stderr, "Buffer count is %d\n", buffer_count); *//* for tuning */
@@ -5819,7 +5831,7 @@ rb_str_casemap(VALUE source, OnigCaseFoldType *flags, rb_encoding *enc)
memcpy(target_current, current_buffer->space, current_buffer->used);
target_current += current_buffer->used;
previous_buffer = current_buffer;
- current_buffer=current_buffer->next;
+ current_buffer = current_buffer->next;
xfree(previous_buffer);
}
}
diff --git a/test/ruby/enc/test_case_mapping.rb b/test/ruby/enc/test_case_mapping.rb
index 2ef3360314..ac86729886 100644
--- a/test/ruby/enc/test_case_mapping.rb
+++ b/test/ruby/enc/test_case_mapping.rb
@@ -60,6 +60,13 @@ class TestCaseMappingPreliminary < Test::Unit::TestCase
check_swapcase_properties 'yUKIHIRO matsumoto (MAtz)', 'Yukihiro MATSUMOTO (maTZ)', :lithuanian
end
+ def test_invalid
+ assert_raise(ArgumentError, "Should not be possible to upcase invalid string.") { "\xEB".force_encoding('UTF-8').upcase }
+ assert_raise(ArgumentError, "Should not be possible to downcase invalid string.") { "\xEB".force_encoding('UTF-8').downcase }
+ assert_raise(ArgumentError, "Should not be possible to capitalize invalid string.") { "\xEB".force_encoding('UTF-8').capitalize }
+ assert_raise(ArgumentError, "Should not be possible to swapcase invalid string.") { "\xEB".force_encoding('UTF-8').swapcase }
+ end
+
def test_general
check_downcase_properties 'résumé dürst ĭñŧėřŋãţijňőńæłĩżàťïōņ', 'RÉSUMÉ DÜRST ĬÑŦĖŘŊÃŢIJŇŐŃÆŁĨŻÀŤÏŌŅ', :lithuanian
check_upcase_properties 'RÉSUMÉ DÜRST ĬÑŦĖŘŊÃŢIJŇŐŃÆŁĨŻÀŤÏŌŅ', 'résumé dürst ĭñŧėřŋãţijňőńæłĩżàťïōņ', :lithuanian
diff --git a/test/ruby/test_m17n_comb.rb b/test/ruby/test_m17n_comb.rb
index 054287e2b5..99c162a92f 100644
--- a/test/ruby/test_m17n_comb.rb
+++ b/test/ruby/test_m17n_comb.rb
@@ -808,7 +808,7 @@ class TestM17NComb < Test::Unit::TestCase
def test_str_downcase
STRINGS.each {|s|
if !s.valid_encoding?
- assert_raise(ArgumentError) { s.downcase }
+ assert_raise(ArgumentError, "Offending string: #{s.inspect}, encoding: #{s.encoding}") { s.downcase }
next
end
t = s.downcase