diff options
author | nobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2013-04-23 02:58:51 +0000 |
---|---|---|
committer | nobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2013-04-23 02:58:51 +0000 |
commit | 596ca948b1d6f2d22b39a4a9c042cc3d164eae13 (patch) | |
tree | c32a17e47f8b2842dd47999c9d761d64e002bc8d | |
parent | eae1366b3867fe304b54c2adcc87fdd583990077 (diff) | |
download | ruby-596ca948b1d6f2d22b39a4a9c042cc3d164eae13.tar.gz |
string.c: fix for UTF-32
* string.c (rb_str_scrub): fix for UTF-32. strlen() on strings
contain NUL returns wrong result, use sizeof operator instead.
[ruby-dev:45975] [Feature #6752]
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@40417 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r-- | ChangeLog | 6 | ||||
-rw-r--r-- | string.c | 26 | ||||
-rw-r--r-- | test/ruby/test_m17n.rb | 6 |
3 files changed, 24 insertions, 14 deletions
@@ -1,3 +1,9 @@ +Tue Apr 23 11:58:46 2013 Nobuyoshi Nakada <nobu@ruby-lang.org> + + * string.c (rb_str_scrub): fix for UTF-32. strlen() on strings + contain NUL returns wrong result, use sizeof operator instead. + [ruby-dev:45975] [Feature #6752] + Tue Apr 23 10:26:50 2013 Akinori MUSHA <knu@iDaemons.org> * test/ruby/test_module.rb @@ -7805,6 +7805,11 @@ rb_str_scrub(int argc, VALUE *argv, VALUE str) return rb_str_dup(str); } +#define DEFAULT_REPLACE_CHAR(str) do { \ + static const char replace[sizeof(str)-1] = str; \ + rep = replace; replen = (int)sizeof(replace); \ + } while (0) + if (rb_enc_asciicompat(enc)) { const char *p = RSTRING_PTR(str); const char *e = RSTRING_END(str); @@ -7824,13 +7829,11 @@ rb_str_scrub(int argc, VALUE *argv, VALUE str) rep7bit_p = (ENC_CODERANGE(repl) == ENC_CODERANGE_7BIT); } else if (enc == rb_utf8_encoding()) { - rep = "\xEF\xBF\xBD"; - replen = strlen(rep); + DEFAULT_REPLACE_CHAR("\xEF\xBF\xBD"); rep7bit_p = FALSE; } else { - rep = "?"; - replen = strlen(rep); + DEFAULT_REPLACE_CHAR("?"); rep7bit_p = TRUE; } cr = ENC_CODERANGE_7BIT; @@ -7938,24 +7941,19 @@ rb_str_scrub(int argc, VALUE *argv, VALUE str) replen = RSTRING_LEN(repl); } else if (enc == utf16be) { - rep = "\xFF\xFD"; - replen = strlen(rep); + DEFAULT_REPLACE_CHAR("\xFF\xFD"); } else if (enc == utf16le) { - rep = "\xFD\xFF"; - replen = strlen(rep); + DEFAULT_REPLACE_CHAR("\xFD\xFF"); } else if (enc == utf32be) { - rep = "\x00\x00\xFF\xFD"; - replen = strlen(rep); + DEFAULT_REPLACE_CHAR("\x00\x00\xFF\xFD"); } else if (enc == utf32le) { - rep = "\xFD\xFF\x00\x00"; - replen = strlen(rep); + DEFAULT_REPLACE_CHAR("\xFD\xFF\x00\x00"); } else { - rep = "?"; - replen = strlen(rep); + DEFAULT_REPLACE_CHAR("?"); } while (p < e) { diff --git a/test/ruby/test_m17n.rb b/test/ruby/test_m17n.rb index 60834bb9c6..9d92fbc6a9 100644 --- a/test/ruby/test_m17n.rb +++ b/test/ruby/test_m17n.rb @@ -1522,5 +1522,11 @@ class TestM17N < Test::Unit::TestCase assert_equal("\uFFFD\u3042".encode("UTF-16LE"), "\x00\xD8\x42\x30".force_encoding(Encoding::UTF_16LE). scrub) + assert_equal("\uFFFD".encode("UTF-32BE"), + "\xff".force_encoding(Encoding::UTF_32BE). + scrub) + assert_equal("\uFFFD".encode("UTF-32LE"), + "\xff".force_encoding(Encoding::UTF_32LE). + scrub) end end |