diff options
-rw-r--r-- | ChangeLog | 5 | ||||
-rw-r--r-- | re.c | 44 | ||||
-rw-r--r-- | string.c | 7 | ||||
-rw-r--r-- | test/ruby/test_m17n.rb | 3 |
4 files changed, 47 insertions, 12 deletions
@@ -1,3 +1,8 @@ +Mon Aug 3 10:08:33 2015 Nobuyoshi Nakada <nobu@ruby-lang.org> + + * re.c (rb_memsearch): should match only char boundaries in wide + character encodings. [ruby-core:70220] [Bug #11413] + Sun Aug 2 07:01:17 2015 Eric Wong <e@80x24.org> * ext/openssl/lib/openssl/buffering.rb (gets): @@ -221,6 +221,32 @@ rb_memsearch_qs_utf8(const unsigned char *xs, long m, const unsigned char *ys, l return -1; } +static inline long +rb_memsearch_wchar(const unsigned char *xs, long m, const unsigned char *ys, long n) +{ + const unsigned char *x = xs, x0 = *xs, *y = ys; + enum {char_size = 2}; + + for (n -= m; n > 0; n -= char_size, y += char_size) { + if (x0 == *y && memcmp(x+1, y+1, m-1) == 0) + return y - ys; + } + return -1; +} + +static inline long +rb_memsearch_qchar(const unsigned char *xs, long m, const unsigned char *ys, long n) +{ + const unsigned char *x = xs, x0 = *xs, *y = ys; + enum {char_size = 4}; + + for (n -= m; n > 0; n -= char_size, y += char_size) { + if (x0 == *y && memcmp(x+1, y+1, m-1) == 0) + return y - ys; + } + return -1; +} + long rb_memsearch(const void *x0, long m, const void *y0, long n, rb_encoding *enc) { @@ -241,15 +267,21 @@ rb_memsearch(const void *x0, long m, const void *y0, long n, rb_encoding *enc) else return -1; } - else if (m <= SIZEOF_VALUE) { - return rb_memsearch_ss(x0, m, y0, n); + else if (LIKELY(rb_enc_mbminlen(enc) == 1)) { + if (m <= SIZEOF_VALUE) { + return rb_memsearch_ss(x0, m, y0, n); + } + else if (enc == rb_utf8_encoding()){ + return rb_memsearch_qs_utf8(x0, m, y0, n); + } } - else if (enc == rb_utf8_encoding()){ - return rb_memsearch_qs_utf8(x0, m, y0, n); + else if (LIKELY(rb_enc_mbminlen(enc) == 2)) { + return rb_memsearch_wchar(x0, m, y0, n); } - else { - return rb_memsearch_qs(x0, m, y0, n); + else if (LIKELY(rb_enc_mbminlen(enc) == 4)) { + return rb_memsearch_qchar(x0, m, y0, n); } + return rb_memsearch_qs(x0, m, y0, n); } #define REG_LITERAL FL_USER5 @@ -6544,15 +6544,10 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str) } enc = STR_ENC_GET(str); - if (NIL_P(spat)) { - if (!NIL_P(rb_fs)) { - spat = rb_fs; - goto fs_set; - } + if (NIL_P(spat) && NIL_P(spat = rb_fs)) { split_type = awk; } else { - fs_set: spat = get_pat_quoted(spat, 0); if (BUILTIN_TYPE(spat) == T_STRING) { rb_encoding *enc2 = STR_ENC_GET(spat); diff --git a/test/ruby/test_m17n.rb b/test/ruby/test_m17n.rb index d655dbf9e7..eb2063ebea 100644 --- a/test/ruby/test_m17n.rb +++ b/test/ruby/test_m17n.rb @@ -1236,6 +1236,9 @@ class TestM17N < Test::Unit::TestCase each_encoding("abc,def", ",", "abc", "def") do |str, sep, *expected| assert_equal(expected, str.split(sep, -1)) end + each_encoding("abc\0def", "\0", "abc", "def") do |str, sep, *expected| + assert_equal(expected, str.split(sep, -1)) + end end def test_nonascii_method_name |