diff options
author | naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2018-03-22 11:18:00 +0000 |
---|---|---|
committer | naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2018-03-22 11:18:00 +0000 |
commit | c40df5a76941d3a8c2cff46432b23401b6ffffbc (patch) | |
tree | b726b9076de34830cfe728987b29a2a0bf13044a | |
parent | 06e42980992c3e231ba5a5c6cf9457980477d78b (diff) | |
download | ruby-c40df5a76941d3a8c2cff46432b23401b6ffffbc.tar.gz |
merge revision(s) 62892,62893: [Backport #14363]
fix each_grapheme_cluster's size [Bug #14363]
From: Hugo Peixoto <hugo.peixoto@gmail.com>
Factor out get_reg_grapheme_cluster
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_2_5@62896 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r-- | string.c | 64 | ||||
-rw-r--r-- | test/ruby/test_string.rb | 13 | ||||
-rw-r--r-- | version.h | 2 |
3 files changed, 59 insertions, 20 deletions
@@ -8309,20 +8309,12 @@ rb_str_codepoints(VALUE str) return rb_str_enumerate_codepoints(str, ary); } -static VALUE -rb_str_enumerate_grapheme_clusters(VALUE str, VALUE ary) +static regex_t * +get_reg_grapheme_cluster(rb_encoding *enc) { - VALUE orig = str; + int encidx = rb_enc_to_index(enc); regex_t *reg_grapheme_cluster = NULL; static regex_t *reg_grapheme_cluster_utf8 = NULL; - int encidx = ENCODING_GET(str); - rb_encoding *enc = rb_enc_from_index(encidx); - int unicode_p = rb_enc_unicode_p(enc); - const char *ptr, *end; - - if (!unicode_p || single_byte_optimizable(str)) { - return rb_str_enumerate_chars(str, ary); - } /* synchronize */ if (encidx == rb_utf8_encindex() && reg_grapheme_cluster_utf8) { @@ -8339,8 +8331,51 @@ rb_str_enumerate_grapheme_clusters(VALUE str, VALUE ary) reg_grapheme_cluster_utf8 = reg_grapheme_cluster; } } + return reg_grapheme_cluster; +} + +static VALUE +rb_str_each_grapheme_cluster_size(VALUE str, VALUE args, VALUE eobj) +{ + size_t grapheme_cluster_count = 0; + regex_t *reg_grapheme_cluster = NULL; + rb_encoding *enc = rb_enc_from_index(ENCODING_GET(str)); + const char *ptr, *end; + + if (!rb_enc_unicode_p(enc) || single_byte_optimizable(str)) { + return rb_str_length(str); + } + + reg_grapheme_cluster = get_reg_grapheme_cluster(enc); + ptr = RSTRING_PTR(str); + end = RSTRING_END(str); + + while (ptr < end) { + OnigPosition len = onig_match(reg_grapheme_cluster, + (const OnigUChar *)ptr, (const OnigUChar *)end, + (const OnigUChar *)ptr, NULL, 0); + if (len <= 0) break; + grapheme_cluster_count++; + ptr += len; + } + + return SIZET2NUM(grapheme_cluster_count); +} + +static VALUE +rb_str_enumerate_grapheme_clusters(VALUE str, VALUE ary) +{ + VALUE orig = str; + regex_t *reg_grapheme_cluster = NULL; + rb_encoding *enc = rb_enc_from_index(ENCODING_GET(str)); + const char *ptr, *end; + + if (!rb_enc_unicode_p(enc) || single_byte_optimizable(str)) { + return rb_str_enumerate_chars(str, ary); + } if (!ary) str = rb_str_new_frozen(str); + reg_grapheme_cluster = get_reg_grapheme_cluster(enc); ptr = RSTRING_PTR(str); end = RSTRING_END(str); @@ -8348,10 +8383,7 @@ rb_str_enumerate_grapheme_clusters(VALUE str, VALUE ary) OnigPosition len = onig_match(reg_grapheme_cluster, (const OnigUChar *)ptr, (const OnigUChar *)end, (const OnigUChar *)ptr, NULL, 0); - if (len == 0) break; - if (len < 0) { - break; - } + if (len <= 0) break; ENUM_ELEM(ary, rb_enc_str_new(ptr, len, enc)); ptr += len; } @@ -8380,7 +8412,7 @@ rb_str_enumerate_grapheme_clusters(VALUE str, VALUE ary) static VALUE rb_str_each_grapheme_cluster(VALUE str) { - RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_char_size); + RETURN_SIZED_ENUMERATOR(str, 0, 0, rb_str_each_grapheme_cluster_size); return rb_str_enumerate_grapheme_clusters(str, 0); } diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb index dd3a0349b5..f91ec297e0 100644 --- a/test/ruby/test_string.rb +++ b/test/ruby/test_string.rb @@ -980,11 +980,18 @@ CODE "\u{1f469 200d 2764 fe0f 200d 1f469}", ].each do |g| assert_equal [g], g.each_grapheme_cluster.to_a + assert_equal 1, g.each_grapheme_cluster.size + end + + [ + ["\u{a 308}", ["\u000A", "\u0308"]], + ["\u{d 308}", ["\u000D", "\u0308"]], + ["abc", ["a", "b", "c"]], + ].each do |str, grapheme_clusters| + assert_equal grapheme_clusters, str.each_grapheme_cluster.to_a + assert_equal grapheme_clusters.size, str.each_grapheme_cluster.size end - assert_equal ["\u000A", "\u0308"], "\u{a 308}".each_grapheme_cluster.to_a - assert_equal ["\u000D", "\u0308"], "\u{d 308}".each_grapheme_cluster.to_a - assert_equal ["a", "b", "c"], "abc".b.each_grapheme_cluster.to_a s = ("x"+"\u{10ABCD}"*250000) assert_empty(s.each_grapheme_cluster {s.clear}) end @@ -1,6 +1,6 @@ #define RUBY_VERSION "2.5.1" #define RUBY_RELEASE_DATE "2018-03-22" -#define RUBY_PATCHLEVEL 49 +#define RUBY_PATCHLEVEL 50 #define RUBY_RELEASE_YEAR 2018 #define RUBY_RELEASE_MONTH 3 |