diff options
author | naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2018-03-22 07:58:39 +0000 |
---|---|---|
committer | naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2018-03-22 07:58:39 +0000 |
commit | 42f1b589649b78d0a50effb8b16c7bfd5b90e518 (patch) | |
tree | 2f58f25fef269a59d6bf5e235063da7e49b3ab35 /string.c | |
parent | 41b2ef468597a120d52f3f73cda47cd284ab1f99 (diff) | |
download | ruby-42f1b589649b78d0a50effb8b16c7bfd5b90e518.tar.gz |
Factor out get_reg_grapheme_cluster
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@62893 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'string.c')
-rw-r--r-- | string.c | 66 |
1 files changed, 24 insertions, 42 deletions
@@ -8355,20 +8355,12 @@ rb_str_codepoints(VALUE str) return rb_str_enumerate_codepoints(str, ary); } -static VALUE -rb_str_each_grapheme_cluster_size(VALUE str, VALUE args, VALUE eobj) +static regex_t * +get_reg_grapheme_cluster(rb_encoding *enc) { - long grapheme_cluster_count = 0; + int encidx = rb_enc_to_index(enc); regex_t *reg_grapheme_cluster = NULL; static regex_t *reg_grapheme_cluster_utf8 = NULL; - int encidx = ENCODING_GET(str); - rb_encoding *enc = rb_enc_from_index(encidx); - int unicode_p = rb_enc_unicode_p(enc); - const char *ptr, *end; - - if (!unicode_p || single_byte_optimizable(str)) { - return rb_str_length(str); - } /* synchronize */ if (encidx == rb_utf8_encindex() && reg_grapheme_cluster_utf8) { @@ -8385,7 +8377,22 @@ rb_str_each_grapheme_cluster_size(VALUE str, VALUE args, VALUE eobj) reg_grapheme_cluster_utf8 = reg_grapheme_cluster; } } + return reg_grapheme_cluster; +} + +static VALUE +rb_str_each_grapheme_cluster_size(VALUE str, VALUE args, VALUE eobj) +{ + size_t grapheme_cluster_count = 0; + regex_t *reg_grapheme_cluster = NULL; + rb_encoding *enc = rb_enc_from_index(ENCODING_GET(str)); + const char *ptr, *end; + + if (!rb_enc_unicode_p(enc) || single_byte_optimizable(str)) { + return rb_str_length(str); + } + reg_grapheme_cluster = get_reg_grapheme_cluster(enc); ptr = RSTRING_PTR(str); end = RSTRING_END(str); @@ -8393,16 +8400,12 @@ rb_str_each_grapheme_cluster_size(VALUE str, VALUE args, VALUE eobj) OnigPosition len = onig_match(reg_grapheme_cluster, (const OnigUChar *)ptr, (const OnigUChar *)end, (const OnigUChar *)ptr, NULL, 0); - if (len == 0) break; - if (len < 0) { - break; - } + if (len <= 0) break; grapheme_cluster_count++; ptr += len; } - RB_GC_GUARD(str); - return LONG2NUM(grapheme_cluster_count); + return SIZET2NUM(grapheme_cluster_count); } static VALUE @@ -8410,33 +8413,15 @@ rb_str_enumerate_grapheme_clusters(VALUE str, VALUE ary) { VALUE orig = str; regex_t *reg_grapheme_cluster = NULL; - static regex_t *reg_grapheme_cluster_utf8 = NULL; - int encidx = ENCODING_GET(str); - rb_encoding *enc = rb_enc_from_index(encidx); - int unicode_p = rb_enc_unicode_p(enc); + rb_encoding *enc = rb_enc_from_index(ENCODING_GET(str)); const char *ptr, *end; - if (!unicode_p || single_byte_optimizable(str)) { + if (!rb_enc_unicode_p(enc) || single_byte_optimizable(str)) { return rb_str_enumerate_chars(str, ary); } - /* synchronize */ - if (encidx == rb_utf8_encindex() && reg_grapheme_cluster_utf8) { - reg_grapheme_cluster = reg_grapheme_cluster_utf8; - } - if (!reg_grapheme_cluster) { - const OnigUChar source[] = "\\X"; - int r = onig_new(®_grapheme_cluster, source, source + sizeof(source) - 1, - ONIG_OPTION_DEFAULT, enc, OnigDefaultSyntax, NULL); - if (r) { - rb_bug("cannot compile grapheme cluster regexp"); - } - if (encidx == rb_utf8_encindex()) { - reg_grapheme_cluster_utf8 = reg_grapheme_cluster; - } - } - if (!ary) str = rb_str_new_frozen(str); + reg_grapheme_cluster = get_reg_grapheme_cluster(enc); ptr = RSTRING_PTR(str); end = RSTRING_END(str); @@ -8444,10 +8429,7 @@ rb_str_enumerate_grapheme_clusters(VALUE str, VALUE ary) OnigPosition len = onig_match(reg_grapheme_cluster, (const OnigUChar *)ptr, (const OnigUChar *)end, (const OnigUChar *)ptr, NULL, 0); - if (len == 0) break; - if (len < 0) { - break; - } + if (len <= 0) break; ENUM_ELEM(ary, rb_enc_str_new(ptr, len, enc)); ptr += len; } |