aboutsummaryrefslogtreecommitdiffstats
path: root/string.c
diff options
context:
space:
mode:
authornaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2018-03-22 07:58:39 +0000
committernaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2018-03-22 07:58:39 +0000
commit2097b1a082b8d01b7fcbe5f9830a2a01e147c1a3 (patch)
tree2f58f25fef269a59d6bf5e235063da7e49b3ab35 /string.c
parentd3f1e33d4bd852e66d549e5a008677966e520e8c (diff)
downloadruby-2097b1a082b8d01b7fcbe5f9830a2a01e147c1a3.tar.gz
Factor out get_reg_grapheme_cluster
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@62893 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'string.c')
-rw-r--r--string.c66
1 files changed, 24 insertions, 42 deletions
diff --git a/string.c b/string.c
index bd7079f05d..ca5bf58b7a 100644
--- a/string.c
+++ b/string.c
@@ -8355,20 +8355,12 @@ rb_str_codepoints(VALUE str)
return rb_str_enumerate_codepoints(str, ary);
}
-static VALUE
-rb_str_each_grapheme_cluster_size(VALUE str, VALUE args, VALUE eobj)
+static regex_t *
+get_reg_grapheme_cluster(rb_encoding *enc)
{
- long grapheme_cluster_count = 0;
+ int encidx = rb_enc_to_index(enc);
regex_t *reg_grapheme_cluster = NULL;
static regex_t *reg_grapheme_cluster_utf8 = NULL;
- int encidx = ENCODING_GET(str);
- rb_encoding *enc = rb_enc_from_index(encidx);
- int unicode_p = rb_enc_unicode_p(enc);
- const char *ptr, *end;
-
- if (!unicode_p || single_byte_optimizable(str)) {
- return rb_str_length(str);
- }
/* synchronize */
if (encidx == rb_utf8_encindex() && reg_grapheme_cluster_utf8) {
@@ -8385,7 +8377,22 @@ rb_str_each_grapheme_cluster_size(VALUE str, VALUE args, VALUE eobj)
reg_grapheme_cluster_utf8 = reg_grapheme_cluster;
}
}
+ return reg_grapheme_cluster;
+}
+
+static VALUE
+rb_str_each_grapheme_cluster_size(VALUE str, VALUE args, VALUE eobj)
+{
+ size_t grapheme_cluster_count = 0;
+ regex_t *reg_grapheme_cluster = NULL;
+ rb_encoding *enc = rb_enc_from_index(ENCODING_GET(str));
+ const char *ptr, *end;
+
+ if (!rb_enc_unicode_p(enc) || single_byte_optimizable(str)) {
+ return rb_str_length(str);
+ }
+ reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
ptr = RSTRING_PTR(str);
end = RSTRING_END(str);
@@ -8393,16 +8400,12 @@ rb_str_each_grapheme_cluster_size(VALUE str, VALUE args, VALUE eobj)
OnigPosition len = onig_match(reg_grapheme_cluster,
(const OnigUChar *)ptr, (const OnigUChar *)end,
(const OnigUChar *)ptr, NULL, 0);
- if (len == 0) break;
- if (len < 0) {
- break;
- }
+ if (len <= 0) break;
grapheme_cluster_count++;
ptr += len;
}
- RB_GC_GUARD(str);
- return LONG2NUM(grapheme_cluster_count);
+ return SIZET2NUM(grapheme_cluster_count);
}
static VALUE
@@ -8410,33 +8413,15 @@ rb_str_enumerate_grapheme_clusters(VALUE str, VALUE ary)
{
VALUE orig = str;
regex_t *reg_grapheme_cluster = NULL;
- static regex_t *reg_grapheme_cluster_utf8 = NULL;
- int encidx = ENCODING_GET(str);
- rb_encoding *enc = rb_enc_from_index(encidx);
- int unicode_p = rb_enc_unicode_p(enc);
+ rb_encoding *enc = rb_enc_from_index(ENCODING_GET(str));
const char *ptr, *end;
- if (!unicode_p || single_byte_optimizable(str)) {
+ if (!rb_enc_unicode_p(enc) || single_byte_optimizable(str)) {
return rb_str_enumerate_chars(str, ary);
}
- /* synchronize */
- if (encidx == rb_utf8_encindex() && reg_grapheme_cluster_utf8) {
- reg_grapheme_cluster = reg_grapheme_cluster_utf8;
- }
- if (!reg_grapheme_cluster) {
- const OnigUChar source[] = "\\X";
- int r = onig_new(&reg_grapheme_cluster, source, source + sizeof(source) - 1,
- ONIG_OPTION_DEFAULT, enc, OnigDefaultSyntax, NULL);
- if (r) {
- rb_bug("cannot compile grapheme cluster regexp");
- }
- if (encidx == rb_utf8_encindex()) {
- reg_grapheme_cluster_utf8 = reg_grapheme_cluster;
- }
- }
-
if (!ary) str = rb_str_new_frozen(str);
+ reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
ptr = RSTRING_PTR(str);
end = RSTRING_END(str);
@@ -8444,10 +8429,7 @@ rb_str_enumerate_grapheme_clusters(VALUE str, VALUE ary)
OnigPosition len = onig_match(reg_grapheme_cluster,
(const OnigUChar *)ptr, (const OnigUChar *)end,
(const OnigUChar *)ptr, NULL, 0);
- if (len == 0) break;
- if (len < 0) {
- break;
- }
+ if (len <= 0) break;
ENUM_ELEM(ary, rb_enc_str_new(ptr, len, enc));
ptr += len;
}