aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNobuyoshi Nakada <nobu@ruby-lang.org>2023-08-24 18:32:46 +0900
committerNobuyoshi Nakada <nobu@ruby-lang.org>2023-08-26 08:58:02 +0900
commit00ac3a64ba57ecd8f10bf54f03297cdec0c538d6 (patch)
treedb4c230a17629f0e8996ffa01c300088a8305efe
parentcfdbbd67268a77177e485263cdd8fb416315e9a6 (diff)
downloadruby-00ac3a64ba57ecd8f10bf54f03297cdec0c538d6.tar.gz
Introduce `at_char_boundary` function
-rw-r--r--file.c2
-rw-r--r--internal/string.h6
-rw-r--r--io.c3
-rw-r--r--string.c9
4 files changed, 12 insertions, 8 deletions
diff --git a/file.c b/file.c
index e753e116de..30dd1cc73b 100644
--- a/file.c
+++ b/file.c
@@ -4610,7 +4610,7 @@ rmext(const char *p, long l0, long l1, const char *e, long l2, rb_encoding *enc)
if (l1 < l2) return l1;
s = p+l1-l2;
- if (rb_enc_left_char_head(p, s, p+l1, enc) != s) return 0;
+ if (!at_char_boundary(p, s, p+l1, enc)) return 0;
#if CASEFOLD_FILESYSTEM
#define fncomp strncasecmp
#else
diff --git a/internal/string.h b/internal/string.h
index 5f59d9621b..cfaf628e02 100644
--- a/internal/string.h
+++ b/internal/string.h
@@ -119,6 +119,12 @@ is_broken_string(VALUE str)
return rb_enc_str_coderange(str) == ENC_CODERANGE_BROKEN;
}
+static inline bool
+at_char_boundary(const char *s, const char *p, const char *e, rb_encoding *enc)
+{
+ return rb_enc_left_char_head(s, p, e, enc) == p;
+}
+
/* expect tail call optimization */
// YJIT needs this function to never allocate and never raise
static inline VALUE
diff --git a/io.c b/io.c
index 6e27ed6273..bd1db9aa5d 100644
--- a/io.c
+++ b/io.c
@@ -4144,8 +4144,7 @@ rb_io_getline_0(VALUE rs, long limit, int chomp, rb_io_t *fptr)
s = RSTRING_PTR(str);
e = RSTRING_END(str);
p = e - rslen;
- pp = rb_enc_left_char_head(s, p, e, enc);
- if (pp != p) continue;
+ if (!at_char_boundary(s, p, e, enc)) continue;
if (!rspara) rscheck(rsptr, rslen, rs);
if (memcmp(p, rsptr, rslen) == 0) {
if (chomp) {
diff --git a/string.c b/string.c
index 0b9ede4e2c..b65868a226 100644
--- a/string.c
+++ b/string.c
@@ -3930,8 +3930,7 @@ str_ensure_byte_pos(VALUE str, long pos)
const char *s = RSTRING_PTR(str);
const char *e = RSTRING_END(str);
const char *p = s + pos;
- const char *pp = rb_enc_left_char_head(s, p, e, rb_enc_get(str));
- if (p != pp) {
+ if (!at_char_boundary(s, p, e, rb_enc_get(str))) {
rb_raise(rb_eIndexError,
"offset %ld does not land on character boundary", pos);
}
@@ -9521,7 +9520,7 @@ chompped_length(VALUE str, VALUE rs)
if (p[len-1] == newline &&
(rslen <= 1 ||
memcmp(rsptr, pp, rslen) == 0)) {
- if (rb_enc_left_char_head(p, pp, e, enc) == pp)
+ if (at_char_boundary(p, pp, e, enc))
return len - rslen;
RB_GC_GUARD(rs);
}
@@ -10497,7 +10496,7 @@ rb_str_end_with(int argc, VALUE *argv, VALUE str)
p = RSTRING_PTR(str);
e = p + slen;
s = e - tlen;
- if (rb_enc_left_char_head(p, s, e, enc) != s)
+ if (!at_char_boundary(p, s, e, enc))
continue;
if (memcmp(s, RSTRING_PTR(tmp), RSTRING_LEN(tmp)) == 0)
return Qtrue;
@@ -10605,7 +10604,7 @@ deleted_suffix_length(VALUE str, VALUE suffix)
suffixptr = RSTRING_PTR(suffix);
s = strptr + olen - suffixlen;
if (memcmp(s, suffixptr, suffixlen) != 0) return 0;
- if (rb_enc_left_char_head(strptr, s, strptr + olen, enc) != s) return 0;
+ if (!at_char_boundary(strptr, s, strptr + olen, enc)) return 0;
return suffixlen;
}