diff options
author | Nobuyoshi Nakada <nobu@ruby-lang.org> | 2023-08-24 18:32:46 +0900 |
---|---|---|
committer | Nobuyoshi Nakada <nobu@ruby-lang.org> | 2023-08-26 08:58:02 +0900 |
commit | 00ac3a64ba57ecd8f10bf54f03297cdec0c538d6 (patch) | |
tree | db4c230a17629f0e8996ffa01c300088a8305efe | |
parent | cfdbbd67268a77177e485263cdd8fb416315e9a6 (diff) | |
download | ruby-00ac3a64ba57ecd8f10bf54f03297cdec0c538d6.tar.gz |
Introduce `at_char_boundary` function
-rw-r--r-- | file.c | 2 | ||||
-rw-r--r-- | internal/string.h | 6 | ||||
-rw-r--r-- | io.c | 3 | ||||
-rw-r--r-- | string.c | 9 |
4 files changed, 12 insertions, 8 deletions
@@ -4610,7 +4610,7 @@ rmext(const char *p, long l0, long l1, const char *e, long l2, rb_encoding *enc) if (l1 < l2) return l1; s = p+l1-l2; - if (rb_enc_left_char_head(p, s, p+l1, enc) != s) return 0; + if (!at_char_boundary(p, s, p+l1, enc)) return 0; #if CASEFOLD_FILESYSTEM #define fncomp strncasecmp #else diff --git a/internal/string.h b/internal/string.h index 5f59d9621b..cfaf628e02 100644 --- a/internal/string.h +++ b/internal/string.h @@ -119,6 +119,12 @@ is_broken_string(VALUE str) return rb_enc_str_coderange(str) == ENC_CODERANGE_BROKEN; } +static inline bool +at_char_boundary(const char *s, const char *p, const char *e, rb_encoding *enc) +{ + return rb_enc_left_char_head(s, p, e, enc) == p; +} + /* expect tail call optimization */ // YJIT needs this function to never allocate and never raise static inline VALUE @@ -4144,8 +4144,7 @@ rb_io_getline_0(VALUE rs, long limit, int chomp, rb_io_t *fptr) s = RSTRING_PTR(str); e = RSTRING_END(str); p = e - rslen; - pp = rb_enc_left_char_head(s, p, e, enc); - if (pp != p) continue; + if (!at_char_boundary(s, p, e, enc)) continue; if (!rspara) rscheck(rsptr, rslen, rs); if (memcmp(p, rsptr, rslen) == 0) { if (chomp) { @@ -3930,8 +3930,7 @@ str_ensure_byte_pos(VALUE str, long pos) const char *s = RSTRING_PTR(str); const char *e = RSTRING_END(str); const char *p = s + pos; - const char *pp = rb_enc_left_char_head(s, p, e, rb_enc_get(str)); - if (p != pp) { + if (!at_char_boundary(s, p, e, rb_enc_get(str))) { rb_raise(rb_eIndexError, "offset %ld does not land on character boundary", pos); } @@ -9521,7 +9520,7 @@ chompped_length(VALUE str, VALUE rs) if (p[len-1] == newline && (rslen <= 1 || memcmp(rsptr, pp, rslen) == 0)) { - if (rb_enc_left_char_head(p, pp, e, enc) == pp) + if (at_char_boundary(p, pp, e, enc)) return len - rslen; RB_GC_GUARD(rs); } @@ -10497,7 +10496,7 @@ rb_str_end_with(int argc, VALUE *argv, VALUE str) p = RSTRING_PTR(str); e = p + slen; s = e - tlen; - if (rb_enc_left_char_head(p, s, e, enc) != s) + if (!at_char_boundary(p, s, e, enc)) continue; if (memcmp(s, RSTRING_PTR(tmp), RSTRING_LEN(tmp)) == 0) return Qtrue; @@ -10605,7 +10604,7 @@ deleted_suffix_length(VALUE str, VALUE suffix) suffixptr = RSTRING_PTR(suffix); s = strptr + olen - suffixlen; if (memcmp(s, suffixptr, suffixlen) != 0) return 0; - if (rb_enc_left_char_head(strptr, s, strptr + olen, enc) != s) return 0; + if (!at_char_boundary(strptr, s, strptr + olen, enc)) return 0; return suffixlen; } |