aboutsummaryrefslogtreecommitdiffstats
path: root/internal
diff options
context:
space:
mode:
authorJohn Hawthorn <john@hawthorn.email>2023-08-31 15:12:47 -0700
committerJohn Hawthorn <john@hawthorn.email>2023-09-01 16:23:28 -0700
commitd89b15cdce8a2fa36fc2a150551f0dd8e58814d7 (patch)
tree789da818c90a706c659de182207e02a9cc5b1e40 /internal
parent2ca0f01015d076d966ab1b0f28700a4424b86da6 (diff)
downloadruby-d89b15cdce8a2fa36fc2a150551f0dd8e58814d7.tar.gz
Use end of char boundary in start_with?
Previously we used the next character following the found prefix to determine if the match ended on a broken character. This had caused surprising behaviour when a valid character was followed by a UTF-8 continuation byte. This commit changes the behaviour to instead look for the end of the last character in the prefix. [Bug #19784] Co-authored-by: ywenc <ywenc@github.com> Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org>
Diffstat (limited to 'internal')
-rw-r--r--internal/string.h9
1 files changed, 9 insertions, 0 deletions
diff --git a/internal/string.h b/internal/string.h
index cfaf628e02..abb0a536ad 100644
--- a/internal/string.h
+++ b/internal/string.h
@@ -125,6 +125,15 @@ at_char_boundary(const char *s, const char *p, const char *e, rb_encoding *enc)
return rb_enc_left_char_head(s, p, e, enc) == p;
}
+static inline bool
+at_char_right_boundary(const char *s, const char *p, const char *e, rb_encoding *enc)
+{
+ RUBY_ASSERT(s <= p);
+ RUBY_ASSERT(p <= e);
+
+ return rb_enc_right_char_head(s, p, e, enc) == p;
+}
+
/* expect tail call optimization */
// YJIT needs this function to never allocate and never raise
static inline VALUE