aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYusuke Endoh <mame@ruby-lang.org>2022-10-25 15:45:40 +0900
committerYusuke Endoh <mame@ruby-lang.org>2022-10-25 17:02:43 +0900
commit1d2d25dcadda0764f303183ac091d0c87b432566 (patch)
treeb9e42a7d3aa1b036b646883825d7a6e26fbe8a5c
parent114e71d06280f9c57b9859ee4405ae89a989ddb6 (diff)
downloadruby-1d2d25dcadda0764f303183ac091d0c87b432566.tar.gz
Prevent potential buffer overrun in onigmo
A code pattern `p + enclen(enc, p, pend)` may lead to a buffer overrun if incomplete bytes of a UTF-8 character is placed at the end of a string. Because this pattern is used in several places in onigmo, this change fixes the issue in the side of `enclen`: the function should not return a number that is larger than `pend - p`. Co-Authored-By: Nobuyoshi Nakada <nobu@ruby-lang.org>
-rw-r--r--include/ruby/onigmo.h4
-rw-r--r--regenc.c15
-rw-r--r--regparse.c1
3 files changed, 17 insertions, 3 deletions
diff --git a/include/ruby/onigmo.h b/include/ruby/onigmo.h
index a7ef59c7c8..d71dfb80fb 100644
--- a/include/ruby/onigmo.h
+++ b/include/ruby/onigmo.h
@@ -356,9 +356,9 @@ int onigenc_ascii_only_case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, c
#define ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e) (enc)->precise_mbc_enc_len(p,e,enc)
ONIG_EXTERN
-int onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, const struct OnigEncodingTypeST* enc);
+int onigenc_mbclen(const OnigUChar* p,const OnigUChar* e, const struct OnigEncodingTypeST* enc);
-#define ONIGENC_MBC_ENC_LEN(enc,p,e) onigenc_mbclen_approximate(p,e,enc)
+#define ONIGENC_MBC_ENC_LEN(enc,p,e) onigenc_mbclen(p,e,enc)
#define ONIGENC_MBC_MAXLEN(enc) ((enc)->max_enc_len)
#define ONIGENC_MBC_MAXLEN_DIST(enc) ONIGENC_MBC_MAXLEN(enc)
#define ONIGENC_MBC_MINLEN(enc) ((enc)->min_enc_len)
diff --git a/regenc.c b/regenc.c
index 16d62fdf40..fc131d2533 100644
--- a/regenc.c
+++ b/regenc.c
@@ -52,6 +52,21 @@ onigenc_set_default_encoding(OnigEncoding enc)
}
extern int
+onigenc_mbclen(const OnigUChar* p,const OnigUChar* e, OnigEncoding enc)
+{
+ int ret = ONIGENC_PRECISE_MBC_ENC_LEN(enc, p, e);
+ if (ONIGENC_MBCLEN_CHARFOUND_P(ret)) {
+ ret = ONIGENC_MBCLEN_CHARFOUND_LEN(ret);
+ if (ret > (int)(e - p)) ret = (int)(e - p); // just for case
+ return ret;
+ }
+ else if (ONIGENC_MBCLEN_NEEDMORE_P(ret)) {
+ return (int)(e - p);
+ }
+ return p < e ? 1 : 0;
+}
+
+extern int
onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, OnigEncoding enc)
{
int ret = ONIGENC_PRECISE_MBC_ENC_LEN(enc, p, e);
diff --git a/regparse.c b/regparse.c
index 513e0a8c7a..4ebd5f1c46 100644
--- a/regparse.c
+++ b/regparse.c
@@ -3799,7 +3799,6 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
}
else { /* string */
p = tok->backp + enclen(enc, tok->backp, end);
- if (p > end) return ONIGERR_END_PATTERN_AT_ESCAPE;
}
}
break;