From 5fade63482d39d9c550a0ce8a555dc8e3c0aedec Mon Sep 17 00:00:00 2001 From: nobu Date: Sun, 11 Mar 2018 00:05:12 +0000 Subject: re.c: fixed escaped multibyte char * re.c (unescape_nonascii): escaped multibyte character should be copied as-is, just with checking if the encoding matches. https://twitter.com/sakuro/status/972014409986883584 git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@62718 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- re.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 're.c') diff --git a/re.c b/re.c index 0508e5f322..84c2e060dd 100644 --- a/re.c +++ b/re.c @@ -2537,11 +2537,13 @@ unescape_nonascii(const char *p, const char *end, rb_encoding *enc, while (p < end) { int chlen = rb_enc_precise_mbclen(p, end, enc); if (!MBCLEN_CHARFOUND_P(chlen)) { + invalid_multibyte: errcpy(err, "invalid multibyte character"); return -1; } chlen = MBCLEN_CHARFOUND_LEN(chlen); if (1 < chlen || (*p & 0x80)) { + multibyte: rb_str_buf_cat(buf, p, chlen); p += chlen; if (*encp == 0) @@ -2559,6 +2561,16 @@ unescape_nonascii(const char *p, const char *end, rb_encoding *enc, errcpy(err, "too short escape sequence"); return -1; } + chlen = rb_enc_precise_mbclen(p, end, enc); + if (!MBCLEN_CHARFOUND_P(chlen)) { + goto invalid_multibyte; + } + if ((chlen = MBCLEN_CHARFOUND_LEN(chlen)) > 1) { + /* include the previous backslash */ + --p; + ++chlen; + goto multibyte; + } switch (c = *p++) { case '1': case '2': case '3': case '4': case '5': case '6': case '7': /* \O, \OO, \OOO or backref */ -- cgit v1.2.3