From ed37bfcc9d4306e47f4c73e0eeca202cb1e00d9d Mon Sep 17 00:00:00 2001 From: naruse Date: Fri, 5 Jul 2013 00:54:11 +0000 Subject: broken utf-8 git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@41786 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- enc/utf_8.c | 4 ++-- include/ruby/oniguruma.h | 9 ++++++--- regenc.c | 8 +++++--- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/enc/utf_8.c b/enc/utf_8.c index dae1f3a1bc..46ef6d6a97 100644 --- a/enc/utf_8.c +++ b/enc/utf_8.c @@ -233,12 +233,12 @@ mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc ARG_UNUSED) if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_UTF8[firstbyte]-2); s = trans[s][*p++]; if (s < 0) return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(3) : - ONIGENC_CONSTRUCT_MBCLEN_INVALID(); + ONIGENC_CONSTRUCT_MBCLEN_INVALID2(2); if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_UTF8[firstbyte]-3); s = trans[s][*p++]; return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(4) : - ONIGENC_CONSTRUCT_MBCLEN_INVALID(); + ONIGENC_CONSTRUCT_MBCLEN_INVALID2(3); } static int diff --git a/include/ruby/oniguruma.h b/include/ruby/oniguruma.h index 6a26ee4aaa..b4b6743adb 100644 --- a/include/ruby/oniguruma.h +++ b/include/ruby/oniguruma.h @@ -246,11 +246,14 @@ ONIG_EXTERN OnigEncodingType OnigEncodingASCII; #define ONIGENC_MBCLEN_CHARFOUND_P(r) (0 < (r)) #define ONIGENC_MBCLEN_CHARFOUND_LEN(r) (r) -#define ONIGENC_CONSTRUCT_MBCLEN_INVALID() (-1) -#define ONIGENC_MBCLEN_INVALID_P(r) ((r) == -1) +#define ONIGENC_INVALID_NUM 0x10000 +#define ONIGENC_CONSTRUCT_MBCLEN_INVALID() (-1-ONIGENC_INVALID_NUM) +#define ONIGENC_CONSTRUCT_MBCLEN_INVALID2(n) (-(n)-ONIGENC_INVALID_NUM) +#define ONIGENC_MBCLEN_INVALID_P(r) ((r) < -ONIGENC_INVALID_NUM) +#define ONIGENC_MBCLEN_INVALID_LEN(r) (-(r)-ONIGENC_INVALID_NUM) #define ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n) (-1-(n)) -#define ONIGENC_MBCLEN_NEEDMORE_P(r) ((r) < -1) +#define ONIGENC_MBCLEN_NEEDMORE_P(r) (-ONIGENC_INVALID_NUM <= (r) && (r) < -1) #define ONIGENC_MBCLEN_NEEDMORE_LEN(r) (-1-(r)) #define ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e) (enc)->precise_mbc_enc_len(p,e,enc) diff --git a/regenc.c b/regenc.c index 288eac433d..fc039154b4 100644 --- a/regenc.c +++ b/regenc.c @@ -55,10 +55,12 @@ extern int onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, struct OnigEncodingTypeST* enc) { int ret = ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e); - if (ONIGENC_MBCLEN_CHARFOUND_P(ret)) - return ONIGENC_MBCLEN_CHARFOUND_LEN(ret); - else if (ONIGENC_MBCLEN_NEEDMORE_P(ret)) + if (ONIGENC_MBCLEN_NEEDMORE_P(ret)) return (int)(e-p)+ONIGENC_MBCLEN_NEEDMORE_LEN(ret); + else if (ONIGENC_MBCLEN_INVALID_P(ret)) + return (int)(e-p)+ONIGENC_MBCLEN_INVALID_LEN(ret); + else if (ONIGENC_MBCLEN_CHARFOUND_P(ret)) + return ONIGENC_MBCLEN_CHARFOUND_LEN(ret); return 1; } -- cgit v1.2.3