From 86a9215bbfa659c3f663c5a61f855567c15c5437 Mon Sep 17 00:00:00 2001 From: akr Date: Tue, 29 Jan 2008 08:38:21 +0000 Subject: * enc/gb18030.c (gb18030_mbc_enc_len): validation implemented. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15313 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- enc/gb18030.c | 106 +++++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 97 insertions(+), 9 deletions(-) (limited to 'enc') diff --git a/enc/gb18030.c b/enc/gb18030.c index 65666ec3af..ebea708173 100644 --- a/enc/gb18030.c +++ b/enc/gb18030.c @@ -62,17 +62,105 @@ static const char GB18030_MAP[] = { CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, CM, C1 }; +typedef enum { FAILURE = -2, ACCEPT = -1, S0 = 0, S1 } state_t; +#define A ACCEPT +#define F FAILURE +static const signed char trans[][0x100] = { + { /* S0 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 1 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 2 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 3 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 8 */ F, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* 9 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* a */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* b */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* c */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* d */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* e */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* f */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, F + }, + { /* S1 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 3 */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, F, F, F, F, F, F, + /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, F, + /* 8 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 9 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* a */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* b */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* c */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* d */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* e */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* f */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, F + }, + { /* S2 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 8 */ F, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + /* 9 */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + /* a */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + /* b */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + /* c */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + /* d */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + /* e */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + /* f */ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, F + }, + { /* S3 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 3 */ A, A, A, A, A, A, A, A, A, A, F, F, F, F, F, F, + /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* a */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* b */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F + } +}; +#undef A +#undef F + static int -gb18030_mbc_enc_len(const UChar* p, const UChar* end ARG_UNUSED, OnigEncoding enc ARG_UNUSED) +gb18030_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc ARG_UNUSED) { - if (GB18030_MAP[*p] != CM) - return 1; - p++; - if (GB18030_MAP[*p] == C4) - return 4; - if (GB18030_MAP[*p] == C1) - return 1; /* illegal sequence */ - return 2; + int firstbyte = *p++; + state_t s = trans[0][firstbyte]; +#define RETURN(n) \ + return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(n) : \ + ONIGENC_CONSTRUCT_MBCLEN_INVALID() + if (s < 0) RETURN(1); + if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(2-1); + s = trans[s][*p++]; + if (s < 0) RETURN(2); + if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(4-2); + s = trans[s][*p++]; + if (s < 0) RETURN(3); + if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(4-3); + s = trans[s][*p++]; + RETURN(4); +#undef RETURN } static OnigCodePoint -- cgit v1.2.3