diff options
author | akr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2007-12-06 09:28:26 +0000 |
---|---|---|
committer | akr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2007-12-06 09:28:26 +0000 |
commit | 69406aad505414de34dc8b560ac1eadf147b0dbc (patch) | |
tree | 161d7248925c7bd2c99d3ed6a341e81ba76b40c4 /enc/euc_jp.c | |
parent | de4ec689910c07a48b81083adc3130b6b6023be3 (diff) | |
download | ruby-69406aad505414de34dc8b560ac1eadf147b0dbc.tar.gz |
* encoding.c (rb_enc_precise_mbclen): new function for mbclen with
validation.
* include/ruby/encoding.h (rb_enc_precise_mbclen): declared.
(MBCLEN_CHARFOUND): new macro.
(MBCLEN_INVALID): new macro.
(MBCLEN_NEEDMORE): new macro.
* include/ruby/oniguruma.h (OnigEncodingTypeST): replace mbc_enc_len
by precise_mbc_enc_len.
(ONIGENC_PRECISE_MBC_ENC_LEN): new macro.
(ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND): new macro.
(ONIGENC_CONSTRUCT_MBCLEN_INVALID): new macro.
(ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE): new macro.
(ONIGENC_MBCLEN_CHARFOUND): new macro.
(ONIGENC_MBCLEN_INVALID): new macro.
(ONIGENC_MBCLEN_NEEDMORE): new macro.
(ONIGENC_MBC_ENC_LEN): use ONIGENC_PRECISE_MBC_ENC_LEN.
* enc/euc_jp.c: validation implemented.
* enc/sjis.c: ditto.
* enc/utf8.c: ditto.
* string.c (rb_str_inspect): use rb_enc_precise_mbclen for invalid
encoding.
(rb_str_valid_encoding_p): new method String#valid_encoding?.
* io.c (rb_io_getc): use rb_enc_precise_mbclen.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@14119 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'enc/euc_jp.c')
-rw-r--r-- | enc/euc_jp.c | 77 |
1 files changed, 76 insertions, 1 deletions
diff --git a/enc/euc_jp.c b/enc/euc_jp.c index f48c904945..ea2a8e0726 100644 --- a/enc/euc_jp.c +++ b/enc/euc_jp.c @@ -50,10 +50,85 @@ static const int EncLen_EUCJP[] = { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 }; +typedef enum { FAILURE = -2, ACCEPT = -1, S0 = 0, S1, S2 } state_t; +#define A ACCEPT +#define F FAILURE +static const signed char trans[][0x100] = { + { /* S0 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 1 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 2 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 3 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, 1, 2, + /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* a */ F, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* b */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* c */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* d */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* e */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* f */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, F + }, + { /* S1 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* a */ F, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* b */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* c */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* d */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* e */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, + /* f */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, F + }, + { /* S2 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, + /* a */ F, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* b */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* c */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* d */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* e */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + /* f */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, F + }, + +}; +#undef A +#undef F + static int mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc) { - return EncLen_EUCJP[*p]; + int firstbyte = *p++; + state_t s; + s = trans[0][firstbyte]; + if (s < 0) return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(1) : + ONIGENC_CONSTRUCT_MBCLEN_INVALID(); + if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_EUCJP[firstbyte]-1); + s = trans[s][*p++]; + if (s < 0) return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(2) : + ONIGENC_CONSTRUCT_MBCLEN_INVALID(); + if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_EUCJP[firstbyte]-2); + s = trans[s][*p++]; + return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(3) : + ONIGENC_CONSTRUCT_MBCLEN_INVALID(); } static OnigCodePoint |