diff options
author | akr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2008-01-30 03:49:54 +0000 |
---|---|---|
committer | akr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2008-01-30 03:49:54 +0000 |
commit | 44cfd58dc5f3949ff5cbcf7350a3e76d6ff1c49c (patch) | |
tree | 667613666ee8844335d91fc2dd952f2fe0faa6dd /enc/utf_16be.c | |
parent | 8e36fa69fd901a012ecdb056d3aedd97f9124879 (diff) | |
download | ruby-44cfd58dc5f3949ff5cbcf7350a3e76d6ff1c49c.tar.gz |
* enc/utf_16be.c (UTF16_IS_SURROGATE_FIRST): avoid branch.
(UTF16_IS_SURROGATE_SECOND): ditto.
(UTF16_IS_SURROGATE): defined.
(utf16be_mbc_enc_len): validation implemented.
* enc/utf_16le.c (UTF16_IS_SURROGATE_FIRST): avoid branch.
(UTF16_IS_SURROGATE_SECOND): ditto.
(UTF16_IS_SURROGATE): defined.
(utf16le_mbc_enc_len): validation implemented.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15338 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'enc/utf_16be.c')
-rw-r--r-- | enc/utf_16be.c | 28 |
1 files changed, 25 insertions, 3 deletions
diff --git a/enc/utf_16be.c b/enc/utf_16be.c index ab5ee1d4a3..5311ba3641 100644 --- a/enc/utf_16be.c +++ b/enc/utf_16be.c @@ -29,8 +29,9 @@ #include "regenc.h" -#define UTF16_IS_SURROGATE_FIRST(c) (c >= 0xd8 && c <= 0xdb) -#define UTF16_IS_SURROGATE_SECOND(c) (c >= 0xdc && c <= 0xdf) +#define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8) +#define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc) +#define UTF16_IS_SURROGATE(c) (((c) & 0xf8) == 0xd8) static const int EncLen_UTF16[] = { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, @@ -55,7 +56,28 @@ static int utf16be_mbc_enc_len(const UChar* p, const OnigUChar* e ARG_UNUSED, OnigEncoding enc ARG_UNUSED) { - return EncLen_UTF16[*p]; + int byte = p[0]; + if (!UTF16_IS_SURROGATE(byte)) { + if (2 <= e-p) + return ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(2); + else + return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(1); + } + if (UTF16_IS_SURROGATE_FIRST(byte)) { + switch (e-p) { + case 1: return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(3); + case 2: return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(2); + case 3: + if (UTF16_IS_SURROGATE_SECOND(p[2])) + return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(1); + break; + default: + if (UTF16_IS_SURROGATE_SECOND(p[2])) + return ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(4); + break; + } + } + return ONIGENC_CONSTRUCT_MBCLEN_INVALID(); } static int |