aboutsummaryrefslogtreecommitdiffstats
path: root/enc/utf_16be.c
diff options
context:
space:
mode:
authorakr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-01-30 03:49:54 +0000
committerakr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-01-30 03:49:54 +0000
commit44cfd58dc5f3949ff5cbcf7350a3e76d6ff1c49c (patch)
tree667613666ee8844335d91fc2dd952f2fe0faa6dd /enc/utf_16be.c
parent8e36fa69fd901a012ecdb056d3aedd97f9124879 (diff)
downloadruby-44cfd58dc5f3949ff5cbcf7350a3e76d6ff1c49c.tar.gz
* enc/utf_16be.c (UTF16_IS_SURROGATE_FIRST): avoid branch.
(UTF16_IS_SURROGATE_SECOND): ditto. (UTF16_IS_SURROGATE): defined. (utf16be_mbc_enc_len): validation implemented. * enc/utf_16le.c (UTF16_IS_SURROGATE_FIRST): avoid branch. (UTF16_IS_SURROGATE_SECOND): ditto. (UTF16_IS_SURROGATE): defined. (utf16le_mbc_enc_len): validation implemented. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15338 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'enc/utf_16be.c')
-rw-r--r--enc/utf_16be.c28
1 files changed, 25 insertions, 3 deletions
diff --git a/enc/utf_16be.c b/enc/utf_16be.c
index ab5ee1d4a3..5311ba3641 100644
--- a/enc/utf_16be.c
+++ b/enc/utf_16be.c
@@ -29,8 +29,9 @@
#include "regenc.h"
-#define UTF16_IS_SURROGATE_FIRST(c) (c >= 0xd8 && c <= 0xdb)
-#define UTF16_IS_SURROGATE_SECOND(c) (c >= 0xdc && c <= 0xdf)
+#define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8)
+#define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc)
+#define UTF16_IS_SURROGATE(c) (((c) & 0xf8) == 0xd8)
static const int EncLen_UTF16[] = {
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
@@ -55,7 +56,28 @@ static int
utf16be_mbc_enc_len(const UChar* p, const OnigUChar* e ARG_UNUSED,
OnigEncoding enc ARG_UNUSED)
{
- return EncLen_UTF16[*p];
+ int byte = p[0];
+ if (!UTF16_IS_SURROGATE(byte)) {
+ if (2 <= e-p)
+ return ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(2);
+ else
+ return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(1);
+ }
+ if (UTF16_IS_SURROGATE_FIRST(byte)) {
+ switch (e-p) {
+ case 1: return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(3);
+ case 2: return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(2);
+ case 3:
+ if (UTF16_IS_SURROGATE_SECOND(p[2]))
+ return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(1);
+ break;
+ default:
+ if (UTF16_IS_SURROGATE_SECOND(p[2]))
+ return ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(4);
+ break;
+ }
+ }
+ return ONIGENC_CONSTRUCT_MBCLEN_INVALID();
}
static int