aboutsummaryrefslogtreecommitdiffstats
path: root/enc/utf_8.c
diff options
context:
space:
mode:
Diffstat (limited to 'enc/utf_8.c')
-rw-r--r--enc/utf_8.c21
1 files changed, 3 insertions, 18 deletions
diff --git a/enc/utf_8.c b/enc/utf_8.c
index b8f38e9d58..8ab8792a6d 100644
--- a/enc/utf_8.c
+++ b/enc/utf_8.c
@@ -35,8 +35,8 @@
/* virtual codepoint values for invalid encoding byte 0xfe and 0xff */
#define INVALID_CODE_FE 0xfffffffe
#define INVALID_CODE_FF 0xffffffff
-#define VALID_CODE_LIMIT 0x7fffffff
#endif
+#define VALID_CODE_LIMIT 0x0010ffff
#define utf8_islead(c) ((UChar )((c) & 0xc0) != 0x80)
@@ -297,9 +297,7 @@ code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED)
if ((code & 0xffffff80) == 0) return 1;
else if ((code & 0xfffff800) == 0) return 2;
else if ((code & 0xffff0000) == 0) return 3;
- else if ((code & 0xffe00000) == 0) return 4;
- else if ((code & 0xfc000000) == 0) return 5;
- else if ((code & 0x80000000) == 0) return 6;
+ else if (code <= VALID_CODE_LIMIT) return 4;
#ifdef USE_INVALID_CODE_SCHEME
else if (code == INVALID_CODE_FE) return 1;
else if (code == INVALID_CODE_FF) return 1;
@@ -328,24 +326,11 @@ code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc ARG_UNUSED)
*p++ = (UChar )(((code>>12) & 0x0f) | 0xe0);
*p++ = UTF8_TRAILS(code, 6);
}
- else if ((code & 0xffe00000) == 0) {
+ else if (code <= VALID_CODE_LIMIT) {
*p++ = (UChar )(((code>>18) & 0x07) | 0xf0);
*p++ = UTF8_TRAILS(code, 12);
*p++ = UTF8_TRAILS(code, 6);
}
- else if ((code & 0xfc000000) == 0) {
- *p++ = (UChar )(((code>>24) & 0x03) | 0xf8);
- *p++ = UTF8_TRAILS(code, 18);
- *p++ = UTF8_TRAILS(code, 12);
- *p++ = UTF8_TRAILS(code, 6);
- }
- else if ((code & 0x80000000) == 0) {
- *p++ = (UChar )(((code>>30) & 0x01) | 0xfc);
- *p++ = UTF8_TRAILS(code, 24);
- *p++ = UTF8_TRAILS(code, 18);
- *p++ = UTF8_TRAILS(code, 12);
- *p++ = UTF8_TRAILS(code, 6);
- }
#ifdef USE_INVALID_CODE_SCHEME
else if (code == INVALID_CODE_FE) {
*p = 0xfe;