From 866c79e2de4567d71f432652c58b48fe50916f37 Mon Sep 17 00:00:00 2001 From: naruse Date: Thu, 8 Oct 2009 02:49:11 +0000 Subject: * tool/enc-unicode.rb: parse range notation of UnicodeData.txt. * enc/unicode/name2ctype.h, enc/unicode/name2ctype.h.blt, enc/unicode/name2ctype.kwd, enc/unicode/name2ctype.src: follow above change. [ruby-dev:39444] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@25260 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 8 ++ enc/unicode/name2ctype.h | 193 ++++++++++++++----------------------------- enc/unicode/name2ctype.h.blt | 193 ++++++++++++++----------------------------- enc/unicode/name2ctype.kwd | 193 ++++++++++++++----------------------------- enc/unicode/name2ctype.src | 193 ++++++++++++++----------------------------- test/ruby/test_regexp.rb | 1 + tool/enc-unicode.rb | 20 +++-- 7 files changed, 272 insertions(+), 529 deletions(-) diff --git a/ChangeLog b/ChangeLog index 7143bb582b..2154e62c8a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +Thu Oct 8 05:45:14 2009 NARUSE, Yui + + * tool/enc-unicode.rb: parse range notation of UnicodeData.txt. + + * enc/unicode/name2ctype.h, enc/unicode/name2ctype.h.blt, + enc/unicode/name2ctype.kwd, enc/unicode/name2ctype.src: + follow above change. [ruby-dev:39444] + Thu Oct 8 02:46:24 2009 Nobuyoshi Nakada * string.c (rb_str_inspect): copy by chunks. diff --git a/enc/unicode/name2ctype.h b/enc/unicode/name2ctype.h index e0926f8969..bbc19caf4d 100644 --- a/enc/unicode/name2ctype.h +++ b/enc/unicode/name2ctype.h @@ -43,7 +43,7 @@ static const OnigCodePoint CR_NEWLINE[] = { /* 'Alpha': [[:Alpha:]] */ static const OnigCodePoint CR_Alpha[] = { - 482, + 477, 0x0041, 0x005a, 0x0061, 0x007a, 0x00aa, 0x00aa, @@ -377,10 +377,8 @@ static const OnigCodePoint CR_Alpha[] = { 0x3131, 0x318e, 0x31a0, 0x31b7, 0x31f0, 0x31ff, - 0x3400, 0x3400, - 0x4db5, 0x4db5, - 0x4e00, 0x4e00, - 0x9fcb, 0x9fcb, + 0x3400, 0x4db5, + 0x4e00, 0x9fcb, 0xa000, 0xa48c, 0xa4d0, 0xa4fd, 0xa500, 0xa60c, @@ -413,8 +411,7 @@ static const OnigCodePoint CR_Alpha[] = { 0xaadb, 0xaadd, 0xabc0, 0xabea, 0xabec, 0xabed, - 0xac00, 0xac00, - 0xd7a3, 0xd7a3, + 0xac00, 0xd7a3, 0xd7b0, 0xd7c6, 0xd7cb, 0xd7fb, 0xf900, 0xfa2d, @@ -520,10 +517,8 @@ static const OnigCodePoint CR_Alpha[] = { 0x1d78a, 0x1d7a8, 0x1d7aa, 0x1d7c2, 0x1d7c4, 0x1d7cb, - 0x20000, 0x20000, - 0x2a6d6, 0x2a6d6, - 0x2a700, 0x2a700, - 0x2b734, 0x2b734, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, 0x2f800, 0x2fa1d, 0xe0100, 0xe01ef, }; /* CR_Alpha */ @@ -593,7 +588,7 @@ static const OnigCodePoint CR_Digit[] = { /* 'Graph': [[:Graph:]] */ static const OnigCodePoint CR_Graph[] = { - 492, + 487, 0x0021, 0x007e, 0x00a1, 0x00ac, 0x00ae, 0x0377, @@ -914,10 +909,8 @@ static const OnigCodePoint CR_Graph[] = { 0x31c0, 0x31e3, 0x31f0, 0x321e, 0x3220, 0x32fe, - 0x3300, 0x3400, - 0x4db5, 0x4db5, - 0x4dc0, 0x4e00, - 0x9fcb, 0x9fcb, + 0x3300, 0x4db5, + 0x4dc0, 0x9fcb, 0xa000, 0xa48c, 0xa490, 0xa4c6, 0xa4d0, 0xa62b, @@ -945,8 +938,7 @@ static const OnigCodePoint CR_Graph[] = { 0xaadb, 0xaadf, 0xabc0, 0xabed, 0xabf0, 0xabf9, - 0xac00, 0xac00, - 0xd7a3, 0xd7a3, + 0xac00, 0xd7a3, 0xd7b0, 0xd7c6, 0xd7cb, 0xd7fb, 0xf900, 0xfa2d, @@ -1080,10 +1072,8 @@ static const OnigCodePoint CR_Graph[] = { 0x1f200, 0x1f200, 0x1f210, 0x1f231, 0x1f240, 0x1f248, - 0x20000, 0x20000, - 0x2a6d6, 0x2a6d6, - 0x2a700, 0x2a700, - 0x2b734, 0x2b734, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, 0x2f800, 0x2fa1d, 0xe0100, 0xe01ef, }; /* CR_Graph */ @@ -1694,7 +1684,7 @@ static const OnigCodePoint CR_Lower[] = { /* 'Print': [[:Print:]] */ static const OnigCodePoint CR_Print[] = { - 494, + 489, 0x0009, 0x000d, 0x0020, 0x007e, 0x0085, 0x0085, @@ -2017,10 +2007,8 @@ static const OnigCodePoint CR_Print[] = { 0x31c0, 0x31e3, 0x31f0, 0x321e, 0x3220, 0x32fe, - 0x3300, 0x3400, - 0x4db5, 0x4db5, - 0x4dc0, 0x4e00, - 0x9fcb, 0x9fcb, + 0x3300, 0x4db5, + 0x4dc0, 0x9fcb, 0xa000, 0xa48c, 0xa490, 0xa4c6, 0xa4d0, 0xa62b, @@ -2048,8 +2036,7 @@ static const OnigCodePoint CR_Print[] = { 0xaadb, 0xaadf, 0xabc0, 0xabed, 0xabf0, 0xabf9, - 0xac00, 0xac00, - 0xd7a3, 0xd7a3, + 0xac00, 0xd7a3, 0xd7b0, 0xd7c6, 0xd7cb, 0xd7fb, 0xf900, 0xfa2d, @@ -2183,10 +2170,8 @@ static const OnigCodePoint CR_Print[] = { 0x1f200, 0x1f200, 0x1f210, 0x1f231, 0x1f240, 0x1f248, - 0x20000, 0x20000, - 0x2a6d6, 0x2a6d6, - 0x2a700, 0x2a700, - 0x2b734, 0x2b734, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, 0x2f800, 0x2fa1d, 0xe0100, 0xe01ef, }; /* CR_Print */ @@ -2950,7 +2935,7 @@ static const OnigCodePoint CR_XDigit[] = { /* 'Word': [[:Word:]] */ static const OnigCodePoint CR_Word[] = { - 508, + 503, 0x0030, 0x0039, 0x0041, 0x005a, 0x005f, 0x005f, @@ -3302,10 +3287,8 @@ static const OnigCodePoint CR_Word[] = { 0x3131, 0x318e, 0x31a0, 0x31b7, 0x31f0, 0x31ff, - 0x3400, 0x3400, - 0x4db5, 0x4db5, - 0x4e00, 0x4e00, - 0x9fcb, 0x9fcb, + 0x3400, 0x4db5, + 0x4e00, 0x9fcb, 0xa000, 0xa48c, 0xa4d0, 0xa4fd, 0xa500, 0xa60c, @@ -3340,8 +3323,7 @@ static const OnigCodePoint CR_Word[] = { 0xabc0, 0xabea, 0xabec, 0xabed, 0xabf0, 0xabf9, - 0xac00, 0xac00, - 0xd7a3, 0xd7a3, + 0xac00, 0xd7a3, 0xd7b0, 0xd7c6, 0xd7cb, 0xd7fb, 0xf900, 0xfa2d, @@ -3453,17 +3435,15 @@ static const OnigCodePoint CR_Word[] = { 0x1d7aa, 0x1d7c2, 0x1d7c4, 0x1d7cb, 0x1d7ce, 0x1d7ff, - 0x20000, 0x20000, - 0x2a6d6, 0x2a6d6, - 0x2a700, 0x2a700, - 0x2b734, 0x2b734, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, 0x2f800, 0x2fa1d, 0xe0100, 0xe01ef, }; /* CR_Word */ /* 'Alnum': [[:Alnum:]] */ static const OnigCodePoint CR_Alnum[] = { - 502, + 497, 0x0030, 0x0039, 0x0041, 0x005a, 0x0061, 0x007a, @@ -3812,10 +3792,8 @@ static const OnigCodePoint CR_Alnum[] = { 0x3131, 0x318e, 0x31a0, 0x31b7, 0x31f0, 0x31ff, - 0x3400, 0x3400, - 0x4db5, 0x4db5, - 0x4e00, 0x4e00, - 0x9fcb, 0x9fcb, + 0x3400, 0x4db5, + 0x4e00, 0x9fcb, 0xa000, 0xa48c, 0xa4d0, 0xa4fd, 0xa500, 0xa60c, @@ -3850,8 +3828,7 @@ static const OnigCodePoint CR_Alnum[] = { 0xabc0, 0xabea, 0xabec, 0xabed, 0xabf0, 0xabf9, - 0xac00, 0xac00, - 0xd7a3, 0xd7a3, + 0xac00, 0xd7a3, 0xd7b0, 0xd7c6, 0xd7cb, 0xd7fb, 0xf900, 0xfa2d, @@ -3960,10 +3937,8 @@ static const OnigCodePoint CR_Alnum[] = { 0x1d7aa, 0x1d7c2, 0x1d7c4, 0x1d7cb, 0x1d7ce, 0x1d7ff, - 0x20000, 0x20000, - 0x2a6d6, 0x2a6d6, - 0x2a700, 0x2a700, - 0x2b734, 0x2b734, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, 0x2f800, 0x2fa1d, 0xe0100, 0xe01ef, }; /* CR_Alnum */ @@ -3984,7 +3959,7 @@ static const OnigCodePoint CR_Any[] = { /* 'Assigned': - */ static const OnigCodePoint CR_Assigned[] = { - 495, + 484, 0x0000, 0x0377, 0x037a, 0x037e, 0x0384, 0x038a, @@ -4300,10 +4275,8 @@ static const OnigCodePoint CR_Assigned[] = { 0x31c0, 0x31e3, 0x31f0, 0x321e, 0x3220, 0x32fe, - 0x3300, 0x3400, - 0x4db5, 0x4db5, - 0x4dc0, 0x4e00, - 0x9fcb, 0x9fcb, + 0x3300, 0x4db5, + 0x4dc0, 0x9fcb, 0xa000, 0xa48c, 0xa490, 0xa4c6, 0xa4d0, 0xa62b, @@ -4331,15 +4304,10 @@ static const OnigCodePoint CR_Assigned[] = { 0xaadb, 0xaadf, 0xabc0, 0xabed, 0xabf0, 0xabf9, - 0xac00, 0xac00, - 0xd7a3, 0xd7a3, + 0xac00, 0xd7a3, 0xd7b0, 0xd7c6, 0xd7cb, 0xd7fb, - 0xd800, 0xd800, - 0xdb7f, 0xdb80, - 0xdbff, 0xdc00, - 0xdfff, 0xe000, - 0xf8ff, 0xfa2d, + 0xd800, 0xfa2d, 0xfa30, 0xfa6d, 0xfa70, 0xfad9, 0xfb00, 0xfb06, @@ -4469,22 +4437,18 @@ static const OnigCodePoint CR_Assigned[] = { 0x1f200, 0x1f200, 0x1f210, 0x1f231, 0x1f240, 0x1f248, - 0x20000, 0x20000, - 0x2a6d6, 0x2a6d6, - 0x2a700, 0x2a700, - 0x2b734, 0x2b734, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, 0x2f800, 0x2fa1d, 0xe0001, 0xe0001, 0xe0020, 0xe007f, 0xe0100, 0xe01ef, - 0xf0000, 0xf0000, - 0xffffd, 0xffffd, - 0x100000, 0x100000, + 0xf0000, 0xffffd, }; /* CR_Assigned */ /* 'C': Major Category */ static const OnigCodePoint CR_C[] = { - 26, + 20, 0x0000, 0x001f, 0x007f, 0x009f, 0x00ad, 0x00ad, @@ -4496,21 +4460,15 @@ static const OnigCodePoint CR_C[] = { 0x202a, 0x202e, 0x2060, 0x2064, 0x206a, 0x206f, - 0xd800, 0xd800, - 0xdb7f, 0xdb80, - 0xdbff, 0xdc00, - 0xdfff, 0xe000, - 0xf8ff, 0xf8ff, + 0xd800, 0xf8ff, 0xfeff, 0xfeff, 0xfff9, 0xfffb, 0x110bd, 0x110bd, 0x1d173, 0x1d17a, 0xe0001, 0xe0001, 0xe0020, 0xe007f, - 0xf0000, 0xf0000, - 0xffffd, 0xffffd, - 0x100000, 0x100000, - 0x10fffd, 0x10ffff, + 0xf0000, 0xffffd, + 0x100000, 0x10ffff, }; /* CR_C */ /* 'Cc': General Category */ @@ -4542,7 +4500,7 @@ static const OnigCodePoint CR_Cf[] = { /* 'Cn': General Category */ static const OnigCodePoint CR_Cn[] = { - 495, + 484, 0x0378, 0x0379, 0x037f, 0x0383, 0x038b, 0x038b, @@ -4858,9 +4816,7 @@ static const OnigCodePoint CR_Cn[] = { 0x31e4, 0x31ef, 0x321f, 0x321f, 0x32ff, 0x32ff, - 0x3401, 0x4db4, 0x4db6, 0x4dbf, - 0x4e01, 0x9fca, 0x9fcc, 0x9fff, 0xa48d, 0xa48f, 0xa4c7, 0xa4cf, @@ -4889,14 +4845,9 @@ static const OnigCodePoint CR_Cn[] = { 0xaae0, 0xabbf, 0xabee, 0xabef, 0xabfa, 0xabff, - 0xac01, 0xd7a2, 0xd7a4, 0xd7af, 0xd7c7, 0xd7ca, 0xd7fc, 0xd7ff, - 0xd801, 0xdb7e, - 0xdb81, 0xdbfe, - 0xdc01, 0xdffe, - 0xe001, 0xf8fe, 0xfa2e, 0xfa2f, 0xfa6e, 0xfa6f, 0xfada, 0xfaff, @@ -5027,42 +4978,32 @@ static const OnigCodePoint CR_Cn[] = { 0x1f201, 0x1f20f, 0x1f232, 0x1f23f, 0x1f249, 0x1ffff, - 0x20001, 0x2a6d5, 0x2a6d7, 0x2a6ff, - 0x2a701, 0x2b733, 0x2b735, 0x2f7ff, 0x2fa1e, 0xe0000, 0xe0002, 0xe001f, 0xe0080, 0xe00ff, 0xe01f0, 0xeffff, - 0xf0001, 0xffffc, - 0xffffe, 0xfffff, - 0x100001, 0x10ffff, + 0xffffe, 0x10ffff, }; /* CR_Cn */ /* 'Co': General Category */ static const OnigCodePoint CR_Co[] = { - 6, - 0xe000, 0xe000, - 0xf8ff, 0xf8ff, - 0xf0000, 0xf0000, - 0xffffd, 0xffffd, - 0x100000, 0x100000, - 0x10fffd, 0x10fffd, + 3, + 0xe000, 0xf8ff, + 0xf0000, 0xffffd, + 0x100000, 0x10fffd, }; /* CR_Co */ /* 'Cs': General Category */ static const OnigCodePoint CR_Cs[] = { - 4, - 0xd800, 0xd800, - 0xdb7f, 0xdb80, - 0xdbff, 0xdc00, - 0xdfff, 0xdfff, + 1, + 0xd800, 0xdfff, }; /* CR_Cs */ /* 'L': Major Category */ static const OnigCodePoint CR_L[] = { - 427, + 422, 0x0041, 0x005a, 0x0061, 0x007a, 0x00aa, 0x00aa, @@ -5347,10 +5288,8 @@ static const OnigCodePoint CR_L[] = { 0x3131, 0x318e, 0x31a0, 0x31b7, 0x31f0, 0x31ff, - 0x3400, 0x3400, - 0x4db5, 0x4db5, - 0x4e00, 0x4e00, - 0x9fcb, 0x9fcb, + 0x3400, 0x4db5, + 0x4e00, 0x9fcb, 0xa000, 0xa48c, 0xa4d0, 0xa4fd, 0xa500, 0xa60c, @@ -5389,8 +5328,7 @@ static const OnigCodePoint CR_L[] = { 0xaac2, 0xaac2, 0xaadb, 0xaadd, 0xabc0, 0xabe2, - 0xac00, 0xac00, - 0xd7a3, 0xd7a3, + 0xac00, 0xd7a3, 0xd7b0, 0xd7c6, 0xd7cb, 0xd7fb, 0xf900, 0xfa2d, @@ -5485,10 +5423,8 @@ static const OnigCodePoint CR_L[] = { 0x1d78a, 0x1d7a8, 0x1d7aa, 0x1d7c2, 0x1d7c4, 0x1d7cb, - 0x20000, 0x20000, - 0x2a6d6, 0x2a6d6, - 0x2a700, 0x2a700, - 0x2b734, 0x2b734, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, 0x2f800, 0x2fa1d, }; /* CR_L */ @@ -6152,7 +6088,7 @@ static const OnigCodePoint CR_Lm[] = { /* 'Lo': General Category */ static const OnigCodePoint CR_Lo[] = { - 316, + 311, 0x01bb, 0x01bb, 0x01c0, 0x01c3, 0x0294, 0x0294, @@ -6362,10 +6298,8 @@ static const OnigCodePoint CR_Lo[] = { 0x3131, 0x318e, 0x31a0, 0x31b7, 0x31f0, 0x31ff, - 0x3400, 0x3400, - 0x4db5, 0x4db5, - 0x4e00, 0x4e00, - 0x9fcb, 0x9fcb, + 0x3400, 0x4db5, + 0x4e00, 0x9fcb, 0xa000, 0xa014, 0xa016, 0xa48c, 0xa4d0, 0xa4f7, @@ -6400,8 +6334,7 @@ static const OnigCodePoint CR_Lo[] = { 0xaac2, 0xaac2, 0xaadb, 0xaadc, 0xabc0, 0xabe2, - 0xac00, 0xac00, - 0xd7a3, 0xd7a3, + 0xac00, 0xd7a3, 0xd7b0, 0xd7c6, 0xd7cb, 0xd7fb, 0xf900, 0xfa2d, @@ -6464,10 +6397,8 @@ static const OnigCodePoint CR_Lo[] = { 0x11083, 0x110af, 0x12000, 0x1236e, 0x13000, 0x1342e, - 0x20000, 0x20000, - 0x2a6d6, 0x2a6d6, - 0x2a700, 0x2a700, - 0x2b734, 0x2b734, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, 0x2f800, 0x2fa1d, }; /* CR_Lo */ diff --git a/enc/unicode/name2ctype.h.blt b/enc/unicode/name2ctype.h.blt index e0926f8969..bbc19caf4d 100644 --- a/enc/unicode/name2ctype.h.blt +++ b/enc/unicode/name2ctype.h.blt @@ -43,7 +43,7 @@ static const OnigCodePoint CR_NEWLINE[] = { /* 'Alpha': [[:Alpha:]] */ static const OnigCodePoint CR_Alpha[] = { - 482, + 477, 0x0041, 0x005a, 0x0061, 0x007a, 0x00aa, 0x00aa, @@ -377,10 +377,8 @@ static const OnigCodePoint CR_Alpha[] = { 0x3131, 0x318e, 0x31a0, 0x31b7, 0x31f0, 0x31ff, - 0x3400, 0x3400, - 0x4db5, 0x4db5, - 0x4e00, 0x4e00, - 0x9fcb, 0x9fcb, + 0x3400, 0x4db5, + 0x4e00, 0x9fcb, 0xa000, 0xa48c, 0xa4d0, 0xa4fd, 0xa500, 0xa60c, @@ -413,8 +411,7 @@ static const OnigCodePoint CR_Alpha[] = { 0xaadb, 0xaadd, 0xabc0, 0xabea, 0xabec, 0xabed, - 0xac00, 0xac00, - 0xd7a3, 0xd7a3, + 0xac00, 0xd7a3, 0xd7b0, 0xd7c6, 0xd7cb, 0xd7fb, 0xf900, 0xfa2d, @@ -520,10 +517,8 @@ static const OnigCodePoint CR_Alpha[] = { 0x1d78a, 0x1d7a8, 0x1d7aa, 0x1d7c2, 0x1d7c4, 0x1d7cb, - 0x20000, 0x20000, - 0x2a6d6, 0x2a6d6, - 0x2a700, 0x2a700, - 0x2b734, 0x2b734, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, 0x2f800, 0x2fa1d, 0xe0100, 0xe01ef, }; /* CR_Alpha */ @@ -593,7 +588,7 @@ static const OnigCodePoint CR_Digit[] = { /* 'Graph': [[:Graph:]] */ static const OnigCodePoint CR_Graph[] = { - 492, + 487, 0x0021, 0x007e, 0x00a1, 0x00ac, 0x00ae, 0x0377, @@ -914,10 +909,8 @@ static const OnigCodePoint CR_Graph[] = { 0x31c0, 0x31e3, 0x31f0, 0x321e, 0x3220, 0x32fe, - 0x3300, 0x3400, - 0x4db5, 0x4db5, - 0x4dc0, 0x4e00, - 0x9fcb, 0x9fcb, + 0x3300, 0x4db5, + 0x4dc0, 0x9fcb, 0xa000, 0xa48c, 0xa490, 0xa4c6, 0xa4d0, 0xa62b, @@ -945,8 +938,7 @@ static const OnigCodePoint CR_Graph[] = { 0xaadb, 0xaadf, 0xabc0, 0xabed, 0xabf0, 0xabf9, - 0xac00, 0xac00, - 0xd7a3, 0xd7a3, + 0xac00, 0xd7a3, 0xd7b0, 0xd7c6, 0xd7cb, 0xd7fb, 0xf900, 0xfa2d, @@ -1080,10 +1072,8 @@ static const OnigCodePoint CR_Graph[] = { 0x1f200, 0x1f200, 0x1f210, 0x1f231, 0x1f240, 0x1f248, - 0x20000, 0x20000, - 0x2a6d6, 0x2a6d6, - 0x2a700, 0x2a700, - 0x2b734, 0x2b734, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, 0x2f800, 0x2fa1d, 0xe0100, 0xe01ef, }; /* CR_Graph */ @@ -1694,7 +1684,7 @@ static const OnigCodePoint CR_Lower[] = { /* 'Print': [[:Print:]] */ static const OnigCodePoint CR_Print[] = { - 494, + 489, 0x0009, 0x000d, 0x0020, 0x007e, 0x0085, 0x0085, @@ -2017,10 +2007,8 @@ static const OnigCodePoint CR_Print[] = { 0x31c0, 0x31e3, 0x31f0, 0x321e, 0x3220, 0x32fe, - 0x3300, 0x3400, - 0x4db5, 0x4db5, - 0x4dc0, 0x4e00, - 0x9fcb, 0x9fcb, + 0x3300, 0x4db5, + 0x4dc0, 0x9fcb, 0xa000, 0xa48c, 0xa490, 0xa4c6, 0xa4d0, 0xa62b, @@ -2048,8 +2036,7 @@ static const OnigCodePoint CR_Print[] = { 0xaadb, 0xaadf, 0xabc0, 0xabed, 0xabf0, 0xabf9, - 0xac00, 0xac00, - 0xd7a3, 0xd7a3, + 0xac00, 0xd7a3, 0xd7b0, 0xd7c6, 0xd7cb, 0xd7fb, 0xf900, 0xfa2d, @@ -2183,10 +2170,8 @@ static const OnigCodePoint CR_Print[] = { 0x1f200, 0x1f200, 0x1f210, 0x1f231, 0x1f240, 0x1f248, - 0x20000, 0x20000, - 0x2a6d6, 0x2a6d6, - 0x2a700, 0x2a700, - 0x2b734, 0x2b734, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, 0x2f800, 0x2fa1d, 0xe0100, 0xe01ef, }; /* CR_Print */ @@ -2950,7 +2935,7 @@ static const OnigCodePoint CR_XDigit[] = { /* 'Word': [[:Word:]] */ static const OnigCodePoint CR_Word[] = { - 508, + 503, 0x0030, 0x0039, 0x0041, 0x005a, 0x005f, 0x005f, @@ -3302,10 +3287,8 @@ static const OnigCodePoint CR_Word[] = { 0x3131, 0x318e, 0x31a0, 0x31b7, 0x31f0, 0x31ff, - 0x3400, 0x3400, - 0x4db5, 0x4db5, - 0x4e00, 0x4e00, - 0x9fcb, 0x9fcb, + 0x3400, 0x4db5, + 0x4e00, 0x9fcb, 0xa000, 0xa48c, 0xa4d0, 0xa4fd, 0xa500, 0xa60c, @@ -3340,8 +3323,7 @@ static const OnigCodePoint CR_Word[] = { 0xabc0, 0xabea, 0xabec, 0xabed, 0xabf0, 0xabf9, - 0xac00, 0xac00, - 0xd7a3, 0xd7a3, + 0xac00, 0xd7a3, 0xd7b0, 0xd7c6, 0xd7cb, 0xd7fb, 0xf900, 0xfa2d, @@ -3453,17 +3435,15 @@ static const OnigCodePoint CR_Word[] = { 0x1d7aa, 0x1d7c2, 0x1d7c4, 0x1d7cb, 0x1d7ce, 0x1d7ff, - 0x20000, 0x20000, - 0x2a6d6, 0x2a6d6, - 0x2a700, 0x2a700, - 0x2b734, 0x2b734, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, 0x2f800, 0x2fa1d, 0xe0100, 0xe01ef, }; /* CR_Word */ /* 'Alnum': [[:Alnum:]] */ static const OnigCodePoint CR_Alnum[] = { - 502, + 497, 0x0030, 0x0039, 0x0041, 0x005a, 0x0061, 0x007a, @@ -3812,10 +3792,8 @@ static const OnigCodePoint CR_Alnum[] = { 0x3131, 0x318e, 0x31a0, 0x31b7, 0x31f0, 0x31ff, - 0x3400, 0x3400, - 0x4db5, 0x4db5, - 0x4e00, 0x4e00, - 0x9fcb, 0x9fcb, + 0x3400, 0x4db5, + 0x4e00, 0x9fcb, 0xa000, 0xa48c, 0xa4d0, 0xa4fd, 0xa500, 0xa60c, @@ -3850,8 +3828,7 @@ static const OnigCodePoint CR_Alnum[] = { 0xabc0, 0xabea, 0xabec, 0xabed, 0xabf0, 0xabf9, - 0xac00, 0xac00, - 0xd7a3, 0xd7a3, + 0xac00, 0xd7a3, 0xd7b0, 0xd7c6, 0xd7cb, 0xd7fb, 0xf900, 0xfa2d, @@ -3960,10 +3937,8 @@ static const OnigCodePoint CR_Alnum[] = { 0x1d7aa, 0x1d7c2, 0x1d7c4, 0x1d7cb, 0x1d7ce, 0x1d7ff, - 0x20000, 0x20000, - 0x2a6d6, 0x2a6d6, - 0x2a700, 0x2a700, - 0x2b734, 0x2b734, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, 0x2f800, 0x2fa1d, 0xe0100, 0xe01ef, }; /* CR_Alnum */ @@ -3984,7 +3959,7 @@ static const OnigCodePoint CR_Any[] = { /* 'Assigned': - */ static const OnigCodePoint CR_Assigned[] = { - 495, + 484, 0x0000, 0x0377, 0x037a, 0x037e, 0x0384, 0x038a, @@ -4300,10 +4275,8 @@ static const OnigCodePoint CR_Assigned[] = { 0x31c0, 0x31e3, 0x31f0, 0x321e, 0x3220, 0x32fe, - 0x3300, 0x3400, - 0x4db5, 0x4db5, - 0x4dc0, 0x4e00, - 0x9fcb, 0x9fcb, + 0x3300, 0x4db5, + 0x4dc0, 0x9fcb, 0xa000, 0xa48c, 0xa490, 0xa4c6, 0xa4d0, 0xa62b, @@ -4331,15 +4304,10 @@ static const OnigCodePoint CR_Assigned[] = { 0xaadb, 0xaadf, 0xabc0, 0xabed, 0xabf0, 0xabf9, - 0xac00, 0xac00, - 0xd7a3, 0xd7a3, + 0xac00, 0xd7a3, 0xd7b0, 0xd7c6, 0xd7cb, 0xd7fb, - 0xd800, 0xd800, - 0xdb7f, 0xdb80, - 0xdbff, 0xdc00, - 0xdfff, 0xe000, - 0xf8ff, 0xfa2d, + 0xd800, 0xfa2d, 0xfa30, 0xfa6d, 0xfa70, 0xfad9, 0xfb00, 0xfb06, @@ -4469,22 +4437,18 @@ static const OnigCodePoint CR_Assigned[] = { 0x1f200, 0x1f200, 0x1f210, 0x1f231, 0x1f240, 0x1f248, - 0x20000, 0x20000, - 0x2a6d6, 0x2a6d6, - 0x2a700, 0x2a700, - 0x2b734, 0x2b734, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, 0x2f800, 0x2fa1d, 0xe0001, 0xe0001, 0xe0020, 0xe007f, 0xe0100, 0xe01ef, - 0xf0000, 0xf0000, - 0xffffd, 0xffffd, - 0x100000, 0x100000, + 0xf0000, 0xffffd, }; /* CR_Assigned */ /* 'C': Major Category */ static const OnigCodePoint CR_C[] = { - 26, + 20, 0x0000, 0x001f, 0x007f, 0x009f, 0x00ad, 0x00ad, @@ -4496,21 +4460,15 @@ static const OnigCodePoint CR_C[] = { 0x202a, 0x202e, 0x2060, 0x2064, 0x206a, 0x206f, - 0xd800, 0xd800, - 0xdb7f, 0xdb80, - 0xdbff, 0xdc00, - 0xdfff, 0xe000, - 0xf8ff, 0xf8ff, + 0xd800, 0xf8ff, 0xfeff, 0xfeff, 0xfff9, 0xfffb, 0x110bd, 0x110bd, 0x1d173, 0x1d17a, 0xe0001, 0xe0001, 0xe0020, 0xe007f, - 0xf0000, 0xf0000, - 0xffffd, 0xffffd, - 0x100000, 0x100000, - 0x10fffd, 0x10ffff, + 0xf0000, 0xffffd, + 0x100000, 0x10ffff, }; /* CR_C */ /* 'Cc': General Category */ @@ -4542,7 +4500,7 @@ static const OnigCodePoint CR_Cf[] = { /* 'Cn': General Category */ static const OnigCodePoint CR_Cn[] = { - 495, + 484, 0x0378, 0x0379, 0x037f, 0x0383, 0x038b, 0x038b, @@ -4858,9 +4816,7 @@ static const OnigCodePoint CR_Cn[] = { 0x31e4, 0x31ef, 0x321f, 0x321f, 0x32ff, 0x32ff, - 0x3401, 0x4db4, 0x4db6, 0x4dbf, - 0x4e01, 0x9fca, 0x9fcc, 0x9fff, 0xa48d, 0xa48f, 0xa4c7, 0xa4cf, @@ -4889,14 +4845,9 @@ static const OnigCodePoint CR_Cn[] = { 0xaae0, 0xabbf, 0xabee, 0xabef, 0xabfa, 0xabff, - 0xac01, 0xd7a2, 0xd7a4, 0xd7af, 0xd7c7, 0xd7ca, 0xd7fc, 0xd7ff, - 0xd801, 0xdb7e, - 0xdb81, 0xdbfe, - 0xdc01, 0xdffe, - 0xe001, 0xf8fe, 0xfa2e, 0xfa2f, 0xfa6e, 0xfa6f, 0xfada, 0xfaff, @@ -5027,42 +4978,32 @@ static const OnigCodePoint CR_Cn[] = { 0x1f201, 0x1f20f, 0x1f232, 0x1f23f, 0x1f249, 0x1ffff, - 0x20001, 0x2a6d5, 0x2a6d7, 0x2a6ff, - 0x2a701, 0x2b733, 0x2b735, 0x2f7ff, 0x2fa1e, 0xe0000, 0xe0002, 0xe001f, 0xe0080, 0xe00ff, 0xe01f0, 0xeffff, - 0xf0001, 0xffffc, - 0xffffe, 0xfffff, - 0x100001, 0x10ffff, + 0xffffe, 0x10ffff, }; /* CR_Cn */ /* 'Co': General Category */ static const OnigCodePoint CR_Co[] = { - 6, - 0xe000, 0xe000, - 0xf8ff, 0xf8ff, - 0xf0000, 0xf0000, - 0xffffd, 0xffffd, - 0x100000, 0x100000, - 0x10fffd, 0x10fffd, + 3, + 0xe000, 0xf8ff, + 0xf0000, 0xffffd, + 0x100000, 0x10fffd, }; /* CR_Co */ /* 'Cs': General Category */ static const OnigCodePoint CR_Cs[] = { - 4, - 0xd800, 0xd800, - 0xdb7f, 0xdb80, - 0xdbff, 0xdc00, - 0xdfff, 0xdfff, + 1, + 0xd800, 0xdfff, }; /* CR_Cs */ /* 'L': Major Category */ static const OnigCodePoint CR_L[] = { - 427, + 422, 0x0041, 0x005a, 0x0061, 0x007a, 0x00aa, 0x00aa, @@ -5347,10 +5288,8 @@ static const OnigCodePoint CR_L[] = { 0x3131, 0x318e, 0x31a0, 0x31b7, 0x31f0, 0x31ff, - 0x3400, 0x3400, - 0x4db5, 0x4db5, - 0x4e00, 0x4e00, - 0x9fcb, 0x9fcb, + 0x3400, 0x4db5, + 0x4e00, 0x9fcb, 0xa000, 0xa48c, 0xa4d0, 0xa4fd, 0xa500, 0xa60c, @@ -5389,8 +5328,7 @@ static const OnigCodePoint CR_L[] = { 0xaac2, 0xaac2, 0xaadb, 0xaadd, 0xabc0, 0xabe2, - 0xac00, 0xac00, - 0xd7a3, 0xd7a3, + 0xac00, 0xd7a3, 0xd7b0, 0xd7c6, 0xd7cb, 0xd7fb, 0xf900, 0xfa2d, @@ -5485,10 +5423,8 @@ static const OnigCodePoint CR_L[] = { 0x1d78a, 0x1d7a8, 0x1d7aa, 0x1d7c2, 0x1d7c4, 0x1d7cb, - 0x20000, 0x20000, - 0x2a6d6, 0x2a6d6, - 0x2a700, 0x2a700, - 0x2b734, 0x2b734, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, 0x2f800, 0x2fa1d, }; /* CR_L */ @@ -6152,7 +6088,7 @@ static const OnigCodePoint CR_Lm[] = { /* 'Lo': General Category */ static const OnigCodePoint CR_Lo[] = { - 316, + 311, 0x01bb, 0x01bb, 0x01c0, 0x01c3, 0x0294, 0x0294, @@ -6362,10 +6298,8 @@ static const OnigCodePoint CR_Lo[] = { 0x3131, 0x318e, 0x31a0, 0x31b7, 0x31f0, 0x31ff, - 0x3400, 0x3400, - 0x4db5, 0x4db5, - 0x4e00, 0x4e00, - 0x9fcb, 0x9fcb, + 0x3400, 0x4db5, + 0x4e00, 0x9fcb, 0xa000, 0xa014, 0xa016, 0xa48c, 0xa4d0, 0xa4f7, @@ -6400,8 +6334,7 @@ static const OnigCodePoint CR_Lo[] = { 0xaac2, 0xaac2, 0xaadb, 0xaadc, 0xabc0, 0xabe2, - 0xac00, 0xac00, - 0xd7a3, 0xd7a3, + 0xac00, 0xd7a3, 0xd7b0, 0xd7c6, 0xd7cb, 0xd7fb, 0xf900, 0xfa2d, @@ -6464,10 +6397,8 @@ static const OnigCodePoint CR_Lo[] = { 0x11083, 0x110af, 0x12000, 0x1236e, 0x13000, 0x1342e, - 0x20000, 0x20000, - 0x2a6d6, 0x2a6d6, - 0x2a700, 0x2a700, - 0x2b734, 0x2b734, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, 0x2f800, 0x2fa1d, }; /* CR_Lo */ diff --git a/enc/unicode/name2ctype.kwd b/enc/unicode/name2ctype.kwd index d1304671f0..46058a8341 100644 --- a/enc/unicode/name2ctype.kwd +++ b/enc/unicode/name2ctype.kwd @@ -8,7 +8,7 @@ static const OnigCodePoint CR_NEWLINE[] = { /* 'Alpha': [[:Alpha:]] */ static const OnigCodePoint CR_Alpha[] = { - 482, + 477, 0x0041, 0x005a, 0x0061, 0x007a, 0x00aa, 0x00aa, @@ -342,10 +342,8 @@ static const OnigCodePoint CR_Alpha[] = { 0x3131, 0x318e, 0x31a0, 0x31b7, 0x31f0, 0x31ff, - 0x3400, 0x3400, - 0x4db5, 0x4db5, - 0x4e00, 0x4e00, - 0x9fcb, 0x9fcb, + 0x3400, 0x4db5, + 0x4e00, 0x9fcb, 0xa000, 0xa48c, 0xa4d0, 0xa4fd, 0xa500, 0xa60c, @@ -378,8 +376,7 @@ static const OnigCodePoint CR_Alpha[] = { 0xaadb, 0xaadd, 0xabc0, 0xabea, 0xabec, 0xabed, - 0xac00, 0xac00, - 0xd7a3, 0xd7a3, + 0xac00, 0xd7a3, 0xd7b0, 0xd7c6, 0xd7cb, 0xd7fb, 0xf900, 0xfa2d, @@ -485,10 +482,8 @@ static const OnigCodePoint CR_Alpha[] = { 0x1d78a, 0x1d7a8, 0x1d7aa, 0x1d7c2, 0x1d7c4, 0x1d7cb, - 0x20000, 0x20000, - 0x2a6d6, 0x2a6d6, - 0x2a700, 0x2a700, - 0x2b734, 0x2b734, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, 0x2f800, 0x2fa1d, 0xe0100, 0xe01ef, }; /* CR_Alpha */ @@ -558,7 +553,7 @@ static const OnigCodePoint CR_Digit[] = { /* 'Graph': [[:Graph:]] */ static const OnigCodePoint CR_Graph[] = { - 492, + 487, 0x0021, 0x007e, 0x00a1, 0x00ac, 0x00ae, 0x0377, @@ -879,10 +874,8 @@ static const OnigCodePoint CR_Graph[] = { 0x31c0, 0x31e3, 0x31f0, 0x321e, 0x3220, 0x32fe, - 0x3300, 0x3400, - 0x4db5, 0x4db5, - 0x4dc0, 0x4e00, - 0x9fcb, 0x9fcb, + 0x3300, 0x4db5, + 0x4dc0, 0x9fcb, 0xa000, 0xa48c, 0xa490, 0xa4c6, 0xa4d0, 0xa62b, @@ -910,8 +903,7 @@ static const OnigCodePoint CR_Graph[] = { 0xaadb, 0xaadf, 0xabc0, 0xabed, 0xabf0, 0xabf9, - 0xac00, 0xac00, - 0xd7a3, 0xd7a3, + 0xac00, 0xd7a3, 0xd7b0, 0xd7c6, 0xd7cb, 0xd7fb, 0xf900, 0xfa2d, @@ -1045,10 +1037,8 @@ static const OnigCodePoint CR_Graph[] = { 0x1f200, 0x1f200, 0x1f210, 0x1f231, 0x1f240, 0x1f248, - 0x20000, 0x20000, - 0x2a6d6, 0x2a6d6, - 0x2a700, 0x2a700, - 0x2b734, 0x2b734, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, 0x2f800, 0x2fa1d, 0xe0100, 0xe01ef, }; /* CR_Graph */ @@ -1659,7 +1649,7 @@ static const OnigCodePoint CR_Lower[] = { /* 'Print': [[:Print:]] */ static const OnigCodePoint CR_Print[] = { - 494, + 489, 0x0009, 0x000d, 0x0020, 0x007e, 0x0085, 0x0085, @@ -1982,10 +1972,8 @@ static const OnigCodePoint CR_Print[] = { 0x31c0, 0x31e3, 0x31f0, 0x321e, 0x3220, 0x32fe, - 0x3300, 0x3400, - 0x4db5, 0x4db5, - 0x4dc0, 0x4e00, - 0x9fcb, 0x9fcb, + 0x3300, 0x4db5, + 0x4dc0, 0x9fcb, 0xa000, 0xa48c, 0xa490, 0xa4c6, 0xa4d0, 0xa62b, @@ -2013,8 +2001,7 @@ static const OnigCodePoint CR_Print[] = { 0xaadb, 0xaadf, 0xabc0, 0xabed, 0xabf0, 0xabf9, - 0xac00, 0xac00, - 0xd7a3, 0xd7a3, + 0xac00, 0xd7a3, 0xd7b0, 0xd7c6, 0xd7cb, 0xd7fb, 0xf900, 0xfa2d, @@ -2148,10 +2135,8 @@ static const OnigCodePoint CR_Print[] = { 0x1f200, 0x1f200, 0x1f210, 0x1f231, 0x1f240, 0x1f248, - 0x20000, 0x20000, - 0x2a6d6, 0x2a6d6, - 0x2a700, 0x2a700, - 0x2b734, 0x2b734, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, 0x2f800, 0x2fa1d, 0xe0100, 0xe01ef, }; /* CR_Print */ @@ -2915,7 +2900,7 @@ static const OnigCodePoint CR_XDigit[] = { /* 'Word': [[:Word:]] */ static const OnigCodePoint CR_Word[] = { - 508, + 503, 0x0030, 0x0039, 0x0041, 0x005a, 0x005f, 0x005f, @@ -3267,10 +3252,8 @@ static const OnigCodePoint CR_Word[] = { 0x3131, 0x318e, 0x31a0, 0x31b7, 0x31f0, 0x31ff, - 0x3400, 0x3400, - 0x4db5, 0x4db5, - 0x4e00, 0x4e00, - 0x9fcb, 0x9fcb, + 0x3400, 0x4db5, + 0x4e00, 0x9fcb, 0xa000, 0xa48c, 0xa4d0, 0xa4fd, 0xa500, 0xa60c, @@ -3305,8 +3288,7 @@ static const OnigCodePoint CR_Word[] = { 0xabc0, 0xabea, 0xabec, 0xabed, 0xabf0, 0xabf9, - 0xac00, 0xac00, - 0xd7a3, 0xd7a3, + 0xac00, 0xd7a3, 0xd7b0, 0xd7c6, 0xd7cb, 0xd7fb, 0xf900, 0xfa2d, @@ -3418,17 +3400,15 @@ static const OnigCodePoint CR_Word[] = { 0x1d7aa, 0x1d7c2, 0x1d7c4, 0x1d7cb, 0x1d7ce, 0x1d7ff, - 0x20000, 0x20000, - 0x2a6d6, 0x2a6d6, - 0x2a700, 0x2a700, - 0x2b734, 0x2b734, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, 0x2f800, 0x2fa1d, 0xe0100, 0xe01ef, }; /* CR_Word */ /* 'Alnum': [[:Alnum:]] */ static const OnigCodePoint CR_Alnum[] = { - 502, + 497, 0x0030, 0x0039, 0x0041, 0x005a, 0x0061, 0x007a, @@ -3777,10 +3757,8 @@ static const OnigCodePoint CR_Alnum[] = { 0x3131, 0x318e, 0x31a0, 0x31b7, 0x31f0, 0x31ff, - 0x3400, 0x3400, - 0x4db5, 0x4db5, - 0x4e00, 0x4e00, - 0x9fcb, 0x9fcb, + 0x3400, 0x4db5, + 0x4e00, 0x9fcb, 0xa000, 0xa48c, 0xa4d0, 0xa4fd, 0xa500, 0xa60c, @@ -3815,8 +3793,7 @@ static const OnigCodePoint CR_Alnum[] = { 0xabc0, 0xabea, 0xabec, 0xabed, 0xabf0, 0xabf9, - 0xac00, 0xac00, - 0xd7a3, 0xd7a3, + 0xac00, 0xd7a3, 0xd7b0, 0xd7c6, 0xd7cb, 0xd7fb, 0xf900, 0xfa2d, @@ -3925,10 +3902,8 @@ static const OnigCodePoint CR_Alnum[] = { 0x1d7aa, 0x1d7c2, 0x1d7c4, 0x1d7cb, 0x1d7ce, 0x1d7ff, - 0x20000, 0x20000, - 0x2a6d6, 0x2a6d6, - 0x2a700, 0x2a700, - 0x2b734, 0x2b734, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, 0x2f800, 0x2fa1d, 0xe0100, 0xe01ef, }; /* CR_Alnum */ @@ -3948,7 +3923,7 @@ static const OnigCodePoint CR_Any[] = { /* 'Assigned': - */ static const OnigCodePoint CR_Assigned[] = { - 495, + 484, 0x0000, 0x0377, 0x037a, 0x037e, 0x0384, 0x038a, @@ -4264,10 +4239,8 @@ static const OnigCodePoint CR_Assigned[] = { 0x31c0, 0x31e3, 0x31f0, 0x321e, 0x3220, 0x32fe, - 0x3300, 0x3400, - 0x4db5, 0x4db5, - 0x4dc0, 0x4e00, - 0x9fcb, 0x9fcb, + 0x3300, 0x4db5, + 0x4dc0, 0x9fcb, 0xa000, 0xa48c, 0xa490, 0xa4c6, 0xa4d0, 0xa62b, @@ -4295,15 +4268,10 @@ static const OnigCodePoint CR_Assigned[] = { 0xaadb, 0xaadf, 0xabc0, 0xabed, 0xabf0, 0xabf9, - 0xac00, 0xac00, - 0xd7a3, 0xd7a3, + 0xac00, 0xd7a3, 0xd7b0, 0xd7c6, 0xd7cb, 0xd7fb, - 0xd800, 0xd800, - 0xdb7f, 0xdb80, - 0xdbff, 0xdc00, - 0xdfff, 0xe000, - 0xf8ff, 0xfa2d, + 0xd800, 0xfa2d, 0xfa30, 0xfa6d, 0xfa70, 0xfad9, 0xfb00, 0xfb06, @@ -4433,22 +4401,18 @@ static const OnigCodePoint CR_Assigned[] = { 0x1f200, 0x1f200, 0x1f210, 0x1f231, 0x1f240, 0x1f248, - 0x20000, 0x20000, - 0x2a6d6, 0x2a6d6, - 0x2a700, 0x2a700, - 0x2b734, 0x2b734, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, 0x2f800, 0x2fa1d, 0xe0001, 0xe0001, 0xe0020, 0xe007f, 0xe0100, 0xe01ef, - 0xf0000, 0xf0000, - 0xffffd, 0xffffd, - 0x100000, 0x100000, + 0xf0000, 0xffffd, }; /* CR_Assigned */ /* 'C': Major Category */ static const OnigCodePoint CR_C[] = { - 26, + 20, 0x0000, 0x001f, 0x007f, 0x009f, 0x00ad, 0x00ad, @@ -4460,21 +4424,15 @@ static const OnigCodePoint CR_C[] = { 0x202a, 0x202e, 0x2060, 0x2064, 0x206a, 0x206f, - 0xd800, 0xd800, - 0xdb7f, 0xdb80, - 0xdbff, 0xdc00, - 0xdfff, 0xe000, - 0xf8ff, 0xf8ff, + 0xd800, 0xf8ff, 0xfeff, 0xfeff, 0xfff9, 0xfffb, 0x110bd, 0x110bd, 0x1d173, 0x1d17a, 0xe0001, 0xe0001, 0xe0020, 0xe007f, - 0xf0000, 0xf0000, - 0xffffd, 0xffffd, - 0x100000, 0x100000, - 0x10fffd, 0x10ffff, + 0xf0000, 0xffffd, + 0x100000, 0x10ffff, }; /* CR_C */ /* 'Cc': General Category */ @@ -4506,7 +4464,7 @@ static const OnigCodePoint CR_Cf[] = { /* 'Cn': General Category */ static const OnigCodePoint CR_Cn[] = { - 495, + 484, 0x0378, 0x0379, 0x037f, 0x0383, 0x038b, 0x038b, @@ -4822,9 +4780,7 @@ static const OnigCodePoint CR_Cn[] = { 0x31e4, 0x31ef, 0x321f, 0x321f, 0x32ff, 0x32ff, - 0x3401, 0x4db4, 0x4db6, 0x4dbf, - 0x4e01, 0x9fca, 0x9fcc, 0x9fff, 0xa48d, 0xa48f, 0xa4c7, 0xa4cf, @@ -4853,14 +4809,9 @@ static const OnigCodePoint CR_Cn[] = { 0xaae0, 0xabbf, 0xabee, 0xabef, 0xabfa, 0xabff, - 0xac01, 0xd7a2, 0xd7a4, 0xd7af, 0xd7c7, 0xd7ca, 0xd7fc, 0xd7ff, - 0xd801, 0xdb7e, - 0xdb81, 0xdbfe, - 0xdc01, 0xdffe, - 0xe001, 0xf8fe, 0xfa2e, 0xfa2f, 0xfa6e, 0xfa6f, 0xfada, 0xfaff, @@ -4991,42 +4942,32 @@ static const OnigCodePoint CR_Cn[] = { 0x1f201, 0x1f20f, 0x1f232, 0x1f23f, 0x1f249, 0x1ffff, - 0x20001, 0x2a6d5, 0x2a6d7, 0x2a6ff, - 0x2a701, 0x2b733, 0x2b735, 0x2f7ff, 0x2fa1e, 0xe0000, 0xe0002, 0xe001f, 0xe0080, 0xe00ff, 0xe01f0, 0xeffff, - 0xf0001, 0xffffc, - 0xffffe, 0xfffff, - 0x100001, 0x10ffff, + 0xffffe, 0x10ffff, }; /* CR_Cn */ /* 'Co': General Category */ static const OnigCodePoint CR_Co[] = { - 6, - 0xe000, 0xe000, - 0xf8ff, 0xf8ff, - 0xf0000, 0xf0000, - 0xffffd, 0xffffd, - 0x100000, 0x100000, - 0x10fffd, 0x10fffd, + 3, + 0xe000, 0xf8ff, + 0xf0000, 0xffffd, + 0x100000, 0x10fffd, }; /* CR_Co */ /* 'Cs': General Category */ static const OnigCodePoint CR_Cs[] = { - 4, - 0xd800, 0xd800, - 0xdb7f, 0xdb80, - 0xdbff, 0xdc00, - 0xdfff, 0xdfff, + 1, + 0xd800, 0xdfff, }; /* CR_Cs */ /* 'L': Major Category */ static const OnigCodePoint CR_L[] = { - 427, + 422, 0x0041, 0x005a, 0x0061, 0x007a, 0x00aa, 0x00aa, @@ -5311,10 +5252,8 @@ static const OnigCodePoint CR_L[] = { 0x3131, 0x318e, 0x31a0, 0x31b7, 0x31f0, 0x31ff, - 0x3400, 0x3400, - 0x4db5, 0x4db5, - 0x4e00, 0x4e00, - 0x9fcb, 0x9fcb, + 0x3400, 0x4db5, + 0x4e00, 0x9fcb, 0xa000, 0xa48c, 0xa4d0, 0xa4fd, 0xa500, 0xa60c, @@ -5353,8 +5292,7 @@ static const OnigCodePoint CR_L[] = { 0xaac2, 0xaac2, 0xaadb, 0xaadd, 0xabc0, 0xabe2, - 0xac00, 0xac00, - 0xd7a3, 0xd7a3, + 0xac00, 0xd7a3, 0xd7b0, 0xd7c6, 0xd7cb, 0xd7fb, 0xf900, 0xfa2d, @@ -5449,10 +5387,8 @@ static const OnigCodePoint CR_L[] = { 0x1d78a, 0x1d7a8, 0x1d7aa, 0x1d7c2, 0x1d7c4, 0x1d7cb, - 0x20000, 0x20000, - 0x2a6d6, 0x2a6d6, - 0x2a700, 0x2a700, - 0x2b734, 0x2b734, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, 0x2f800, 0x2fa1d, }; /* CR_L */ @@ -6116,7 +6052,7 @@ static const OnigCodePoint CR_Lm[] = { /* 'Lo': General Category */ static const OnigCodePoint CR_Lo[] = { - 316, + 311, 0x01bb, 0x01bb, 0x01c0, 0x01c3, 0x0294, 0x0294, @@ -6326,10 +6262,8 @@ static const OnigCodePoint CR_Lo[] = { 0x3131, 0x318e, 0x31a0, 0x31b7, 0x31f0, 0x31ff, - 0x3400, 0x3400, - 0x4db5, 0x4db5, - 0x4e00, 0x4e00, - 0x9fcb, 0x9fcb, + 0x3400, 0x4db5, + 0x4e00, 0x9fcb, 0xa000, 0xa014, 0xa016, 0xa48c, 0xa4d0, 0xa4f7, @@ -6364,8 +6298,7 @@ static const OnigCodePoint CR_Lo[] = { 0xaac2, 0xaac2, 0xaadb, 0xaadc, 0xabc0, 0xabe2, - 0xac00, 0xac00, - 0xd7a3, 0xd7a3, + 0xac00, 0xd7a3, 0xd7b0, 0xd7c6, 0xd7cb, 0xd7fb, 0xf900, 0xfa2d, @@ -6428,10 +6361,8 @@ static const OnigCodePoint CR_Lo[] = { 0x11083, 0x110af, 0x12000, 0x1236e, 0x13000, 0x1342e, - 0x20000, 0x20000, - 0x2a6d6, 0x2a6d6, - 0x2a700, 0x2a700, - 0x2b734, 0x2b734, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, 0x2f800, 0x2fa1d, }; /* CR_Lo */ diff --git a/enc/unicode/name2ctype.src b/enc/unicode/name2ctype.src index d1304671f0..46058a8341 100644 --- a/enc/unicode/name2ctype.src +++ b/enc/unicode/name2ctype.src @@ -8,7 +8,7 @@ static const OnigCodePoint CR_NEWLINE[] = { /* 'Alpha': [[:Alpha:]] */ static const OnigCodePoint CR_Alpha[] = { - 482, + 477, 0x0041, 0x005a, 0x0061, 0x007a, 0x00aa, 0x00aa, @@ -342,10 +342,8 @@ static const OnigCodePoint CR_Alpha[] = { 0x3131, 0x318e, 0x31a0, 0x31b7, 0x31f0, 0x31ff, - 0x3400, 0x3400, - 0x4db5, 0x4db5, - 0x4e00, 0x4e00, - 0x9fcb, 0x9fcb, + 0x3400, 0x4db5, + 0x4e00, 0x9fcb, 0xa000, 0xa48c, 0xa4d0, 0xa4fd, 0xa500, 0xa60c, @@ -378,8 +376,7 @@ static const OnigCodePoint CR_Alpha[] = { 0xaadb, 0xaadd, 0xabc0, 0xabea, 0xabec, 0xabed, - 0xac00, 0xac00, - 0xd7a3, 0xd7a3, + 0xac00, 0xd7a3, 0xd7b0, 0xd7c6, 0xd7cb, 0xd7fb, 0xf900, 0xfa2d, @@ -485,10 +482,8 @@ static const OnigCodePoint CR_Alpha[] = { 0x1d78a, 0x1d7a8, 0x1d7aa, 0x1d7c2, 0x1d7c4, 0x1d7cb, - 0x20000, 0x20000, - 0x2a6d6, 0x2a6d6, - 0x2a700, 0x2a700, - 0x2b734, 0x2b734, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, 0x2f800, 0x2fa1d, 0xe0100, 0xe01ef, }; /* CR_Alpha */ @@ -558,7 +553,7 @@ static const OnigCodePoint CR_Digit[] = { /* 'Graph': [[:Graph:]] */ static const OnigCodePoint CR_Graph[] = { - 492, + 487, 0x0021, 0x007e, 0x00a1, 0x00ac, 0x00ae, 0x0377, @@ -879,10 +874,8 @@ static const OnigCodePoint CR_Graph[] = { 0x31c0, 0x31e3, 0x31f0, 0x321e, 0x3220, 0x32fe, - 0x3300, 0x3400, - 0x4db5, 0x4db5, - 0x4dc0, 0x4e00, - 0x9fcb, 0x9fcb, + 0x3300, 0x4db5, + 0x4dc0, 0x9fcb, 0xa000, 0xa48c, 0xa490, 0xa4c6, 0xa4d0, 0xa62b, @@ -910,8 +903,7 @@ static const OnigCodePoint CR_Graph[] = { 0xaadb, 0xaadf, 0xabc0, 0xabed, 0xabf0, 0xabf9, - 0xac00, 0xac00, - 0xd7a3, 0xd7a3, + 0xac00, 0xd7a3, 0xd7b0, 0xd7c6, 0xd7cb, 0xd7fb, 0xf900, 0xfa2d, @@ -1045,10 +1037,8 @@ static const OnigCodePoint CR_Graph[] = { 0x1f200, 0x1f200, 0x1f210, 0x1f231, 0x1f240, 0x1f248, - 0x20000, 0x20000, - 0x2a6d6, 0x2a6d6, - 0x2a700, 0x2a700, - 0x2b734, 0x2b734, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, 0x2f800, 0x2fa1d, 0xe0100, 0xe01ef, }; /* CR_Graph */ @@ -1659,7 +1649,7 @@ static const OnigCodePoint CR_Lower[] = { /* 'Print': [[:Print:]] */ static const OnigCodePoint CR_Print[] = { - 494, + 489, 0x0009, 0x000d, 0x0020, 0x007e, 0x0085, 0x0085, @@ -1982,10 +1972,8 @@ static const OnigCodePoint CR_Print[] = { 0x31c0, 0x31e3, 0x31f0, 0x321e, 0x3220, 0x32fe, - 0x3300, 0x3400, - 0x4db5, 0x4db5, - 0x4dc0, 0x4e00, - 0x9fcb, 0x9fcb, + 0x3300, 0x4db5, + 0x4dc0, 0x9fcb, 0xa000, 0xa48c, 0xa490, 0xa4c6, 0xa4d0, 0xa62b, @@ -2013,8 +2001,7 @@ static const OnigCodePoint CR_Print[] = { 0xaadb, 0xaadf, 0xabc0, 0xabed, 0xabf0, 0xabf9, - 0xac00, 0xac00, - 0xd7a3, 0xd7a3, + 0xac00, 0xd7a3, 0xd7b0, 0xd7c6, 0xd7cb, 0xd7fb, 0xf900, 0xfa2d, @@ -2148,10 +2135,8 @@ static const OnigCodePoint CR_Print[] = { 0x1f200, 0x1f200, 0x1f210, 0x1f231, 0x1f240, 0x1f248, - 0x20000, 0x20000, - 0x2a6d6, 0x2a6d6, - 0x2a700, 0x2a700, - 0x2b734, 0x2b734, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, 0x2f800, 0x2fa1d, 0xe0100, 0xe01ef, }; /* CR_Print */ @@ -2915,7 +2900,7 @@ static const OnigCodePoint CR_XDigit[] = { /* 'Word': [[:Word:]] */ static const OnigCodePoint CR_Word[] = { - 508, + 503, 0x0030, 0x0039, 0x0041, 0x005a, 0x005f, 0x005f, @@ -3267,10 +3252,8 @@ static const OnigCodePoint CR_Word[] = { 0x3131, 0x318e, 0x31a0, 0x31b7, 0x31f0, 0x31ff, - 0x3400, 0x3400, - 0x4db5, 0x4db5, - 0x4e00, 0x4e00, - 0x9fcb, 0x9fcb, + 0x3400, 0x4db5, + 0x4e00, 0x9fcb, 0xa000, 0xa48c, 0xa4d0, 0xa4fd, 0xa500, 0xa60c, @@ -3305,8 +3288,7 @@ static const OnigCodePoint CR_Word[] = { 0xabc0, 0xabea, 0xabec, 0xabed, 0xabf0, 0xabf9, - 0xac00, 0xac00, - 0xd7a3, 0xd7a3, + 0xac00, 0xd7a3, 0xd7b0, 0xd7c6, 0xd7cb, 0xd7fb, 0xf900, 0xfa2d, @@ -3418,17 +3400,15 @@ static const OnigCodePoint CR_Word[] = { 0x1d7aa, 0x1d7c2, 0x1d7c4, 0x1d7cb, 0x1d7ce, 0x1d7ff, - 0x20000, 0x20000, - 0x2a6d6, 0x2a6d6, - 0x2a700, 0x2a700, - 0x2b734, 0x2b734, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, 0x2f800, 0x2fa1d, 0xe0100, 0xe01ef, }; /* CR_Word */ /* 'Alnum': [[:Alnum:]] */ static const OnigCodePoint CR_Alnum[] = { - 502, + 497, 0x0030, 0x0039, 0x0041, 0x005a, 0x0061, 0x007a, @@ -3777,10 +3757,8 @@ static const OnigCodePoint CR_Alnum[] = { 0x3131, 0x318e, 0x31a0, 0x31b7, 0x31f0, 0x31ff, - 0x3400, 0x3400, - 0x4db5, 0x4db5, - 0x4e00, 0x4e00, - 0x9fcb, 0x9fcb, + 0x3400, 0x4db5, + 0x4e00, 0x9fcb, 0xa000, 0xa48c, 0xa4d0, 0xa4fd, 0xa500, 0xa60c, @@ -3815,8 +3793,7 @@ static const OnigCodePoint CR_Alnum[] = { 0xabc0, 0xabea, 0xabec, 0xabed, 0xabf0, 0xabf9, - 0xac00, 0xac00, - 0xd7a3, 0xd7a3, + 0xac00, 0xd7a3, 0xd7b0, 0xd7c6, 0xd7cb, 0xd7fb, 0xf900, 0xfa2d, @@ -3925,10 +3902,8 @@ static const OnigCodePoint CR_Alnum[] = { 0x1d7aa, 0x1d7c2, 0x1d7c4, 0x1d7cb, 0x1d7ce, 0x1d7ff, - 0x20000, 0x20000, - 0x2a6d6, 0x2a6d6, - 0x2a700, 0x2a700, - 0x2b734, 0x2b734, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, 0x2f800, 0x2fa1d, 0xe0100, 0xe01ef, }; /* CR_Alnum */ @@ -3948,7 +3923,7 @@ static const OnigCodePoint CR_Any[] = { /* 'Assigned': - */ static const OnigCodePoint CR_Assigned[] = { - 495, + 484, 0x0000, 0x0377, 0x037a, 0x037e, 0x0384, 0x038a, @@ -4264,10 +4239,8 @@ static const OnigCodePoint CR_Assigned[] = { 0x31c0, 0x31e3, 0x31f0, 0x321e, 0x3220, 0x32fe, - 0x3300, 0x3400, - 0x4db5, 0x4db5, - 0x4dc0, 0x4e00, - 0x9fcb, 0x9fcb, + 0x3300, 0x4db5, + 0x4dc0, 0x9fcb, 0xa000, 0xa48c, 0xa490, 0xa4c6, 0xa4d0, 0xa62b, @@ -4295,15 +4268,10 @@ static const OnigCodePoint CR_Assigned[] = { 0xaadb, 0xaadf, 0xabc0, 0xabed, 0xabf0, 0xabf9, - 0xac00, 0xac00, - 0xd7a3, 0xd7a3, + 0xac00, 0xd7a3, 0xd7b0, 0xd7c6, 0xd7cb, 0xd7fb, - 0xd800, 0xd800, - 0xdb7f, 0xdb80, - 0xdbff, 0xdc00, - 0xdfff, 0xe000, - 0xf8ff, 0xfa2d, + 0xd800, 0xfa2d, 0xfa30, 0xfa6d, 0xfa70, 0xfad9, 0xfb00, 0xfb06, @@ -4433,22 +4401,18 @@ static const OnigCodePoint CR_Assigned[] = { 0x1f200, 0x1f200, 0x1f210, 0x1f231, 0x1f240, 0x1f248, - 0x20000, 0x20000, - 0x2a6d6, 0x2a6d6, - 0x2a700, 0x2a700, - 0x2b734, 0x2b734, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, 0x2f800, 0x2fa1d, 0xe0001, 0xe0001, 0xe0020, 0xe007f, 0xe0100, 0xe01ef, - 0xf0000, 0xf0000, - 0xffffd, 0xffffd, - 0x100000, 0x100000, + 0xf0000, 0xffffd, }; /* CR_Assigned */ /* 'C': Major Category */ static const OnigCodePoint CR_C[] = { - 26, + 20, 0x0000, 0x001f, 0x007f, 0x009f, 0x00ad, 0x00ad, @@ -4460,21 +4424,15 @@ static const OnigCodePoint CR_C[] = { 0x202a, 0x202e, 0x2060, 0x2064, 0x206a, 0x206f, - 0xd800, 0xd800, - 0xdb7f, 0xdb80, - 0xdbff, 0xdc00, - 0xdfff, 0xe000, - 0xf8ff, 0xf8ff, + 0xd800, 0xf8ff, 0xfeff, 0xfeff, 0xfff9, 0xfffb, 0x110bd, 0x110bd, 0x1d173, 0x1d17a, 0xe0001, 0xe0001, 0xe0020, 0xe007f, - 0xf0000, 0xf0000, - 0xffffd, 0xffffd, - 0x100000, 0x100000, - 0x10fffd, 0x10ffff, + 0xf0000, 0xffffd, + 0x100000, 0x10ffff, }; /* CR_C */ /* 'Cc': General Category */ @@ -4506,7 +4464,7 @@ static const OnigCodePoint CR_Cf[] = { /* 'Cn': General Category */ static const OnigCodePoint CR_Cn[] = { - 495, + 484, 0x0378, 0x0379, 0x037f, 0x0383, 0x038b, 0x038b, @@ -4822,9 +4780,7 @@ static const OnigCodePoint CR_Cn[] = { 0x31e4, 0x31ef, 0x321f, 0x321f, 0x32ff, 0x32ff, - 0x3401, 0x4db4, 0x4db6, 0x4dbf, - 0x4e01, 0x9fca, 0x9fcc, 0x9fff, 0xa48d, 0xa48f, 0xa4c7, 0xa4cf, @@ -4853,14 +4809,9 @@ static const OnigCodePoint CR_Cn[] = { 0xaae0, 0xabbf, 0xabee, 0xabef, 0xabfa, 0xabff, - 0xac01, 0xd7a2, 0xd7a4, 0xd7af, 0xd7c7, 0xd7ca, 0xd7fc, 0xd7ff, - 0xd801, 0xdb7e, - 0xdb81, 0xdbfe, - 0xdc01, 0xdffe, - 0xe001, 0xf8fe, 0xfa2e, 0xfa2f, 0xfa6e, 0xfa6f, 0xfada, 0xfaff, @@ -4991,42 +4942,32 @@ static const OnigCodePoint CR_Cn[] = { 0x1f201, 0x1f20f, 0x1f232, 0x1f23f, 0x1f249, 0x1ffff, - 0x20001, 0x2a6d5, 0x2a6d7, 0x2a6ff, - 0x2a701, 0x2b733, 0x2b735, 0x2f7ff, 0x2fa1e, 0xe0000, 0xe0002, 0xe001f, 0xe0080, 0xe00ff, 0xe01f0, 0xeffff, - 0xf0001, 0xffffc, - 0xffffe, 0xfffff, - 0x100001, 0x10ffff, + 0xffffe, 0x10ffff, }; /* CR_Cn */ /* 'Co': General Category */ static const OnigCodePoint CR_Co[] = { - 6, - 0xe000, 0xe000, - 0xf8ff, 0xf8ff, - 0xf0000, 0xf0000, - 0xffffd, 0xffffd, - 0x100000, 0x100000, - 0x10fffd, 0x10fffd, + 3, + 0xe000, 0xf8ff, + 0xf0000, 0xffffd, + 0x100000, 0x10fffd, }; /* CR_Co */ /* 'Cs': General Category */ static const OnigCodePoint CR_Cs[] = { - 4, - 0xd800, 0xd800, - 0xdb7f, 0xdb80, - 0xdbff, 0xdc00, - 0xdfff, 0xdfff, + 1, + 0xd800, 0xdfff, }; /* CR_Cs */ /* 'L': Major Category */ static const OnigCodePoint CR_L[] = { - 427, + 422, 0x0041, 0x005a, 0x0061, 0x007a, 0x00aa, 0x00aa, @@ -5311,10 +5252,8 @@ static const OnigCodePoint CR_L[] = { 0x3131, 0x318e, 0x31a0, 0x31b7, 0x31f0, 0x31ff, - 0x3400, 0x3400, - 0x4db5, 0x4db5, - 0x4e00, 0x4e00, - 0x9fcb, 0x9fcb, + 0x3400, 0x4db5, + 0x4e00, 0x9fcb, 0xa000, 0xa48c, 0xa4d0, 0xa4fd, 0xa500, 0xa60c, @@ -5353,8 +5292,7 @@ static const OnigCodePoint CR_L[] = { 0xaac2, 0xaac2, 0xaadb, 0xaadd, 0xabc0, 0xabe2, - 0xac00, 0xac00, - 0xd7a3, 0xd7a3, + 0xac00, 0xd7a3, 0xd7b0, 0xd7c6, 0xd7cb, 0xd7fb, 0xf900, 0xfa2d, @@ -5449,10 +5387,8 @@ static const OnigCodePoint CR_L[] = { 0x1d78a, 0x1d7a8, 0x1d7aa, 0x1d7c2, 0x1d7c4, 0x1d7cb, - 0x20000, 0x20000, - 0x2a6d6, 0x2a6d6, - 0x2a700, 0x2a700, - 0x2b734, 0x2b734, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, 0x2f800, 0x2fa1d, }; /* CR_L */ @@ -6116,7 +6052,7 @@ static const OnigCodePoint CR_Lm[] = { /* 'Lo': General Category */ static const OnigCodePoint CR_Lo[] = { - 316, + 311, 0x01bb, 0x01bb, 0x01c0, 0x01c3, 0x0294, 0x0294, @@ -6326,10 +6262,8 @@ static const OnigCodePoint CR_Lo[] = { 0x3131, 0x318e, 0x31a0, 0x31b7, 0x31f0, 0x31ff, - 0x3400, 0x3400, - 0x4db5, 0x4db5, - 0x4e00, 0x4e00, - 0x9fcb, 0x9fcb, + 0x3400, 0x4db5, + 0x4e00, 0x9fcb, 0xa000, 0xa014, 0xa016, 0xa48c, 0xa4d0, 0xa4f7, @@ -6364,8 +6298,7 @@ static const OnigCodePoint CR_Lo[] = { 0xaac2, 0xaac2, 0xaadb, 0xaadc, 0xabc0, 0xabe2, - 0xac00, 0xac00, - 0xd7a3, 0xd7a3, + 0xac00, 0xd7a3, 0xd7b0, 0xd7c6, 0xd7cb, 0xd7fb, 0xf900, 0xfa2d, @@ -6428,10 +6361,8 @@ static const OnigCodePoint CR_Lo[] = { 0x11083, 0x110af, 0x12000, 0x1236e, 0x13000, 0x1342e, - 0x20000, 0x20000, - 0x2a6d6, 0x2a6d6, - 0x2a700, 0x2a700, - 0x2b734, 0x2b734, + 0x20000, 0x2a6d6, + 0x2a700, 0x2b734, 0x2f800, 0x2fa1d, }; /* CR_Lo */ diff --git a/test/ruby/test_regexp.rb b/test/ruby/test_regexp.rb index cdbe277ba8..1ad202652d 100644 --- a/test/ruby/test_regexp.rb +++ b/test/ruby/test_regexp.rb @@ -745,6 +745,7 @@ class TestRegexp < Test::Unit::TestCase assert_match(/^\u3042{0}\p{Any}$/, "a") assert_match(/^\u3042{0}\p{Any}$/, "\u3041") assert_match(/^\u3042{0}\p{Any}$/, "\0") + assert_match(/^\p{Lo}{4}$/u, "\u3401\u4E01\u{20001}\u{2A701}") assert_no_match(/^\u3042{0}\p{Any}$/, "\0\0") assert_no_match(/^\u3042{0}\p{Any}$/, "") assert_raise(SyntaxError) { eval('/^\u3042{0}\p{' + "\u3042" + '}$/') } diff --git a/tool/enc-unicode.rb b/tool/enc-unicode.rb index 8429bcc178..57edb3b3e5 100755 --- a/tool/enc-unicode.rb +++ b/tool/enc-unicode.rb @@ -40,26 +40,36 @@ end def parse_unicode_data(file) last_cp = 0 data = {'Cn' => []} + beg_cp = nil IO.foreach(file) do |line| fields = line.split(';') cp = fields[0].to_i(16) + case fields[1] + when /\A<(.*),\s*First>\z/ + beg_cp = cp + next + when /\A<(.*),\s*Last>\z/ + cps = (beg_cp..cp).to_a + else + beg_cp = cp + cps = [cp] + end + # The Cn category represents unassigned characters. These are not listed in # UnicodeData.txt so we must derive them by looking for 'holes' in the range # of listed codepoints. We increment the last codepoint seen and compare it # with the current codepoint. If the current codepoint is less than # last_cp.next we have found a hole, so we add the missing codepoint to the # Cn category. - while ((last_cp = last_cp.next) < cp) - data['Cn'] << last_cp - end + data['Cn'].concat((last_cp.next...beg_cp).to_a) # The third field denotes the 'General' category, e.g. Lu - (data[fields[2]] ||= []) << cp + (data[fields[2]] ||= []).concat(cps) # The 'Major' category is the first letter of the 'General' category, e.g. # 'Lu' -> 'L' - (data[fields[2][0,1]] ||= []) << cp + (data[fields[2][0,1]] ||= []).concat(cps) last_cp = cp end -- cgit v1.2.3