From 4d0396ec962c2271385745b48e802be272b56575 Mon Sep 17 00:00:00 2001 From: nobu Date: Mon, 15 Oct 2018 13:48:20 +0000 Subject: unicode.c: moved addtional GCB ranges * enc/unicode.c: moved additional Grapheme Cluster Break ranges which depend on the Unicode version. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@65087 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- enc/unicode.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ regparse.c | 52 ++++++++-------------------------------------------- 2 files changed, 60 insertions(+), 44 deletions(-) diff --git a/enc/unicode.c b/enc/unicode.c index c909022989..c44451f7b4 100644 --- a/enc/unicode.c +++ b/enc/unicode.c @@ -786,6 +786,57 @@ SpecialsCopy: return (int )(to - to_start); } +const OnigCodePoint +onigenc_unicode_GCB_ranges_GAZ[] = { +#if ONIG_UNICODE_VERSION_MAJOR < 10 + 13, + 0x1F308, 0x1F308, + 0x1F33E, 0x1F33E, + 0x1F373, 0x1F373, + 0x1F393, 0x1F393, + 0x1F3A4, 0x1F3A4, + 0x1F3A8, 0x1F3A8, + 0x1F3EB, 0x1F3EB, + 0x1F3ED, 0x1F3ED, + 0x1F4BB, 0x1F4BC, + 0x1F527, 0x1F527, + 0x1F52C, 0x1F52C, + 0x1F680, 0x1F680, + 0x1F692, 0x1F692, +#else + 0, +#endif +}; + +const OnigCodePoint +onigenc_unicode_GCB_ranges_E_Base[] = { +#if ONIG_UNICODE_VERSION_MAJOR < 10 + 8, + 0x1F3C2, 0x1F3C2, + 0x1F3C7, 0x1F3C7, + 0x1F3CC, 0x1F3CC, +#else + 3, +#endif + 0x1F3F3, 0x1F3F3, + 0x1F441, 0x1F441, + 0x1F46F, 0x1F46F, +#if ONIG_UNICODE_VERSION_MAJOR < 10 + 0x1F574, 0x1F574, + 0x1F6CC, 0x1F6CC, +#endif +}; + +const OnigCodePoint +onigenc_unicode_GCB_ranges_Emoji[] = { + 4, + 0x2640, 0x2640, + 0x2642, 0x2642, + 0x2695, 0x2696, + 0x2708, 0x2708, +}; + +#if 0 const char onigenc_unicode_version_string[] = #ifdef ONIG_UNICODE_VERSION_STRING ONIG_UNICODE_VERSION_STRING @@ -801,3 +852,4 @@ const int onigenc_unicode_version_number[3] = { 0 #endif }; +#endif diff --git a/regparse.c b/regparse.c index c46ddbcd91..a296bf1108 100644 --- a/regparse.c +++ b/regparse.c @@ -35,7 +35,6 @@ #define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS -extern const int onigenc_unicode_version_number[3]; const OnigSyntaxType OnigSyntaxRuby = { (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | @@ -5707,6 +5706,10 @@ propname2ctype(ScanEnv* env, const char* propname) return ctype; } +extern const OnigCodePoint onigenc_unicode_GCB_ranges_GAZ[]; +extern const OnigCodePoint onigenc_unicode_GCB_ranges_E_Base[]; +extern const OnigCodePoint onigenc_unicode_GCB_ranges_Emoji[]; + static int node_extended_grapheme_cluster(Node** np, ScanEnv* env) { @@ -6068,23 +6071,8 @@ node_extended_grapheme_cluster(Node** np, ScanEnv* env) np1 = node_new_cclass(); if (IS_NULL(np1)) goto err; cc = NCCLASS(np1); - if (onigenc_unicode_version_number[0] < 10) { - static const OnigCodePoint ranges[] = { - 13, - 0x1F308, 0x1F308, - 0x1F33E, 0x1F33E, - 0x1F373, 0x1F373, - 0x1F393, 0x1F393, - 0x1F3A4, 0x1F3A4, - 0x1F3A8, 0x1F3A8, - 0x1F3EB, 0x1F3EB, - 0x1F3ED, 0x1F3ED, - 0x1F4BB, 0x1F4BC, - 0x1F527, 0x1F527, - 0x1F52C, 0x1F52C, - 0x1F680, 0x1F680, - 0x1F692, 0x1F692, - }; + { + const OnigCodePoint *ranges = onigenc_unicode_GCB_ranges_GAZ; r = add_ctype_to_cc_by_range(cc, -1, 0, env, sb_out, ranges); if (r != 0) goto err; } @@ -6123,13 +6111,7 @@ node_extended_grapheme_cluster(Node** np, ScanEnv* env) if (IS_NULL(np1)) goto err; cc = NCCLASS(np1); { - static const OnigCodePoint ranges[] = { - 4, - 0x2640, 0x2640, - 0x2642, 0x2642, - 0x2695, 0x2696, - 0x2708, 0x2708, - }; + const OnigCodePoint *ranges = onigenc_unicode_GCB_ranges_Emoji; r = add_ctype_to_cc_by_range(cc, -1, 0, env, sb_out, ranges); if (r != 0) goto err; } @@ -6210,25 +6192,7 @@ node_extended_grapheme_cluster(Node** np, ScanEnv* env) if (IS_NULL(np1)) goto err; cc = NCCLASS(np1); { - static const OnigCodePoint ranges9[] = { - 8, - 0x1F3C2, 0x1F3C2, - 0x1F3C7, 0x1F3C7, - 0x1F3CC, 0x1F3CC, - 0x1F3F3, 0x1F3F3, - 0x1F441, 0x1F441, - 0x1F46F, 0x1F46F, - 0x1F574, 0x1F574, - 0x1F6CC, 0x1F6CC, - }; - static const OnigCodePoint ranges10[] = { - 3, - 0x1F3F3, 0x1F3F3, - 0x1F441, 0x1F441, - 0x1F46F, 0x1F46F, - }; - const OnigCodePoint *ranges = - (onigenc_unicode_version_number[0] < 10) ? ranges9 : ranges10; + const OnigCodePoint *ranges = onigenc_unicode_GCB_ranges_E_Base; r = add_ctype_to_cc_by_range(cc, -1, 0, env, sb_out, ranges); if (r != 0) goto err; } -- cgit v1.2.3