aboutsummaryrefslogtreecommitdiffstats
path: root/regparse.c
diff options
context:
space:
mode:
authorduerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2018-11-27 02:18:29 +0000
committerduerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2018-11-27 02:18:29 +0000
commit2e075759148689af6b771afb863e88ebbde89c2b (patch)
treef1f8190bda3a99a306246cee53e62b2cd062855a /regparse.c
parent7bbe9d419536fc54f7fae081139c554a0922df39 (diff)
downloadruby-2e075759148689af6b771afb863e88ebbde89c2b.tar.gz
add some comments in function node_extended_grapheme_cluster() [ci skip]
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@66014 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'regparse.c')
-rw-r--r--regparse.c26
1 files changed, 24 insertions, 2 deletions
diff --git a/regparse.c b/regparse.c
index 3a8e3b5bc1..e4cf13796e 100644
--- a/regparse.c
+++ b/regparse.c
@@ -5760,10 +5760,16 @@ node_extended_grapheme_cluster(Node** np, ScanEnv* env)
int extend = propname2ctype(env, "Grapheme_Cluster_Break=Extend");
if (extend < 0) goto err;
- /* Prepend*
+ /* main comment: The order of the code is mostly in reverse of the order
+ * the various expressions appear in the grammar */
+ /* Unicode 10.0.0 */
+ /* CRLF
+ * | Prepend*
* ( RI-sequence | Hangul-Syllable | !Control )
- * ( Grapheme_Extend | SpacingMark )* */
+ * ( Grapheme_Extend | SpacingMark )*
+ * | . */
+ /* Unicode 10.0.0 */
/* ( Grapheme_Extend | SpacingMark )* */
np1 = node_new_cclass();
if (IS_NULL(np1)) goto err;
@@ -5785,6 +5791,7 @@ node_extended_grapheme_cluster(Node** np, ScanEnv* env)
list = tmp;
np1 = NULL;
+ /* Unicode 10.0.0 */
/* ( RI-sequence | Hangul-Syllable | !Control ) */
/* !Control */
np1 = node_new_cclass();
@@ -5818,12 +5825,19 @@ node_extended_grapheme_cluster(Node** np, ScanEnv* env)
alt = tmp;
np1 = NULL;
+ /* Unicode 10.0.0 */
/* Hangul-Syllable
* := L* V+ T*
* | L* LV V* T*
* | L* LVT T*
* | L+
* | T+ */
+ /* Unicode 11.0.0 */
+ /* Hangul-Syllable
+ * := L* (V+ | LV V* | LVT) T*
+ * | L+
+ * | T+ */
+ /* these are equivalent, so we leave things as is for the moment */
/* T+ */
r = create_property_node(&np1, env, "Grapheme_Cluster_Break=T");
@@ -5990,7 +6004,9 @@ node_extended_grapheme_cluster(Node** np, ScanEnv* env)
if (IS_NULL(tmp)) goto err;
alt = tmp;
list2 = NULL;
+ /* end of Hangul-Syllable */
+ /* Unicode 10.0.0 */
/* Emoji sequence := (E_Base | EBG) Extend* E_Modifier?
* (ZWJ (Glue_After_Zwj | EBG Extend* E_Modifier?) )* */
@@ -6037,6 +6053,7 @@ node_extended_grapheme_cluster(Node** np, ScanEnv* env)
alt2 = tmp;
list2 = NULL;
+ /* Unicode 10.0.0 */
/* Glue_After_Zwj */
np1 = node_new_cclass();
if (IS_NULL(np1)) goto err;
@@ -6075,6 +6092,7 @@ node_extended_grapheme_cluster(Node** np, ScanEnv* env)
alt2 = tmp;
list2 = NULL;
+ /* Unicode 10.0.0 */
/* Emoji variation sequence
* http://unicode.org/Public/emoji/4.0/emoji-zwj-sequences.txt
*/
@@ -6194,6 +6212,7 @@ node_extended_grapheme_cluster(Node** np, ScanEnv* env)
alt = tmp;
list2 = NULL;
+ /* Unicode 10.0.0 */
/* ZWJ (E_Base_GAZ | Glue_After_Zwj) E_Modifier? */
/* a sequence starting with ZWJ seems artificial, but GraphemeBreakTest
* has such examples.
@@ -6240,6 +6259,9 @@ node_extended_grapheme_cluster(Node** np, ScanEnv* env)
alt = tmp;
list2 = NULL;
+ /* Unicode 10.0.0/11.0.0 */
+ /* this is actually Regional_Indicator+ in Unicode 10.0.0,
+ * but it is Regional_Indicator{2} in Unicode 11.0.0, so no need to fix */
/* RI-Sequence := Regional_Indicator{2} */
np1 = node_new_cclass();
if (IS_NULL(np1)) goto err;