diff options
Diffstat (limited to 'tool/unicode_norm_gen.rb')
-rw-r--r-- | tool/unicode_norm_gen.rb | 61 |
1 files changed, 35 insertions, 26 deletions
diff --git a/tool/unicode_norm_gen.rb b/tool/unicode_norm_gen.rb index c90b16dac5..982f3f077c 100644 --- a/tool/unicode_norm_gen.rb +++ b/tool/unicode_norm_gen.rb @@ -46,7 +46,7 @@ class Array else first.to_UTF8 + '-' + last.to_UTF8 end - end.line_slice "\" +\n \"" + end.line_slice "\" \\\n \"" end end @@ -141,33 +141,42 @@ open("#{OuputDataDir}/normalize_tables.rb", "w").print <<MAPPING_TABLE_FILE_END # automatically generated by tool/unicode_norm_gen.rb module Normalize - ACCENTS = " - [#{accent_array.to_regexp_chars}] - " - REGEXP_D_STRING = " # composition starters and composition exclusions - [#{(composition_table.values+composition_exclusions).to_regexp_chars}]\#{ACCENTS}* - | # characters that can be the result of a composition, except composition starters - [#{(composition_starters-composition_table.values).to_regexp_chars}]?\#{ACCENTS}+ - | # precomposed Hangul syllables - [\\u{AC00}-\\u{D7A4}] - " - REGEXP_C_STRING = " # composition exclusions - [#{composition_exclusions.to_regexp_chars}]\#{ACCENTS}* - | # composition starters and characters that can be the result of a composition - [#{(composition_starters+composition_table.values).to_regexp_chars}]?\#{ACCENTS}+ - | # Hangul syllables with separate trailer - [#{hangul_no_trailing.to_regexp_chars}][\\u11A8-\\u11C2] - | # decomposed Hangul syllables - [\\u1100-\\u1112][\\u1161-\\u1175][\\u11A8-\\u11C2]? - " - REGEXP_K_STRING = " - [#{kompatible_table.keys.to_regexp_chars}] - " - - CLASS_TABLE = { + accents = "" \\ + "[#{accent_array.to_regexp_chars}]" \\ + "" + ACCENTS = accents + REGEXP_D_STRING = "\#{'' # composition starters and composition exclusions + }" \\ + "[#{(composition_table.values+composition_exclusions).to_regexp_chars}]\#{accents}*" \\ + "|\#{'' # characters that can be the result of a composition, except composition starters + }" \\ + "[#{(composition_starters-composition_table.values).to_regexp_chars}]?\#{accents}+" \\ + "|\#{'' # precomposed Hangul syllables + }" \\ + "[\\u{AC00}-\\u{D7A4}]" \\ + "" + REGEXP_C_STRING = "\#{'' # composition exclusions + }" \\ + "[#{composition_exclusions.to_regexp_chars}]\#{accents}*" \\ + "|\#{'' # composition starters and characters that can be the result of a composition + }" \\ + "[#{(composition_starters+composition_table.values).to_regexp_chars}]?\#{accents}+" \\ + "|\#{'' # Hangul syllables with separate trailer + }" \\ + "[#{hangul_no_trailing.to_regexp_chars}][\\u11A8-\\u11C2]" \\ + "|\#{'' # decomposed Hangul syllables + }" \\ + "[\\u1100-\\u1112][\\u1161-\\u1175][\\u11A8-\\u11C2]?" \\ + "" + REGEXP_K_STRING = "" \\ + "[#{kompatible_table.keys.to_regexp_chars}]" \\ + "" + + class_table = { #{class_table_str} } - CLASS_TABLE.default = 0 + class_table.default = 0 + CLASS_TABLE = class_table DECOMPOSITION_TABLE = { #{decomposition_table.to_hash_string} |