diff options
author | naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2009-10-08 02:49:11 +0000 |
---|---|---|
committer | naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2009-10-08 02:49:11 +0000 |
commit | 866c79e2de4567d71f432652c58b48fe50916f37 (patch) | |
tree | 799d6e168abeaa5babf61a3e42ab5f0ab9b2094c /tool | |
parent | ec0e370eb5451a1e597bf528f8f9a2dcc46880f0 (diff) | |
download | ruby-866c79e2de4567d71f432652c58b48fe50916f37.tar.gz |
* tool/enc-unicode.rb: parse range notation of UnicodeData.txt.
* enc/unicode/name2ctype.h, enc/unicode/name2ctype.h.blt,
enc/unicode/name2ctype.kwd, enc/unicode/name2ctype.src:
follow above change. [ruby-dev:39444]
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@25260 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'tool')
-rwxr-xr-x | tool/enc-unicode.rb | 20 |
1 files changed, 15 insertions, 5 deletions
diff --git a/tool/enc-unicode.rb b/tool/enc-unicode.rb index 8429bcc178..57edb3b3e5 100755 --- a/tool/enc-unicode.rb +++ b/tool/enc-unicode.rb @@ -40,26 +40,36 @@ end def parse_unicode_data(file) last_cp = 0 data = {'Cn' => []} + beg_cp = nil IO.foreach(file) do |line| fields = line.split(';') cp = fields[0].to_i(16) + case fields[1] + when /\A<(.*),\s*First>\z/ + beg_cp = cp + next + when /\A<(.*),\s*Last>\z/ + cps = (beg_cp..cp).to_a + else + beg_cp = cp + cps = [cp] + end + # The Cn category represents unassigned characters. These are not listed in # UnicodeData.txt so we must derive them by looking for 'holes' in the range # of listed codepoints. We increment the last codepoint seen and compare it # with the current codepoint. If the current codepoint is less than # last_cp.next we have found a hole, so we add the missing codepoint to the # Cn category. - while ((last_cp = last_cp.next) < cp) - data['Cn'] << last_cp - end + data['Cn'].concat((last_cp.next...beg_cp).to_a) # The third field denotes the 'General' category, e.g. Lu - (data[fields[2]] ||= []) << cp + (data[fields[2]] ||= []).concat(cps) # The 'Major' category is the first letter of the 'General' category, e.g. # 'Lu' -> 'L' - (data[fields[2][0,1]] ||= []) << cp + (data[fields[2][0,1]] ||= []).concat(cps) last_cp = cp end |