aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorakr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2010-03-15 12:25:20 +0000
committerakr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2010-03-15 12:25:20 +0000
commita73374bb5716287b82879e6ced116315aac77f7d (patch)
tree0c6b7c81955a031af3714ecb042768444c6132ed
parent21c7d36573736e6e3937c24dfa1cfccf6aa907ad (diff)
downloadruby-a73374bb5716287b82879e6ced116315aac77f7d.tar.gz
* tool/transcode-tblgen.rb (transcode_tblgen): add valid_encoding
optional argument. * enc/trans/single_byte.trans use valid_encoding argument for transcode_tblgen. * enc/trans/chinese.trans: ditto. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@26941 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog10
-rw-r--r--enc/trans/chinese.trans10
-rw-r--r--enc/trans/single_byte.trans3
-rwxr-xr-xtool/transcode-tblgen.rb30
4 files changed, 34 insertions, 19 deletions
diff --git a/ChangeLog b/ChangeLog
index 5d60415fd4..1ab1b159bb 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+Mon Mar 15 21:22:49 2010 Tanaka Akira <akr@fsij.org>
+
+ * tool/transcode-tblgen.rb (transcode_tblgen): add valid_encoding
+ optional argument.
+
+ * enc/trans/single_byte.trans use valid_encoding argument for
+ transcode_tblgen.
+
+ * enc/trans/chinese.trans: ditto.
+
Mon Mar 15 18:33:36 2010 Nobuyoshi Nakada <nobu@ruby-lang.org>
* random.c (default_rand): removed initial buffer.
diff --git a/enc/trans/chinese.trans b/enc/trans/chinese.trans
index 1db6565254..3689696018 100644
--- a/enc/trans/chinese.trans
+++ b/enc/trans/chinese.trans
@@ -1,16 +1,18 @@
#include "transcode_data.h"
<%
- set_valid_byte_pattern 'GB2312', 'EUC-KR'
- set_valid_byte_pattern 'GB12345', 'EUC-KR'
+ gb2312_valid_byte_pattern = ValidEncoding['EUC-KR']
+ gb12345_valid_byte_pattern = ValidEncoding['EUC-KR']
transcode_tblgen "GB2312", "UTF-8",
[["{00-7f}", :nomap]] +
- citrus_decode_mapsrc("euc", 0x8080, "GB2312/UCS")
+ citrus_decode_mapsrc("euc", 0x8080, "GB2312/UCS"),
+ gb2312_valid_byte_pattern
transcode_tblgen "GB12345", "UTF-8",
[["{00-7f}", :nomap]] +
- citrus_decode_mapsrc("euc", 0x8080, "GB12345/UCS")
+ citrus_decode_mapsrc("euc", 0x8080, "GB12345/UCS"),
+ gb12345_valid_byte_pattern
transcode_tblgen "UTF-8", "GB2312",
[["{00-7f}", :nomap]] +
diff --git a/enc/trans/single_byte.trans b/enc/trans/single_byte.trans
index db47074570..55d8430932 100644
--- a/enc/trans/single_byte.trans
+++ b/enc/trans/single_byte.trans
@@ -22,9 +22,8 @@
require(name.downcase + "-tbl")
control1_if_needed = (name =~ /^ISO-8859/) ? CONTROL1_TO_UCS_TBL : []
tbl_to_ucs = control1_if_needed + eval(name.gsub(/-/, '_') + "_TO_UCS_TBL")
- set_valid_byte_pattern(name, '1byte')
code = ''
- code << transcode_tblgen(name, "UTF-8", [["{00-7f}", :nomap], *tbl_to_ucs.reject {|a, b| a.length != 2 }])
+ code << transcode_tblgen(name, "UTF-8", [["{00-7f}", :nomap], *tbl_to_ucs.reject {|a, b| a.length != 2 }], '{00-ff}')
code << "\n"
code << transcode_tblgen("UTF-8", name, [["{00-7f}", :nomap], *tbl_to_ucs.map {|a,b| [b,a] }])
code
diff --git a/tool/transcode-tblgen.rb b/tool/transcode-tblgen.rb
index 48407ac3df..34d5e8e77b 100755
--- a/tool/transcode-tblgen.rb
+++ b/tool/transcode-tblgen.rb
@@ -165,7 +165,7 @@ class ActionMap
end
def self.build_tree(rects)
- expand("", rects) {|actions|
+ expand("", rects) {|prefix, actions|
unambiguous_action(actions)
}
end
@@ -186,12 +186,12 @@ class ActionMap
all_rects.concat rects.map {|min, max, action| [min, max, [i, action]] }
}
- tree = expand("", all_rects) {|actions|
+ tree = expand("", all_rects) {|prefix, actions|
args = Array.new(rects_list.length) { [] }
actions.each {|i, action|
args[i] << action
}
- yield(args)
+ yield(prefix, *args)
}
self.new("", tree)
@@ -213,7 +213,7 @@ class ActionMap
end
if has_empty
actions = rects.map {|min, max, action| action }.uniq
- act = block.call(actions)
+ act = block.call(prefix, actions)
tree = Action.new(act)
else
tree = []
@@ -649,18 +649,22 @@ def encode_utf8(map)
r
end
-def transcode_compile_tree(name, from, map)
+def transcode_compile_tree(name, from, map, valid_encoding=nil)
map = encode_utf8(map)
h = {}
map.each {|k, v|
h[k] = v unless h[k] # use first mapping
}
- if valid_encoding = ValidEncoding[from]
+ valid_encoding = ValidEncoding[from] if valid_encoding == nil
+ if valid_encoding
rects = ActionMap.parse_to_rects(h)
undef_rects = ActionMap.parse_to_rects(valid_encoding => :undef)
- am = ActionMap.merge(rects, undef_rects) {|a1, a2|
- a1 = a1.empty? ? nil : ActionMap.unambiguous_action(a1)
- a2 = a2.empty? ? nil : ActionMap.unambiguous_action(a2)
+ am = ActionMap.merge(rects, undef_rects) {|prefix, as1, as2|
+ a1 = as1.empty? ? nil : ActionMap.unambiguous_action(as1)
+ a2 = as2.empty? ? nil : ActionMap.unambiguous_action(as2)
+ if !a2
+ raise "invalid mapping: #{prefix}"
+ end
a1 || a2
}
else
@@ -675,7 +679,7 @@ end
TRANSCODERS = []
TRANSCODE_GENERATED_TRANSCODER_CODE = ''
-def transcode_tbl_only(from, to, map)
+def transcode_tbl_only(from, to, map, valid_encoding=nil)
if VERBOSE_MODE
if from.empty? || to.empty?
STDERR.puts "converter for #{from.empty? ? to : from}"
@@ -692,12 +696,12 @@ def transcode_tbl_only(from, to, map)
else
tree_name = "from_#{id_from}_to_#{id_to}"
end
- real_tree_name, max_input = transcode_compile_tree(tree_name, from, map)
+ real_tree_name, max_input = transcode_compile_tree(tree_name, from, map, valid_encoding)
return map, tree_name, real_tree_name, max_input
end
-def transcode_tblgen(from, to, map)
- map, tree_name, real_tree_name, max_input = transcode_tbl_only(from, to, map)
+def transcode_tblgen(from, to, map, valid_encoding=nil)
+ map, tree_name, real_tree_name, max_input = transcode_tbl_only(from, to, map, valid_encoding)
transcoder_name = "rb_#{tree_name}"
TRANSCODERS << transcoder_name
input_unit_length = UnitLength[from]