aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorduerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-10-31 09:07:21 +0000
committerduerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-10-31 09:07:21 +0000
commitd37df9fb13bd2cf46ddd0cafb3bfe1885ac809e9 (patch)
treeaacc314aa5c3107b1bb16ad87bd4ffa593aecde7
parentec280ecbe658035d8c9affa6ca46e13a255c0cbe (diff)
downloadruby-d37df9fb13bd2cf46ddd0cafb3bfe1885ac809e9.tar.gz
* enc/trans/single_byte.trans, maccroatioan-tbl.rb,
maccyrillic-tbl.rb, maciceland-tbl.rb: new single-byte transcodings (from Yoshihiro Kambayashi) * test/ruby/test_transcode.rb: added tests for the above (from Yoshihiro Kambayashi) git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@20075 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog9
-rw-r--r--enc/trans/maccroatian-tbl.rb129
-rw-r--r--enc/trans/maccyrillic-tbl.rb130
-rw-r--r--enc/trans/maciceland-tbl.rb129
-rw-r--r--enc/trans/single_byte.trans3
-rw-r--r--test/ruby/test_transcode.rb73
6 files changed, 473 insertions, 0 deletions
diff --git a/ChangeLog b/ChangeLog
index 00d246b69e..ff9dd73735 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,12 @@
+Fri Oct 31 18:05:05 2008 Martin Duerst <duerst@it.aoyama.ac.jp>
+
+ * enc/trans/single_byte.trans, maccroatioan-tbl.rb,
+ maccyrillic-tbl.rb, maciceland-tbl.rb: new single-byte
+ transcodings (from Yoshihiro Kambayashi)
+
+ * test/ruby/test_transcode.rb: added tests for the above
+ (from Yoshihiro Kambayashi)
+
Fri Oct 31 12:51:25 2008 Yukihiro Matsumoto <matz@ruby-lang.org>
* dir.c (dir_globs): need taint check. reported by steve
diff --git a/enc/trans/maccroatian-tbl.rb b/enc/trans/maccroatian-tbl.rb
new file mode 100644
index 0000000000..359878ec1f
--- /dev/null
+++ b/enc/trans/maccroatian-tbl.rb
@@ -0,0 +1,129 @@
+MACCROATIAN_TO_UCS_TBL = [
+ ["CA",0xA0],
+ ["C1",0xA1],
+ ["A2",0xA2],
+ ["A3",0xA3],
+ ["DB",0xA4],
+ ["A4",0xA7],
+ ["AC",0xA8],
+ ["D9",0xA9],
+ ["BB",0xAA],
+ ["C7",0xAB],
+ ["C2",0xAC],
+ ["A8",0xAE],
+ ["F8",0xAF],
+ ["A1",0xB0],
+ ["B1",0xB1],
+ ["AB",0xB4],
+ ["B5",0xB5],
+ ["A6",0xB6],
+ ["E1",0xB7],
+ ["FC",0xB8],
+ ["BC",0xBA],
+ ["DF",0xBB],
+ ["C0",0xBF],
+ ["CB",0xC0],
+ ["E7",0xC1],
+ ["E5",0xC2],
+ ["CC",0xC3],
+ ["80",0xC4],
+ ["81",0xC5],
+ ["DE",0xC6],
+ ["82",0xC7],
+ ["E9",0xC8],
+ ["83",0xC9],
+ ["FD",0xCA],
+ ["FA",0xCB],
+ ["ED",0xCC],
+ ["EA",0xCD],
+ ["EB",0xCE],
+ ["EC",0xCF],
+ ["84",0xD1],
+ ["F1",0xD2],
+ ["EE",0xD3],
+ ["EF",0xD4],
+ ["CD",0xD5],
+ ["85",0xD6],
+ ["AF",0xD8],
+ ["F4",0xD9],
+ ["F2",0xDA],
+ ["F3",0xDB],
+ ["86",0xDC],
+ ["A7",0xDF],
+ ["88",0xE0],
+ ["87",0xE1],
+ ["89",0xE2],
+ ["8B",0xE3],
+ ["8A",0xE4],
+ ["8C",0xE5],
+ ["FE",0xE6],
+ ["8D",0xE7],
+ ["8F",0xE8],
+ ["8E",0xE9],
+ ["90",0xEA],
+ ["91",0xEB],
+ ["93",0xEC],
+ ["92",0xED],
+ ["94",0xEE],
+ ["95",0xEF],
+ ["96",0xF1],
+ ["98",0xF2],
+ ["97",0xF3],
+ ["99",0xF4],
+ ["9B",0xF5],
+ ["9A",0xF6],
+ ["D6",0xF7],
+ ["BF",0xF8],
+ ["9D",0xF9],
+ ["9C",0xFA],
+ ["9E",0xFB],
+ ["9F",0xFC],
+ ["C6",0x106],
+ ["E6",0x107],
+ ["C8",0x10C],
+ ["E8",0x10D],
+ ["D0",0x110],
+ ["F0",0x111],
+ ["F5",0x131],
+ ["CE",0x152],
+ ["CF",0x153],
+ ["A9",0x160],
+ ["B9",0x161],
+ ["AE",0x17D],
+ ["BE",0x17E],
+ ["C4",0x192],
+ ["F6",0x2C6],
+ ["FF",0x2C7],
+ ["FB",0x2DA],
+ ["F7",0x2DC],
+ ["F9",0x3C0],
+ ["E0",0x2013],
+ ["D1",0x2014],
+ ["D4",0x2018],
+ ["D5",0x2019],
+ ["E2",0x201A],
+ ["D2",0x201C],
+ ["D3",0x201D],
+ ["E3",0x201E],
+ ["A0",0x2020],
+ ["A5",0x2022],
+ ["C9",0x2026],
+ ["E4",0x2030],
+ ["DC",0x2039],
+ ["DD",0x203A],
+ ["DA",0x2044],
+ ["AA",0x2122],
+ ["BD",0x2126],
+ ["B6",0x2202],
+ ["B4",0x2206],
+ ["B8",0x220F],
+ ["B7",0x2211],
+ ["C3",0x221A],
+ ["B0",0x221E],
+ ["BA",0x222B],
+ ["C5",0x2248],
+ ["AD",0x2260],
+ ["B2",0x2264],
+ ["B3",0x2265],
+ ["D7",0x25CA],
+] \ No newline at end of file
diff --git a/enc/trans/maccyrillic-tbl.rb b/enc/trans/maccyrillic-tbl.rb
new file mode 100644
index 0000000000..378aa8c3bc
--- /dev/null
+++ b/enc/trans/maccyrillic-tbl.rb
@@ -0,0 +1,130 @@
+MACCYRILLIC_TO_UCS_TBL = [
+ ["CA",0xA0],
+ ["A2",0xA2],
+ ["A3",0xA3],
+ ["FF",0xA4],
+ ["A4",0xA7],
+ ["A9",0xA9],
+ ["C7",0xAB],
+ ["C2",0xAC],
+ ["A8",0xAE],
+ ["A1",0xB0],
+ ["B1",0xB1],
+ ["B5",0xB5],
+ ["A6",0xB6],
+ ["C8",0xBB],
+ ["D6",0xF7],
+ ["C4",0x192],
+ ["DD",0x401],
+ ["AB",0x402],
+ ["AE",0x403],
+ ["B8",0x404],
+ ["C1",0x405],
+ ["A7",0x406],
+ ["BA",0x407],
+ ["B7",0x408],
+ ["BC",0x409],
+ ["BE",0x40A],
+ ["CB",0x40B],
+ ["CD",0x40C],
+ ["D8",0x40E],
+ ["DA",0x40F],
+ ["80",0x410],
+ ["81",0x411],
+ ["82",0x412],
+ ["83",0x413],
+ ["84",0x414],
+ ["85",0x415],
+ ["86",0x416],
+ ["87",0x417],
+ ["88",0x418],
+ ["89",0x419],
+ ["8A",0x41A],
+ ["8B",0x41B],
+ ["8C",0x41C],
+ ["8D",0x41D],
+ ["8E",0x41E],
+ ["8F",0x41F],
+ ["90",0x420],
+ ["91",0x421],
+ ["92",0x422],
+ ["93",0x423],
+ ["94",0x424],
+ ["95",0x425],
+ ["96",0x426],
+ ["97",0x427],
+ ["98",0x428],
+ ["99",0x429],
+ ["9A",0x42A],
+ ["9B",0x42B],
+ ["9C",0x42C],
+ ["9D",0x42D],
+ ["9E",0x42E],
+ ["9F",0x42F],
+ ["E0",0x430],
+ ["E1",0x431],
+ ["E2",0x432],
+ ["E3",0x433],
+ ["E4",0x434],
+ ["E5",0x435],
+ ["E6",0x436],
+ ["E7",0x437],
+ ["E8",0x438],
+ ["E9",0x439],
+ ["EA",0x43A],
+ ["EB",0x43B],
+ ["EC",0x43C],
+ ["ED",0x43D],
+ ["EE",0x43E],
+ ["EF",0x43F],
+ ["F0",0x440],
+ ["F1",0x441],
+ ["F2",0x442],
+ ["F3",0x443],
+ ["F4",0x444],
+ ["F5",0x445],
+ ["F6",0x446],
+ ["F7",0x447],
+ ["F8",0x448],
+ ["F9",0x449],
+ ["FA",0x44A],
+ ["FB",0x44B],
+ ["FC",0x44C],
+ ["FD",0x44D],
+ ["FE",0x44E],
+ ["DF",0x44F],
+ ["DE",0x451],
+ ["AC",0x452],
+ ["AF",0x453],
+ ["B9",0x454],
+ ["CF",0x455],
+ ["B4",0x456],
+ ["BB",0x457],
+ ["C0",0x458],
+ ["BD",0x459],
+ ["BF",0x45A],
+ ["CC",0x45B],
+ ["CE",0x45C],
+ ["D9",0x45E],
+ ["DB",0x45F],
+ ["D0",0x2013],
+ ["D1",0x2014],
+ ["D4",0x2018],
+ ["D5",0x2019],
+ ["D2",0x201C],
+ ["D3",0x201D],
+ ["D7",0x201E],
+ ["A0",0x2020],
+ ["A5",0x2022],
+ ["C9",0x2026],
+ ["DC",0x2116],
+ ["AA",0x2122],
+ ["B6",0x2202],
+ ["C6",0x2206],
+ ["C3",0x221A],
+ ["B0",0x221E],
+ ["C5",0x2248],
+ ["AD",0x2260],
+ ["B2",0x2264],
+ ["B3",0x2265],
+] \ No newline at end of file
diff --git a/enc/trans/maciceland-tbl.rb b/enc/trans/maciceland-tbl.rb
new file mode 100644
index 0000000000..818d992274
--- /dev/null
+++ b/enc/trans/maciceland-tbl.rb
@@ -0,0 +1,129 @@
+MACICELAND_TO_UCS_TBL = [
+ ["CA",0xA0],
+ ["C1",0xA1],
+ ["A2",0xA2],
+ ["A3",0xA3],
+ ["DB",0xA4],
+ ["B4",0xA5],
+ ["A4",0xA7],
+ ["AC",0xA8],
+ ["A9",0xA9],
+ ["BB",0xAA],
+ ["C7",0xAB],
+ ["C2",0xAC],
+ ["A8",0xAE],
+ ["F8",0xAF],
+ ["A1",0xB0],
+ ["B1",0xB1],
+ ["AB",0xB4],
+ ["B5",0xB5],
+ ["A6",0xB6],
+ ["E1",0xB7],
+ ["FC",0xB8],
+ ["BC",0xBA],
+ ["C8",0xBB],
+ ["C0",0xBF],
+ ["CB",0xC0],
+ ["E7",0xC1],
+ ["E5",0xC2],
+ ["CC",0xC3],
+ ["80",0xC4],
+ ["81",0xC5],
+ ["AE",0xC6],
+ ["82",0xC7],
+ ["E9",0xC8],
+ ["83",0xC9],
+ ["E6",0xCA],
+ ["E8",0xCB],
+ ["ED",0xCC],
+ ["EA",0xCD],
+ ["EB",0xCE],
+ ["EC",0xCF],
+ ["DC",0xD0],
+ ["84",0xD1],
+ ["F1",0xD2],
+ ["EE",0xD3],
+ ["EF",0xD4],
+ ["CD",0xD5],
+ ["85",0xD6],
+ ["AF",0xD8],
+ ["F4",0xD9],
+ ["F2",0xDA],
+ ["F3",0xDB],
+ ["86",0xDC],
+ ["A0",0xDD],
+ ["DE",0xDE],
+ ["A7",0xDF],
+ ["88",0xE0],
+ ["87",0xE1],
+ ["89",0xE2],
+ ["8B",0xE3],
+ ["8A",0xE4],
+ ["8C",0xE5],
+ ["BE",0xE6],
+ ["8D",0xE7],
+ ["8F",0xE8],
+ ["8E",0xE9],
+ ["90",0xEA],
+ ["91",0xEB],
+ ["93",0xEC],
+ ["92",0xED],
+ ["94",0xEE],
+ ["95",0xEF],
+ ["DD",0xF0],
+ ["96",0xF1],
+ ["98",0xF2],
+ ["97",0xF3],
+ ["99",0xF4],
+ ["9B",0xF5],
+ ["9A",0xF6],
+ ["D6",0xF7],
+ ["BF",0xF8],
+ ["9D",0xF9],
+ ["9C",0xFA],
+ ["9E",0xFB],
+ ["9F",0xFC],
+ ["E0",0xFD],
+ ["DF",0xFE],
+ ["D8",0xFF],
+ ["F5",0x131],
+ ["CE",0x152],
+ ["CF",0x153],
+ ["D9",0x178],
+ ["C4",0x192],
+ ["F6",0x2C6],
+ ["FF",0x2C7],
+ ["F9",0x2D8],
+ ["FA",0x2D9],
+ ["FB",0x2DA],
+ ["FE",0x2DB],
+ ["F7",0x2DC],
+ ["FD",0x2DD],
+ ["B9",0x3C0],
+ ["D0",0x2013],
+ ["D1",0x2014],
+ ["D4",0x2018],
+ ["D5",0x2019],
+ ["E2",0x201A],
+ ["D2",0x201C],
+ ["D3",0x201D],
+ ["E3",0x201E],
+ ["A5",0x2022],
+ ["C9",0x2026],
+ ["E4",0x2030],
+ ["DA",0x2044],
+ ["AA",0x2122],
+ ["BD",0x2126],
+ ["B6",0x2202],
+ ["C6",0x2206],
+ ["B8",0x220F],
+ ["B7",0x2211],
+ ["C3",0x221A],
+ ["B0",0x221E],
+ ["BA",0x222B],
+ ["C5",0x2248],
+ ["AD",0x2260],
+ ["B2",0x2264],
+ ["B3",0x2265],
+ ["D7",0x25CA],
+] \ No newline at end of file
diff --git a/enc/trans/single_byte.trans b/enc/trans/single_byte.trans
index b49bc779a1..f46fd1055b 100644
--- a/enc/trans/single_byte.trans
+++ b/enc/trans/single_byte.trans
@@ -53,6 +53,9 @@
transcode_tblgen_singlebyte "WINDOWS-1255"
transcode_tblgen_singlebyte "WINDOWS-1256"
transcode_tblgen_singlebyte "WINDOWS-1257"
+ transcode_tblgen_singlebyte "MACCROATIAN"
+ transcode_tblgen_singlebyte "MACCYRILLIC"
+ transcode_tblgen_singlebyte "MACICELAND"
%>
<%= transcode_generated_code %>
diff --git a/test/ruby/test_transcode.rb b/test/ruby/test_transcode.rb
index 5280bd41dc..abd337e339 100644
--- a/test/ruby/test_transcode.rb
+++ b/test/ruby/test_transcode.rb
@@ -417,6 +417,79 @@ class TestTranscode < Test::Unit::TestCase
check_both_ways("\u02D9", "\xFF", 'windows-1257') # ˙
end
+ def test_macCroatian
+ check_both_ways("\u00C4", "\x80", 'macCroatian') # Ä
+ check_both_ways("\u00E8", "\x8F", 'macCroatian') # è
+ check_both_ways("\u00EA", "\x90", 'macCroatian') # ê
+ check_both_ways("\u00FC", "\x9F", 'macCroatian') # ü
+ check_both_ways("\u2020", "\xA0", 'macCroatian') # †
+ check_both_ways("\u00D8", "\xAF", 'macCroatian') # Ø
+ check_both_ways("\u221E", "\xB0", 'macCroatian') # ∞
+ check_both_ways("\u00F8", "\xBF", 'macCroatian') # ø
+ check_both_ways("\u00BF", "\xC0", 'macCroatian') # ¿
+ check_both_ways("\u0153", "\xCF", 'macCroatian') # œ
+ check_both_ways("\u0110", "\xD0", 'macCroatian') # Đ
+ check_both_ways("\u00A9", "\xD9", 'macCroatian') # ©
+ check_both_ways("\u2044", "\xDA", 'macCroatian') # ⁄
+ check_both_ways("\u203A", "\xDD", 'macCroatian') # ›
+ check_both_ways("\u00C6", "\xDE", 'macCroatian') # Æ
+ check_both_ways("\u00BB", "\xDF", 'macCroatian') # »
+ check_both_ways("\u2013", "\xE0", 'macCroatian') # –
+ check_both_ways("\u00B7", "\xE1", 'macCroatian') # ·
+ check_both_ways("\u00C2", "\xE5", 'macCroatian') # Â
+ check_both_ways("\u0107", "\xE6", 'macCroatian') # ć
+ check_both_ways("\u00C1", "\xE7", 'macCroatian') # Á
+ check_both_ways("\u010D", "\xE8", 'macCroatian') # č
+ check_both_ways("\u00C8", "\xE9", 'macCroatian') # È
+ check_both_ways("\u00D4", "\xEF", 'macCroatian') # Ô
+ check_both_ways("\u0111", "\xF0", 'macCroatian') # đ
+ check_both_ways("\u00D2", "\xF1", 'macCroatian') # Ò
+ check_both_ways("\u00AF", "\xF8", 'macCroatian') # ¯
+ check_both_ways("\u03C0", "\xF9", 'macCroatian') # π
+ check_both_ways("\u00CB", "\xFA", 'macCroatian') # Ë
+ check_both_ways("\u00CA", "\xFD", 'macCroatian') # Ê
+ check_both_ways("\u00E6", "\xFE", 'macCroatian') # æ
+ check_both_ways("\u02C7", "\xFF", 'macCroatian') # ˇ
+ end
+
+ def test_macCyrillic
+ check_both_ways("\u0410", "\x80", 'macCyrillic') # А
+ check_both_ways("\u041F", "\x8F", 'macCyrillic') # П
+ check_both_ways("\u0420", "\x90", 'macCyrillic') # Р
+ check_both_ways("\u042F", "\x9F", 'macCyrillic') # Я
+ check_both_ways("\u2020", "\xA0", 'macCyrillic') # †
+ check_both_ways("\u0453", "\xAF", 'macCyrillic') # ѓ
+ check_both_ways("\u221E", "\xB0", 'macCyrillic') # ∞
+ check_both_ways("\u045A", "\xBF", 'macCyrillic') # њ
+ check_both_ways("\u0458", "\xC0", 'macCyrillic') # ј
+ check_both_ways("\u0455", "\xCF", 'macCyrillic') # ѕ
+ check_both_ways("\u2013", "\xD0", 'macCyrillic') # –
+ check_both_ways("\u044F", "\xDF", 'macCyrillic') # я
+ check_both_ways("\u0430", "\xE0", 'macCyrillic') # а
+ check_both_ways("\u043F", "\xEF", 'macCyrillic') # п
+ check_both_ways("\u0440", "\xF0", 'macCyrillic') # р
+ check_both_ways("\u00A4", "\xFF", 'macCyrillic') # ¤
+ end
+
+ def test_macIceland
+ check_both_ways("\u00C4", "\x80", 'macIceland') # Ä
+ check_both_ways("\u00E8", "\x8F", 'macIceland') # è
+ check_both_ways("\u00EA", "\x90", 'macIceland') # ê
+ check_both_ways("\u00FC", "\x9F", 'macIceland') # ü
+ check_both_ways("\u00DD", "\xA0", 'macIceland') # Ý
+ check_both_ways("\u00D8", "\xAF", 'macIceland') # Ø
+ check_both_ways("\u221E", "\xB0", 'macIceland') # ∞
+ check_both_ways("\u00F8", "\xBF", 'macIceland') # ø
+ check_both_ways("\u00BF", "\xC0", 'macIceland') # ¿
+ check_both_ways("\u0153", "\xCF", 'macIceland') # œ
+ check_both_ways("\u2013", "\xD0", 'macIceland') # –
+ check_both_ways("\u00FE", "\xDF", 'macIceland') # þ
+ check_both_ways("\u00FD", "\xE0", 'macIceland') # ý
+ check_both_ways("\u00D4", "\xEF", 'macIceland') # Ô
+ #check_both_ways("\uF8FF", "\xF0", 'macIceland') # Apple logo
+ check_both_ways("\u02C7", "\xFF", 'macIceland') # ˇ
+ end
+
def check_utf_16_both_ways(utf8, raw)
copy = raw.dup
0.step(copy.length-1, 2) { |i| copy[i+1], copy[i] = copy[i], copy[i+1] }