diff options
-rw-r--r-- | ChangeLog | 9 | ||||
-rw-r--r-- | enc/trans/maccroatian-tbl.rb | 129 | ||||
-rw-r--r-- | enc/trans/maccyrillic-tbl.rb | 130 | ||||
-rw-r--r-- | enc/trans/maciceland-tbl.rb | 129 | ||||
-rw-r--r-- | enc/trans/single_byte.trans | 3 | ||||
-rw-r--r-- | test/ruby/test_transcode.rb | 73 |
6 files changed, 473 insertions, 0 deletions
@@ -1,3 +1,12 @@ +Fri Oct 31 18:05:05 2008 Martin Duerst <duerst@it.aoyama.ac.jp> + + * enc/trans/single_byte.trans, maccroatioan-tbl.rb, + maccyrillic-tbl.rb, maciceland-tbl.rb: new single-byte + transcodings (from Yoshihiro Kambayashi) + + * test/ruby/test_transcode.rb: added tests for the above + (from Yoshihiro Kambayashi) + Fri Oct 31 12:51:25 2008 Yukihiro Matsumoto <matz@ruby-lang.org> * dir.c (dir_globs): need taint check. reported by steve diff --git a/enc/trans/maccroatian-tbl.rb b/enc/trans/maccroatian-tbl.rb new file mode 100644 index 0000000000..359878ec1f --- /dev/null +++ b/enc/trans/maccroatian-tbl.rb @@ -0,0 +1,129 @@ +MACCROATIAN_TO_UCS_TBL = [ + ["CA",0xA0], + ["C1",0xA1], + ["A2",0xA2], + ["A3",0xA3], + ["DB",0xA4], + ["A4",0xA7], + ["AC",0xA8], + ["D9",0xA9], + ["BB",0xAA], + ["C7",0xAB], + ["C2",0xAC], + ["A8",0xAE], + ["F8",0xAF], + ["A1",0xB0], + ["B1",0xB1], + ["AB",0xB4], + ["B5",0xB5], + ["A6",0xB6], + ["E1",0xB7], + ["FC",0xB8], + ["BC",0xBA], + ["DF",0xBB], + ["C0",0xBF], + ["CB",0xC0], + ["E7",0xC1], + ["E5",0xC2], + ["CC",0xC3], + ["80",0xC4], + ["81",0xC5], + ["DE",0xC6], + ["82",0xC7], + ["E9",0xC8], + ["83",0xC9], + ["FD",0xCA], + ["FA",0xCB], + ["ED",0xCC], + ["EA",0xCD], + ["EB",0xCE], + ["EC",0xCF], + ["84",0xD1], + ["F1",0xD2], + ["EE",0xD3], + ["EF",0xD4], + ["CD",0xD5], + ["85",0xD6], + ["AF",0xD8], + ["F4",0xD9], + ["F2",0xDA], + ["F3",0xDB], + ["86",0xDC], + ["A7",0xDF], + ["88",0xE0], + ["87",0xE1], + ["89",0xE2], + ["8B",0xE3], + ["8A",0xE4], + ["8C",0xE5], + ["FE",0xE6], + ["8D",0xE7], + ["8F",0xE8], + ["8E",0xE9], + ["90",0xEA], + ["91",0xEB], + ["93",0xEC], + ["92",0xED], + ["94",0xEE], + ["95",0xEF], + ["96",0xF1], + ["98",0xF2], + ["97",0xF3], + ["99",0xF4], + ["9B",0xF5], + ["9A",0xF6], + ["D6",0xF7], + ["BF",0xF8], + ["9D",0xF9], + ["9C",0xFA], + ["9E",0xFB], + ["9F",0xFC], + ["C6",0x106], + ["E6",0x107], + ["C8",0x10C], + ["E8",0x10D], + ["D0",0x110], + ["F0",0x111], + ["F5",0x131], + ["CE",0x152], + ["CF",0x153], + ["A9",0x160], + ["B9",0x161], + ["AE",0x17D], + ["BE",0x17E], + ["C4",0x192], + ["F6",0x2C6], + ["FF",0x2C7], + ["FB",0x2DA], + ["F7",0x2DC], + ["F9",0x3C0], + ["E0",0x2013], + ["D1",0x2014], + ["D4",0x2018], + ["D5",0x2019], + ["E2",0x201A], + ["D2",0x201C], + ["D3",0x201D], + ["E3",0x201E], + ["A0",0x2020], + ["A5",0x2022], + ["C9",0x2026], + ["E4",0x2030], + ["DC",0x2039], + ["DD",0x203A], + ["DA",0x2044], + ["AA",0x2122], + ["BD",0x2126], + ["B6",0x2202], + ["B4",0x2206], + ["B8",0x220F], + ["B7",0x2211], + ["C3",0x221A], + ["B0",0x221E], + ["BA",0x222B], + ["C5",0x2248], + ["AD",0x2260], + ["B2",0x2264], + ["B3",0x2265], + ["D7",0x25CA], +]
\ No newline at end of file diff --git a/enc/trans/maccyrillic-tbl.rb b/enc/trans/maccyrillic-tbl.rb new file mode 100644 index 0000000000..378aa8c3bc --- /dev/null +++ b/enc/trans/maccyrillic-tbl.rb @@ -0,0 +1,130 @@ +MACCYRILLIC_TO_UCS_TBL = [ + ["CA",0xA0], + ["A2",0xA2], + ["A3",0xA3], + ["FF",0xA4], + ["A4",0xA7], + ["A9",0xA9], + ["C7",0xAB], + ["C2",0xAC], + ["A8",0xAE], + ["A1",0xB0], + ["B1",0xB1], + ["B5",0xB5], + ["A6",0xB6], + ["C8",0xBB], + ["D6",0xF7], + ["C4",0x192], + ["DD",0x401], + ["AB",0x402], + ["AE",0x403], + ["B8",0x404], + ["C1",0x405], + ["A7",0x406], + ["BA",0x407], + ["B7",0x408], + ["BC",0x409], + ["BE",0x40A], + ["CB",0x40B], + ["CD",0x40C], + ["D8",0x40E], + ["DA",0x40F], + ["80",0x410], + ["81",0x411], + ["82",0x412], + ["83",0x413], + ["84",0x414], + ["85",0x415], + ["86",0x416], + ["87",0x417], + ["88",0x418], + ["89",0x419], + ["8A",0x41A], + ["8B",0x41B], + ["8C",0x41C], + ["8D",0x41D], + ["8E",0x41E], + ["8F",0x41F], + ["90",0x420], + ["91",0x421], + ["92",0x422], + ["93",0x423], + ["94",0x424], + ["95",0x425], + ["96",0x426], + ["97",0x427], + ["98",0x428], + ["99",0x429], + ["9A",0x42A], + ["9B",0x42B], + ["9C",0x42C], + ["9D",0x42D], + ["9E",0x42E], + ["9F",0x42F], + ["E0",0x430], + ["E1",0x431], + ["E2",0x432], + ["E3",0x433], + ["E4",0x434], + ["E5",0x435], + ["E6",0x436], + ["E7",0x437], + ["E8",0x438], + ["E9",0x439], + ["EA",0x43A], + ["EB",0x43B], + ["EC",0x43C], + ["ED",0x43D], + ["EE",0x43E], + ["EF",0x43F], + ["F0",0x440], + ["F1",0x441], + ["F2",0x442], + ["F3",0x443], + ["F4",0x444], + ["F5",0x445], + ["F6",0x446], + ["F7",0x447], + ["F8",0x448], + ["F9",0x449], + ["FA",0x44A], + ["FB",0x44B], + ["FC",0x44C], + ["FD",0x44D], + ["FE",0x44E], + ["DF",0x44F], + ["DE",0x451], + ["AC",0x452], + ["AF",0x453], + ["B9",0x454], + ["CF",0x455], + ["B4",0x456], + ["BB",0x457], + ["C0",0x458], + ["BD",0x459], + ["BF",0x45A], + ["CC",0x45B], + ["CE",0x45C], + ["D9",0x45E], + ["DB",0x45F], + ["D0",0x2013], + ["D1",0x2014], + ["D4",0x2018], + ["D5",0x2019], + ["D2",0x201C], + ["D3",0x201D], + ["D7",0x201E], + ["A0",0x2020], + ["A5",0x2022], + ["C9",0x2026], + ["DC",0x2116], + ["AA",0x2122], + ["B6",0x2202], + ["C6",0x2206], + ["C3",0x221A], + ["B0",0x221E], + ["C5",0x2248], + ["AD",0x2260], + ["B2",0x2264], + ["B3",0x2265], +]
\ No newline at end of file diff --git a/enc/trans/maciceland-tbl.rb b/enc/trans/maciceland-tbl.rb new file mode 100644 index 0000000000..818d992274 --- /dev/null +++ b/enc/trans/maciceland-tbl.rb @@ -0,0 +1,129 @@ +MACICELAND_TO_UCS_TBL = [ + ["CA",0xA0], + ["C1",0xA1], + ["A2",0xA2], + ["A3",0xA3], + ["DB",0xA4], + ["B4",0xA5], + ["A4",0xA7], + ["AC",0xA8], + ["A9",0xA9], + ["BB",0xAA], + ["C7",0xAB], + ["C2",0xAC], + ["A8",0xAE], + ["F8",0xAF], + ["A1",0xB0], + ["B1",0xB1], + ["AB",0xB4], + ["B5",0xB5], + ["A6",0xB6], + ["E1",0xB7], + ["FC",0xB8], + ["BC",0xBA], + ["C8",0xBB], + ["C0",0xBF], + ["CB",0xC0], + ["E7",0xC1], + ["E5",0xC2], + ["CC",0xC3], + ["80",0xC4], + ["81",0xC5], + ["AE",0xC6], + ["82",0xC7], + ["E9",0xC8], + ["83",0xC9], + ["E6",0xCA], + ["E8",0xCB], + ["ED",0xCC], + ["EA",0xCD], + ["EB",0xCE], + ["EC",0xCF], + ["DC",0xD0], + ["84",0xD1], + ["F1",0xD2], + ["EE",0xD3], + ["EF",0xD4], + ["CD",0xD5], + ["85",0xD6], + ["AF",0xD8], + ["F4",0xD9], + ["F2",0xDA], + ["F3",0xDB], + ["86",0xDC], + ["A0",0xDD], + ["DE",0xDE], + ["A7",0xDF], + ["88",0xE0], + ["87",0xE1], + ["89",0xE2], + ["8B",0xE3], + ["8A",0xE4], + ["8C",0xE5], + ["BE",0xE6], + ["8D",0xE7], + ["8F",0xE8], + ["8E",0xE9], + ["90",0xEA], + ["91",0xEB], + ["93",0xEC], + ["92",0xED], + ["94",0xEE], + ["95",0xEF], + ["DD",0xF0], + ["96",0xF1], + ["98",0xF2], + ["97",0xF3], + ["99",0xF4], + ["9B",0xF5], + ["9A",0xF6], + ["D6",0xF7], + ["BF",0xF8], + ["9D",0xF9], + ["9C",0xFA], + ["9E",0xFB], + ["9F",0xFC], + ["E0",0xFD], + ["DF",0xFE], + ["D8",0xFF], + ["F5",0x131], + ["CE",0x152], + ["CF",0x153], + ["D9",0x178], + ["C4",0x192], + ["F6",0x2C6], + ["FF",0x2C7], + ["F9",0x2D8], + ["FA",0x2D9], + ["FB",0x2DA], + ["FE",0x2DB], + ["F7",0x2DC], + ["FD",0x2DD], + ["B9",0x3C0], + ["D0",0x2013], + ["D1",0x2014], + ["D4",0x2018], + ["D5",0x2019], + ["E2",0x201A], + ["D2",0x201C], + ["D3",0x201D], + ["E3",0x201E], + ["A5",0x2022], + ["C9",0x2026], + ["E4",0x2030], + ["DA",0x2044], + ["AA",0x2122], + ["BD",0x2126], + ["B6",0x2202], + ["C6",0x2206], + ["B8",0x220F], + ["B7",0x2211], + ["C3",0x221A], + ["B0",0x221E], + ["BA",0x222B], + ["C5",0x2248], + ["AD",0x2260], + ["B2",0x2264], + ["B3",0x2265], + ["D7",0x25CA], +]
\ No newline at end of file diff --git a/enc/trans/single_byte.trans b/enc/trans/single_byte.trans index b49bc779a1..f46fd1055b 100644 --- a/enc/trans/single_byte.trans +++ b/enc/trans/single_byte.trans @@ -53,6 +53,9 @@ transcode_tblgen_singlebyte "WINDOWS-1255" transcode_tblgen_singlebyte "WINDOWS-1256" transcode_tblgen_singlebyte "WINDOWS-1257" + transcode_tblgen_singlebyte "MACCROATIAN" + transcode_tblgen_singlebyte "MACCYRILLIC" + transcode_tblgen_singlebyte "MACICELAND" %> <%= transcode_generated_code %> diff --git a/test/ruby/test_transcode.rb b/test/ruby/test_transcode.rb index 5280bd41dc..abd337e339 100644 --- a/test/ruby/test_transcode.rb +++ b/test/ruby/test_transcode.rb @@ -417,6 +417,79 @@ class TestTranscode < Test::Unit::TestCase check_both_ways("\u02D9", "\xFF", 'windows-1257') # ˙ end + def test_macCroatian + check_both_ways("\u00C4", "\x80", 'macCroatian') # Ä + check_both_ways("\u00E8", "\x8F", 'macCroatian') # è + check_both_ways("\u00EA", "\x90", 'macCroatian') # ê + check_both_ways("\u00FC", "\x9F", 'macCroatian') # ü + check_both_ways("\u2020", "\xA0", 'macCroatian') # † + check_both_ways("\u00D8", "\xAF", 'macCroatian') # Ø + check_both_ways("\u221E", "\xB0", 'macCroatian') # ∞ + check_both_ways("\u00F8", "\xBF", 'macCroatian') # ø + check_both_ways("\u00BF", "\xC0", 'macCroatian') # ¿ + check_both_ways("\u0153", "\xCF", 'macCroatian') # œ + check_both_ways("\u0110", "\xD0", 'macCroatian') # Đ + check_both_ways("\u00A9", "\xD9", 'macCroatian') # © + check_both_ways("\u2044", "\xDA", 'macCroatian') # ⁄ + check_both_ways("\u203A", "\xDD", 'macCroatian') # › + check_both_ways("\u00C6", "\xDE", 'macCroatian') # Æ + check_both_ways("\u00BB", "\xDF", 'macCroatian') # » + check_both_ways("\u2013", "\xE0", 'macCroatian') # – + check_both_ways("\u00B7", "\xE1", 'macCroatian') # · + check_both_ways("\u00C2", "\xE5", 'macCroatian') #  + check_both_ways("\u0107", "\xE6", 'macCroatian') # ć + check_both_ways("\u00C1", "\xE7", 'macCroatian') # Á + check_both_ways("\u010D", "\xE8", 'macCroatian') # č + check_both_ways("\u00C8", "\xE9", 'macCroatian') # È + check_both_ways("\u00D4", "\xEF", 'macCroatian') # Ô + check_both_ways("\u0111", "\xF0", 'macCroatian') # đ + check_both_ways("\u00D2", "\xF1", 'macCroatian') # Ò + check_both_ways("\u00AF", "\xF8", 'macCroatian') # ¯ + check_both_ways("\u03C0", "\xF9", 'macCroatian') # π + check_both_ways("\u00CB", "\xFA", 'macCroatian') # Ë + check_both_ways("\u00CA", "\xFD", 'macCroatian') # Ê + check_both_ways("\u00E6", "\xFE", 'macCroatian') # æ + check_both_ways("\u02C7", "\xFF", 'macCroatian') # ˇ + end + + def test_macCyrillic + check_both_ways("\u0410", "\x80", 'macCyrillic') # А + check_both_ways("\u041F", "\x8F", 'macCyrillic') # П + check_both_ways("\u0420", "\x90", 'macCyrillic') # Р + check_both_ways("\u042F", "\x9F", 'macCyrillic') # Я + check_both_ways("\u2020", "\xA0", 'macCyrillic') # † + check_both_ways("\u0453", "\xAF", 'macCyrillic') # ѓ + check_both_ways("\u221E", "\xB0", 'macCyrillic') # ∞ + check_both_ways("\u045A", "\xBF", 'macCyrillic') # њ + check_both_ways("\u0458", "\xC0", 'macCyrillic') # ј + check_both_ways("\u0455", "\xCF", 'macCyrillic') # ѕ + check_both_ways("\u2013", "\xD0", 'macCyrillic') # – + check_both_ways("\u044F", "\xDF", 'macCyrillic') # я + check_both_ways("\u0430", "\xE0", 'macCyrillic') # а + check_both_ways("\u043F", "\xEF", 'macCyrillic') # п + check_both_ways("\u0440", "\xF0", 'macCyrillic') # р + check_both_ways("\u00A4", "\xFF", 'macCyrillic') # ¤ + end + + def test_macIceland + check_both_ways("\u00C4", "\x80", 'macIceland') # Ä + check_both_ways("\u00E8", "\x8F", 'macIceland') # è + check_both_ways("\u00EA", "\x90", 'macIceland') # ê + check_both_ways("\u00FC", "\x9F", 'macIceland') # ü + check_both_ways("\u00DD", "\xA0", 'macIceland') # Ý + check_both_ways("\u00D8", "\xAF", 'macIceland') # Ø + check_both_ways("\u221E", "\xB0", 'macIceland') # ∞ + check_both_ways("\u00F8", "\xBF", 'macIceland') # ø + check_both_ways("\u00BF", "\xC0", 'macIceland') # ¿ + check_both_ways("\u0153", "\xCF", 'macIceland') # œ + check_both_ways("\u2013", "\xD0", 'macIceland') # – + check_both_ways("\u00FE", "\xDF", 'macIceland') # þ + check_both_ways("\u00FD", "\xE0", 'macIceland') # ý + check_both_ways("\u00D4", "\xEF", 'macIceland') # Ô + #check_both_ways("\uF8FF", "\xF0", 'macIceland') # Apple logo + check_both_ways("\u02C7", "\xFF", 'macIceland') # ˇ + end + def check_utf_16_both_ways(utf8, raw) copy = raw.dup 0.step(copy.length-1, 2) { |i| copy[i+1], copy[i] = copy[i], copy[i+1] } |