diff options
author | duerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2007-12-28 09:26:55 +0000 |
---|---|---|
committer | duerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2007-12-28 09:26:55 +0000 |
commit | 793e9423cd4c4c679959b7dd011040b5aba2d169 (patch) | |
tree | 034b079d1e8a3b856e8cc5cc96dc3fd185f5d4a8 /enc/trans | |
parent | 48af602e38293f53e7c9afa7b41ba778714220f4 (diff) | |
download | ruby-793e9423cd4c4c679959b7dd011040b5aba2d169.tar.gz |
Fri Dec 28 01:55:04 2007 Martin Duerst <duerst@it.aoyama.ac.jp>
* transcode.c (transcode_dispatch): reverted some of the changes
in r14746.
* transcode.c, enc/trans/single_byte.c: Added conversions to/from
US-ASCII and ASCII-8BIT (using data tables).
* enc/trans/single_byte.c: Some spacing/ordering changes due to
automatic data file generation.
* transcode_data.h, transcode.c: Preliminary code for using
micro-conversion functions.
* test/ruby/test_transcode.rb: Added some tests for US-ASCII and
ASCII-8BIT conversions.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@14766 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'enc/trans')
-rw-r--r-- | enc/trans/single_byte.c | 117 |
1 files changed, 103 insertions, 14 deletions
diff --git a/enc/trans/single_byte.c b/enc/trans/single_byte.c index bbfb3f5f9a..baa523447e 100644 --- a/enc/trans/single_byte.c +++ b/enc/trans/single_byte.c @@ -1,6 +1,63 @@ #include "transcode_data.h" static const unsigned char +from_US_ASCII_offsets[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +}; +static const struct byte_lookup* const +from_US_ASCII_infos[2] = { + NOMAP, UNDEF, +}; +static const BYTE_LOOKUP +from_US_ASCII = { + /* used from from_US_ASCII */ + /* used from to_US_ASCII */ + /* used from to_ASCII_8BIT */ + /* used from from_ASCII_8BIT */ + from_US_ASCII_offsets, + from_US_ASCII_infos +}; + +static rb_transcoder +rb_from_US_ASCII = { + "US-ASCII", "UTF-8", &from_US_ASCII, 1, 0, + NULL, NULL, +}; + +static rb_transcoder +rb_to_US_ASCII = { + "UTF-8", "US-ASCII", &from_US_ASCII, 1, 1, + NULL, NULL, +}; + +static rb_transcoder +rb_from_ASCII_8BIT = { + "ASCII-8BIT", "UTF-8", &from_US_ASCII, 1, 0, + NULL, NULL, +}; + +static rb_transcoder +rb_to_ASCII_8BIT = { + "UTF-8", "ASCII-8BIT", &from_US_ASCII, 1, 1, + NULL, NULL, +}; + +static const unsigned char from_ISO_8859_1_offsets[256] = { /* used from from_ISO_8859_1 */ /* used from from_ISO_8859_2 */ @@ -69,6 +126,7 @@ from_ISO_8859_1 = { from_ISO_8859_1_offsets, from_ISO_8859_1_infos }; + static rb_transcoder rb_from_ISO_8859_1 = { "ISO-8859-1", "UTF-8", &from_ISO_8859_1, 2, 0, @@ -167,6 +225,7 @@ to_ISO_8859_1 = { to_ISO_8859_1_offsets, to_ISO_8859_1_infos }; + static rb_transcoder rb_to_ISO_8859_1 = { "UTF-8", "ISO-8859-1", &to_ISO_8859_1, 1, 1, @@ -214,6 +273,7 @@ from_ISO_8859_2 = { from_ISO_8859_1_offsets, from_ISO_8859_2_infos }; + static rb_transcoder rb_from_ISO_8859_2 = { "ISO-8859-2", "UTF-8", &from_ISO_8859_2, 2, 0, @@ -370,6 +430,7 @@ to_ISO_8859_2 = { to_ISO_8859_2_offsets, to_ISO_8859_2_infos }; + static rb_transcoder rb_to_ISO_8859_2 = { "UTF-8", "ISO-8859-2", &to_ISO_8859_2, 1, 1, @@ -434,6 +495,7 @@ from_ISO_8859_3 = { from_ISO_8859_3_offsets, from_ISO_8859_3_infos }; + static rb_transcoder rb_from_ISO_8859_3 = { "ISO-8859-3", "UTF-8", &from_ISO_8859_3, 2, 0, @@ -565,6 +627,7 @@ to_ISO_8859_3 = { to_ISO_8859_2_offsets, to_ISO_8859_3_infos }; + static rb_transcoder rb_to_ISO_8859_3 = { "UTF-8", "ISO-8859-3", &to_ISO_8859_3, 1, 1, @@ -612,6 +675,7 @@ from_ISO_8859_4 = { from_ISO_8859_1_offsets, from_ISO_8859_4_infos }; + static rb_transcoder rb_from_ISO_8859_4 = { "ISO-8859-4", "UTF-8", &from_ISO_8859_4, 2, 0, @@ -747,6 +811,7 @@ to_ISO_8859_4 = { to_ISO_8859_2_offsets, to_ISO_8859_4_infos }; + static rb_transcoder rb_to_ISO_8859_4 = { "UTF-8", "ISO-8859-4", &to_ISO_8859_4, 1, 1, @@ -826,6 +891,7 @@ from_ISO_8859_5 = { from_ISO_8859_1_offsets, from_ISO_8859_5_infos }; + static rb_transcoder rb_from_ISO_8859_5 = { "ISO-8859-5", "UTF-8", &from_ISO_8859_5, 3, 0, @@ -977,6 +1043,7 @@ to_ISO_8859_5 = { to_ISO_8859_5_offsets, to_ISO_8859_5_infos }; + static rb_transcoder rb_to_ISO_8859_5 = { "UTF-8", "ISO-8859-5", &to_ISO_8859_5, 1, 1, @@ -1032,6 +1099,7 @@ from_ISO_8859_6 = { from_ISO_8859_6_offsets, from_ISO_8859_6_infos }; + static rb_transcoder rb_from_ISO_8859_6 = { "ISO-8859-6", "UTF-8", &from_ISO_8859_6, 2, 0, @@ -1138,6 +1206,7 @@ to_ISO_8859_6 = { to_ISO_8859_6_offsets, to_ISO_8859_6_infos }; + static rb_transcoder rb_to_ISO_8859_6 = { "UTF-8", "ISO-8859-6", &to_ISO_8859_6, 1, 1, @@ -1235,6 +1304,7 @@ from_ISO_8859_7 = { from_ISO_8859_7_offsets, from_ISO_8859_7_infos }; + static rb_transcoder rb_from_ISO_8859_7 = { "ISO-8859-7", "UTF-8", &from_ISO_8859_7, 3, 0, @@ -1421,6 +1491,7 @@ to_ISO_8859_7 = { to_ISO_8859_7_offsets, to_ISO_8859_7_infos }; + static rb_transcoder rb_to_ISO_8859_7 = { "UTF-8", "ISO-8859-7", &to_ISO_8859_7, 1, 1, @@ -1501,6 +1572,7 @@ from_ISO_8859_8 = { from_ISO_8859_8_offsets, from_ISO_8859_8_infos }; + static rb_transcoder rb_from_ISO_8859_8 = { "ISO-8859-8", "UTF-8", &from_ISO_8859_8, 3, 0, @@ -1646,6 +1718,7 @@ to_ISO_8859_8 = { to_ISO_8859_8_offsets, to_ISO_8859_8_infos }; + static rb_transcoder rb_to_ISO_8859_8 = { "UTF-8", "ISO-8859-8", &to_ISO_8859_8, 1, 1, @@ -1693,6 +1766,7 @@ from_ISO_8859_9 = { from_ISO_8859_1_offsets, from_ISO_8859_9_infos }; + static rb_transcoder rb_from_ISO_8859_9 = { "ISO-8859-9", "UTF-8", &from_ISO_8859_9, 2, 0, @@ -1795,6 +1869,7 @@ to_ISO_8859_9 = { to_ISO_8859_9_offsets, to_ISO_8859_9_infos }; + static rb_transcoder rb_to_ISO_8859_9 = { "UTF-8", "ISO-8859-9", &to_ISO_8859_9, 1, 1, @@ -1874,6 +1949,7 @@ from_ISO_8859_10 = { from_ISO_8859_1_offsets, from_ISO_8859_10_infos }; + static rb_transcoder rb_from_ISO_8859_10 = { "ISO-8859-10", "UTF-8", &from_ISO_8859_10, 3, 0, @@ -2031,6 +2107,7 @@ to_ISO_8859_10 = { to_ISO_8859_10_offsets, to_ISO_8859_10_infos }; + static rb_transcoder rb_to_ISO_8859_10 = { "UTF-8", "ISO-8859-10", &to_ISO_8859_10, 1, 1, @@ -2125,6 +2202,7 @@ from_ISO_8859_11 = { from_ISO_8859_11_offsets, from_ISO_8859_11_infos }; + static rb_transcoder rb_from_ISO_8859_11 = { "ISO-8859-11", "UTF-8", &from_ISO_8859_11, 3, 0, @@ -2258,6 +2336,7 @@ to_ISO_8859_11 = { to_ISO_8859_11_offsets, to_ISO_8859_11_infos }; + static rb_transcoder rb_to_ISO_8859_11 = { "UTF-8", "ISO-8859-11", &to_ISO_8859_11, 1, 1, @@ -2337,6 +2416,7 @@ from_ISO_8859_13 = { from_ISO_8859_1_offsets, from_ISO_8859_13_infos }; + static rb_transcoder rb_from_ISO_8859_13 = { "ISO-8859-13", "UTF-8", &from_ISO_8859_13, 3, 0, @@ -2481,6 +2561,7 @@ to_ISO_8859_13 = { to_ISO_8859_10_offsets, to_ISO_8859_13_infos }; + static rb_transcoder rb_to_ISO_8859_13 = { "UTF-8", "ISO-8859-13", &to_ISO_8859_13, 1, 1, @@ -2560,6 +2641,7 @@ from_ISO_8859_14 = { from_ISO_8859_1_offsets, from_ISO_8859_14_infos }; + static rb_transcoder rb_from_ISO_8859_14 = { "ISO-8859-14", "UTF-8", &from_ISO_8859_14, 3, 0, @@ -2781,6 +2863,7 @@ to_ISO_8859_14 = { to_ISO_8859_14_offsets, to_ISO_8859_14_infos }; + static rb_transcoder rb_to_ISO_8859_14 = { "UTF-8", "ISO-8859-14", &to_ISO_8859_14, 1, 1, @@ -2860,6 +2943,7 @@ from_ISO_8859_15 = { from_ISO_8859_1_offsets, from_ISO_8859_15_infos }; + static rb_transcoder rb_from_ISO_8859_15 = { "ISO-8859-15", "UTF-8", &from_ISO_8859_15, 3, 0, @@ -2979,6 +3063,7 @@ to_ISO_8859_15 = { to_ISO_8859_15_offsets, to_ISO_8859_15_infos }; + static rb_transcoder rb_to_ISO_8859_15 = { "UTF-8", "ISO-8859-15", &to_ISO_8859_15, 1, 1, @@ -2988,33 +3073,37 @@ rb_to_ISO_8859_15 = { void Init_single_byte(void) { + rb_register_transcoder(&rb_from_US_ASCII); + rb_register_transcoder(&rb_to_US_ASCII); + rb_register_transcoder(&rb_from_ASCII_8BIT); + rb_register_transcoder(&rb_to_ASCII_8BIT); rb_register_transcoder(&rb_from_ISO_8859_1); - rb_register_transcoder(&rb_from_ISO_8859_2); - rb_register_transcoder(&rb_from_ISO_8859_3); - rb_register_transcoder(&rb_from_ISO_8859_4); - rb_register_transcoder(&rb_from_ISO_8859_5); - rb_register_transcoder(&rb_from_ISO_8859_6); - rb_register_transcoder(&rb_from_ISO_8859_7); - rb_register_transcoder(&rb_from_ISO_8859_8); - rb_register_transcoder(&rb_from_ISO_8859_9); - rb_register_transcoder(&rb_from_ISO_8859_10); - rb_register_transcoder(&rb_from_ISO_8859_11); - rb_register_transcoder(&rb_from_ISO_8859_13); - rb_register_transcoder(&rb_from_ISO_8859_14); - rb_register_transcoder(&rb_from_ISO_8859_15); rb_register_transcoder(&rb_to_ISO_8859_1); + rb_register_transcoder(&rb_from_ISO_8859_2); rb_register_transcoder(&rb_to_ISO_8859_2); + rb_register_transcoder(&rb_from_ISO_8859_3); rb_register_transcoder(&rb_to_ISO_8859_3); + rb_register_transcoder(&rb_from_ISO_8859_4); rb_register_transcoder(&rb_to_ISO_8859_4); + rb_register_transcoder(&rb_from_ISO_8859_5); rb_register_transcoder(&rb_to_ISO_8859_5); + rb_register_transcoder(&rb_from_ISO_8859_6); rb_register_transcoder(&rb_to_ISO_8859_6); + rb_register_transcoder(&rb_from_ISO_8859_7); rb_register_transcoder(&rb_to_ISO_8859_7); + rb_register_transcoder(&rb_from_ISO_8859_8); rb_register_transcoder(&rb_to_ISO_8859_8); + rb_register_transcoder(&rb_from_ISO_8859_9); rb_register_transcoder(&rb_to_ISO_8859_9); + rb_register_transcoder(&rb_from_ISO_8859_10); rb_register_transcoder(&rb_to_ISO_8859_10); + rb_register_transcoder(&rb_from_ISO_8859_11); rb_register_transcoder(&rb_to_ISO_8859_11); + rb_register_transcoder(&rb_from_ISO_8859_13); rb_register_transcoder(&rb_to_ISO_8859_13); + rb_register_transcoder(&rb_from_ISO_8859_14); rb_register_transcoder(&rb_to_ISO_8859_14); + rb_register_transcoder(&rb_from_ISO_8859_15); rb_register_transcoder(&rb_to_ISO_8859_15); } -/* Footprint (bytes): gross: 26788, saved: 3728, net: 23060 */ +/* Footprint (bytes): gross: 27876, saved: 4544, net: 23332 */ |