From 2897632f163143e03f83ba47d2611c3357974c36 Mon Sep 17 00:00:00 2001 From: akr Date: Mon, 11 Aug 2008 07:39:52 +0000 Subject: * enc/trans/iso2022.trans: renamed from iso2022.erb.c. * enc/trans/single_byte.trans: ditto. * enc/trans/utf_16_32.trans: ditto. * enc/trans/korean.trans: ditto. * enc/trans/japanese.trans: ditto. * enc/depend: follow the renaming. * tool/build-transcode: ditto. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18488 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- enc/depend | 8 +- enc/trans/iso2022.erb.c | 148 ------------------- enc/trans/iso2022.trans | 148 +++++++++++++++++++ enc/trans/japanese.erb.c | 24 --- enc/trans/japanese.trans | 24 +++ enc/trans/korean.erb.c | 17 --- enc/trans/korean.trans | 17 +++ enc/trans/single_byte.erb.c | 62 -------- enc/trans/single_byte.trans | 62 ++++++++ enc/trans/utf_16_32.erb.c | 351 -------------------------------------------- enc/trans/utf_16_32.trans | 351 ++++++++++++++++++++++++++++++++++++++++++++ 11 files changed, 607 insertions(+), 605 deletions(-) delete mode 100644 enc/trans/iso2022.erb.c create mode 100644 enc/trans/iso2022.trans delete mode 100644 enc/trans/japanese.erb.c create mode 100644 enc/trans/japanese.trans delete mode 100644 enc/trans/korean.erb.c create mode 100644 enc/trans/korean.trans delete mode 100644 enc/trans/single_byte.erb.c create mode 100644 enc/trans/single_byte.trans delete mode 100644 enc/trans/utf_16_32.erb.c create mode 100644 enc/trans/utf_16_32.trans (limited to 'enc') diff --git a/enc/depend b/enc/depend index d413898689..2eeb058d91 100644 --- a/enc/depend +++ b/enc/depend @@ -7,8 +7,10 @@ % atrans = [] % trans = Dir.open($srcdir+"/trans") {|d| % d.select {|e| -% if e.chomp!('.c') -% atrans << e if e.chomp!(".erb") +% if e.chomp!('.trans') +% atrans << e +% true +% elsif e.chomp!('.c') % true % end % } @@ -72,7 +74,7 @@ $(ENCOBJS): regenc.h oniguruma.h config.h defines.h $(TRANSOBJS): ruby.h intern.h config.h defines.h missing.h encoding.h oniguruma.h st.h transcode_data.h % end % atrans.each do |e| -% src = "#{e}.erb.c" +% src = "#{e}.trans" % src = [src, *IO.read(File.join($srcdir, "trans", src)).scan(/^\s*require\s+[\'\"]([^\'\"]*)/).flatten.map{|c|c+".rb"}] <%=rule_subst % "enc/trans/#{e}.c"%>: <%= src.map {|e| rule_subst % "enc/trans/#{e}"}.join(" ")%> $(srcdir)/tool/transcode-tblgen.rb diff --git a/enc/trans/iso2022.erb.c b/enc/trans/iso2022.erb.c deleted file mode 100644 index 3209fad163..0000000000 --- a/enc/trans/iso2022.erb.c +++ /dev/null @@ -1,148 +0,0 @@ -#include "transcode_data.h" - -<% - map = {} - map["1b2842"] = :func_so # designate US-ASCII to G0. "ESC ( B" - map["1b284a"] = :func_so # designate JIS X 0201 latin to G0. "ESC ( J" - map["1b2440"] = :func_so # designate JIS X 0208 1978 to G0. "ESC $ @" - map["1b2442"] = :func_so # designate JIS X 0208 1983 to G0. "ESC $ B" - map["{00-0d,10-1a,1c-7f}"] = :func_si - - map_jisx0208_rest = {} - map_jisx0208_rest["{21-7e}"] = :func_so -%> - -<%= transcode_generate_node(ActionMap.parse(map), "iso2022jp_to_eucjp") %> -<%= transcode_generate_node(ActionMap.parse(map_jisx0208_rest), "iso2022jp_to_eucjp_jisx0208_rest") %> - -static VALUE -fun_si_iso2022jp_to_eucjp(rb_transcoding* t, const unsigned char* s, size_t l) -{ - if (t->stateful[0] == 0) - return (VALUE)NOMAP; - else if (0x21 <= s[0] && s[0] <= 0x7e) - return (VALUE)&iso2022jp_to_eucjp_jisx0208_rest; - else - return (VALUE)INVALID; -} - -static int -fun_so_iso2022jp_to_eucjp(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o) -{ - if (s[0] == 0x1b) { - if (s[1] == '(') { - switch (s[l-1]) { - case 'B': - case 'J': - t->stateful[0] = 0; - break; - } - } - else { - switch (s[l-1]) { - case '@': - case 'B': - t->stateful[0] = 1; - break; - } - } - return 0; - } - else { - o[0] = s[0] | 0x80; - o[1] = s[1] | 0x80; - return 2; - } -} - -static const rb_transcoder -rb_ISO_2022_JP_to_EUC_JP = { - "ISO-2022-JP", "EUC-JP", &iso2022jp_to_eucjp, - 1, /* input_unit_length */ - 3, /* max_input */ - 3, /* max_output */ - NULL, fun_si_iso2022jp_to_eucjp, NULL, fun_so_iso2022jp_to_eucjp -}; - -<% - map_eucjp = { - "{0e,0f,1b}" => :undef, - "{00-0d,10-1a,1c-7f}" => :func_so, - "{a1-fe}{a1-fe}" => :func_so, - "8e{a1-fe}" => :undef, - "8f{a1-fe}{a1-fe}" => :undef, - } -%> - -<%= transcode_generate_node(ActionMap.parse(map_eucjp), "eucjp_to_iso2022jp") %> - -static int -fun_so_eucjp_to_iso2022jp(rb_transcoding *t, const unsigned char *s, size_t l, unsigned char *o) -{ - unsigned char *output0 = o; - - if (t->stateful[0] == 0) { - t->stateful[0] = 1; /* initialized flag */ - t->stateful[1] = 1; /* ASCII mode */ - } - - if (l != t->stateful[1]) { - if (l == 1) { - *o++ = 0x1b; - *o++ = '('; - *o++ = 'B'; - t->stateful[1] = 1; - } - else { - *o++ = 0x1b; - *o++ = '$'; - *o++ = 'B'; - t->stateful[1] = 2; - } - } - - if (l == 1) { - *o++ = s[0] & 0x7f; - } - else { - *o++ = s[0] & 0x7f; - *o++ = s[1] & 0x7f; - } - - return o - output0; -} - -static int -finish_eucjp_to_iso2022jp(rb_transcoding *t, unsigned char *o) -{ - unsigned char *output0 = o; - - if (t->stateful[0] == 0) - return 0; - - if (t->stateful[1] != 1) { - *o++ = 0x1b; - *o++ = '('; - *o++ = 'B'; - t->stateful[1] = 1; - } - - return o - output0; -} - -static const rb_transcoder -rb_EUC_JP_to_ISO_2022_JP = { - "EUC-JP", "ISO-2022-JP", &eucjp_to_iso2022jp, - 1, /* input_unit_length */ - 3, /* max_input */ - 5, /* max_output */ - NULL, NULL, NULL, fun_so_eucjp_to_iso2022jp, finish_eucjp_to_iso2022jp -}; - -void -Init_iso2022(void) -{ - rb_register_transcoder(&rb_ISO_2022_JP_to_EUC_JP); - rb_register_transcoder(&rb_EUC_JP_to_ISO_2022_JP); -} - diff --git a/enc/trans/iso2022.trans b/enc/trans/iso2022.trans new file mode 100644 index 0000000000..3209fad163 --- /dev/null +++ b/enc/trans/iso2022.trans @@ -0,0 +1,148 @@ +#include "transcode_data.h" + +<% + map = {} + map["1b2842"] = :func_so # designate US-ASCII to G0. "ESC ( B" + map["1b284a"] = :func_so # designate JIS X 0201 latin to G0. "ESC ( J" + map["1b2440"] = :func_so # designate JIS X 0208 1978 to G0. "ESC $ @" + map["1b2442"] = :func_so # designate JIS X 0208 1983 to G0. "ESC $ B" + map["{00-0d,10-1a,1c-7f}"] = :func_si + + map_jisx0208_rest = {} + map_jisx0208_rest["{21-7e}"] = :func_so +%> + +<%= transcode_generate_node(ActionMap.parse(map), "iso2022jp_to_eucjp") %> +<%= transcode_generate_node(ActionMap.parse(map_jisx0208_rest), "iso2022jp_to_eucjp_jisx0208_rest") %> + +static VALUE +fun_si_iso2022jp_to_eucjp(rb_transcoding* t, const unsigned char* s, size_t l) +{ + if (t->stateful[0] == 0) + return (VALUE)NOMAP; + else if (0x21 <= s[0] && s[0] <= 0x7e) + return (VALUE)&iso2022jp_to_eucjp_jisx0208_rest; + else + return (VALUE)INVALID; +} + +static int +fun_so_iso2022jp_to_eucjp(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o) +{ + if (s[0] == 0x1b) { + if (s[1] == '(') { + switch (s[l-1]) { + case 'B': + case 'J': + t->stateful[0] = 0; + break; + } + } + else { + switch (s[l-1]) { + case '@': + case 'B': + t->stateful[0] = 1; + break; + } + } + return 0; + } + else { + o[0] = s[0] | 0x80; + o[1] = s[1] | 0x80; + return 2; + } +} + +static const rb_transcoder +rb_ISO_2022_JP_to_EUC_JP = { + "ISO-2022-JP", "EUC-JP", &iso2022jp_to_eucjp, + 1, /* input_unit_length */ + 3, /* max_input */ + 3, /* max_output */ + NULL, fun_si_iso2022jp_to_eucjp, NULL, fun_so_iso2022jp_to_eucjp +}; + +<% + map_eucjp = { + "{0e,0f,1b}" => :undef, + "{00-0d,10-1a,1c-7f}" => :func_so, + "{a1-fe}{a1-fe}" => :func_so, + "8e{a1-fe}" => :undef, + "8f{a1-fe}{a1-fe}" => :undef, + } +%> + +<%= transcode_generate_node(ActionMap.parse(map_eucjp), "eucjp_to_iso2022jp") %> + +static int +fun_so_eucjp_to_iso2022jp(rb_transcoding *t, const unsigned char *s, size_t l, unsigned char *o) +{ + unsigned char *output0 = o; + + if (t->stateful[0] == 0) { + t->stateful[0] = 1; /* initialized flag */ + t->stateful[1] = 1; /* ASCII mode */ + } + + if (l != t->stateful[1]) { + if (l == 1) { + *o++ = 0x1b; + *o++ = '('; + *o++ = 'B'; + t->stateful[1] = 1; + } + else { + *o++ = 0x1b; + *o++ = '$'; + *o++ = 'B'; + t->stateful[1] = 2; + } + } + + if (l == 1) { + *o++ = s[0] & 0x7f; + } + else { + *o++ = s[0] & 0x7f; + *o++ = s[1] & 0x7f; + } + + return o - output0; +} + +static int +finish_eucjp_to_iso2022jp(rb_transcoding *t, unsigned char *o) +{ + unsigned char *output0 = o; + + if (t->stateful[0] == 0) + return 0; + + if (t->stateful[1] != 1) { + *o++ = 0x1b; + *o++ = '('; + *o++ = 'B'; + t->stateful[1] = 1; + } + + return o - output0; +} + +static const rb_transcoder +rb_EUC_JP_to_ISO_2022_JP = { + "EUC-JP", "ISO-2022-JP", &eucjp_to_iso2022jp, + 1, /* input_unit_length */ + 3, /* max_input */ + 5, /* max_output */ + NULL, NULL, NULL, fun_so_eucjp_to_iso2022jp, finish_eucjp_to_iso2022jp +}; + +void +Init_iso2022(void) +{ + rb_register_transcoder(&rb_ISO_2022_JP_to_EUC_JP); + rb_register_transcoder(&rb_EUC_JP_to_ISO_2022_JP); +} + diff --git a/enc/trans/japanese.erb.c b/enc/trans/japanese.erb.c deleted file mode 100644 index dae3bf1e03..0000000000 --- a/enc/trans/japanese.erb.c +++ /dev/null @@ -1,24 +0,0 @@ -#include "transcode_data.h" - -<% - require 'sjis-tbl' - require 'eucjp-tbl' -%> - -<%= transcode_tblgen "Shift_JIS", "UTF-8", [["{00-7f}", :nomap], *SJIS_TO_UCS_TBL] %> -<%= transcode_tblgen "Windows-31J", "UTF-8", [["{00-7f}", :nomap], *SJIS_TO_UCS_TBL] %> - -<%= transcode_tblgen "UTF-8", "Shift_JIS", [["{00-7f}", :nomap], *UCS_TO_SJIS_TBL] %> -<%= transcode_tblgen "UTF-8", "Windows-31J", [["{00-7f}", :nomap], *UCS_TO_SJIS_TBL] %> - -<%= transcode_tblgen "EUC-JP", "UTF-8", [["{00-7f}", :nomap], *EUCJP_TO_UCS_TBL] %> -<%= transcode_tblgen "CP51932", "UTF-8", [["{00-7f}", :nomap], *EUCJP_TO_UCS_TBL] %> - -<%= transcode_tblgen "UTF-8", "EUC-JP", [["{00-7f}", :nomap], *UCS_TO_EUCJP_TBL] %> -<%= transcode_tblgen "UTF-8", "CP51932", [["{00-7f}", :nomap], *UCS_TO_EUCJP_TBL] %> - -void -Init_japanese(void) -{ -<%= transcode_register_code %> -} diff --git a/enc/trans/japanese.trans b/enc/trans/japanese.trans new file mode 100644 index 0000000000..dae3bf1e03 --- /dev/null +++ b/enc/trans/japanese.trans @@ -0,0 +1,24 @@ +#include "transcode_data.h" + +<% + require 'sjis-tbl' + require 'eucjp-tbl' +%> + +<%= transcode_tblgen "Shift_JIS", "UTF-8", [["{00-7f}", :nomap], *SJIS_TO_UCS_TBL] %> +<%= transcode_tblgen "Windows-31J", "UTF-8", [["{00-7f}", :nomap], *SJIS_TO_UCS_TBL] %> + +<%= transcode_tblgen "UTF-8", "Shift_JIS", [["{00-7f}", :nomap], *UCS_TO_SJIS_TBL] %> +<%= transcode_tblgen "UTF-8", "Windows-31J", [["{00-7f}", :nomap], *UCS_TO_SJIS_TBL] %> + +<%= transcode_tblgen "EUC-JP", "UTF-8", [["{00-7f}", :nomap], *EUCJP_TO_UCS_TBL] %> +<%= transcode_tblgen "CP51932", "UTF-8", [["{00-7f}", :nomap], *EUCJP_TO_UCS_TBL] %> + +<%= transcode_tblgen "UTF-8", "EUC-JP", [["{00-7f}", :nomap], *UCS_TO_EUCJP_TBL] %> +<%= transcode_tblgen "UTF-8", "CP51932", [["{00-7f}", :nomap], *UCS_TO_EUCJP_TBL] %> + +void +Init_japanese(void) +{ +<%= transcode_register_code %> +} diff --git a/enc/trans/korean.erb.c b/enc/trans/korean.erb.c deleted file mode 100644 index f04fa15613..0000000000 --- a/enc/trans/korean.erb.c +++ /dev/null @@ -1,17 +0,0 @@ -#include "transcode_data.h" - -<% - require "euckr-tbl" - require "cp949-tbl" -%> - -<%= transcode_tblgen "UTF-8", "EUC-KR", [["{00-7f}", :nomap], *UCS_TO_EUCKR_TBL] %> -<%= transcode_tblgen "EUC-KR", "UTF-8", [["{00-7f}", :nomap], *EUCKR_TO_UCS_TBL] %> -<%= transcode_tblgen "UTF-8", "CP949", [["{00-7f}", :nomap], *UCS_TO_CP949_TBL] %> -<%= transcode_tblgen "CP949", "UTF-8", [["{00-7f}", :nomap], *CP949_TO_UCS_TBL] %> - -void -Init_korean(void) -{ -<%= transcode_register_code %> -} diff --git a/enc/trans/korean.trans b/enc/trans/korean.trans new file mode 100644 index 0000000000..f04fa15613 --- /dev/null +++ b/enc/trans/korean.trans @@ -0,0 +1,17 @@ +#include "transcode_data.h" + +<% + require "euckr-tbl" + require "cp949-tbl" +%> + +<%= transcode_tblgen "UTF-8", "EUC-KR", [["{00-7f}", :nomap], *UCS_TO_EUCKR_TBL] %> +<%= transcode_tblgen "EUC-KR", "UTF-8", [["{00-7f}", :nomap], *EUCKR_TO_UCS_TBL] %> +<%= transcode_tblgen "UTF-8", "CP949", [["{00-7f}", :nomap], *UCS_TO_CP949_TBL] %> +<%= transcode_tblgen "CP949", "UTF-8", [["{00-7f}", :nomap], *CP949_TO_UCS_TBL] %> + +void +Init_korean(void) +{ +<%= transcode_register_code %> +} diff --git a/enc/trans/single_byte.erb.c b/enc/trans/single_byte.erb.c deleted file mode 100644 index 8cbf474eb5..0000000000 --- a/enc/trans/single_byte.erb.c +++ /dev/null @@ -1,62 +0,0 @@ -#include "transcode_data.h" - -<% - us_ascii_map = [["{00-7f}", :nomap], ["{80-ff}", :undef]] - - ISO_8859_1_TO_UCS_TBL = (0x80..0xff).map {|c| ["%02X" % c, c] } - CONTROL1_TO_UCS_TBL = (0x80..0x9f).map {|c| ["%02X" % c, c] } - - require 'iso-8859-2-tbl' - require 'iso-8859-3-tbl' - require 'iso-8859-4-tbl' - require 'iso-8859-5-tbl' - require 'iso-8859-6-tbl' - require 'iso-8859-7-tbl' - require 'iso-8859-8-tbl' - require 'iso-8859-9-tbl' - require 'iso-8859-10-tbl' - require 'iso-8859-11-tbl' - require 'iso-8859-13-tbl' - require 'iso-8859-14-tbl' - require 'iso-8859-15-tbl' - -%> - -<%= transcode_tblgen "US-ASCII", "UTF-8", us_ascii_map %> -<%= transcode_tblgen "UTF-8", "US-ASCII", us_ascii_map %> -<%= transcode_tblgen "ASCII-8BIT", "UTF-8", us_ascii_map %> -<%= transcode_tblgen "UTF-8", "ASCII-8BIT", us_ascii_map %> - -<% - def transcode_tblgen_iso8859(name, tbl_to_ucs) - tbl_to_ucs = CONTROL1_TO_UCS_TBL + tbl_to_ucs - name_ident = name.tr('-','_') - code = '' - code << transcode_tblgen(name, "UTF-8", [["{00-7f}", :nomap], *tbl_to_ucs]) - code << "\n" - code << transcode_tblgen("UTF-8", name, [["{00-7f}", :nomap], *tbl_to_ucs.map {|a,b| [b,a] }]) - code - end -%> - -<%= transcode_tblgen_iso8859("ISO-8859-1", ISO_8859_1_TO_UCS_TBL) %> -<%= transcode_tblgen_iso8859("ISO-8859-2", ISO_8859_2_TO_UCS_TBL) %> -<%= transcode_tblgen_iso8859("ISO-8859-3", ISO_8859_3_TO_UCS_TBL) %> -<%= transcode_tblgen_iso8859("ISO-8859-4", ISO_8859_4_TO_UCS_TBL) %> -<%= transcode_tblgen_iso8859("ISO-8859-5", ISO_8859_5_TO_UCS_TBL) %> -<%= transcode_tblgen_iso8859("ISO-8859-6", ISO_8859_6_TO_UCS_TBL) %> -<%= transcode_tblgen_iso8859("ISO-8859-7", ISO_8859_7_TO_UCS_TBL) %> -<%= transcode_tblgen_iso8859("ISO-8859-8", ISO_8859_8_TO_UCS_TBL) %> -<%= transcode_tblgen_iso8859("ISO-8859-9", ISO_8859_9_TO_UCS_TBL) %> -<%= transcode_tblgen_iso8859("ISO-8859-10", ISO_8859_10_TO_UCS_TBL) %> -<%= transcode_tblgen_iso8859("ISO-8859-11", ISO_8859_11_TO_UCS_TBL) %> -<%= transcode_tblgen_iso8859("ISO-8859-13", ISO_8859_13_TO_UCS_TBL) %> -<%= transcode_tblgen_iso8859("ISO-8859-14", ISO_8859_14_TO_UCS_TBL) %> -<%= transcode_tblgen_iso8859("ISO-8859-15", ISO_8859_15_TO_UCS_TBL) %> - -void -Init_single_byte(void) -{ -<%= transcode_register_code %> -} - diff --git a/enc/trans/single_byte.trans b/enc/trans/single_byte.trans new file mode 100644 index 0000000000..8cbf474eb5 --- /dev/null +++ b/enc/trans/single_byte.trans @@ -0,0 +1,62 @@ +#include "transcode_data.h" + +<% + us_ascii_map = [["{00-7f}", :nomap], ["{80-ff}", :undef]] + + ISO_8859_1_TO_UCS_TBL = (0x80..0xff).map {|c| ["%02X" % c, c] } + CONTROL1_TO_UCS_TBL = (0x80..0x9f).map {|c| ["%02X" % c, c] } + + require 'iso-8859-2-tbl' + require 'iso-8859-3-tbl' + require 'iso-8859-4-tbl' + require 'iso-8859-5-tbl' + require 'iso-8859-6-tbl' + require 'iso-8859-7-tbl' + require 'iso-8859-8-tbl' + require 'iso-8859-9-tbl' + require 'iso-8859-10-tbl' + require 'iso-8859-11-tbl' + require 'iso-8859-13-tbl' + require 'iso-8859-14-tbl' + require 'iso-8859-15-tbl' + +%> + +<%= transcode_tblgen "US-ASCII", "UTF-8", us_ascii_map %> +<%= transcode_tblgen "UTF-8", "US-ASCII", us_ascii_map %> +<%= transcode_tblgen "ASCII-8BIT", "UTF-8", us_ascii_map %> +<%= transcode_tblgen "UTF-8", "ASCII-8BIT", us_ascii_map %> + +<% + def transcode_tblgen_iso8859(name, tbl_to_ucs) + tbl_to_ucs = CONTROL1_TO_UCS_TBL + tbl_to_ucs + name_ident = name.tr('-','_') + code = '' + code << transcode_tblgen(name, "UTF-8", [["{00-7f}", :nomap], *tbl_to_ucs]) + code << "\n" + code << transcode_tblgen("UTF-8", name, [["{00-7f}", :nomap], *tbl_to_ucs.map {|a,b| [b,a] }]) + code + end +%> + +<%= transcode_tblgen_iso8859("ISO-8859-1", ISO_8859_1_TO_UCS_TBL) %> +<%= transcode_tblgen_iso8859("ISO-8859-2", ISO_8859_2_TO_UCS_TBL) %> +<%= transcode_tblgen_iso8859("ISO-8859-3", ISO_8859_3_TO_UCS_TBL) %> +<%= transcode_tblgen_iso8859("ISO-8859-4", ISO_8859_4_TO_UCS_TBL) %> +<%= transcode_tblgen_iso8859("ISO-8859-5", ISO_8859_5_TO_UCS_TBL) %> +<%= transcode_tblgen_iso8859("ISO-8859-6", ISO_8859_6_TO_UCS_TBL) %> +<%= transcode_tblgen_iso8859("ISO-8859-7", ISO_8859_7_TO_UCS_TBL) %> +<%= transcode_tblgen_iso8859("ISO-8859-8", ISO_8859_8_TO_UCS_TBL) %> +<%= transcode_tblgen_iso8859("ISO-8859-9", ISO_8859_9_TO_UCS_TBL) %> +<%= transcode_tblgen_iso8859("ISO-8859-10", ISO_8859_10_TO_UCS_TBL) %> +<%= transcode_tblgen_iso8859("ISO-8859-11", ISO_8859_11_TO_UCS_TBL) %> +<%= transcode_tblgen_iso8859("ISO-8859-13", ISO_8859_13_TO_UCS_TBL) %> +<%= transcode_tblgen_iso8859("ISO-8859-14", ISO_8859_14_TO_UCS_TBL) %> +<%= transcode_tblgen_iso8859("ISO-8859-15", ISO_8859_15_TO_UCS_TBL) %> + +void +Init_single_byte(void) +{ +<%= transcode_register_code %> +} + diff --git a/enc/trans/utf_16_32.erb.c b/enc/trans/utf_16_32.erb.c deleted file mode 100644 index aea2ab50a8..0000000000 --- a/enc/trans/utf_16_32.erb.c +++ /dev/null @@ -1,351 +0,0 @@ -#include "transcode_data.h" - -static int -fun_so_from_utf_16be(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o) -{ - if (!s[0] && s[1]<0x80) { - o[0] = s[1]; - return 1; - } - else if (s[0]<0x08) { - o[0] = 0xC0 | (s[0]<<2) | (s[1]>>6); - o[1] = 0x80 | (s[1]&0x3F); - return 2; - } - else if ((s[0]&0xF8)!=0xD8) { - o[0] = 0xE0 | (s[0]>>4); - o[1] = 0x80 | ((s[0]&0x0F)<<2) | (s[1]>>6); - o[2] = 0x80 | (s[1]&0x3F); - return 3; - } - else { - unsigned int u = (((s[0]&0x03)<<2)|(s[1]>>6)) + 1; - o[0] = 0xF0 | (u>>2); - o[1] = 0x80 | ((u&0x03)<<4) | ((s[1]>>2)&0x0F); - o[2] = 0x80 | ((s[1]&0x03)<<4) | ((s[2]&0x03)<<2) | (s[3]>>6); - o[3] = 0x80 | (s[3]&0x3F); - return 4; - } -} - -static int -fun_so_to_utf_16be(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o) -{ - if (!(s[0]&0x80)) { - o[0] = 0x00; - o[1] = s[0]; - return 2; - } - else if ((s[0]&0xE0)==0xC0) { - o[0] = (s[0]>>2)&0x07; - o[1] = ((s[0]&0x03)<<6) | (s[1]&0x3F); - return 2; - } - else if ((s[0]&0xF0)==0xE0) { - o[0] = (s[0]<<4) | ((s[1]>>2)^0x20); - o[1] = (s[1]<<6) | (s[2]^0x80); - return 2; - } - else { - int w = (((s[0]&0x07)<<2) | ((s[1]>>4)&0x03)) - 1; - o[0] = 0xD8 | (w>>2); - o[1] = (w<<6) | ((s[1]&0x0F)<<2) | ((s[2]>>4)-8); - o[2] = 0xDC | ((s[2]>>2)&0x03); - o[3] = (s[2]<<6) | (s[3]&~0x80); - return 4; - } -} - -static int -fun_so_from_utf_16le(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o) -{ - if (!s[1] && s[0]<0x80) { - o[0] = s[0]; - return 1; - } - else if (s[1]<0x08) { - o[0] = 0xC0 | (s[1]<<2) | (s[0]>>6); - o[1] = 0x80 | (s[0]&0x3F); - return 2; - } - else if ((s[1]&0xF8)!=0xD8) { - o[0] = 0xE0 | (s[1]>>4); - o[1] = 0x80 | ((s[1]&0x0F)<<2) | (s[0]>>6); - o[2] = 0x80 | (s[0]&0x3F); - return 3; - } - else { - unsigned int u = (((s[1]&0x03)<<2)|(s[0]>>6)) + 1; - o[0] = 0xF0 | u>>2; - o[1] = 0x80 | ((u&0x03)<<4) | ((s[0]>>2)&0x0F); - o[2] = 0x80 | ((s[0]&0x03)<<4) | ((s[3]&0x03)<<2) | (s[2]>>6); - o[3] = 0x80 | (s[2]&0x3F); - return 4; - } -} - -static int -fun_so_to_utf_16le(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o) -{ - if (!(s[0]&0x80)) { - o[1] = 0x00; - o[0] = s[0]; - return 2; - } - else if ((s[0]&0xE0)==0xC0) { - o[1] = (s[0]>>2)&0x07; - o[0] = ((s[0]&0x03)<<6) | (s[1]&0x3F); - return 2; - } - else if ((s[0]&0xF0)==0xE0) { - o[1] = (s[0]<<4) | ((s[1]>>2)^0x20); - o[0] = (s[1]<<6) | (s[2]^0x80); - return 2; - } - else { - int w = (((s[0]&0x07)<<2) | ((s[1]>>4)&0x03)) - 1; - o[1] = 0xD8 | (w>>2); - o[0] = (w<<6) | ((s[1]&0x0F)<<2) | ((s[2]>>4)-8); - o[3] = 0xDC | ((s[2]>>2)&0x03); - o[2] = (s[2]<<6) | (s[3]&~0x80); - return 4; - } -} - -static int -fun_so_from_utf_32be(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o) -{ - if (!s[1]) { - if (s[2]==0 && s[3]<0x80) { - o[0] = s[3]; - return 1; - } - else if (s[2]<0x08) { - o[0] = 0xC0 | (s[2]<<2) | (s[3]>>6); - o[1] = 0x80 | (s[3]&0x3F); - return 2; - } - else { - o[0] = 0xE0 | (s[2]>>4); - o[1] = 0x80 | ((s[2]&0x0F)<<2) | (s[3]>>6); - o[2] = 0x80 | (s[3]&0x3F); - return 3; - } - } - else { - o[0] = 0xF0 | (s[1]>>2); - o[1] = 0x80 | ((s[1]&0x03)<<4) | (s[2]>>4); - o[2] = 0x80 | ((s[2]&0x0F)<<2) | (s[3]>>6); - o[3] = 0x80 | (s[3]&0x3F); - return 4; - } -} - -static int -fun_so_to_utf_32be(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o) -{ - o[0] = 0; - if (!(s[0]&0x80)) { - o[1] = o[2] = 0x00; - o[3] = s[0]; - } - else if ((s[0]&0xE0)==0xC0) { - o[1] = 0x00; - o[2] = (s[0]>>2)&0x07; - o[3] = ((s[0]&0x03)<<6) | (s[1]&0x3F); - } - else if ((s[0]&0xF0)==0xE0) { - o[1] = 0x00; - o[2] = (s[0]<<4) | ((s[1]>>2)^0x20); - o[3] = (s[1]<<6) | (s[2]^0x80); - } - else { - o[1] = ((s[0]&0x07)<<2) | ((s[1]>>4)&0x03); - o[2] = ((s[1]&0x0F)<<4) | ((s[2]>>2)&0x0F); - o[3] = ((s[2]&0x03)<<6) | (s[3]&0x3F); - } - return 4; -} - -static int -fun_so_from_utf_32le(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o) -{ - if (!s[2]) { - if (s[1]==0 && s[0]<0x80) { - o[0] = s[0]; - return 1; - } - else if (s[1]<0x08) { - o[0] = 0xC0 | (s[1]<<2) | (s[0]>>6); - o[1] = 0x80 | (s[0]&0x3F); - return 2; - } - else { - o[0] = 0xE0 | (s[1]>>4); - o[1] = 0x80 | ((s[1]&0x0F)<<2) | (s[0]>>6); - o[2] = 0x80 | (s[0]&0x3F); - return 3; - } - } - else { - o[0] = 0xF0 | (s[2]>>2); - o[1] = 0x80 | ((s[2]&0x03)<<4) | (s[1]>>4); - o[2] = 0x80 | ((s[1]&0x0F)<<2) | (s[0]>>6); - o[3] = 0x80 | (s[0]&0x3F); - return 4; - } -} - -static int -fun_so_to_utf_32le(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o) -{ - o[3] = 0; - if (!(s[0]&0x80)) { - o[2] = o[1] = 0x00; - o[0] = s[0]; - } - else if ((s[0]&0xE0)==0xC0) { - o[2] = 0x00; - o[1] = (s[0]>>2)&0x07; - o[0] = ((s[0]&0x03)<<6) | (s[1]&0x3F); - } - else if ((s[0]&0xF0)==0xE0) { - o[2] = 0x00; - o[1] = (s[0]<<4) | ((s[1]>>2)^0x20); - o[0] = (s[1]<<6) | (s[2]^0x80); - } - else { - o[2] = ((s[0]&0x07)<<2) | ((s[1]>>4)&0x03); - o[1] = ((s[1]&0x0F)<<4) | ((s[2]>>2)&0x0F); - o[0] = ((s[2]&0x03)<<6) | (s[3]&0x3F); - } - return 4; -} - -<%= - map = {} - map["{00-d7,e0-ff}{00-ff}"] = :func_so - map["{d8-db}{00-ff}{dc-df}{00-ff}"] = :func_so - transcode_generate_node(ActionMap.parse(map), "from_UTF_16BE") -%> - -static const rb_transcoder -rb_from_UTF_16BE = { - "UTF-16BE", "UTF-8", &from_UTF_16BE, - 2, /* input_unit_length */ - 4, /* max_input */ - 4, /* max_output */ - NULL, NULL, NULL, &fun_so_from_utf_16be -}; - -<%= - map = {} - map["{00-7f}"] = :func_so - map["{c2-df}{80-bf}"] = :func_so - map["e0{a0-bf}{80-bf}"] = :func_so - map["{e1-ec}{80-bf}{80-bf}"] = :func_so - map["ed{80-9f}{80-bf}"] = :func_so - map["{ee-ef}{80-bf}{80-bf}"] = :func_so - map["f0{90-bf}{80-bf}{80-bf}"] = :func_so - map["{f1-f3}{80-bf}{80-bf}{80-bf}"] = :func_so - map["f4{80-8f}{80-bf}{80-bf}"] = :func_so - am = ActionMap.parse(map) - transcode_generate_node(am, "to_UTF_16BE") -%> - -static const rb_transcoder -rb_to_UTF_16BE = { - "UTF-8", "UTF-16BE", &to_UTF_16BE, - 1, /* input_unit_length */ - 4, /* max_input */ - 4, /* max_output */ - NULL, NULL, NULL, &fun_so_to_utf_16be -}; - -<%= - map = {} - map["{00-ff}{00-d7,e0-ff}"] = :func_so - map["{00-ff}{d8-db}{00-ff}{dc-df}"] = :func_so - transcode_generate_node(ActionMap.parse(map), "from_UTF_16LE") -%> - -static const rb_transcoder -rb_from_UTF_16LE = { - "UTF-16LE", "UTF-8", &from_UTF_16LE, - 2, /* input_unit_length */ - 4, /* max_input */ - 4, /* max_output */ - NULL, NULL, NULL, &fun_so_from_utf_16le -}; - -static const rb_transcoder -rb_to_UTF_16LE = { - "UTF-8", "UTF-16LE", &to_UTF_16BE, - 1, /* input_unit_length */ - 4, /* max_input */ - 4, /* max_output */ - NULL, NULL, NULL, &fun_so_to_utf_16le -}; - -<%= - map = {} - map["0000{00-d7,e0-ff}{00-ff}"] = :func_so - map["00{01-10}{00-ff}{00-ff}"] = :func_so - transcode_generate_node(ActionMap.parse(map), "from_UTF_32BE") -%> - -static const rb_transcoder -rb_from_UTF_32BE = { - "UTF-32BE", "UTF-8", &from_UTF_32BE, - 4, /* input_unit_length */ - 4, /* max_input */ - 4, /* max_output */ - NULL, NULL, NULL, &fun_so_from_utf_32be -}; - -static const rb_transcoder -rb_to_UTF_32BE = { - "UTF-8", "UTF-32BE", &to_UTF_16BE, - 1, /* input_unit_length */ - 4, /* max_input */ - 4, /* max_output */ - NULL, NULL, NULL, &fun_so_to_utf_32be -}; - -<%= - map = {} - map["{00-ff}{00-d7,e0-ff}0000"] = :func_so - map["{00-ff}{00-ff}{01-10}00"] = :func_so - transcode_generate_node(ActionMap.parse(map), "from_UTF_32LE") -%> - -static const rb_transcoder -rb_from_UTF_32LE = { - "UTF-32LE", "UTF-8", &from_UTF_32LE, - 4, /* input_unit_length */ - 4, /* max_input */ - 4, /* max_output */ - NULL, NULL, NULL, &fun_so_from_utf_32le -}; - -static const rb_transcoder -rb_to_UTF_32LE = { - "UTF-8", "UTF-32LE", &to_UTF_16BE, - 1, /* input_unit_length */ - 4, /* max_input */ - 4, /* max_output */ - NULL, NULL, NULL, &fun_so_to_utf_32le -}; - -void -Init_utf_16_32(void) -{ - rb_register_transcoder(&rb_from_UTF_16BE); - rb_register_transcoder(&rb_to_UTF_16BE); - rb_register_transcoder(&rb_from_UTF_16LE); - rb_register_transcoder(&rb_to_UTF_16LE); - rb_register_transcoder(&rb_from_UTF_32BE); - rb_register_transcoder(&rb_to_UTF_32BE); - rb_register_transcoder(&rb_from_UTF_32LE); - rb_register_transcoder(&rb_to_UTF_32LE); -} diff --git a/enc/trans/utf_16_32.trans b/enc/trans/utf_16_32.trans new file mode 100644 index 0000000000..aea2ab50a8 --- /dev/null +++ b/enc/trans/utf_16_32.trans @@ -0,0 +1,351 @@ +#include "transcode_data.h" + +static int +fun_so_from_utf_16be(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o) +{ + if (!s[0] && s[1]<0x80) { + o[0] = s[1]; + return 1; + } + else if (s[0]<0x08) { + o[0] = 0xC0 | (s[0]<<2) | (s[1]>>6); + o[1] = 0x80 | (s[1]&0x3F); + return 2; + } + else if ((s[0]&0xF8)!=0xD8) { + o[0] = 0xE0 | (s[0]>>4); + o[1] = 0x80 | ((s[0]&0x0F)<<2) | (s[1]>>6); + o[2] = 0x80 | (s[1]&0x3F); + return 3; + } + else { + unsigned int u = (((s[0]&0x03)<<2)|(s[1]>>6)) + 1; + o[0] = 0xF0 | (u>>2); + o[1] = 0x80 | ((u&0x03)<<4) | ((s[1]>>2)&0x0F); + o[2] = 0x80 | ((s[1]&0x03)<<4) | ((s[2]&0x03)<<2) | (s[3]>>6); + o[3] = 0x80 | (s[3]&0x3F); + return 4; + } +} + +static int +fun_so_to_utf_16be(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o) +{ + if (!(s[0]&0x80)) { + o[0] = 0x00; + o[1] = s[0]; + return 2; + } + else if ((s[0]&0xE0)==0xC0) { + o[0] = (s[0]>>2)&0x07; + o[1] = ((s[0]&0x03)<<6) | (s[1]&0x3F); + return 2; + } + else if ((s[0]&0xF0)==0xE0) { + o[0] = (s[0]<<4) | ((s[1]>>2)^0x20); + o[1] = (s[1]<<6) | (s[2]^0x80); + return 2; + } + else { + int w = (((s[0]&0x07)<<2) | ((s[1]>>4)&0x03)) - 1; + o[0] = 0xD8 | (w>>2); + o[1] = (w<<6) | ((s[1]&0x0F)<<2) | ((s[2]>>4)-8); + o[2] = 0xDC | ((s[2]>>2)&0x03); + o[3] = (s[2]<<6) | (s[3]&~0x80); + return 4; + } +} + +static int +fun_so_from_utf_16le(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o) +{ + if (!s[1] && s[0]<0x80) { + o[0] = s[0]; + return 1; + } + else if (s[1]<0x08) { + o[0] = 0xC0 | (s[1]<<2) | (s[0]>>6); + o[1] = 0x80 | (s[0]&0x3F); + return 2; + } + else if ((s[1]&0xF8)!=0xD8) { + o[0] = 0xE0 | (s[1]>>4); + o[1] = 0x80 | ((s[1]&0x0F)<<2) | (s[0]>>6); + o[2] = 0x80 | (s[0]&0x3F); + return 3; + } + else { + unsigned int u = (((s[1]&0x03)<<2)|(s[0]>>6)) + 1; + o[0] = 0xF0 | u>>2; + o[1] = 0x80 | ((u&0x03)<<4) | ((s[0]>>2)&0x0F); + o[2] = 0x80 | ((s[0]&0x03)<<4) | ((s[3]&0x03)<<2) | (s[2]>>6); + o[3] = 0x80 | (s[2]&0x3F); + return 4; + } +} + +static int +fun_so_to_utf_16le(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o) +{ + if (!(s[0]&0x80)) { + o[1] = 0x00; + o[0] = s[0]; + return 2; + } + else if ((s[0]&0xE0)==0xC0) { + o[1] = (s[0]>>2)&0x07; + o[0] = ((s[0]&0x03)<<6) | (s[1]&0x3F); + return 2; + } + else if ((s[0]&0xF0)==0xE0) { + o[1] = (s[0]<<4) | ((s[1]>>2)^0x20); + o[0] = (s[1]<<6) | (s[2]^0x80); + return 2; + } + else { + int w = (((s[0]&0x07)<<2) | ((s[1]>>4)&0x03)) - 1; + o[1] = 0xD8 | (w>>2); + o[0] = (w<<6) | ((s[1]&0x0F)<<2) | ((s[2]>>4)-8); + o[3] = 0xDC | ((s[2]>>2)&0x03); + o[2] = (s[2]<<6) | (s[3]&~0x80); + return 4; + } +} + +static int +fun_so_from_utf_32be(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o) +{ + if (!s[1]) { + if (s[2]==0 && s[3]<0x80) { + o[0] = s[3]; + return 1; + } + else if (s[2]<0x08) { + o[0] = 0xC0 | (s[2]<<2) | (s[3]>>6); + o[1] = 0x80 | (s[3]&0x3F); + return 2; + } + else { + o[0] = 0xE0 | (s[2]>>4); + o[1] = 0x80 | ((s[2]&0x0F)<<2) | (s[3]>>6); + o[2] = 0x80 | (s[3]&0x3F); + return 3; + } + } + else { + o[0] = 0xF0 | (s[1]>>2); + o[1] = 0x80 | ((s[1]&0x03)<<4) | (s[2]>>4); + o[2] = 0x80 | ((s[2]&0x0F)<<2) | (s[3]>>6); + o[3] = 0x80 | (s[3]&0x3F); + return 4; + } +} + +static int +fun_so_to_utf_32be(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o) +{ + o[0] = 0; + if (!(s[0]&0x80)) { + o[1] = o[2] = 0x00; + o[3] = s[0]; + } + else if ((s[0]&0xE0)==0xC0) { + o[1] = 0x00; + o[2] = (s[0]>>2)&0x07; + o[3] = ((s[0]&0x03)<<6) | (s[1]&0x3F); + } + else if ((s[0]&0xF0)==0xE0) { + o[1] = 0x00; + o[2] = (s[0]<<4) | ((s[1]>>2)^0x20); + o[3] = (s[1]<<6) | (s[2]^0x80); + } + else { + o[1] = ((s[0]&0x07)<<2) | ((s[1]>>4)&0x03); + o[2] = ((s[1]&0x0F)<<4) | ((s[2]>>2)&0x0F); + o[3] = ((s[2]&0x03)<<6) | (s[3]&0x3F); + } + return 4; +} + +static int +fun_so_from_utf_32le(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o) +{ + if (!s[2]) { + if (s[1]==0 && s[0]<0x80) { + o[0] = s[0]; + return 1; + } + else if (s[1]<0x08) { + o[0] = 0xC0 | (s[1]<<2) | (s[0]>>6); + o[1] = 0x80 | (s[0]&0x3F); + return 2; + } + else { + o[0] = 0xE0 | (s[1]>>4); + o[1] = 0x80 | ((s[1]&0x0F)<<2) | (s[0]>>6); + o[2] = 0x80 | (s[0]&0x3F); + return 3; + } + } + else { + o[0] = 0xF0 | (s[2]>>2); + o[1] = 0x80 | ((s[2]&0x03)<<4) | (s[1]>>4); + o[2] = 0x80 | ((s[1]&0x0F)<<2) | (s[0]>>6); + o[3] = 0x80 | (s[0]&0x3F); + return 4; + } +} + +static int +fun_so_to_utf_32le(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o) +{ + o[3] = 0; + if (!(s[0]&0x80)) { + o[2] = o[1] = 0x00; + o[0] = s[0]; + } + else if ((s[0]&0xE0)==0xC0) { + o[2] = 0x00; + o[1] = (s[0]>>2)&0x07; + o[0] = ((s[0]&0x03)<<6) | (s[1]&0x3F); + } + else if ((s[0]&0xF0)==0xE0) { + o[2] = 0x00; + o[1] = (s[0]<<4) | ((s[1]>>2)^0x20); + o[0] = (s[1]<<6) | (s[2]^0x80); + } + else { + o[2] = ((s[0]&0x07)<<2) | ((s[1]>>4)&0x03); + o[1] = ((s[1]&0x0F)<<4) | ((s[2]>>2)&0x0F); + o[0] = ((s[2]&0x03)<<6) | (s[3]&0x3F); + } + return 4; +} + +<%= + map = {} + map["{00-d7,e0-ff}{00-ff}"] = :func_so + map["{d8-db}{00-ff}{dc-df}{00-ff}"] = :func_so + transcode_generate_node(ActionMap.parse(map), "from_UTF_16BE") +%> + +static const rb_transcoder +rb_from_UTF_16BE = { + "UTF-16BE", "UTF-8", &from_UTF_16BE, + 2, /* input_unit_length */ + 4, /* max_input */ + 4, /* max_output */ + NULL, NULL, NULL, &fun_so_from_utf_16be +}; + +<%= + map = {} + map["{00-7f}"] = :func_so + map["{c2-df}{80-bf}"] = :func_so + map["e0{a0-bf}{80-bf}"] = :func_so + map["{e1-ec}{80-bf}{80-bf}"] = :func_so + map["ed{80-9f}{80-bf}"] = :func_so + map["{ee-ef}{80-bf}{80-bf}"] = :func_so + map["f0{90-bf}{80-bf}{80-bf}"] = :func_so + map["{f1-f3}{80-bf}{80-bf}{80-bf}"] = :func_so + map["f4{80-8f}{80-bf}{80-bf}"] = :func_so + am = ActionMap.parse(map) + transcode_generate_node(am, "to_UTF_16BE") +%> + +static const rb_transcoder +rb_to_UTF_16BE = { + "UTF-8", "UTF-16BE", &to_UTF_16BE, + 1, /* input_unit_length */ + 4, /* max_input */ + 4, /* max_output */ + NULL, NULL, NULL, &fun_so_to_utf_16be +}; + +<%= + map = {} + map["{00-ff}{00-d7,e0-ff}"] = :func_so + map["{00-ff}{d8-db}{00-ff}{dc-df}"] = :func_so + transcode_generate_node(ActionMap.parse(map), "from_UTF_16LE") +%> + +static const rb_transcoder +rb_from_UTF_16LE = { + "UTF-16LE", "UTF-8", &from_UTF_16LE, + 2, /* input_unit_length */ + 4, /* max_input */ + 4, /* max_output */ + NULL, NULL, NULL, &fun_so_from_utf_16le +}; + +static const rb_transcoder +rb_to_UTF_16LE = { + "UTF-8", "UTF-16LE", &to_UTF_16BE, + 1, /* input_unit_length */ + 4, /* max_input */ + 4, /* max_output */ + NULL, NULL, NULL, &fun_so_to_utf_16le +}; + +<%= + map = {} + map["0000{00-d7,e0-ff}{00-ff}"] = :func_so + map["00{01-10}{00-ff}{00-ff}"] = :func_so + transcode_generate_node(ActionMap.parse(map), "from_UTF_32BE") +%> + +static const rb_transcoder +rb_from_UTF_32BE = { + "UTF-32BE", "UTF-8", &from_UTF_32BE, + 4, /* input_unit_length */ + 4, /* max_input */ + 4, /* max_output */ + NULL, NULL, NULL, &fun_so_from_utf_32be +}; + +static const rb_transcoder +rb_to_UTF_32BE = { + "UTF-8", "UTF-32BE", &to_UTF_16BE, + 1, /* input_unit_length */ + 4, /* max_input */ + 4, /* max_output */ + NULL, NULL, NULL, &fun_so_to_utf_32be +}; + +<%= + map = {} + map["{00-ff}{00-d7,e0-ff}0000"] = :func_so + map["{00-ff}{00-ff}{01-10}00"] = :func_so + transcode_generate_node(ActionMap.parse(map), "from_UTF_32LE") +%> + +static const rb_transcoder +rb_from_UTF_32LE = { + "UTF-32LE", "UTF-8", &from_UTF_32LE, + 4, /* input_unit_length */ + 4, /* max_input */ + 4, /* max_output */ + NULL, NULL, NULL, &fun_so_from_utf_32le +}; + +static const rb_transcoder +rb_to_UTF_32LE = { + "UTF-8", "UTF-32LE", &to_UTF_16BE, + 1, /* input_unit_length */ + 4, /* max_input */ + 4, /* max_output */ + NULL, NULL, NULL, &fun_so_to_utf_32le +}; + +void +Init_utf_16_32(void) +{ + rb_register_transcoder(&rb_from_UTF_16BE); + rb_register_transcoder(&rb_to_UTF_16BE); + rb_register_transcoder(&rb_from_UTF_16LE); + rb_register_transcoder(&rb_to_UTF_16LE); + rb_register_transcoder(&rb_from_UTF_32BE); + rb_register_transcoder(&rb_to_UTF_32BE); + rb_register_transcoder(&rb_from_UTF_32LE); + rb_register_transcoder(&rb_to_UTF_32LE); +} -- cgit v1.2.3