diff options
author | akr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2008-09-03 14:12:06 +0000 |
---|---|---|
committer | akr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2008-09-03 14:12:06 +0000 |
commit | f6441bf61cd4244aca5f465d262baf31b8872ac2 (patch) | |
tree | 57787744890d4ac6db62bbcf44b12e738f34eb62 /enc | |
parent | fce093432eadc191b3647f116a9c2f6748efda3e (diff) | |
download | ruby-f6441bf61cd4244aca5f465d262baf31b8872ac2.tar.gz |
* transcode_data.h (rb_transcoding): remove stateful field.
add state field.
(TRANSCODING_STATE): defined.
(rb_transcoder): add fields: state_size, state_init_func,
state_fini_func.
change rb_transcoding* argument to void*.
* transcode.c (transcode_restartable0): use TRANSCODING_STATE for
first arguments of transcoder functions.
(rb_transcoding_open_by_transcoder): initialize state field.
(rb_transcoding_close): finalize state field.
* tool/transcode-tblgen.rb: provide state size/init/fini.
* enc/trans/newline.trans (universal_newline_init): defined.
(fun_so_universal_newline): take void* as a state pointer.
(rb_universal_newline): provide state size/init/fini.
(rb_crlf_newline): ditto.
(rb_cr_newline): ditto.
* enc/trans/iso2022.trans (iso2022jp_init): defined.
(fun_si_iso2022jp_to_eucjp): take void* as a state pointer.
(fun_so_iso2022jp_to_eucjp): ditto.
(fun_so_eucjp_to_iso2022jp): ditto.
(iso2022jp_reset_sequence_size): ditto.
(finish_eucjp_to_iso2022jp): ditto.
(rb_ISO_2022_JP_to_EUC_JP): provide state size/init/fini.
(rb_EUC_JP_to_ISO_2022_JP): ditto.
* enc/trans/utf_16_32.trans (fun_so_from_utf_16be): take void* as a
state pointer.
(fun_so_to_utf_16be): ditto.
(fun_so_from_utf_16le): ditto.
(fun_so_to_utf_16le): ditto.
(fun_so_from_utf_32be): ditto.
(fun_so_to_utf_32be): ditto.
(fun_so_from_utf_32le): ditto.
(fun_so_to_utf_32le): ditto.
(rb_from_UTF_16BE): provide state size/init/fini.
(rb_to_UTF_16BE): ditto.
(rb_from_UTF_16LE): ditto.
(rb_to_UTF_16LE): ditto.
(rb_from_UTF_32BE): ditto.
(rb_to_UTF_32BE): ditto.
(rb_from_UTF_32LE): ditto.
(rb_to_UTF_32LE): ditto.
* enc/trans/japanese.trans (fun_so_eucjp2sjis): take void* as a state
pointer.
(fun_so_sjis2eucjp): ditto.
(rb_eucjp2sjis): provide state size/init/fini.
(rb_sjis2eucjp): provide state size/init/fini.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19096 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'enc')
-rw-r--r-- | enc/trans/iso2022.trans | 59 | ||||
-rw-r--r-- | enc/trans/japanese.trans | 8 | ||||
-rw-r--r-- | enc/trans/newline.trans | 31 | ||||
-rw-r--r-- | enc/trans/utf_16_32.trans | 24 |
4 files changed, 77 insertions, 45 deletions
diff --git a/enc/trans/iso2022.trans b/enc/trans/iso2022.trans index 49da2c3f6c..067611ebd0 100644 --- a/enc/trans/iso2022.trans +++ b/enc/trans/iso2022.trans @@ -27,10 +27,22 @@ <%= transcode_generated_code %> +#define G0_ASCII 0 +#define G0_JISX0208 1 + +static int +iso2022jp_init(void *statep) +{ + unsigned char *sp = statep; + *sp = G0_ASCII; + return 0; +} + static VALUE -fun_si_iso2022jp_to_eucjp(rb_transcoding* t, const unsigned char* s, size_t l) +fun_si_iso2022jp_to_eucjp(void *statep, const unsigned char *s, size_t l) { - if (t->stateful[0] == 0) + unsigned char *sp = statep; + if (*sp == G0_ASCII) return (VALUE)NOMAP; else if (0x21 <= s[0] && s[0] <= 0x7e) return (VALUE)iso2022jp_to_eucjp_jisx0208_rest; @@ -39,14 +51,15 @@ fun_si_iso2022jp_to_eucjp(rb_transcoding* t, const unsigned char* s, size_t l) } static int -fun_so_iso2022jp_to_eucjp(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o) +fun_so_iso2022jp_to_eucjp(void *statep, const unsigned char *s, size_t l, unsigned char* o) { + unsigned char *sp = statep; if (s[0] == 0x1b) { if (s[1] == '(') { switch (s[l-1]) { case 'B': case 'J': - t->stateful[0] = 0; + *sp = G0_ASCII; break; } } @@ -54,7 +67,7 @@ fun_so_iso2022jp_to_eucjp(rb_transcoding* t, const unsigned char* s, size_t l, u switch (s[l-1]) { case '@': case 'B': - t->stateful[0] = 1; + *sp = G0_JISX0208; break; } } @@ -75,31 +88,28 @@ rb_ISO_2022_JP_to_EUC_JP = { 3, /* max_input */ 3, /* max_output */ stateful_decoder, /* stateful_type */ + 1, iso2022jp_init, iso2022jp_init, /* state_size, state_init, state_fini */ NULL, fun_si_iso2022jp_to_eucjp, NULL, fun_so_iso2022jp_to_eucjp }; static int -fun_so_eucjp_to_iso2022jp(rb_transcoding *t, const unsigned char *s, size_t l, unsigned char *o) +fun_so_eucjp_to_iso2022jp(void *statep, const unsigned char *s, size_t l, unsigned char *o) { + unsigned char *sp = statep; unsigned char *output0 = o; - if (t->stateful[0] == 0) { - t->stateful[0] = 1; /* initialized flag */ - t->stateful[1] = 1; /* G0 = ASCII */ - } - - if (l != t->stateful[1]) { + if (*sp != (l == 1 ? G0_ASCII : G0_JISX0208)) { if (l == 1) { *o++ = 0x1b; *o++ = '('; *o++ = 'B'; - t->stateful[1] = 1; /* G0 = ASCII */ + *sp = G0_ASCII; } else { *o++ = 0x1b; *o++ = '$'; *o++ = 'B'; - t->stateful[1] = 2; /* G0 = JIS X 0208 1983 */ + *sp = G0_JISX0208; /* JIS X 0208 1983 */ } } @@ -115,27 +125,27 @@ fun_so_eucjp_to_iso2022jp(rb_transcoding *t, const unsigned char *s, size_t l, u } static int -iso2022jp_reset_sequence_size(rb_transcoding *t) +iso2022jp_reset_sequence_size(void *statep) { - if (t->stateful[1] == 2) + unsigned char *sp = statep; + if (*sp == G0_JISX0208) return 3; return 0; } static int -finish_eucjp_to_iso2022jp(rb_transcoding *t, unsigned char *o) +finish_eucjp_to_iso2022jp(void *statep, unsigned char *o) { + unsigned char *sp = statep; unsigned char *output0 = o; - if (t->stateful[0] == 0) + if (*sp == G0_ASCII) return 0; - if (t->stateful[1] != 1) { - *o++ = 0x1b; - *o++ = '('; - *o++ = 'B'; - t->stateful[1] = 1; - } + *o++ = 0x1b; + *o++ = '('; + *o++ = 'B'; + *sp = G0_ASCII; return o - output0; } @@ -148,6 +158,7 @@ rb_EUC_JP_to_ISO_2022_JP = { 3, /* max_input */ 5, /* max_output */ stateful_encoder, /* stateful_type */ + 1, iso2022jp_init, iso2022jp_init, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_eucjp_to_iso2022jp, finish_eucjp_to_iso2022jp, iso2022jp_reset_sequence_size, finish_eucjp_to_iso2022jp diff --git a/enc/trans/japanese.trans b/enc/trans/japanese.trans index 279957b972..64f38fbfc6 100644 --- a/enc/trans/japanese.trans +++ b/enc/trans/japanese.trans @@ -20,7 +20,7 @@ <%= transcode_generated_code %> static int -fun_so_eucjp2sjis(rb_transcoding *t, const unsigned char *s, size_t l, unsigned char* o) +fun_so_eucjp2sjis(void *statep, const unsigned char *s, size_t l, unsigned char *o) { if (s[0] == 0x8e) { o[0] = s[1]; @@ -41,7 +41,7 @@ fun_so_eucjp2sjis(rb_transcoding *t, const unsigned char *s, size_t l, unsigned } static int -fun_so_sjis2eucjp(rb_transcoding *t, const unsigned char *s, size_t l, unsigned char* o) +fun_so_sjis2eucjp(void *statep, const unsigned char *s, size_t l, unsigned char *o) { if (l == 1) { o[0] = '\x8e'; @@ -49,7 +49,7 @@ fun_so_sjis2eucjp(rb_transcoding *t, const unsigned char *s, size_t l, unsigned return 2; } else { - int h, m, l; + int h, l; h = s[0]; l = s[1]; if (0xe0 <= h) @@ -74,6 +74,7 @@ rb_eucjp2sjis = { 3, /* max_input */ 2, /* max_output */ stateless_converter, /* stateful_type */ + 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_eucjp2sjis }; @@ -85,6 +86,7 @@ rb_sjis2eucjp = { 2, /* max_input */ 2, /* max_output */ stateless_converter, /* stateful_type */ + 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_sjis2eucjp }; diff --git a/enc/trans/newline.trans b/enc/trans/newline.trans index da8202eea4..7b10d368b1 100644 --- a/enc/trans/newline.trans +++ b/enc/trans/newline.trans @@ -21,33 +21,41 @@ <%= transcode_generated_code %> +#define NORMAL 0 +#define JUST_AFTER_CR 1 + +static int +universal_newline_init(void *statep) +{ + unsigned char *sp = statep; + *sp = NORMAL; + return 0; +} + static int -fun_so_universal_newline(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o) +fun_so_universal_newline(void *statep, const unsigned char* s, size_t l, unsigned char* o) { + unsigned char *sp = statep; int len; - /* - t->stateful[0] == 0 : normal - t->stateful[0] == 1 : just after '\r' - */ if (s[0] == '\n') { - if (t->stateful[0] == 0) { + if (*sp == NORMAL) { o[0] = '\n'; len = 1; } - else { + else { /* JUST_AFTER_CR */ len = 0; } - t->stateful[0] = 0; + *sp = NORMAL; } else if (s[0] == '\r') { o[0] = '\n'; len = 1; - t->stateful[0] = 1; + *sp = JUST_AFTER_CR; } else { o[0] = s[0]; len = 1; - t->stateful[0] = 0; + *sp = NORMAL; } return len; } @@ -60,6 +68,7 @@ rb_universal_newline = { 1, /* max_input */ 1, /* max_output */ stateful_decoder, /* stateful_type */ + 1, universal_newline_init, universal_newline_init, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_universal_newline }; @@ -71,6 +80,7 @@ rb_crlf_newline = { 1, /* max_input */ 2, /* max_output */ stateless_converter, /* stateful_type */ + 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, NULL }; @@ -82,6 +92,7 @@ rb_cr_newline = { 1, /* max_input */ 1, /* max_output */ stateless_converter, /* stateful_type */ + 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, NULL }; diff --git a/enc/trans/utf_16_32.trans b/enc/trans/utf_16_32.trans index 57cd77dd77..9ffff341a9 100644 --- a/enc/trans/utf_16_32.trans +++ b/enc/trans/utf_16_32.trans @@ -38,7 +38,7 @@ <%= transcode_generated_code %> static int -fun_so_from_utf_16be(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o) +fun_so_from_utf_16be(void *statep, const unsigned char *s, size_t l, unsigned char *o) { if (!s[0] && s[1]<0x80) { o[0] = s[1]; @@ -66,7 +66,7 @@ fun_so_from_utf_16be(rb_transcoding* t, const unsigned char* s, size_t l, unsign } static int -fun_so_to_utf_16be(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o) +fun_so_to_utf_16be(void *statep, const unsigned char *s, size_t l, unsigned char *o) { if (!(s[0]&0x80)) { o[0] = 0x00; @@ -94,7 +94,7 @@ fun_so_to_utf_16be(rb_transcoding* t, const unsigned char* s, size_t l, unsigned } static int -fun_so_from_utf_16le(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o) +fun_so_from_utf_16le(void *statep, const unsigned char *s, size_t l, unsigned char *o) { if (!s[1] && s[0]<0x80) { o[0] = s[0]; @@ -122,7 +122,7 @@ fun_so_from_utf_16le(rb_transcoding* t, const unsigned char* s, size_t l, unsign } static int -fun_so_to_utf_16le(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o) +fun_so_to_utf_16le(void *statep, const unsigned char *s, size_t l, unsigned char *o) { if (!(s[0]&0x80)) { o[1] = 0x00; @@ -150,7 +150,7 @@ fun_so_to_utf_16le(rb_transcoding* t, const unsigned char* s, size_t l, unsigned } static int -fun_so_from_utf_32be(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o) +fun_so_from_utf_32be(void *statep, const unsigned char *s, size_t l, unsigned char *o) { if (!s[1]) { if (s[2]==0 && s[3]<0x80) { @@ -179,7 +179,7 @@ fun_so_from_utf_32be(rb_transcoding* t, const unsigned char* s, size_t l, unsign } static int -fun_so_to_utf_32be(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o) +fun_so_to_utf_32be(void *statep, const unsigned char *s, size_t l, unsigned char *o) { o[0] = 0; if (!(s[0]&0x80)) { @@ -205,7 +205,7 @@ fun_so_to_utf_32be(rb_transcoding* t, const unsigned char* s, size_t l, unsigned } static int -fun_so_from_utf_32le(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o) +fun_so_from_utf_32le(void *statep, const unsigned char *s, size_t l, unsigned char *o) { if (!s[2]) { if (s[1]==0 && s[0]<0x80) { @@ -234,7 +234,7 @@ fun_so_from_utf_32le(rb_transcoding* t, const unsigned char* s, size_t l, unsign } static int -fun_so_to_utf_32le(rb_transcoding* t, const unsigned char* s, size_t l, unsigned char* o) +fun_so_to_utf_32le(void *statep, const unsigned char *s, size_t l, unsigned char *o) { o[3] = 0; if (!(s[0]&0x80)) { @@ -267,6 +267,7 @@ rb_from_UTF_16BE = { 4, /* max_input */ 4, /* max_output */ stateless_converter, /* stateful_type */ + 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_from_utf_16be }; @@ -278,6 +279,7 @@ rb_to_UTF_16BE = { 4, /* max_input */ 4, /* max_output */ stateless_converter, /* stateful_type */ + 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_to_utf_16be }; @@ -289,6 +291,7 @@ rb_from_UTF_16LE = { 4, /* max_input */ 4, /* max_output */ stateless_converter, /* stateful_type */ + 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_from_utf_16le }; @@ -300,6 +303,7 @@ rb_to_UTF_16LE = { 4, /* max_input */ 4, /* max_output */ stateless_converter, /* stateful_type */ + 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_to_utf_16le }; @@ -311,6 +315,7 @@ rb_from_UTF_32BE = { 4, /* max_input */ 4, /* max_output */ stateless_converter, /* stateful_type */ + 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_from_utf_32be }; @@ -322,6 +327,7 @@ rb_to_UTF_32BE = { 4, /* max_input */ 4, /* max_output */ stateless_converter, /* stateful_type */ + 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_to_utf_32be }; @@ -333,6 +339,7 @@ rb_from_UTF_32LE = { 4, /* max_input */ 4, /* max_output */ stateless_converter, /* stateful_type */ + 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_from_utf_32le }; @@ -344,6 +351,7 @@ rb_to_UTF_32LE = { 4, /* max_input */ 4, /* max_output */ stateless_converter, /* stateful_type */ + 0, NULL, NULL, /* state_size, state_init, state_fini */ NULL, NULL, NULL, fun_so_to_utf_32le }; |