diff options
author | duerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2007-12-28 09:26:55 +0000 |
---|---|---|
committer | duerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2007-12-28 09:26:55 +0000 |
commit | 793e9423cd4c4c679959b7dd011040b5aba2d169 (patch) | |
tree | 034b079d1e8a3b856e8cc5cc96dc3fd185f5d4a8 /transcode.c | |
parent | 48af602e38293f53e7c9afa7b41ba778714220f4 (diff) | |
download | ruby-793e9423cd4c4c679959b7dd011040b5aba2d169.tar.gz |
Fri Dec 28 01:55:04 2007 Martin Duerst <duerst@it.aoyama.ac.jp>
* transcode.c (transcode_dispatch): reverted some of the changes
in r14746.
* transcode.c, enc/trans/single_byte.c: Added conversions to/from
US-ASCII and ASCII-8BIT (using data tables).
* enc/trans/single_byte.c: Some spacing/ordering changes due to
automatic data file generation.
* transcode_data.h, transcode.c: Preliminary code for using
micro-conversion functions.
* test/ruby/test_transcode.rb: Added some tests for US-ASCII and
ASCII-8BIT conversions.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@14766 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'transcode.c')
-rw-r--r-- | transcode.c | 30 |
1 files changed, 9 insertions, 21 deletions
diff --git a/transcode.c b/transcode.c index b748eb6347..4875570ef4 100644 --- a/transcode.c +++ b/transcode.c @@ -89,6 +89,8 @@ rb_declare_transcoder(const char *enc1, const char *enc2, const char *lib) static void init_transcoder_table(void) { + rb_declare_transcoder("US-ASCII", "UTF-8", "single_byte"); + rb_declare_transcoder("ASCII-8BIT", "UTF-8", "single_byte"); rb_declare_transcoder("ISO-8859-1", "UTF-8", "single_byte"); rb_declare_transcoder("ISO-8859-2", "UTF-8", "single_byte"); rb_declare_transcoder("ISO-8859-3", "UTF-8", "single_byte"); @@ -173,6 +175,7 @@ transcode_loop(char **in_pos, char **out_pos, follow_byte: next_offset = next_table->base[next_byte]; next_info = (VALUE)next_table->info[next_offset]; + follow_info: switch (next_info & 0x1F) { case NOMAP: *out_p++ = next_byte; @@ -191,7 +194,7 @@ transcode_loop(char **in_pos, char **out_pos, else goto invalid; } - next_table = next_table->info[next_offset]; + next_table = (const BYTE_LOOKUP *)next_info; goto follow_byte; /* maybe rewrite the following cases to use fallthrough???? */ case ZERObt: /* drop input */ @@ -210,6 +213,9 @@ transcode_loop(char **in_pos, char **out_pos, *out_p++ = getBT2(next_info); *out_p++ = getBT3(next_info); continue; + case FUNii: + next_info = (VALUE)(*my_transcoder->func_ii)(next_info); + goto follow_info; case INVALID: goto invalid; case UNDEF: @@ -287,7 +293,7 @@ str_transcode(int argc, VALUE *argv, VALUE *self) return -1; } if (from_enc && to_enc && rb_enc_asciicompat(from_enc) && rb_enc_asciicompat(to_enc)) { - if (to_encidx == 0 || ENC_CODERANGE(str) == ENC_CODERANGE_7BIT) { + if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT) { return to_encidx; } } @@ -295,25 +301,6 @@ str_transcode(int argc, VALUE *argv, VALUE *self) return -1; } - if (from_encidx == 0) { - const char *p = RSTRING_PTR(str); - const char *e = p + RSTRING_LEN(str); - - while (p < e) { - int ret = rb_enc_precise_mbclen(p, e, to_enc); - int len = MBCLEN_CHARFOUND(ret); - - if (!len) { - rb_raise(rb_eArgError, "not fully converted, %d bytes left", e-p); - } - p += len; - } - if (to_encidx < 0) { - to_encidx = rb_define_dummy_encoding(to_e); - } - return to_encidx; - } - while (!final_encoding) { /* loop for multistep transcoding */ /* later, maybe use smaller intermediate strings for very long strings */ if (!(my_transcoder = transcode_dispatch(from_e, to_e))) { @@ -412,6 +399,7 @@ rb_str_transcode_bang(int argc, VALUE *argv, VALUE str) /* * call-seq: * str.encode(encoding) => str + * str.encode(to_encoding, from_encoding) => str * * With one argument, returns a copy of <i>str</i> transcoded * to encoding +encoding+. |