Fri Dec 28 01:55:04 2007 Martin Duerst <duerst@it.aoyama.ac.jp>

* transcode.c (transcode_dispatch): reverted some of the changes in r14746. * transcode.c, enc/trans/single_byte.c: Added conversions to/from US-ASCII and ASCII-8BIT (using data tables). * enc/trans/single_byte.c: Some spacing/ordering changes due to automatic data file generation. * transcode_data.h, transcode.c: Preliminary code for using micro-conversion functions. * test/ruby/test_transcode.rb: Added some tests for US-ASCII and ASCII-8BIT conversions. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@14766 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
author: duerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> 2007-12-28 09:26:55 +0000
committer: duerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> 2007-12-28 09:26:55 +0000
commit: 793e9423cd4c4c679959b7dd011040b5aba2d169 (patch)
tree: 034b079d1e8a3b856e8cc5cc96dc3fd185f5d4a8 /transcode.c
parent: 48af602e38293f53e7c9afa7b41ba778714220f4 (diff)
download: ruby-793e9423cd4c4c679959b7dd011040b5aba2d169.tar.gz
1 files changed, 9 insertions, 21 deletions
diff --git a/transcode.c b/transcode.c
index b748eb6347..4875570ef4 100644
--- a/transcode.c
+++ b/transcode.c
@@ -89,6 +89,8 @@ rb_declare_transcoder(const char *enc1, const char *enc2, const char *lib)
 static void
 init_transcoder_table(void)
 {
+    rb_declare_transcoder("US-ASCII",    "UTF-8", "single_byte");
+    rb_declare_transcoder("ASCII-8BIT",  "UTF-8", "single_byte");
     rb_declare_transcoder("ISO-8859-1",  "UTF-8", "single_byte");
     rb_declare_transcoder("ISO-8859-2",  "UTF-8", "single_byte");
     rb_declare_transcoder("ISO-8859-3",  "UTF-8", "single_byte");
@@ -173,6 +175,7 @@ transcode_loop(char **in_pos, char **out_pos,
       follow_byte:
 	next_offset = next_table->base[next_byte];
 	next_info = (VALUE)next_table->info[next_offset];
+      follow_info:
 	switch (next_info & 0x1F) {
 	  case NOMAP:
 	    *out_p++ = next_byte;
@@ -191,7 +194,7 @@ transcode_loop(char **in_pos, char **out_pos,
 		else
 		    goto invalid;
 	    }
-	    next_table = next_table->info[next_offset];
+	    next_table = (const BYTE_LOOKUP *)next_info;
 	    goto follow_byte;
 	    /* maybe rewrite the following cases to use fallthrough???? */
 	  case ZERObt: /* drop input */
@@ -210,6 +213,9 @@ transcode_loop(char **in_pos, char **out_pos,
 	    *out_p++ = getBT2(next_info);
 	    *out_p++ = getBT3(next_info);
 	    continue;
+	  case FUNii:
+	    next_info = (VALUE)(*my_transcoder->func_ii)(next_info);
+	    goto follow_info;
 	  case INVALID:
 	    goto invalid;
 	  case UNDEF:
@@ -287,7 +293,7 @@ str_transcode(int argc, VALUE *argv, VALUE *self)
 	return -1;
     }
     if (from_enc && to_enc && rb_enc_asciicompat(from_enc) && rb_enc_asciicompat(to_enc)) {
-	if (to_encidx == 0 || ENC_CODERANGE(str) == ENC_CODERANGE_7BIT) {
+	if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT) {
 	    return to_encidx;
 	}
     }
@@ -295,25 +301,6 @@ str_transcode(int argc, VALUE *argv, VALUE *self)
 	return -1;
     }
 
-    if (from_encidx == 0) {
-	const char *p = RSTRING_PTR(str);
-	const char *e = p + RSTRING_LEN(str);
-
-	while (p < e) {
-	    int ret = rb_enc_precise_mbclen(p, e, to_enc);
-	    int len = MBCLEN_CHARFOUND(ret);
-
-	    if (!len) {
-		rb_raise(rb_eArgError, "not fully converted, %d bytes left", e-p);
-	    }
-	    p += len;
-	}
-	if (to_encidx < 0) {
-	    to_encidx = rb_define_dummy_encoding(to_e);
-	}
-	return to_encidx;
-    }
-
     while (!final_encoding) { /* loop for multistep transcoding */
 	/* later, maybe use smaller intermediate strings for very long strings */
 	if (!(my_transcoder = transcode_dispatch(from_e, to_e))) {
@@ -412,6 +399,7 @@ rb_str_transcode_bang(int argc, VALUE *argv, VALUE str)
 /*
  *  call-seq:
  *     str.encode(encoding)   => str
+ *     str.encode(to_encoding, from_encoding)   => str
  *
  *  With one argument, returns a copy of <i>str</i> transcoded
  *  to encoding +encoding+.
author	duerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>	2007-12-28 09:26:55 +0000
committer	duerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>	2007-12-28 09:26:55 +0000
commit	793e9423cd4c4c679959b7dd011040b5aba2d169 (patch)
tree	034b079d1e8a3b856e8cc5cc96dc3fd185f5d4a8 /transcode.c
parent	48af602e38293f53e7c9afa7b41ba778714220f4 (diff)
download	ruby-793e9423cd4c4c679959b7dd011040b5aba2d169.tar.gz