5 files changed, 154 insertions, 53 deletions
diff --git a/ChangeLog b/ChangeLog
index 564a489c62..1b7175b2ff 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,20 @@
+Fri Dec 28 01:55:04 2007  Martin Duerst  <duerst@it.aoyama.ac.jp>
+
+	* transcode.c (transcode_dispatch): reverted some of the changes
+          in r14746.
+
+	* transcode.c, enc/trans/single_byte.c: Added conversions to/from
+	  US-ASCII and ASCII-8BIT (using data tables).
+
+	* enc/trans/single_byte.c: Some spacing/ordering changes due to
+	  automatic data file generation.
+
+	* transcode_data.h, transcode.c: Preliminary code for using
+	  micro-conversion functions.
+
+	* test/ruby/test_transcode.rb: Added some tests for US-ASCII and
+	  ASCII-8BIT conversions.
+
 Fri Dec 28 17:33:44 2007  Tanaka Akira  <akr@fsij.org>
 
 	* time.c (make_time_t): verify mktime and timegm result.
diff --git a/enc/trans/single_byte.c b/enc/trans/single_byte.c
index bbfb3f5f9a..baa523447e 100644
--- a/enc/trans/single_byte.c
+++ b/enc/trans/single_byte.c
@@ -1,6 +1,63 @@
 #include "transcode_data.h"
 
 static const unsigned char
+from_US_ASCII_offsets[256] = {
+      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
+      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
+      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
+      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
+      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
+      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
+      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
+      0,  0,  0,  0,  0,  0,  0,  0,    0,  0,  0,  0,  0,  0,  0,  0,
+      1,  1,  1,  1,  1,  1,  1,  1,    1,  1,  1,  1,  1,  1,  1,  1,
+      1,  1,  1,  1,  1,  1,  1,  1,    1,  1,  1,  1,  1,  1,  1,  1,
+      1,  1,  1,  1,  1,  1,  1,  1,    1,  1,  1,  1,  1,  1,  1,  1,
+      1,  1,  1,  1,  1,  1,  1,  1,    1,  1,  1,  1,  1,  1,  1,  1,
+      1,  1,  1,  1,  1,  1,  1,  1,    1,  1,  1,  1,  1,  1,  1,  1,
+      1,  1,  1,  1,  1,  1,  1,  1,    1,  1,  1,  1,  1,  1,  1,  1,
+      1,  1,  1,  1,  1,  1,  1,  1,    1,  1,  1,  1,  1,  1,  1,  1,
+      1,  1,  1,  1,  1,  1,  1,  1,    1,  1,  1,  1,  1,  1,  1,  1,
+};
+static const struct byte_lookup* const
+from_US_ASCII_infos[2] = {
+     NOMAP, UNDEF,
+};
+static const BYTE_LOOKUP
+from_US_ASCII = {
+  /* used from from_US_ASCII */
+  /* used from to_US_ASCII */
+  /* used from to_ASCII_8BIT */
+  /* used from from_ASCII_8BIT */
+    from_US_ASCII_offsets,
+    from_US_ASCII_infos
+};
+
+static rb_transcoder
+rb_from_US_ASCII = {
+    "US-ASCII", "UTF-8", &from_US_ASCII, 1, 0,
+    NULL, NULL,
+};
+
+static rb_transcoder
+rb_to_US_ASCII = {
+    "UTF-8", "US-ASCII", &from_US_ASCII, 1, 1,
+    NULL, NULL,
+};
+
+static rb_transcoder
+rb_from_ASCII_8BIT = {
+    "ASCII-8BIT", "UTF-8", &from_US_ASCII, 1, 0,
+    NULL, NULL,
+};
+
+static rb_transcoder
+rb_to_ASCII_8BIT = {
+    "UTF-8", "ASCII-8BIT", &from_US_ASCII, 1, 1,
+    NULL, NULL,
+};
+
+static const unsigned char
 from_ISO_8859_1_offsets[256] = {
   /* used from from_ISO_8859_1 */
   /* used from from_ISO_8859_2 */
@@ -69,6 +126,7 @@ from_ISO_8859_1 = {
     from_ISO_8859_1_offsets,
     from_ISO_8859_1_infos
 };
+
 static rb_transcoder
 rb_from_ISO_8859_1 = {
     "ISO-8859-1", "UTF-8", &from_ISO_8859_1, 2, 0,
@@ -167,6 +225,7 @@ to_ISO_8859_1 = {
     to_ISO_8859_1_offsets,
     to_ISO_8859_1_infos
 };
+
 static rb_transcoder
 rb_to_ISO_8859_1 = {
     "UTF-8", "ISO-8859-1", &to_ISO_8859_1, 1, 1,
@@ -214,6 +273,7 @@ from_ISO_8859_2 = {
     from_ISO_8859_1_offsets,
     from_ISO_8859_2_infos
 };
+
 static rb_transcoder
 rb_from_ISO_8859_2 = {
     "ISO-8859-2", "UTF-8", &from_ISO_8859_2, 2, 0,
@@ -370,6 +430,7 @@ to_ISO_8859_2 = {
     to_ISO_8859_2_offsets,
     to_ISO_8859_2_infos
 };
+
 static rb_transcoder
 rb_to_ISO_8859_2 = {
     "UTF-8", "ISO-8859-2", &to_ISO_8859_2, 1, 1,
@@ -434,6 +495,7 @@ from_ISO_8859_3 = {
     from_ISO_8859_3_offsets,
     from_ISO_8859_3_infos
 };
+
 static rb_transcoder
 rb_from_ISO_8859_3 = {
     "ISO-8859-3", "UTF-8", &from_ISO_8859_3, 2, 0,
@@ -565,6 +627,7 @@ to_ISO_8859_3 = {
     to_ISO_8859_2_offsets,
     to_ISO_8859_3_infos
 };
+
 static rb_transcoder
 rb_to_ISO_8859_3 = {
     "UTF-8", "ISO-8859-3", &to_ISO_8859_3, 1, 1,
@@ -612,6 +675,7 @@ from_ISO_8859_4 = {
     from_ISO_8859_1_offsets,
     from_ISO_8859_4_infos
 };
+
 static rb_transcoder
 rb_from_ISO_8859_4 = {
     "ISO-8859-4", "UTF-8", &from_ISO_8859_4, 2, 0,
@@ -747,6 +811,7 @@ to_ISO_8859_4 = {
     to_ISO_8859_2_offsets,
     to_ISO_8859_4_infos
 };
+
 static rb_transcoder
 rb_to_ISO_8859_4 = {
     "UTF-8", "ISO-8859-4", &to_ISO_8859_4, 1, 1,
@@ -826,6 +891,7 @@ from_ISO_8859_5 = {
     from_ISO_8859_1_offsets,
     from_ISO_8859_5_infos
 };
+
 static rb_transcoder
 rb_from_ISO_8859_5 = {
     "ISO-8859-5", "UTF-8", &from_ISO_8859_5, 3, 0,
@@ -977,6 +1043,7 @@ to_ISO_8859_5 = {
     to_ISO_8859_5_offsets,
     to_ISO_8859_5_infos
 };
+
 static rb_transcoder
 rb_to_ISO_8859_5 = {
     "UTF-8", "ISO-8859-5", &to_ISO_8859_5, 1, 1,
@@ -1032,6 +1099,7 @@ from_ISO_8859_6 = {
     from_ISO_8859_6_offsets,
     from_ISO_8859_6_infos
 };
+
 static rb_transcoder
 rb_from_ISO_8859_6 = {
     "ISO-8859-6", "UTF-8", &from_ISO_8859_6, 2, 0,
@@ -1138,6 +1206,7 @@ to_ISO_8859_6 = {
     to_ISO_8859_6_offsets,
     to_ISO_8859_6_infos
 };
+
 static rb_transcoder
 rb_to_ISO_8859_6 = {
     "UTF-8", "ISO-8859-6", &to_ISO_8859_6, 1, 1,
@@ -1235,6 +1304,7 @@ from_ISO_8859_7 = {
     from_ISO_8859_7_offsets,
     from_ISO_8859_7_infos
 };
+
 static rb_transcoder
 rb_from_ISO_8859_7 = {
     "ISO-8859-7", "UTF-8", &from_ISO_8859_7, 3, 0,
@@ -1421,6 +1491,7 @@ to_ISO_8859_7 = {
     to_ISO_8859_7_offsets,
     to_ISO_8859_7_infos
 };
+
 static rb_transcoder
 rb_to_ISO_8859_7 = {
     "UTF-8", "ISO-8859-7", &to_ISO_8859_7, 1, 1,
@@ -1501,6 +1572,7 @@ from_ISO_8859_8 = {
     from_ISO_8859_8_offsets,
     from_ISO_8859_8_infos
 };
+
 static rb_transcoder
 rb_from_ISO_8859_8 = {
     "ISO-8859-8", "UTF-8", &from_ISO_8859_8, 3, 0,
@@ -1646,6 +1718,7 @@ to_ISO_8859_8 = {
     to_ISO_8859_8_offsets,
     to_ISO_8859_8_infos
 };
+
 static rb_transcoder
 rb_to_ISO_8859_8 = {
     "UTF-8", "ISO-8859-8", &to_ISO_8859_8, 1, 1,
@@ -1693,6 +1766,7 @@ from_ISO_8859_9 = {
     from_ISO_8859_1_offsets,
     from_ISO_8859_9_infos
 };
+
 static rb_transcoder
 rb_from_ISO_8859_9 = {
     "ISO-8859-9", "UTF-8", &from_ISO_8859_9, 2, 0,
@@ -1795,6 +1869,7 @@ to_ISO_8859_9 = {
     to_ISO_8859_9_offsets,
     to_ISO_8859_9_infos
 };
+
 static rb_transcoder
 rb_to_ISO_8859_9 = {
     "UTF-8", "ISO-8859-9", &to_ISO_8859_9, 1, 1,
@@ -1874,6 +1949,7 @@ from_ISO_8859_10 = {
     from_ISO_8859_1_offsets,
     from_ISO_8859_10_infos
 };
+
 static rb_transcoder
 rb_from_ISO_8859_10 = {
     "ISO-8859-10", "UTF-8", &from_ISO_8859_10, 3, 0,
@@ -2031,6 +2107,7 @@ to_ISO_8859_10 = {
     to_ISO_8859_10_offsets,
     to_ISO_8859_10_infos
 };
+
 static rb_transcoder
 rb_to_ISO_8859_10 = {
     "UTF-8", "ISO-8859-10", &to_ISO_8859_10, 1, 1,
@@ -2125,6 +2202,7 @@ from_ISO_8859_11 = {
     from_ISO_8859_11_offsets,
     from_ISO_8859_11_infos
 };
+
 static rb_transcoder
 rb_from_ISO_8859_11 = {
     "ISO-8859-11", "UTF-8", &from_ISO_8859_11, 3, 0,
@@ -2258,6 +2336,7 @@ to_ISO_8859_11 = {
     to_ISO_8859_11_offsets,
     to_ISO_8859_11_infos
 };
+
 static rb_transcoder
 rb_to_ISO_8859_11 = {
     "UTF-8", "ISO-8859-11", &to_ISO_8859_11, 1, 1,
@@ -2337,6 +2416,7 @@ from_ISO_8859_13 = {
     from_ISO_8859_1_offsets,
     from_ISO_8859_13_infos
 };
+
 static rb_transcoder
 rb_from_ISO_8859_13 = {
     "ISO-8859-13", "UTF-8", &from_ISO_8859_13, 3, 0,
@@ -2481,6 +2561,7 @@ to_ISO_8859_13 = {
     to_ISO_8859_10_offsets,
     to_ISO_8859_13_infos
 };
+
 static rb_transcoder
 rb_to_ISO_8859_13 = {
     "UTF-8", "ISO-8859-13", &to_ISO_8859_13, 1, 1,
@@ -2560,6 +2641,7 @@ from_ISO_8859_14 = {
     from_ISO_8859_1_offsets,
     from_ISO_8859_14_infos
 };
+
 static rb_transcoder
 rb_from_ISO_8859_14 = {
     "ISO-8859-14", "UTF-8", &from_ISO_8859_14, 3, 0,
@@ -2781,6 +2863,7 @@ to_ISO_8859_14 = {
     to_ISO_8859_14_offsets,
     to_ISO_8859_14_infos
 };
+
 static rb_transcoder
 rb_to_ISO_8859_14 = {
     "UTF-8", "ISO-8859-14", &to_ISO_8859_14, 1, 1,
@@ -2860,6 +2943,7 @@ from_ISO_8859_15 = {
     from_ISO_8859_1_offsets,
     from_ISO_8859_15_infos
 };
+
 static rb_transcoder
 rb_from_ISO_8859_15 = {
     "ISO-8859-15", "UTF-8", &from_ISO_8859_15, 3, 0,
@@ -2979,6 +3063,7 @@ to_ISO_8859_15 = {
     to_ISO_8859_15_offsets,
     to_ISO_8859_15_infos
 };
+
 static rb_transcoder
 rb_to_ISO_8859_15 = {
     "UTF-8", "ISO-8859-15", &to_ISO_8859_15, 1, 1,
@@ -2988,33 +3073,37 @@ rb_to_ISO_8859_15 = {
 void
 Init_single_byte(void)
 {
+    rb_register_transcoder(&rb_from_US_ASCII);
+    rb_register_transcoder(&rb_to_US_ASCII);
+    rb_register_transcoder(&rb_from_ASCII_8BIT);
+    rb_register_transcoder(&rb_to_ASCII_8BIT);
     rb_register_transcoder(&rb_from_ISO_8859_1);
-    rb_register_transcoder(&rb_from_ISO_8859_2);
-    rb_register_transcoder(&rb_from_ISO_8859_3);
-    rb_register_transcoder(&rb_from_ISO_8859_4);
-    rb_register_transcoder(&rb_from_ISO_8859_5);
-    rb_register_transcoder(&rb_from_ISO_8859_6);
-    rb_register_transcoder(&rb_from_ISO_8859_7);
-    rb_register_transcoder(&rb_from_ISO_8859_8);
-    rb_register_transcoder(&rb_from_ISO_8859_9);
-    rb_register_transcoder(&rb_from_ISO_8859_10);
-    rb_register_transcoder(&rb_from_ISO_8859_11);
-    rb_register_transcoder(&rb_from_ISO_8859_13);
-    rb_register_transcoder(&rb_from_ISO_8859_14);
-    rb_register_transcoder(&rb_from_ISO_8859_15);
     rb_register_transcoder(&rb_to_ISO_8859_1);
+    rb_register_transcoder(&rb_from_ISO_8859_2);
     rb_register_transcoder(&rb_to_ISO_8859_2);
+    rb_register_transcoder(&rb_from_ISO_8859_3);
     rb_register_transcoder(&rb_to_ISO_8859_3);
+    rb_register_transcoder(&rb_from_ISO_8859_4);
     rb_register_transcoder(&rb_to_ISO_8859_4);
+    rb_register_transcoder(&rb_from_ISO_8859_5);
     rb_register_transcoder(&rb_to_ISO_8859_5);
+    rb_register_transcoder(&rb_from_ISO_8859_6);
     rb_register_transcoder(&rb_to_ISO_8859_6);
+    rb_register_transcoder(&rb_from_ISO_8859_7);
     rb_register_transcoder(&rb_to_ISO_8859_7);
+    rb_register_transcoder(&rb_from_ISO_8859_8);
     rb_register_transcoder(&rb_to_ISO_8859_8);
+    rb_register_transcoder(&rb_from_ISO_8859_9);
     rb_register_transcoder(&rb_to_ISO_8859_9);
+    rb_register_transcoder(&rb_from_ISO_8859_10);
     rb_register_transcoder(&rb_to_ISO_8859_10);
+    rb_register_transcoder(&rb_from_ISO_8859_11);
     rb_register_transcoder(&rb_to_ISO_8859_11);
+    rb_register_transcoder(&rb_from_ISO_8859_13);
     rb_register_transcoder(&rb_to_ISO_8859_13);
+    rb_register_transcoder(&rb_from_ISO_8859_14);
     rb_register_transcoder(&rb_to_ISO_8859_14);
+    rb_register_transcoder(&rb_from_ISO_8859_15);
     rb_register_transcoder(&rb_to_ISO_8859_15);
 }
-/* Footprint (bytes): gross: 26788, saved: 3728, net: 23060 */
+/* Footprint (bytes): gross: 27876, saved: 4544, net: 23332 */
diff --git a/test/ruby/test_transcode.rb b/test/ruby/test_transcode.rb
index 73177a136b..4b3e1e77f3 100644
--- a/test/ruby/test_transcode.rb
+++ b/test/ruby/test_transcode.rb
@@ -26,6 +26,8 @@ class TestTranscode < Test::Unit::TestCase
     assert_raise(ArgumentError) { 'abc'.encode!('foo', 'bar') }
     assert_raise(ArgumentError) { 'abc'.force_encoding('utf-8').encode('foo') }
     assert_raise(ArgumentError) { 'abc'.force_encoding('utf-8').encode!('foo') }
+    assert_raise(RuntimeError) { "\x80".encode('utf-8','ASCII-8BIT') }
+    assert_raise(RuntimeError) { "\x80".encode('utf-8','US-ASCII') }
     assert_raise(RuntimeError) { "\xA5".encode('utf-8','iso-8859-3') }
   end
 
@@ -87,6 +89,7 @@ class TestTranscode < Test::Unit::TestCase
 
   def test_ascii_range
     encodings = [
+      'US-ASCII', 'ASCII-8BIT',
       'ISO-8859-1', 'ISO-8859-2', 'ISO-8859-3',
       'ISO-8859-4', 'ISO-8859-5', 'ISO-8859-6',
       'ISO-8859-7', 'ISO-8859-8', 'ISO-8859-9',
diff --git a/transcode.c b/transcode.c
index b748eb6347..4875570ef4 100644
--- a/transcode.c
+++ b/transcode.c
@@ -89,6 +89,8 @@ rb_declare_transcoder(const char *enc1, const char *enc2, const char *lib)
 static void
 init_transcoder_table(void)
 {
+    rb_declare_transcoder("US-ASCII",    "UTF-8", "single_byte");
+    rb_declare_transcoder("ASCII-8BIT",  "UTF-8", "single_byte");
     rb_declare_transcoder("ISO-8859-1",  "UTF-8", "single_byte");
     rb_declare_transcoder("ISO-8859-2",  "UTF-8", "single_byte");
     rb_declare_transcoder("ISO-8859-3",  "UTF-8", "single_byte");
@@ -173,6 +175,7 @@ transcode_loop(char **in_pos, char **out_pos,
       follow_byte:
 	next_offset = next_table->base[next_byte];
 	next_info = (VALUE)next_table->info[next_offset];
+      follow_info:
 	switch (next_info & 0x1F) {
 	  case NOMAP:
 	    *out_p++ = next_byte;
@@ -191,7 +194,7 @@ transcode_loop(char **in_pos, char **out_pos,
 		else
 		    goto invalid;
 	    }
-	    next_table = next_table->info[next_offset];
+	    next_table = (const BYTE_LOOKUP *)next_info;
 	    goto follow_byte;
 	    /* maybe rewrite the following cases to use fallthrough???? */
 	  case ZERObt: /* drop input */
@@ -210,6 +213,9 @@ transcode_loop(char **in_pos, char **out_pos,
 	    *out_p++ = getBT2(next_info);
 	    *out_p++ = getBT3(next_info);
 	    continue;
+	  case FUNii:
+	    next_info = (VALUE)(*my_transcoder->func_ii)(next_info);
+	    goto follow_info;
 	  case INVALID:
 	    goto invalid;
 	  case UNDEF:
@@ -287,7 +293,7 @@ str_transcode(int argc, VALUE *argv, VALUE *self)
 	return -1;
     }
     if (from_enc && to_enc && rb_enc_asciicompat(from_enc) && rb_enc_asciicompat(to_enc)) {
-	if (to_encidx == 0 || ENC_CODERANGE(str) == ENC_CODERANGE_7BIT) {
+	if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT) {
 	    return to_encidx;
 	}
     }
@@ -295,25 +301,6 @@ str_transcode(int argc, VALUE *argv, VALUE *self)
 	return -1;
     }
 
-    if (from_encidx == 0) {
-	const char *p = RSTRING_PTR(str);
-	const char *e = p + RSTRING_LEN(str);
-
-	while (p < e) {
-	    int ret = rb_enc_precise_mbclen(p, e, to_enc);
-	    int len = MBCLEN_CHARFOUND(ret);
-
-	    if (!len) {
-		rb_raise(rb_eArgError, "not fully converted, %d bytes left", e-p);
-	    }
-	    p += len;
-	}
-	if (to_encidx < 0) {
-	    to_encidx = rb_define_dummy_encoding(to_e);
-	}
-	return to_encidx;
-    }
-
     while (!final_encoding) { /* loop for multistep transcoding */
 	/* later, maybe use smaller intermediate strings for very long strings */
 	if (!(my_transcoder = transcode_dispatch(from_e, to_e))) {
@@ -412,6 +399,7 @@ rb_str_transcode_bang(int argc, VALUE *argv, VALUE str)
 /*
  *  call-seq:
  *     str.encode(encoding)   => str
+ *     str.encode(to_encoding, from_encoding)   => str
  *
  *  With one argument, returns a copy of <i>str</i> transcoded
  *  to encoding +encoding+.
diff --git a/transcode_data.h b/transcode_data.h
index 862e37eae5..6d3d210cbe 100644
--- a/transcode_data.h
+++ b/transcode_data.h
@@ -27,24 +27,27 @@ typedef struct byte_lookup {
 #define PType (const BYTE_LOOKUP *)
 #endif
 
-#define NOMAP   (PType 0x01)   /* single byte direct map */
-#define ONEbt   (0x02)   /* one byte payload */
-#define TWObt   (0x03)   /* two bytes payload */
-#define THREEbt (0x05)   /* three bytes payload */
-#define FOURbt  (0x06)   /* four bytes payload, UTF-8 only, macros start at getBT0 */
-#define INVALID (PType 0x07)   /* invalid byte sequence */
-#define UNDEF   (PType 0x09)   /* legal but undefined */
-#define ZERObt  (PType 0x0A)   /* zero bytes of payload, i.e. remove */
-
-#define o1(b1)          ((const BYTE_LOOKUP *)((((unsigned char)(b1))<<8)|ONEbt))
-#define o2(b1,b2)       ((const BYTE_LOOKUP *)((((unsigned char)(b1))<<8)|(((unsigned char)(b2))<<16)|TWObt))
-#define o3(b1,b2,b3)    ((const BYTE_LOOKUP *)((((unsigned char)(b1))<<8)|(((unsigned char)(b2))<<16)|(((unsigned char)(b3))<<24)|THREEbt))
-#define o4(b0,b1,b2,b3) ((const BYTE_LOOKUP *)((((unsigned char)(b1))<< 8)|(((unsigned char)(b2))<<16)|(((unsigned char)(b3))<<24)|((((unsigned char)(b0))&0x07)<<5)|FOURbt))
-
-#define getBT1(a)      (((a)>> 8)&0xFF)
-#define getBT2(a)      (((a)>>16)&0xFF)
-#define getBT3(a)      (((a)>>24)&0xFF)
-#define getBT0(a)      ((((a)>> 5)&0x07)|0xF0)   /* for UTF-8 only!!! */
+#define NOMAP	(PType 0x01)	/* single byte direct map */
+#define ONEbt	(0x02)		/* one byte payload */
+#define TWObt	(0x03)		/* two bytes payload */
+#define THREEbt	(0x05)		/* three bytes payload */
+#define FOURbt	(0x06)		/* four bytes payload, UTF-8 only, macros start at getBT0 */
+#define INVALID	(PType 0x07)	/* invalid byte sequence */
+#define UNDEF	(PType 0x09)	/* legal but undefined */
+#define ZERObt	(PType 0x0A)	/* zero bytes of payload, i.e. remove */
+#define FUNii	(PType 0x0B)	/* function from info to info */
+
+#define o1(b1)		(PType((((unsigned char)(b1))<<8)|ONEbt))
+#define o2(b1,b2)	(PType((((unsigned char)(b1))<<8)|(((unsigned char)(b2))<<16)|TWObt))
+#define o3(b1,b2,b3)	(PType((((unsigned char)(b1))<<8)|(((unsigned char)(b2))<<16)|(((unsigned char)(b3))<<24)|THREEbt))
+#define o4(b0,b1,b2,b3)	(PType((((unsigned char)(b1))<< 8)|(((unsigned char)(b2))<<16)|(((unsigned char)(b3))<<24)|((((unsigned char)(b0))&0x07)<<5)|FOURbt))
+
+#define getBT1(a)	(((a)>> 8)&0xFF)
+#define getBT2(a)	(((a)>>16)&0xFF)
+#define getBT3(a)	(((a)>>24)&0xFF)
+#define getBT0(a)	((((a)>> 5)&0x07)|0xF0)   /* for UTF-8 only!!! */
+
+#define o2FUNii(b1,b2)	(PType((((unsigned char)(b1))<<8)|(((unsigned char)(b2))<<16)|FUNii))
 
 /* do we need these??? maybe not, can be done with simple tables */
 #define ONETRAIL       /* legal but undefined if one more trailing UTF-8 */
@@ -70,6 +73,7 @@ typedef struct rb_transcoder {
 			 struct rb_transcoder *, struct rb_transcoding *);
     void (*postprocessor)(char**, char**, char*, char*,
 			 struct rb_transcoder *, struct rb_transcoding *);
+    VALUE (*func_ii)(VALUE); /* function from info to info */
 } rb_transcoder;
 
 void rb_declare_transcoder(const char *enc1, const char *enc2, const char *lib);