aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog9
-rw-r--r--test/ruby/test_transcode.rb12
-rw-r--r--transcode.c39
3 files changed, 57 insertions, 3 deletions
diff --git a/ChangeLog b/ChangeLog
index 4f2c6da71d..71d342e187 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,12 @@
+Tue Apr 13 09:32:12 2010 NARUSE, Yui <naruse@ruby-lang.org>
+
+ * transcode.c (transcode_loop): insert output the value when
+ fallback hash has a related key. [ruby-dev:40540]
+ [ruby-dev:40829] #3036
+
+ * transcode.c (rb_econv_prepare_opts): pass to newhash
+ a value with the key :fallback.
+
Tue Apr 13 00:12:04 2010 Tanaka Akira <akr@fsij.org>
* random.c (rand_init): use the absolute value of seed to
diff --git a/test/ruby/test_transcode.rb b/test/ruby/test_transcode.rb
index 7f73d31797..d16a4d534b 100644
--- a/test/ruby/test_transcode.rb
+++ b/test/ruby/test_transcode.rb
@@ -1892,8 +1892,7 @@ class TestTranscode < Test::Unit::TestCase
check_both_ways("\u795E\u6797\u7FA9\u535A", "\xAF\xAB\xAA\x4C\xB8\x71\xB3\xD5", 'Big5-HKSCS') # 神林義博
end
- def
- test_Big5_UAO
+ def test_Big5_UAO
check_both_ways("\u4e17", "\x81\x40", 'Big5-UAO') # 丗
end
@@ -1903,4 +1902,13 @@ class TestTranscode < Test::Unit::TestCase
assert_equal(Encoding::US_ASCII, a.encoding)
assert_equal(Encoding::Shift_JIS, b.encoding)
end
+
+ def test_fallback
+ assert_equal("\u3042".encode("EUC-JP"), "\u{20000}".encode("EUC-JP",
+ fallback: {"\u{20000}" => "\u3042".encode("EUC-JP")}))
+ assert_equal("\u3042".encode("EUC-JP"), "\u{20000}".encode("EUC-JP",
+ fallback: {"\u{20000}" => "\u3042"}))
+ assert_equal("[ISU]", "\u{1F4BA}".encode("SJIS-KDDI",
+ fallback: {"\u{1F4BA}" => "[ISU]"}))
+ end
end
diff --git a/transcode.c b/transcode.c
index dba26a2394..7683d4c9be 100644
--- a/transcode.c
+++ b/transcode.c
@@ -21,7 +21,7 @@ VALUE rb_eConverterNotFoundError;
VALUE rb_cEncodingConverter;
-static VALUE sym_invalid, sym_undef, sym_replace;
+static VALUE sym_invalid, sym_undef, sym_replace, sym_fallback;
static VALUE sym_xml, sym_text, sym_attr;
static VALUE sym_universal_newline;
static VALUE sym_crlf_newline;
@@ -2256,17 +2256,37 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
unsigned char *out_start = *out_pos;
int max_output;
VALUE exc;
+ VALUE fallback = Qnil;
ec = rb_econv_open_opts(src_encoding, dst_encoding, ecflags, ecopts);
if (!ec)
rb_exc_raise(rb_econv_open_exc(src_encoding, dst_encoding, ecflags));
+ if (!NIL_P(ecopts) && TYPE(ecopts) == T_HASH)
+ fallback = rb_hash_aref(ecopts, sym_fallback);
last_tc = ec->last_tc;
max_output = last_tc ? last_tc->transcoder->max_output : 1;
resume:
ret = rb_econv_convert(ec, in_pos, in_stop, out_pos, out_stop, 0);
+ if (!NIL_P(fallback) && ret == econv_undefined_conversion) {
+ VALUE rep = rb_enc_str_new(
+ (const char *)ec->last_error.error_bytes_start,
+ ec->last_error.error_bytes_len,
+ rb_enc_find(ec->last_error.source_encoding));
+ rep = rb_hash_lookup2(fallback, rep, Qundef);
+ if (rep != Qundef) {
+ StringValue(rep);
+ ret = rb_econv_insert_output(ec, (const unsigned char *)RSTRING_PTR(rep),
+ RSTRING_LEN(rep), rb_enc_name(rb_enc_get(rep)));
+ if (ret == -1) {
+ rb_raise(rb_eArgError, "too big fallback string");
+ }
+ goto resume;
+ }
+ }
+
if (ret == econv_invalid_byte_sequence ||
ret == econv_incomplete_input ||
ret == econv_undefined_conversion) {
@@ -2442,6 +2462,7 @@ rb_econv_prepare_opts(VALUE opthash, VALUE *opts)
return 0;
}
ecflags = econv_opts(opthash);
+
v = rb_hash_aref(opthash, sym_replace);
if (!NIL_P(v)) {
StringValue(v);
@@ -2456,6 +2477,16 @@ rb_econv_prepare_opts(VALUE opthash, VALUE *opts)
rb_hash_aset(newhash, sym_replace, v);
}
+ v = rb_hash_aref(opthash, sym_fallback);
+ if (!NIL_P(v)) {
+ v = rb_convert_type(v, T_HASH, "Hash", "to_hash");
+ if (!NIL_P(v)) {
+ if (NIL_P(newhash))
+ newhash = rb_hash_new();
+ rb_hash_aset(newhash, sym_fallback, v);
+ }
+ }
+
if (!NIL_P(newhash))
rb_hash_freeze(newhash);
*opts = newhash;
@@ -2728,6 +2759,11 @@ str_encode_bang(int argc, VALUE *argv, VALUE str)
* :replace ::
* Sets the replacement string to the value. The default replacement
* string is "\uFFFD" for Unicode encoding forms, and "?" otherwise.
+ * :fallback ::
+ * Sets the replacement string by the hash for undefined character.
+ * Its key is a such undefined character encoded in source encoding
+ * of current transcoder. Its value can be any encoding until it
+ * can be converted into the destination encoding of the transcoder.
* :xml ::
* The value must be <code>:text</code> or <code>:attr</code>.
* If the value is <code>:text</code> <code>#encode</code> replaces
@@ -4193,6 +4229,7 @@ Init_transcode(void)
sym_invalid = ID2SYM(rb_intern("invalid"));
sym_undef = ID2SYM(rb_intern("undef"));
sym_replace = ID2SYM(rb_intern("replace"));
+ sym_fallback = ID2SYM(rb_intern("fallback"));
sym_xml = ID2SYM(rb_intern("xml"));
sym_text = ID2SYM(rb_intern("text"));
sym_attr = ID2SYM(rb_intern("attr"));