diff options
author | akr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2008-09-04 10:15:34 +0000 |
---|---|---|
committer | akr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2008-09-04 10:15:34 +0000 |
commit | 45c37073038bde691ef12350277cd5a0b7796ec3 (patch) | |
tree | a0008d75fa81363442708cdaa98272e426bb8415 | |
parent | 1633eb7238776b94a5f162b85f225423174e4c26 (diff) | |
download | ruby-45c37073038bde691ef12350277cd5a0b7796ec3.tar.gz |
* include/ruby/encoding.h (ECONV_INVALID_IGNORE): removed because
it tend to cause security problem. If the behaviour is really
required, ECONV_INVALID_REPLACE with empty string can be used.
For example, CVE-2006-2313, CVE-2008-1036, [ruby-core:15645]
(ECONV_UNDEF_IGNORE): ditto.
* transcode.c (rb_econv_convert): follow the above change.
(econv_opts): ditto.
(Init_transcode): ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19123 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r-- | ChangeLog | 12 | ||||
-rw-r--r-- | include/ruby/encoding.h | 2 | ||||
-rw-r--r-- | test/ruby/test_econv.rb | 4 | ||||
-rw-r--r-- | test/ruby/test_io_m17n.rb | 20 | ||||
-rw-r--r-- | test/ruby/test_transcode.rb | 16 | ||||
-rw-r--r-- | transcode.c | 18 |
6 files changed, 34 insertions, 38 deletions
@@ -1,3 +1,15 @@ +Thu Sep 4 19:10:27 2008 Tanaka Akira <akr@fsij.org> + + * include/ruby/encoding.h (ECONV_INVALID_IGNORE): removed because + it tend to cause security problem. If the behaviour is really + required, ECONV_INVALID_REPLACE with empty string can be used. + For example, CVE-2006-2313, CVE-2008-1036, [ruby-core:15645] + (ECONV_UNDEF_IGNORE): ditto. + + * transcode.c (rb_econv_convert): follow the above change. + (econv_opts): ditto. + (Init_transcode): ditto. + Thu Sep 4 13:22:02 2008 Nobuyoshi Nakada <nobu@ruby-lang.org> * vm_core.h (struct rb_vm_struct): replaced signal staff with trap diff --git a/include/ruby/encoding.h b/include/ruby/encoding.h index 10a7b95229..ae1ec13e74 100644 --- a/include/ruby/encoding.h +++ b/include/ruby/encoding.h @@ -251,11 +251,9 @@ void rb_econv_binmode(rb_econv_t *ec); /* flags for rb_econv_open */ #define ECONV_INVALID_MASK 0x000f -#define ECONV_INVALID_IGNORE 0x0001 #define ECONV_INVALID_REPLACE 0x0002 #define ECONV_UNDEF_MASK 0x00f0 -#define ECONV_UNDEF_IGNORE 0x0010 #define ECONV_UNDEF_REPLACE 0x0020 /* effective only if output is ascii compatible */ diff --git a/test/ruby/test_econv.rb b/test/ruby/test_econv.rb index 6844067526..12f652d853 100644 --- a/test/ruby/test_econv.rb +++ b/test/ruby/test_econv.rb @@ -524,7 +524,7 @@ class TestEncodingConverter < Test::Unit::TestCase end def test_invalid_ignore - ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::INVALID_IGNORE) + ec = Encoding::Converter.new("UTF-8", "EUC-JP", :invalid => :replace, :replace => "") ret = ec.primitive_convert(src="abc\x80def", dst="", nil, 100) assert_equal(:finished, ret) assert_equal("", src) @@ -540,7 +540,7 @@ class TestEncodingConverter < Test::Unit::TestCase end def test_undef_ignore - ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::UNDEF_IGNORE) + ec = Encoding::Converter.new("UTF-8", "EUC-JP", :undef => :replace, :replace => "") ret = ec.primitive_convert(src="abc\u{fffd}def", dst="", nil, 100) assert_equal(:finished, ret) assert_equal("", src) diff --git a/test/ruby/test_io_m17n.rb b/test/ruby/test_io_m17n.rb index 67f48527b5..550d163bc2 100644 --- a/test/ruby/test_io_m17n.rb +++ b/test/ruby/test_io_m17n.rb @@ -1312,14 +1312,14 @@ EOT open("t.txt", "r:utf-8:euc-jp", :invalid => :replace) {|f| assert_equal("a?b", f.read) } - open("t.txt", "r:utf-8:euc-jp", :invalid => :ignore) {|f| + open("t.txt", "r:utf-8:euc-jp", :invalid => :replace, :replace => "") {|f| assert_equal("ab", f.read) } open("t.txt", "r:utf-8:euc-jp", :undef => :replace) {|f| assert_raise(Encoding::InvalidByteSequence) { f.read } assert_equal("b", f.read) } - open("t.txt", "r:utf-8:euc-jp", :undef => :ignore) {|f| + open("t.txt", "r:utf-8:euc-jp", :undef => :replace, :replace => "") {|f| assert_raise(Encoding::InvalidByteSequence) { f.read } assert_equal("b", f.read) } @@ -1332,14 +1332,14 @@ EOT open("t.txt", "r:utf-8:euc-jp", :undef => :replace) {|f| assert_equal("a?b", f.read) } - open("t.txt", "r:utf-8:euc-jp", :undef => :ignore) {|f| + open("t.txt", "r:utf-8:euc-jp", :undef => :replace, :replace => "") {|f| assert_equal("ab", f.read) } open("t.txt", "r:utf-8:euc-jp", :invalid => :replace) {|f| assert_raise(Encoding::ConversionUndefined) { f.read } assert_equal("b", f.read) } - open("t.txt", "r:utf-8:euc-jp", :invalid => :ignore) {|f| + open("t.txt", "r:utf-8:euc-jp", :invalid => :replace, :replace => "") {|f| assert_raise(Encoding::ConversionUndefined) { f.read } assert_equal("b", f.read) } @@ -1354,7 +1354,7 @@ EOT } assert_equal("a?b", File.read("t.txt")) - open("t.txt", "w:euc-jp", :invalid => :ignore) {|f| + open("t.txt", "w:euc-jp", :invalid => :replace, :replace => "") {|f| assert_nothing_raised { f.write invalid_utf8 } } assert_equal("ab", File.read("t.txt")) @@ -1362,7 +1362,7 @@ EOT open("t.txt", "w:euc-jp", :undef => :replace) {|f| assert_raise(Encoding::InvalidByteSequence) { f.write invalid_utf8 } } - open("t.txt", "w:euc-jp", :undef => :ignore) {|f| + open("t.txt", "w:euc-jp", :undef => :replace, :replace => "") {|f| assert_raise(Encoding::InvalidByteSequence) { f.write invalid_utf8 } } } @@ -1375,14 +1375,14 @@ EOT assert_nothing_raised { f.write "a\uFFFDb" } } assert_equal("a?b", File.read("t.txt")) - open("t.txt", "w:euc-jp:utf-8", :undef => :ignore) {|f| + open("t.txt", "w:euc-jp:utf-8", :undef => :replace, :replace => "") {|f| assert_nothing_raised { f.write "a\uFFFDb" } } assert_equal("ab", File.read("t.txt")) open("t.txt", "w:euc-jp:utf-8", :invalid => :replace) {|f| assert_raise(Encoding::ConversionUndefined) { f.write "a\uFFFDb" } } - open("t.txt", "w:euc-jp:utf-8", :invalid => :ignore) {|f| + open("t.txt", "w:euc-jp:utf-8", :invalid => :replace, :replace => "") {|f| assert_raise(Encoding::ConversionUndefined) { f.write "a\uFFFDb" } } } @@ -1395,14 +1395,14 @@ EOT assert_nothing_raised { f.write "a\uFFFDb" } } assert_equal("a?b", File.read("t.txt")) - open("t.txt", "w:iso-2022-jp:utf-8", :undef => :ignore) {|f| + open("t.txt", "w:iso-2022-jp:utf-8", :undef => :replace, :replace => "") {|f| assert_nothing_raised { f.write "a\uFFFDb" } } assert_equal("ab", File.read("t.txt")) open("t.txt", "w:iso-2022-jp:utf-8", :invalid => :replace) {|f| assert_raise(Encoding::ConversionUndefined) { f.write "a\uFFFDb" } } - open("t.txt", "w:iso-2022-jp:utf-8", :invalid => :ignore) {|f| + open("t.txt", "w:iso-2022-jp:utf-8", :invalid => :replace, :replace => "") {|f| assert_raise(Encoding::ConversionUndefined) { f.write "a\uFFFDb" } } } diff --git a/test/ruby/test_transcode.rb b/test/ruby/test_transcode.rb index 2bf8f04de4..614d4ceb03 100644 --- a/test/ruby/test_transcode.rb +++ b/test/ruby/test_transcode.rb @@ -247,23 +247,23 @@ class TestTranscode < Test::Unit::TestCase def test_invalid_ignore # arguments only - assert_nothing_raised { 'abc'.encode('utf-8', invalid: :ignore) } + assert_nothing_raised { 'abc'.encode('utf-8', invalid: :replace, replace: "") } # check handling of UTF-8 ill-formed subsequences assert_equal("\x00\x41\x00\x3E\x00\x42".force_encoding('UTF-16BE'), - "\x41\xC2\x3E\x42".encode('UTF-16BE', 'UTF-8', invalid: :ignore)) + "\x41\xC2\x3E\x42".encode('UTF-16BE', 'UTF-8', invalid: :replace, replace: "")) assert_equal("\x00\x41\x00\xF1\x00\x42".force_encoding('UTF-16BE'), - "\x41\xC2\xC3\xB1\x42".encode('UTF-16BE', 'UTF-8', invalid: :ignore)) + "\x41\xC2\xC3\xB1\x42".encode('UTF-16BE', 'UTF-8', invalid: :replace, replace: "")) assert_equal("\x00\x42".force_encoding('UTF-16BE'), - "\xF0\x80\x80\x42".encode('UTF-16BE', 'UTF-8', invalid: :ignore)) + "\xF0\x80\x80\x42".encode('UTF-16BE', 'UTF-8', invalid: :replace, replace: "")) assert_equal(''.force_encoding('UTF-16BE'), - "\x82\xAB".encode('UTF-16BE', 'UTF-8', invalid: :ignore)) + "\x82\xAB".encode('UTF-16BE', 'UTF-8', invalid: :replace, replace: "")) assert_equal("\e$B!!\e(B".force_encoding("ISO-2022-JP"), - "\xA1\xA1\xFF".encode("ISO-2022-JP", "EUC-JP", invalid: :ignore)) + "\xA1\xA1\xFF".encode("ISO-2022-JP", "EUC-JP", invalid: :replace, replace: "")) assert_equal("\e$B\x24\x22\x24\x24\e(B".force_encoding("ISO-2022-JP"), - "\xA4\xA2\xFF\xA4\xA4".encode("ISO-2022-JP", "EUC-JP", invalid: :ignore)) + "\xA4\xA2\xFF\xA4\xA4".encode("ISO-2022-JP", "EUC-JP", invalid: :replace, replace: "")) assert_equal("\e$B\x24\x22\x24\x24\e(B".force_encoding("ISO-2022-JP"), - "\xA4\xA2\xFF\xFF\xA4\xA4".encode("ISO-2022-JP", "EUC-JP", invalid: :ignore)) + "\xA4\xA2\xFF\xFF\xA4\xA4".encode("ISO-2022-JP", "EUC-JP", invalid: :replace, replace: "")) end def test_invalid_replace diff --git a/transcode.c b/transcode.c index 273a913d31..746892b8d5 100644 --- a/transcode.c +++ b/transcode.c @@ -1286,10 +1286,7 @@ rb_econv_convert(rb_econv_t *ec, ret == econv_incomplete_input) { /* deal with invalid byte sequence */ /* todo: add more alternative behaviors */ - if (ec->flags&ECONV_INVALID_IGNORE) { - goto resume; - } - else if (ec->flags&ECONV_INVALID_REPLACE) { + if (ec->flags&ECONV_INVALID_REPLACE) { if (output_replacement_character(ec) == 0) goto resume; } @@ -1299,10 +1296,7 @@ rb_econv_convert(rb_econv_t *ec, /* valid character in source encoding * but no related character(s) in destination encoding */ /* todo: add more alternative behaviors */ - if (ec->flags&ECONV_UNDEF_IGNORE) { - goto resume; - } - else if (ec->flags&ECONV_UNDEF_REPLACE) { + if (ec->flags&ECONV_UNDEF_REPLACE) { if (output_replacement_character(ec) == 0) goto resume; } @@ -2009,9 +2003,6 @@ econv_opts(VALUE opt) v = rb_hash_aref(opt, sym_invalid); if (NIL_P(v)) { } - else if (v==sym_ignore) { - options |= ECONV_INVALID_IGNORE; - } else if (v==sym_replace) { options |= ECONV_INVALID_REPLACE; v = rb_hash_aref(opt, sym_replace); @@ -2022,9 +2013,6 @@ econv_opts(VALUE opt) v = rb_hash_aref(opt, sym_undef); if (NIL_P(v)) { } - else if (v==sym_ignore) { - options |= ECONV_UNDEF_IGNORE; - } else if (v==sym_replace) { options |= ECONV_UNDEF_REPLACE; } @@ -3314,10 +3302,8 @@ Init_transcode(void) rb_define_method(rb_cEncodingConverter, "replacement", econv_get_replacement, 0); rb_define_method(rb_cEncodingConverter, "replacement=", econv_set_replacement, 1); rb_define_const(rb_cEncodingConverter, "INVALID_MASK", INT2FIX(ECONV_INVALID_MASK)); - rb_define_const(rb_cEncodingConverter, "INVALID_IGNORE", INT2FIX(ECONV_INVALID_IGNORE)); rb_define_const(rb_cEncodingConverter, "INVALID_REPLACE", INT2FIX(ECONV_INVALID_REPLACE)); rb_define_const(rb_cEncodingConverter, "UNDEF_MASK", INT2FIX(ECONV_UNDEF_MASK)); - rb_define_const(rb_cEncodingConverter, "UNDEF_IGNORE", INT2FIX(ECONV_UNDEF_IGNORE)); rb_define_const(rb_cEncodingConverter, "UNDEF_REPLACE", INT2FIX(ECONV_UNDEF_REPLACE)); rb_define_const(rb_cEncodingConverter, "PARTIAL_INPUT", INT2FIX(ECONV_PARTIAL_INPUT)); rb_define_const(rb_cEncodingConverter, "OUTPUT_FOLLOWED_BY_INPUT", INT2FIX(ECONV_OUTPUT_FOLLOWED_BY_INPUT)); |