diff options
author | nobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2012-05-24 03:07:49 +0000 |
---|---|---|
committer | nobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2012-05-24 03:07:49 +0000 |
commit | 3cc5ddcac142317709aa235a27d8ce0a84140150 (patch) | |
tree | 1e3bc3cf086c47d524182fe7febc253f0e66a7e8 | |
parent | c427f44f1092c96a3bd3ea90d10ec591f3a93f94 (diff) | |
download | ruby-3cc5ddcac142317709aa235a27d8ce0a84140150.tar.gz |
Bug #6487
* io.c (io_strip_bom): check EOF. [Bug #6487][ruby-core:45203]
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@35766 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r-- | ChangeLog | 4 | ||||
-rw-r--r-- | io.c | 68 | ||||
-rw-r--r-- | test/ruby/test_file.rb | 51 |
3 files changed, 88 insertions, 35 deletions
@@ -1,3 +1,7 @@ +Thu May 24 12:07:46 2012 Nobuyoshi Nakada <nobu@ruby-lang.org> + + * io.c (io_strip_bom): check EOF. [Bug #6487][ruby-core:45203] + Wed May 23 22:06:14 2012 NARUSE, Yui <naruse@ruby-lang.org> * lib/net/http/header.rb (Net::HTTPHeader#range): fix broken parser of @@ -5099,65 +5099,63 @@ static void io_encoding_set(rb_io_t *, VALUE, VALUE, VALUE); static int io_strip_bom(VALUE io) { - int b1, b2, b3, b4; - switch (b1 = FIX2INT(rb_io_getbyte(io))) { - case 0xEF: - b2 = FIX2INT(rb_io_getbyte(io)); - if (b2 == 0xBB) { - b3 = FIX2INT(rb_io_getbyte(io)); - if (b3 == 0xBF) { + VALUE b1, b2, b3, b4; + + if (NIL_P(b1 = rb_io_getbyte(io))) return 0; + switch (b1) { + case INT2FIX(0xEF): + if (NIL_P(b2 = rb_io_getbyte(io))) break; + if (b2 == INT2FIX(0xBB) && !NIL_P(b3 = rb_io_getbyte(io))) { + if (b3 == INT2FIX(0xBF)) { return rb_utf8_encindex(); } - rb_io_ungetbyte(io, INT2FIX(b3)); + rb_io_ungetbyte(io, b3); } - rb_io_ungetbyte(io, INT2FIX(b2)); + rb_io_ungetbyte(io, b2); break; - case 0xFE: - b2 = FIX2INT(rb_io_getbyte(io)); - if (b2 == 0xFF) { + case INT2FIX(0xFE): + if (NIL_P(b2 = rb_io_getbyte(io))) break; + if (b2 == INT2FIX(0xFF)) { return rb_enc_find_index("UTF-16BE"); } - rb_io_ungetbyte(io, INT2FIX(b2)); + rb_io_ungetbyte(io, b2); break; - case 0xFF: - b2 = FIX2INT(rb_io_getbyte(io)); - if (b2 == 0xFE) { - b3 = FIX2INT(rb_io_getbyte(io)); - if (b3 == 0) { - b4 = FIX2INT(rb_io_getbyte(io)); - if (b4 == 0) { + case INT2FIX(0xFF): + if (NIL_P(b2 = rb_io_getbyte(io))) break; + if (b2 == INT2FIX(0xFE)) { + b3 = rb_io_getbyte(io); + if (b3 == INT2FIX(0) && !NIL_P(b4 = rb_io_getbyte(io))) { + if (b4 == INT2FIX(0)) { return rb_enc_find_index("UTF-32LE"); } - rb_io_ungetbyte(io, INT2FIX(b4)); + rb_io_ungetbyte(io, b4); + rb_io_ungetbyte(io, b3); } else { - rb_io_ungetbyte(io, INT2FIX(b3)); + rb_io_ungetbyte(io, b3); return rb_enc_find_index("UTF-16LE"); } - rb_io_ungetbyte(io, INT2FIX(b3)); } - rb_io_ungetbyte(io, INT2FIX(b2)); + rb_io_ungetbyte(io, b2); break; - case 0: - b2 = FIX2INT(rb_io_getbyte(io)); - if (b2 == 0) { - b3 = FIX2INT(rb_io_getbyte(io)); - if (b3 == 0xFE) { - b4 = FIX2INT(rb_io_getbyte(io)); - if (b4 == 0xFF) { + case INT2FIX(0): + if (NIL_P(b2 = rb_io_getbyte(io))) break; + if (b2 == INT2FIX(0) && !NIL_P(b3 = rb_io_getbyte(io))) { + if (b3 == INT2FIX(0xFE) && !NIL_P(b4 = rb_io_getbyte(io))) { + if (b4 == INT2FIX(0xFF)) { return rb_enc_find_index("UTF-32BE"); } - rb_io_ungetbyte(io, INT2FIX(b4)); + rb_io_ungetbyte(io, b4); } - rb_io_ungetbyte(io, INT2FIX(b3)); + rb_io_ungetbyte(io, b3); } - rb_io_ungetbyte(io, INT2FIX(b2)); + rb_io_ungetbyte(io, b2); break; } - rb_io_ungetbyte(io, INT2FIX(b1)); + rb_io_ungetbyte(io, b1); return 0; } diff --git a/test/ruby/test_file.rb b/test/ruby/test_file.rb index 54983d6deb..f94e4336d7 100644 --- a/test/ruby/test_file.rb +++ b/test/ruby/test_file.rb @@ -38,6 +38,57 @@ class TestFile < Test::Unit::TestCase include TestEOF::Seek + def test_empty_file_bom + bug6487 = '[ruby-core:45203]' + f = Tempfile.new(__method__.to_s) + f.close + assert File.exist? f.path + assert_nothing_raised(bug6487) {File.read(f.path, mode: 'r:utf-8')} + assert_nothing_raised(bug6487) {File.read(f.path, mode: 'r:bom|utf-8')} + f.close(true) + end + + def assert_bom(bytes, name) + bug6487 = '[ruby-core:45203]' + + f = Tempfile.new(name.to_s) + f.sync = true + expected = "" + result = nil + bytes[0...-1].each do |x| + f.write x + f.write ' ' + f.pos -= 1 + expected << x + assert_nothing_raised(bug6487) {result = File.read(f.path, mode: 'rb:bom|utf-8')} + assert_equal("#{expected} ".force_encoding("utf-8"), result) + end + f.write bytes[-1] + assert_nothing_raised(bug6487) {result = File.read(f.path, mode: 'rb:bom|utf-8')} + assert_equal '', result, "valid bom" + f.close(true) + end + + def test_bom_8 + assert_bom(["\xEF", "\xBB", "\xBF"], __method__) + end + + def test_bom_16be + assert_bom(["\xFE", "\xFF"], __method__) + end + + def test_bom_16le + assert_bom(["\xFF", "\xFE"], __method__) + end + + def test_bom_32be + assert_bom(["\0", "\0", "\xFE", "\xFF"], __method__) + end + + def test_bom_32le + assert_bom(["\xFF\xFE\0", "\0"], __method__) + end + def test_truncate_wbuf f = Tempfile.new("test-truncate") f.print "abc" |