diff options
-rw-r--r-- | ChangeLog | 7 | ||||
-rw-r--r-- | lib/rexml/parsers/baseparser.rb | 3 | ||||
-rw-r--r-- | test/rexml/test_document.rb | 22 |
3 files changed, 32 insertions, 0 deletions
@@ -1,3 +1,10 @@ +Sun Oct 28 21:40:13 2012 Kouhei Sutou <kou@cozmixng.org> + + * lib/rexml/parsers/baseparser.rb: Fix a bug that UTF-8 is used + for UTF-16XX encoded XML that doesn't have encoding="UTF-16" in + XML declration. + * test/rexml/test_document.rb: Add tests for the above change. + Sun Oct 28 21:37:34 2012 Kouhei Sutou <kou@cozmixng.org> * test/rexml/test_document.rb: Group tests that they parse diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index dc4a1c8bee..a88896c5db 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -215,6 +215,9 @@ module REXML if need_source_encoding_update?(encoding) @source.encoding = encoding end + if encoding.nil? and /\AUTF-16(?:BE|LE)\z/i =~ @source.encoding + encoding = "UTF-16" + end standalone = STANDALONE.match(results) standalone = standalone[1] unless standalone.nil? return [ :xmldecl, version, encoding, standalone ] diff --git a/test/rexml/test_document.rb b/test/rexml/test_document.rb index 028fa988a6..4c5d7d1dd8 100644 --- a/test/rexml/test_document.rb +++ b/test/rexml/test_document.rb @@ -246,5 +246,27 @@ EOX assert_equal("UTF-16", document.encoding) end end + + class NoEncodingTest < self + def test_utf_16le + xml = <<-EOX.encode("UTF-16LE").force_encoding("ASCII-8BIT") +<?xml version="1.0"?> +<message>Hello world!</message> +EOX + bom = "\ufeff".encode("UTF-16LE").force_encoding("ASCII-8BIT") + document = REXML::Document.new(bom + xml) + assert_equal("UTF-16", document.encoding) + end + + def test_utf_16be + xml = <<-EOX.encode("UTF-16BE").force_encoding("ASCII-8BIT") +<?xml version="1.0"?> +<message>Hello world!</message> +EOX + bom = "\ufeff".encode("UTF-16BE").force_encoding("ASCII-8BIT") + document = REXML::Document.new(bom + xml) + assert_equal("UTF-16", document.encoding) + end + end end end |