From 46ff009168cfb29b473bc37d38b64719ad9e6526 Mon Sep 17 00:00:00 2001 From: naruse Date: Wed, 14 Apr 2010 13:11:11 +0000 Subject: * lib/rexml/source.rb: force_encoding("UTF-8") when the input is already UTF-8. patched by Kouhei Sutou [ruby-core:23404] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@27342 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- lib/rexml/source.rb | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) (limited to 'lib/rexml') diff --git a/lib/rexml/source.rb b/lib/rexml/source.rb index 3f6d4ffa26..1206150b16 100644 --- a/lib/rexml/source.rb +++ b/lib/rexml/source.rb @@ -162,6 +162,15 @@ module REXML @line_break = ">" end super( @source.eof? ? str : str+@source.readline( @line_break ) ) + + if !@to_utf and + @buffer.respond_to?(:force_encoding) and + @source.respond_to?(:external_encoding) and + @source.external_encoding != ::Encoding::UTF_8 + @force_utf8 = true + else + @force_utf8 = false + end end def scan(pattern, cons=false) @@ -174,11 +183,7 @@ module REXML if rv.size == 0 until @buffer =~ pattern or @source.nil? begin - # READLINE OPT - #str = @source.read(@block_size) - str = @source.readline(@line_break) - str = decode(str) if @to_utf and str - @buffer << str + @buffer << readline rescue Iconv::IllegalSequence raise rescue @@ -193,12 +198,7 @@ module REXML def read begin - str = @source.readline(@line_break) - str = decode(str) if @to_utf and str - @buffer << str - if not @to_utf and @buffer.respond_to? :force_encoding - @buffer.force_encoding Encoding::UTF_8 - end + @buffer << readline rescue Exception, NameError @source = nil end @@ -213,9 +213,7 @@ module REXML @buffer = $' if cons and rv while !rv and @source begin - str = @source.readline(@line_break) - str = decode(str) if @to_utf and str - @buffer << str + @buffer << readline rv = pattern.match(@buffer) @buffer = $' if cons and rv rescue @@ -254,5 +252,18 @@ module REXML end [pos, lineno, line] end + + private + def readline + str = @source.readline(@line_break) + return nil if str.nil? + + if @to_utf + decode(str) + else + str.force_encoding(::Encoding::UTF_8) if @force_utf8 + str + end + end end end -- cgit v1.2.3