From 994f066f76857a781f8819b8da2c2aeceedbf87b Mon Sep 17 00:00:00 2001 From: kou Date: Sat, 30 Oct 2010 12:10:56 +0000 Subject: * lib/rexml/encoding.rb: use Ruby native encoding mechnism. [ruby-dev:42464] * lib/rexml/encodings/: remove. * lib/rexml/document.rb, lib/rexml/formatters/default.rb, lib/rexml/output.rb, lib/rexml/parseexception.rb, lib/rexml/parsers/baseparser.rb, lib/rexml/source.rb, lib/rexml/xmldecl.rb: use Ruby's native Encoding object. * test/rexml/, test/rss/: follow the above encoding chagnes. * NEWS: add REXML's incompatible change about encoding. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@29646 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- lib/rexml/encoding.rb | 116 ++++++++++++++++++++++++-------------------------- 1 file changed, 56 insertions(+), 60 deletions(-) (limited to 'lib/rexml/encoding.rb') diff --git a/lib/rexml/encoding.rb b/lib/rexml/encoding.rb index 3feffb80f4..0c4a88fbeb 100644 --- a/lib/rexml/encoding.rb +++ b/lib/rexml/encoding.rb @@ -1,71 +1,67 @@ # -*- mode: ruby; ruby-indent-level: 2; indent-tabs-mode: t; tab-width: 2 -*- vim: sw=2 ts=2 module REXML module Encoding - @encoding_methods = {} - def self.register(enc, &block) - @encoding_methods[enc] = block - end - def self.apply(obj, enc) - @encoding_methods[enc][obj] - end - def self.encoding_method(enc) - @encoding_methods[enc] - end - - # Native, default format is UTF-8, so it is declared here rather than in - # an encodings/ definition. - UTF_8 = 'UTF-8' - UTF_16 = 'UTF-16' - UNILE = 'UNILE' - - # ID ---> Encoding name - attr_reader :encoding - def encoding=( enc ) - old_verbosity = $VERBOSE - begin - $VERBOSE = false - enc = enc.nil? ? nil : enc.upcase - return false if defined? @encoding and enc == @encoding - if enc and enc != UTF_8 - @encoding = enc - raise ArgumentError, "Bad encoding name #@encoding" unless @encoding =~ /^[\w-]+$/ - @encoding.untaint - begin - require 'rexml/encodings/ICONV.rb' - Encoding.apply(self, "ICONV") - rescue LoadError, Exception - begin - enc_file = File.join( "rexml", "encodings", "#@encoding.rb" ) - require enc_file - Encoding.apply(self, @encoding) - rescue LoadError => err - puts err.message - raise ArgumentError, "No decoder found for encoding #@encoding. Please install iconv." - end - end - else - @encoding = UTF_8 - require 'rexml/encodings/UTF-8.rb' - Encoding.apply(self, @encoding) - end - ensure - $VERBOSE = old_verbosity + # ID ---> Encoding object + attr_reader :encoding + def encoding=(encoding) + if encoding.is_a?(String) + original_encoding = encoding + encoding = find_encoding(encoding) + unless encoding + raise ArgumentError, "Bad encoding name #{original_encoding}" + end + end + return false if defined?(@encoding) and encoding == @encoding + if encoding and encoding != ::Encoding::UTF_8 + @encoding = encoding + else + @encoding = ::Encoding::UTF_8 end true end - def check_encoding str + def check_encoding(xml) # We have to recognize UTF-16, LSB UTF-16, and UTF-8 - if str[0,2] == "\xfe\xff" - str[0,2] = "" - return UTF_16 - elsif str[0,2] == "\xff\xfe" - str[0,2] = "" - return UNILE - end - str =~ /^\s*<\?xml\s+version\s*=\s*(['"]).*?\1\s+encoding\s*=\s*(["'])(.*?)\2/m - return $3.upcase if $3 - return UTF_8 + if xml[0, 2] == "\xfe\xff" + xml[0, 2] = "" + ::Encoding::UTF_16BE + elsif xml[0, 2] == "\xff\xfe" + xml[0, 2] = "" + ::Encoding::UTF_16LE + else + if /\A\s*<\?xml\s+version\s*=\s*(['"]).*?\1 + \s+encoding\s*=\s*(["'])(.*?)\2/mx =~ xml + encoding_name = $3 + if /\Autf-16\z/i =~ encoding_name + ::Encoding::UTF_16BE + else + find_encoding(encoding_name) + end + else + ::Encoding::UTF_8 + end + end end + + def encode(string) + string.encode(@encoding) + end + + def decode(string) + string.encode(::Encoding::UTF_8, @encoding) + end + + private + def find_encoding(name) + case name + when "UTF-16" + name = "UTF-16BE" + when /\Ashift-jis\z/i + name = "Shift_JIS" + when /\ACP-(\d+)\z/ + name = "CP#{$1}" + end + ::Encoding.find(name) + end end end -- cgit v1.2.3