aboutsummaryrefslogtreecommitdiffstats
path: root/lib/rexml/encoding.rb
blob: 0c4a88fbeb0bb4ceab1dca7142996e55d5e8d776 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# -*- mode: ruby; ruby-indent-level: 2; indent-tabs-mode: t; tab-width: 2 -*- vim: sw=2 ts=2
module REXML
  module Encoding
    # ID ---> Encoding object
		attr_reader :encoding
    def encoding=(encoding)
			if encoding.is_a?(String)
				original_encoding = encoding
				encoding = find_encoding(encoding)
				unless encoding
					raise ArgumentError, "Bad encoding name #{original_encoding}"
				end
			end
			return false if defined?(@encoding) and encoding == @encoding
			if encoding and encoding != ::Encoding::UTF_8
				@encoding = encoding
			else
				@encoding = ::Encoding::UTF_8
      end
      true
    end

    def check_encoding(xml)
      # We have to recognize UTF-16, LSB UTF-16, and UTF-8
      if xml[0, 2] == "\xfe\xff"
        xml[0, 2] = ""
				::Encoding::UTF_16BE
      elsif xml[0, 2] == "\xff\xfe"
        xml[0, 2] = ""
				::Encoding::UTF_16LE
			else
				if /\A\s*<\?xml\s+version\s*=\s*(['"]).*?\1
            \s+encoding\s*=\s*(["'])(.*?)\2/mx =~ xml
					encoding_name = $3
					if /\Autf-16\z/i =~ encoding_name
						::Encoding::UTF_16BE
					else
						find_encoding(encoding_name)
					end
				else
					::Encoding::UTF_8
				end
			end
    end

		def encode(string)
			string.encode(@encoding)
		end

		def decode(string)
			string.encode(::Encoding::UTF_8, @encoding)
		end

		private
		def find_encoding(name)
			case name
			when "UTF-16"
				name = "UTF-16BE"
			when /\Ashift-jis\z/i
				name = "Shift_JIS"
			when /\ACP-(\d+)\z/
				name = "CP#{$1}"
			end
			::Encoding.find(name)
		end
  end
end