aboutsummaryrefslogtreecommitdiffstats
path: root/spec/rubyspec/core/string/valid_encoding_spec.rb
blob: ddd0fe52a239e477b2b90e6b4f7c0504943e860d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
require File.expand_path('../../../spec_helper', __FILE__)

with_feature :encoding do
  describe "String#valid_encoding?" do
    it "returns true if the String's encoding is valid" do
      "a".valid_encoding?.should be_true
      "\u{8365}\u{221}".valid_encoding?.should be_true
    end

    it "returns true if self is valid in the current encoding and other encodings" do
      str = "\x77"
      str.force_encoding('utf-8').valid_encoding?.should be_true
      str.force_encoding('ascii-8bit').valid_encoding?.should be_true
    end

    it "returns true for all encodings self is valid in" do
      str = "\u{6754}"
      str.force_encoding('ASCII-8BIT').valid_encoding?.should be_true
      str.force_encoding('UTF-8').valid_encoding?.should be_true
      str.force_encoding('US-ASCII').valid_encoding?.should be_false
      str.force_encoding('Big5').valid_encoding?.should be_false
      str.force_encoding('CP949').valid_encoding?.should be_false
      str.force_encoding('Emacs-Mule').valid_encoding?.should be_false
      str.force_encoding('EUC-JP').valid_encoding?.should be_false
      str.force_encoding('EUC-KR').valid_encoding?.should be_false
      str.force_encoding('EUC-TW').valid_encoding?.should be_false
      str.force_encoding('GB18030').valid_encoding?.should be_false
      str.force_encoding('GBK').valid_encoding?.should be_false
      str.force_encoding('ISO-8859-1').valid_encoding?.should be_true
      str.force_encoding('ISO-8859-2').valid_encoding?.should be_true
      str.force_encoding('ISO-8859-3').valid_encoding?.should be_true
      str.force_encoding('ISO-8859-4').valid_encoding?.should be_true
      str.force_encoding('ISO-8859-5').valid_encoding?.should be_true
      str.force_encoding('ISO-8859-6').valid_encoding?.should be_true
      str.force_encoding('ISO-8859-7').valid_encoding?.should be_true
      str.force_encoding('ISO-8859-8').valid_encoding?.should be_true
      str.force_encoding('ISO-8859-9').valid_encoding?.should be_true
      str.force_encoding('ISO-8859-10').valid_encoding?.should be_true
      str.force_encoding('ISO-8859-11').valid_encoding?.should be_true
      str.force_encoding('ISO-8859-13').valid_encoding?.should be_true
      str.force_encoding('ISO-8859-14').valid_encoding?.should be_true
      str.force_encoding('ISO-8859-15').valid_encoding?.should be_true
      str.force_encoding('ISO-8859-16').valid_encoding?.should be_true
      str.force_encoding('KOI8-R').valid_encoding?.should be_true
      str.force_encoding('KOI8-U').valid_encoding?.should be_true
      str.force_encoding('Shift_JIS').valid_encoding?.should be_false
      str.force_encoding('UTF-16BE').valid_encoding?.should be_false
      str.force_encoding('UTF-16LE').valid_encoding?.should be_false
      str.force_encoding('UTF-32BE').valid_encoding?.should be_false
      str.force_encoding('UTF-32LE').valid_encoding?.should be_false
      str.force_encoding('Windows-1251').valid_encoding?.should be_true
      str.force_encoding('IBM437').valid_encoding?.should be_true
      str.force_encoding('IBM737').valid_encoding?.should be_true
      str.force_encoding('IBM775').valid_encoding?.should be_true
      str.force_encoding('CP850').valid_encoding?.should be_true
      str.force_encoding('IBM852').valid_encoding?.should be_true
      str.force_encoding('CP852').valid_encoding?.should be_true
      str.force_encoding('IBM855').valid_encoding?.should be_true
      str.force_encoding('CP855').valid_encoding?.should be_true
      str.force_encoding('IBM857').valid_encoding?.should be_true
      str.force_encoding('IBM860').valid_encoding?.should be_true
      str.force_encoding('IBM861').valid_encoding?.should be_true
      str.force_encoding('IBM862').valid_encoding?.should be_true
      str.force_encoding('IBM863').valid_encoding?.should be_true
      str.force_encoding('IBM864').valid_encoding?.should be_true
      str.force_encoding('IBM865').valid_encoding?.should be_true
      str.force_encoding('IBM866').valid_encoding?.should be_true
      str.force_encoding('IBM869').valid_encoding?.should be_true
      str.force_encoding('Windows-1258').valid_encoding?.should be_true
      str.force_encoding('GB1988').valid_encoding?.should be_true
      str.force_encoding('macCentEuro').valid_encoding?.should be_true
      str.force_encoding('macCroatian').valid_encoding?.should be_true
      str.force_encoding('macCyrillic').valid_encoding?.should be_true
      str.force_encoding('macGreek').valid_encoding?.should be_true
      str.force_encoding('macIceland').valid_encoding?.should be_true
      str.force_encoding('macRoman').valid_encoding?.should be_true
      str.force_encoding('macRomania').valid_encoding?.should be_true
      str.force_encoding('macThai').valid_encoding?.should be_true
      str.force_encoding('macTurkish').valid_encoding?.should be_true
      str.force_encoding('macUkraine').valid_encoding?.should be_true
      str.force_encoding('stateless-ISO-2022-JP').valid_encoding?.should be_false
      str.force_encoding('eucJP-ms').valid_encoding?.should be_false
      str.force_encoding('CP51932').valid_encoding?.should be_false
      str.force_encoding('GB2312').valid_encoding?.should be_false
      str.force_encoding('GB12345').valid_encoding?.should be_false
      str.force_encoding('ISO-2022-JP').valid_encoding?.should be_true
      str.force_encoding('ISO-2022-JP-2').valid_encoding?.should be_true
      str.force_encoding('CP50221').valid_encoding?.should be_true
      str.force_encoding('Windows-1252').valid_encoding?.should be_true
      str.force_encoding('Windows-1250').valid_encoding?.should be_true
      str.force_encoding('Windows-1256').valid_encoding?.should be_true
      str.force_encoding('Windows-1253').valid_encoding?.should be_true
      str.force_encoding('Windows-1255').valid_encoding?.should be_true
      str.force_encoding('Windows-1254').valid_encoding?.should be_true
      str.force_encoding('TIS-620').valid_encoding?.should be_true
      str.force_encoding('Windows-874').valid_encoding?.should be_true
      str.force_encoding('Windows-1257').valid_encoding?.should be_true
      str.force_encoding('Windows-31J').valid_encoding?.should be_false
      str.force_encoding('MacJapanese').valid_encoding?.should be_false
      str.force_encoding('UTF-7').valid_encoding?.should be_true
      str.force_encoding('UTF8-MAC').valid_encoding?.should be_true
    end

    it "returns false if self is valid in one encoding, but invalid in the one it's tagged with" do
      str = "\u{8765}"
      str.valid_encoding?.should be_true
      str = str.force_encoding('ascii')
      str.valid_encoding?.should be_false
    end

    it "returns false if self contains a character invalid in the associated encoding" do
      "abc#{[0x80].pack('C')}".force_encoding('ascii').valid_encoding?.should be_false
    end

    it "returns false if a valid String had an invalid character appended to it" do
      str = "a"
      str.valid_encoding?.should be_true
      str << [0xDD].pack('C').force_encoding('utf-8')
      str.valid_encoding?.should be_false
    end

    it "returns true if an invalid string is appended another invalid one but both make a valid string" do
      str = [0xD0].pack('C').force_encoding('utf-8')
      str.valid_encoding?.should be_false
      str << [0xBF].pack('C').force_encoding('utf-8')
      str.valid_encoding?.should be_true
    end
  end
end