diff options
author | eregon <eregon@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2017-09-20 20:18:52 +0000 |
---|---|---|
committer | eregon <eregon@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2017-09-20 20:18:52 +0000 |
commit | 1d15d5f08032acf1b7bceacbb450d617ff6e0931 (patch) | |
tree | a3785a79899302bc149e4a6e72f624ac27dc1f10 /spec/ruby/core/encoding/converter | |
parent | 75bfc6440d595bf339007f4fb280fd4d743e89c1 (diff) | |
download | ruby-1d15d5f08032acf1b7bceacbb450d617ff6e0931.tar.gz |
Move spec/rubyspec to spec/ruby for consistency
* Other ruby implementations use the spec/ruby directory.
[Misc #13792] [ruby-core:82287]
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@59979 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'spec/ruby/core/encoding/converter')
16 files changed, 1054 insertions, 0 deletions
diff --git a/spec/ruby/core/encoding/converter/asciicompat_encoding_spec.rb b/spec/ruby/core/encoding/converter/asciicompat_encoding_spec.rb new file mode 100644 index 0000000000..329e09cade --- /dev/null +++ b/spec/ruby/core/encoding/converter/asciicompat_encoding_spec.rb @@ -0,0 +1,39 @@ +require File.expand_path('../../../../spec_helper', __FILE__) + +with_feature :encoding do + describe "Encoding::Converter.asciicompat_encoding" do + it "accepts an encoding name as a String argument" do + lambda { Encoding::Converter.asciicompat_encoding('UTF-8') }. + should_not raise_error + end + + it "coerces non-String/Encoding objects with #to_str" do + str = mock('string') + str.should_receive(:to_str).at_least(1).times.and_return('string') + Encoding::Converter.asciicompat_encoding(str) + end + + it "accepts an Encoding object as an argument" do + Encoding::Converter. + asciicompat_encoding(Encoding.find("ISO-2022-JP")). + should == Encoding::Converter.asciicompat_encoding("ISO-2022-JP") + end + + it "returns a corresponding ASCII compatible encoding for ASCII-incompatible encodings" do + Encoding::Converter.asciicompat_encoding('UTF-16BE').should == Encoding::UTF_8 + Encoding::Converter.asciicompat_encoding("ISO-2022-JP").should == Encoding.find("stateless-ISO-2022-JP") + end + + it "returns nil when the given encoding is ASCII compatible" do + Encoding::Converter.asciicompat_encoding('ASCII').should be_nil + Encoding::Converter.asciicompat_encoding('UTF-8').should be_nil + end + + it "handles encoding names who resolve to nil encodings" do + internal = Encoding.default_internal + Encoding.default_internal = nil + Encoding::Converter.asciicompat_encoding('internal').should be_nil + Encoding.default_internal = internal + end + end +end diff --git a/spec/ruby/core/encoding/converter/constants_spec.rb b/spec/ruby/core/encoding/converter/constants_spec.rb new file mode 100644 index 0000000000..16eb60b4ab --- /dev/null +++ b/spec/ruby/core/encoding/converter/constants_spec.rb @@ -0,0 +1,133 @@ +require File.expand_path('../../../../spec_helper', __FILE__) + +with_feature :encoding do + describe "Encoding::Converter::INVALID_MASK" do + it "exists" do + Encoding::Converter.should have_constant(:INVALID_MASK) + end + + it "has a Fixnum value" do + Encoding::Converter::INVALID_MASK.should be_an_instance_of(Fixnum) + end + end + + describe "Encoding::Converter::INVALID_REPLACE" do + it "exists" do + Encoding::Converter.should have_constant(:INVALID_REPLACE) + end + + it "has a Fixnum value" do + Encoding::Converter::INVALID_REPLACE.should be_an_instance_of(Fixnum) + end + end + + describe "Encoding::Converter::UNDEF_MASK" do + it "exists" do + Encoding::Converter.should have_constant(:UNDEF_MASK) + end + + it "has a Fixnum value" do + Encoding::Converter::UNDEF_MASK.should be_an_instance_of(Fixnum) + end + end + + describe "Encoding::Converter::UNDEF_REPLACE" do + it "exists" do + Encoding::Converter.should have_constant(:UNDEF_REPLACE) + end + + it "has a Fixnum value" do + Encoding::Converter::UNDEF_REPLACE.should be_an_instance_of(Fixnum) + end + end + + describe "Encoding::Converter::UNDEF_HEX_CHARREF" do + it "exists" do + Encoding::Converter.should have_constant(:UNDEF_HEX_CHARREF) + end + + it "has a Fixnum value" do + Encoding::Converter::UNDEF_HEX_CHARREF.should be_an_instance_of(Fixnum) + end + end + + describe "Encoding::Converter::PARTIAL_INPUT" do + it "exists" do + Encoding::Converter.should have_constant(:PARTIAL_INPUT) + end + + it "has a Fixnum value" do + Encoding::Converter::PARTIAL_INPUT.should be_an_instance_of(Fixnum) + end + end + + describe "Encoding::Converter::AFTER_OUTPUT" do + it "exists" do + Encoding::Converter.should have_constant(:AFTER_OUTPUT) + end + + it "has a Fixnum value" do + Encoding::Converter::AFTER_OUTPUT.should be_an_instance_of(Fixnum) + end + end + + describe "Encoding::Converter::UNIVERSAL_NEWLINE_DECORATOR" do + it "exists" do + Encoding::Converter.should have_constant(:UNIVERSAL_NEWLINE_DECORATOR) + end + + it "has a Fixnum value" do + Encoding::Converter::UNIVERSAL_NEWLINE_DECORATOR.should be_an_instance_of(Fixnum) + end + end + + describe "Encoding::Converter::CRLF_NEWLINE_DECORATOR" do + it "exists" do + Encoding::Converter.should have_constant(:CRLF_NEWLINE_DECORATOR) + end + + it "has a Fixnum value" do + Encoding::Converter::CRLF_NEWLINE_DECORATOR.should be_an_instance_of(Fixnum) + end + end + + describe "Encoding::Converter::CR_NEWLINE_DECORATOR" do + it "exists" do + Encoding::Converter.should have_constant(:CR_NEWLINE_DECORATOR) + end + + it "has a Fixnum value" do + Encoding::Converter::CR_NEWLINE_DECORATOR.should be_an_instance_of(Fixnum) + end + end + + describe "Encoding::Converter::XML_TEXT_DECORATOR" do + it "exists" do + Encoding::Converter.should have_constant(:XML_TEXT_DECORATOR) + end + + it "has a Fixnum value" do + Encoding::Converter::XML_TEXT_DECORATOR.should be_an_instance_of(Fixnum) + end + end + + describe "Encoding::Converter::XML_ATTR_CONTENT_DECORATOR" do + it "exists" do + Encoding::Converter.should have_constant(:XML_ATTR_CONTENT_DECORATOR) + end + + it "has a Fixnum value" do + Encoding::Converter::XML_ATTR_CONTENT_DECORATOR.should be_an_instance_of(Fixnum) + end + end + + describe "Encoding::Converter::XML_ATTR_QUOTE_DECORATOR" do + it "exists" do + Encoding::Converter.should have_constant(:XML_ATTR_QUOTE_DECORATOR) + end + + it "has a Fixnum value" do + Encoding::Converter::XML_ATTR_QUOTE_DECORATOR.should be_an_instance_of(Fixnum) + end + end +end diff --git a/spec/ruby/core/encoding/converter/convert_spec.rb b/spec/ruby/core/encoding/converter/convert_spec.rb new file mode 100644 index 0000000000..588d659ceb --- /dev/null +++ b/spec/ruby/core/encoding/converter/convert_spec.rb @@ -0,0 +1,47 @@ +# -*- encoding: binary -*- +require File.expand_path('../../../../spec_helper', __FILE__) + +with_feature :encoding do + describe "Encoding::Converter#convert" do + it "returns a String" do + ec = Encoding::Converter.new('ascii', 'utf-8') + ec.convert('glark').should be_an_instance_of(String) + end + + it "sets the encoding of the result to the target encoding" do + ec = Encoding::Converter.new('ascii', 'utf-8') + str = 'glark'.force_encoding('ascii') + ec.convert(str).encoding.should == Encoding::UTF_8 + end + + it "transcodes the given String to the target encoding" do + ec = Encoding::Converter.new("utf-8", "euc-jp") + ec.convert("\u3042".force_encoding('UTF-8')).should == \ + "\xA4\xA2".force_encoding('EUC-JP') + end + + it "allows Strings of different encodings to the source encoding" do + ec = Encoding::Converter.new('ascii', 'utf-8') + str = 'glark'.force_encoding('SJIS') + ec.convert(str).encoding.should == Encoding::UTF_8 + end + + it "reuses the given encoding pair if called multiple times" do + ec = Encoding::Converter.new('ascii', 'SJIS') + ec.convert('a'.force_encoding('ASCII')).should == 'a'.force_encoding('SJIS') + ec.convert('b'.force_encoding('ASCII')).should == 'b'.force_encoding('SJIS') + end + + it "raises UndefinedConversionError if the String contains characters invalid for the target encoding" do + ec = Encoding::Converter.new('UTF-8', Encoding.find('macCyrillic')) + lambda { ec.convert("\u{6543}".force_encoding('UTF-8')) }.should \ + raise_error(Encoding::UndefinedConversionError) + end + + it "raises an ArgumentError if called on a finished stream" do + ec = Encoding::Converter.new('UTF-8', Encoding.find('macCyrillic')) + ec.finish + lambda { ec.convert("\u{65}") }.should raise_error(ArgumentError) + end + end +end diff --git a/spec/ruby/core/encoding/converter/convpath_spec.rb b/spec/ruby/core/encoding/converter/convpath_spec.rb new file mode 100644 index 0000000000..679b894f58 --- /dev/null +++ b/spec/ruby/core/encoding/converter/convpath_spec.rb @@ -0,0 +1,65 @@ +require File.expand_path('../../../../spec_helper', __FILE__) + +with_feature :encoding do + describe "Encoding::Converter#convpath" do + before :all do + @perms = Encoding.name_list.permutation(2).map do |pair| + Encoding::Converter.new(pair.first, pair.last) rescue nil + end.compact.map{|ec| ec.convpath} + end + + it "returns an Array" do + ec = Encoding::Converter.new('ASCII', 'EUC-JP') + ec.convpath.should be_an_instance_of(Array) + end + + it "returns each encoding pair as a sub-Array" do + ec = Encoding::Converter.new('ASCII', 'EUC-JP') + ec.convpath.first.should be_an_instance_of(Array) + ec.convpath.first.size.should == 2 + end + + it "returns each encoding as an Encoding object" do + ec = Encoding::Converter.new('ASCII', 'EUC-JP') + ec.convpath.first.first.should be_an_instance_of(Encoding) + ec.convpath.first.last.should be_an_instance_of(Encoding) + end + + it "returns multiple encoding pairs when direct conversion is impossible" do + ec = Encoding::Converter.new('ascii','Big5') + ec.convpath.size.should == 2 + ec.convpath.first.first.should == Encoding::US_ASCII + ec.convpath.first.last.should == ec.convpath.last.first + ec.convpath.last.last.should == Encoding::Big5 + end + + it "sets the last element of each pair to the first element of the next" do + @perms.each do |convpath| + next if convpath.size == 1 + convpath.each_with_index do |pair, idx| + break if idx == convpath.size - 1 + pair.last.should == convpath[idx+1].first + end + end + end + + it "only lists a source encoding once" do + @perms.each do |convpath| + next if convpath.size < 2 + seen = Hash.new(false) + convpath.each_with_index do |pair, idx| + seen.key?(pair.first).should be_false if idx > 0 + seen[pair.first] = true + end + end + end + + it "indicates if crlf_newline conversion would occur" do + ec = Encoding::Converter.new("ISo-8859-1", "EUC-JP", {crlf_newline: true}) + ec.convpath.last.should == "crlf_newline" + + ec = Encoding::Converter.new("ASCII", "UTF-8", {crlf_newline: false}) + ec.convpath.last.should_not == "crlf_newline" + end + end +end diff --git a/spec/ruby/core/encoding/converter/destination_encoding_spec.rb b/spec/ruby/core/encoding/converter/destination_encoding_spec.rb new file mode 100644 index 0000000000..830e6d2178 --- /dev/null +++ b/spec/ruby/core/encoding/converter/destination_encoding_spec.rb @@ -0,0 +1,13 @@ +require File.expand_path('../../../../spec_helper', __FILE__) + +with_feature :encoding do + describe "Encoding::Converter#destination_encoding" do + it "returns the destination encoding as an Encoding object" do + ec = Encoding::Converter.new('ASCII','Big5') + ec.destination_encoding.should == Encoding::BIG5 + + ec = Encoding::Converter.new('SJIS','EUC-JP') + ec.destination_encoding.should == Encoding::EUC_JP + end + end +end diff --git a/spec/ruby/core/encoding/converter/finish_spec.rb b/spec/ruby/core/encoding/converter/finish_spec.rb new file mode 100644 index 0000000000..86097357f4 --- /dev/null +++ b/spec/ruby/core/encoding/converter/finish_spec.rb @@ -0,0 +1,38 @@ +require File.expand_path('../../../../spec_helper', __FILE__) + +with_feature :encoding do + describe "Encoding::Converter#finish" do + before :each do + @ec = Encoding::Converter.new("utf-8", "iso-2022-jp") + end + + it "returns a String" do + @ec.convert('foo') + @ec.finish.should be_an_instance_of(String) + end + + it "returns an empty String if there is nothing more to convert" do + @ec.convert("glark") + @ec.finish.should == "" + end + + it "returns the last part of the converted String if it hasn't already" do + @ec.convert("\u{9999}").should == "\e$B9a".force_encoding('iso-2022-jp') + @ec.finish.should == "\e(B".force_encoding('iso-2022-jp') + end + + it "returns a String in the destination encoding" do + @ec.convert("glark") + @ec.finish.encoding.should == Encoding::ISO2022_JP + end + + it "returns an empty String if self was not given anything to convert" do + @ec.finish.should == "" + end + + it "returns an empty String on subsequent invocations" do + @ec.finish.should == "" + @ec.finish.should == "" + end + end +end diff --git a/spec/ruby/core/encoding/converter/insert_output_spec.rb b/spec/ruby/core/encoding/converter/insert_output_spec.rb new file mode 100644 index 0000000000..bc9a56ba45 --- /dev/null +++ b/spec/ruby/core/encoding/converter/insert_output_spec.rb @@ -0,0 +1,5 @@ +require File.expand_path('../../../../spec_helper', __FILE__) + +describe "Encoding::Converter#insert_output" do + it "needs to be reviewed for spec completeness" +end diff --git a/spec/ruby/core/encoding/converter/inspect_spec.rb b/spec/ruby/core/encoding/converter/inspect_spec.rb new file mode 100644 index 0000000000..b8216176cf --- /dev/null +++ b/spec/ruby/core/encoding/converter/inspect_spec.rb @@ -0,0 +1,13 @@ +require File.expand_path('../../../../spec_helper', __FILE__) + +describe "Encoding::Converter#inspect" do + it "includes the source and destination encodings in the return value" do + source = Encoding::UTF_8 + destination = Encoding::UTF_16LE + + output = "#<Encoding::Converter: #{source.name} to #{destination.name}>" + + x = Encoding::Converter.new(source, destination) + x.inspect.should == output + end +end diff --git a/spec/ruby/core/encoding/converter/last_error_spec.rb b/spec/ruby/core/encoding/converter/last_error_spec.rb new file mode 100644 index 0000000000..8465935368 --- /dev/null +++ b/spec/ruby/core/encoding/converter/last_error_spec.rb @@ -0,0 +1,85 @@ +# -*- encoding: binary -*- +require File.expand_path('../../../../spec_helper', __FILE__) + +with_feature :encoding do + describe "Encoding::Converter#last_error" do + it "returns nil when the no conversion has been attempted" do + ec = Encoding::Converter.new('ascii','utf-8') + ec.last_error.should be_nil + end + + it "returns nil when the last conversion did not produce an error" do + ec = Encoding::Converter.new('ascii','utf-8') + ec.convert('a'.force_encoding('ascii')) + ec.last_error.should be_nil + end + + it "returns nil when #primitive_convert last returned :destination_buffer_full" do + ec = Encoding::Converter.new("utf-8", "iso-2022-jp") + ec.primitive_convert("\u{9999}", "", 0, 0, partial_input: false) \ + .should == :destination_buffer_full + ec.last_error.should be_nil + end + + it "returns nil when #primitive_convert last returned :finished" do + ec = Encoding::Converter.new("utf-8", "iso-8859-1") + ec.primitive_convert("glark".force_encoding('utf-8'),"").should == :finished + ec.last_error.should be_nil + end + + it "returns nil if the last conversion succeeded but the penultimate failed" do + ec = Encoding::Converter.new("utf-8", "iso-8859-1") + ec.primitive_convert("\xf1abcd","").should == :invalid_byte_sequence + ec.primitive_convert("glark".force_encoding('utf-8'),"").should == :finished + ec.last_error.should be_nil + end + + it "returns an Encoding::InvalidByteSequenceError when #primitive_convert last returned :invalid_byte_sequence" do + ec = Encoding::Converter.new("utf-8", "iso-8859-1") + ec.primitive_convert("\xf1abcd","").should == :invalid_byte_sequence + ec.last_error.should be_an_instance_of(Encoding::InvalidByteSequenceError) + end + + it "returns an Encoding::UndefinedConversionError when #primitive_convert last returned :undefined_conversion" do + ec = Encoding::Converter.new("utf-8", "iso-8859-1") + ec.primitive_convert("\u{9876}","").should == :undefined_conversion + ec.last_error.should be_an_instance_of(Encoding::UndefinedConversionError) + end + + it "returns an Encoding::InvalidByteSequenceError when #primitive_convert last returned :incomplete_input" do + ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1") + ec.primitive_convert("\xa4", "", nil, 10).should == :incomplete_input + ec.last_error.should be_an_instance_of(Encoding::InvalidByteSequenceError) + end + + it "returns an Encoding::InvalidByteSequenceError when the last call to #convert produced one" do + ec = Encoding::Converter.new("utf-8", "iso-8859-1") + exception = nil + lambda do + begin + ec.convert("\xf1abcd") + rescue Encoding::InvalidByteSequenceError => e + exception = e + raise e + end + end.should raise_error(Encoding::InvalidByteSequenceError) + ec.last_error.should be_an_instance_of(Encoding::InvalidByteSequenceError) + ec.last_error.message.should == exception.message + end + + it "returns an Encoding::UndefinedConversionError when the last call to #convert produced one" do + ec = Encoding::Converter.new("utf-8", "iso-8859-1") + exception = nil + lambda do + begin + ec.convert("\u{9899}") + rescue Encoding::UndefinedConversionError => e + exception = e + raise e + end + end.should raise_error(Encoding::UndefinedConversionError) + ec.last_error.should be_an_instance_of(Encoding::UndefinedConversionError) + ec.last_error.message.should == exception.message + end + end +end diff --git a/spec/ruby/core/encoding/converter/new_spec.rb b/spec/ruby/core/encoding/converter/new_spec.rb new file mode 100644 index 0000000000..d228c80a18 --- /dev/null +++ b/spec/ruby/core/encoding/converter/new_spec.rb @@ -0,0 +1,121 @@ +# -*- encoding: ascii-8bit -*- +require File.expand_path('../../../../spec_helper', __FILE__) + +with_feature :encoding do + describe "Encoding::Converter.new" do + it "accepts a String for the source encoding" do + conv = Encoding::Converter.new("us-ascii", "utf-8") + conv.source_encoding.should == Encoding::US_ASCII + end + + it "accepts a String for the destination encoding" do + conv = Encoding::Converter.new("us-ascii", "utf-8") + conv.destination_encoding.should == Encoding::UTF_8 + end + + it "accepts an Encoding object for the source encoding" do + conv = Encoding::Converter.new(Encoding::US_ASCII, "utf-8") + conv.source_encoding.should == Encoding::US_ASCII + end + + it "accepts an Encoding object for the destination encoding" do + conv = Encoding::Converter.new("us-ascii", Encoding::UTF_8) + conv.destination_encoding.should == Encoding::UTF_8 + end + + it "raises an Encoding::ConverterNotFoundError if both encodings are the same" do + lambda do + Encoding::Converter.new "utf-8", "utf-8" + end.should raise_error(Encoding::ConverterNotFoundError) + end + + it "calls #to_str to convert the source encoding argument to an encoding name" do + enc = mock("us-ascii") + enc.should_receive(:to_str).and_return("us-ascii") + conv = Encoding::Converter.new(enc, "utf-8") + conv.source_encoding.should == Encoding::US_ASCII + end + + it "calls #to_str to convert the destination encoding argument to an encoding name" do + enc = mock("utf-8") + enc.should_receive(:to_str).and_return("utf-8") + conv = Encoding::Converter.new("us-ascii", enc) + conv.destination_encoding.should == Encoding::UTF_8 + end + + it "sets replacement from the options Hash" do + conv = Encoding::Converter.new("us-ascii", "utf-8", replace: "fubar") + conv.replacement.should == "fubar" + end + + it "calls #to_hash to convert the options argument to a Hash if not a Fixnum" do + opts = mock("encoding converter options") + opts.should_receive(:to_hash).and_return({ replace: "fubar" }) + conv = Encoding::Converter.new("us-ascii", "utf-8", opts) + conv.replacement.should == "fubar" + end + + it "calls #to_str to convert the replacement object to a String" do + obj = mock("encoding converter replacement") + obj.should_receive(:to_str).and_return("fubar") + conv = Encoding::Converter.new("us-ascii", "utf-8", replace: obj) + conv.replacement.should == "fubar" + end + + it "raises a TypeError if #to_str does not return a String" do + obj = mock("encoding converter replacement") + obj.should_receive(:to_str).and_return(1) + + lambda do + Encoding::Converter.new("us-ascii", "utf-8", replace: obj) + end.should raise_error(TypeError) + end + + it "raises a TypeError if passed true for the replacement object" do + lambda do + Encoding::Converter.new("us-ascii", "utf-8", replace: true) + end.should raise_error(TypeError) + end + + it "raises a TypeError if passed false for the replacement object" do + lambda do + Encoding::Converter.new("us-ascii", "utf-8", replace: false) + end.should raise_error(TypeError) + end + + it "raises a TypeError if passed a Fixnum for the replacement object" do + lambda do + Encoding::Converter.new("us-ascii", "utf-8", replace: 1) + end.should raise_error(TypeError) + end + + it "accepts an empty String for the replacement object" do + conv = Encoding::Converter.new("us-ascii", "utf-8", replace: "") + conv.replacement.should == "" + end + + describe "when passed nil for the replacement object" do + describe "when the destination encoding is not UTF-8" do + it "sets the replacement String to '?'" do + conv = Encoding::Converter.new("us-ascii", "ascii-8bit", replace: nil) + conv.replacement.should == "?" + end + + it "sets the replacement String encoding to US-ASCII" do + conv = Encoding::Converter.new("us-ascii", "ascii-8bit", replace: nil) + conv.replacement.encoding.should == Encoding::US_ASCII + end + + it "sets the replacement String to '\\uFFFD'" do + conv = Encoding::Converter.new("us-ascii", "utf-8", replace: nil) + conv.replacement.should == "\u{fffd}".force_encoding("utf-8") + end + + it "sets the replacement String encoding to UTF-8" do + conv = Encoding::Converter.new("us-ascii", "utf-8", replace: nil) + conv.replacement.encoding.should == Encoding::UTF_8 + end + end + end + end +end diff --git a/spec/ruby/core/encoding/converter/primitive_convert_spec.rb b/spec/ruby/core/encoding/converter/primitive_convert_spec.rb new file mode 100644 index 0000000000..b9d6288bb2 --- /dev/null +++ b/spec/ruby/core/encoding/converter/primitive_convert_spec.rb @@ -0,0 +1,213 @@ +# -*- encoding: binary -*- +require File.expand_path('../../../../spec_helper', __FILE__) + +with_feature :encoding do + describe "Encoding::Converter#primitive_convert" do + before :each do + @ec = Encoding::Converter.new("utf-8", "iso-8859-1") + end + + it "accepts a nil source buffer" do + lambda { @ec.primitive_convert(nil,"") }.should_not raise_error + end + + it "accepts a String as the source buffer" do + lambda { @ec.primitive_convert("","") }.should_not raise_error + end + + it "accepts nil for the destination byte offset" do + lambda { @ec.primitive_convert("","", nil) }.should_not raise_error + end + + it "accepts an integer for the destination byte offset" do + lambda { @ec.primitive_convert("","a", 1) }.should_not raise_error + end + + it "calls #to_int to convert the destination byte offset" do + offset = mock("encoding primitive_convert destination byte offset") + offset.should_receive(:to_int).and_return(2) + @ec.primitive_convert("abc", result = " ", offset).should == :finished + result.should == " abc" + end + + it "raises an ArgumentError if the destination byte offset is greater than the bytesize of the destination buffer" do + lambda { @ec.primitive_convert("","am", 0) }.should_not raise_error + lambda { @ec.primitive_convert("","am", 1) }.should_not raise_error + lambda { @ec.primitive_convert("","am", 2) }.should_not raise_error + lambda { @ec.primitive_convert("","am", 3) }.should raise_error(ArgumentError) + end + + it "uses the destination byte offset to determine where to write the result in the destination buffer" do + dest = "aa" + @ec.primitive_convert("b",dest, nil, 0) + dest.should == "aa" + + @ec.primitive_convert("b",dest, nil, 1) + dest.should == "aab" + + @ec.primitive_convert("b",dest, nil, 2) + dest.should == "aabbb" + end + + it "accepts nil for the destination bytesize" do + lambda { @ec.primitive_convert("","", nil, nil) }.should_not raise_error + end + + it "accepts an integer for the destination bytesize" do + lambda { @ec.primitive_convert("","", nil, 0) }.should_not raise_error + end + + it "allows a destination bytesize value greater than the bytesize of the source buffer" do + lambda { @ec.primitive_convert("am","", nil, 3) }.should_not raise_error + end + + it "allows a destination bytesize value less than the bytesize of the source buffer" do + lambda { @ec.primitive_convert("am","", nil, 1) }.should_not raise_error + end + + it "calls #to_int to convert the destination byte size" do + size = mock("encoding primitive_convert destination byte size") + size.should_receive(:to_int).and_return(2) + @ec.primitive_convert("abc", result = " ", 0, size).should == :destination_buffer_full + result.should == "ab" + end + + it "uses destination bytesize as the maximum bytesize of the destination buffer" do + dest = "" + @ec.primitive_convert("glark", dest, nil, 1) + dest.bytesize.should == 1 + end + + it "allows a destination buffer of unlimited size if destination bytesize is nil" do + source = "glark".force_encoding('utf-8') + dest = "" + @ec.primitive_convert("glark", dest, nil, nil) + dest.bytesize.should == source.bytesize + end + + it "accepts an options hash" do + @ec.primitive_convert("","",nil,nil, {after_output: true}).should == :finished + end + + it "sets the destination buffer's encoding to the destination encoding if the conversion suceeded" do + dest = "".force_encoding('utf-8') + dest.encoding.should == Encoding::UTF_8 + @ec.primitive_convert("\u{98}",dest).should == :finished + dest.encoding.should == Encoding::ISO_8859_1 + end + + it "sets the destination buffer's encoding to the destination encoding if the conversion failed" do + dest = "".force_encoding('utf-8') + dest.encoding.should == Encoding::UTF_8 + @ec.primitive_convert("\u{9878}",dest).should == :undefined_conversion + dest.encoding.should == Encoding::ISO_8859_1 + end + + it "removes the undefined part from the source buffer when returning :undefined_conversion" do + dest = "".force_encoding('utf-8') + s = "\u{9878}abcd" + @ec.primitive_convert(s, dest).should == :undefined_conversion + + s.should == "abcd" + end + + it "returns :incomplete_input when source buffer ends unexpectedly and :partial_input isn't specified" do + ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1") + ec.primitive_convert("\xa4", "", nil, nil, partial_input: false).should == :incomplete_input + end + + it "clears the source buffer when returning :incomplete_input" do + ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1") + s = "\xa4" + ec.primitive_convert(s, "").should == :incomplete_input + + s.should == "" + end + + it "returns :source_buffer_empty when source buffer ends unexpectedly and :partial_input is true" do + ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1") + ec.primitive_convert("\xa4", "", nil, nil, partial_input: true).should == :source_buffer_empty + end + + it "clears the source buffer when returning :source_buffer_empty" do + ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1") + s = "\xa4" + ec.primitive_convert(s, "", nil, nil, partial_input: true).should == :source_buffer_empty + + s.should == "" + end + + it "returns :undefined_conversion when a character in the source buffer is not representable in the output encoding" do + @ec.primitive_convert("\u{9876}","").should == :undefined_conversion + end + + it "returns :invalid_byte_sequence when an invalid byte sequence was found in the source buffer" do + @ec.primitive_convert("\xf1abcd","").should == :invalid_byte_sequence + end + + it "removes consumed and erroneous bytes from the source buffer when returning :invalid_byte_sequence" do + ec = Encoding::Converter.new(Encoding::UTF_8, Encoding::UTF_8_MAC) + s = "\xC3\xA1\x80\x80\xC3\xA1".force_encoding("utf-8") + dest = "".force_encoding("utf-8") + ec.primitive_convert(s, dest) + + s.should == "\x80\xC3\xA1".force_encoding("utf-8") + end + + it "returns :finished when the conversion succeeded" do + @ec.primitive_convert("glark".force_encoding('utf-8'),"").should == :finished + end + + it "clears the source buffer when returning :finished" do + s = "glark".force_encoding('utf-8') + @ec.primitive_convert(s, "").should == :finished + + s.should == "" + end + + it "returns :destination_buffer_full when the destination buffer is too small" do + ec = Encoding::Converter.new("utf-8", "iso-2022-jp") + source = "\u{9999}" + destination_bytesize = source.bytesize - 1 + ec.primitive_convert(source, "", 0, destination_bytesize) \ + .should == :destination_buffer_full + source.should == "" + end + + it "clears the source buffer when returning :destination_buffer_full" do + ec = Encoding::Converter.new("utf-8", "iso-2022-jp") + s = "\u{9999}" + destination_bytesize = s.bytesize - 1 + ec.primitive_convert(s, "", 0, destination_bytesize).should == :destination_buffer_full + + s.should == "" + end + + it "keeps removing invalid bytes from the source buffer" do + ec = Encoding::Converter.new(Encoding::UTF_8, Encoding::UTF_8_MAC) + s = "\x80\x80\x80" + dest = "".force_encoding(Encoding::UTF_8_MAC) + + ec.primitive_convert(s, dest) + s.should == "\x80\x80" + ec.primitive_convert(s, dest) + s.should == "\x80" + ec.primitive_convert(s, dest) + s.should == "" + end + + it "reuses read-again bytes after the first error" do + s = "\xf1abcd" + dest = "" + + @ec.primitive_convert(s, dest).should == :invalid_byte_sequence + s.should == "bcd" + @ec.primitive_errinfo[4].should == "a" + + @ec.primitive_convert(s, dest).should == :finished + s.should == "" + + dest.should == "abcd" + end + end +end diff --git a/spec/ruby/core/encoding/converter/primitive_errinfo_spec.rb b/spec/ruby/core/encoding/converter/primitive_errinfo_spec.rb new file mode 100644 index 0000000000..f92c95c6d5 --- /dev/null +++ b/spec/ruby/core/encoding/converter/primitive_errinfo_spec.rb @@ -0,0 +1,72 @@ +# -*- encoding: binary -*- +require File.expand_path('../../../../spec_helper', __FILE__) + +with_feature :encoding do + describe "Encoding::Converter#primitive_errinfo" do + it "returns [:source_buffer_empty,nil,nil,nil,nil] when no conversion has been attempted" do + ec = Encoding::Converter.new('ascii','utf-8') + ec.primitive_errinfo.should == [:source_buffer_empty, nil, nil, nil, nil] + end + + it "returns [:finished,nil,nil,nil,nil] when #primitive_convert last returned :finished" do + ec = Encoding::Converter.new('ascii','utf-8') + ec.primitive_convert("a","").should == :finished + ec.primitive_errinfo.should == [:finished, nil, nil, nil, nil] + end + + it "returns [:source_buffer_empty,nil,nil,nil, nil] when #convert last succeeded" do + ec = Encoding::Converter.new('ascii','utf-8') + ec.convert("a".force_encoding('ascii')).should == "a".\ + force_encoding('utf-8') + ec.primitive_errinfo.should == [:source_buffer_empty, nil, nil, nil, nil] + end + + it "returns [:destination_buffer_full,nil,nil,nil,nil] when #primitive_convert last returned :destination_buffer_full" do + ec = Encoding::Converter.new("utf-8", "iso-2022-jp") + ec.primitive_convert("\u{9999}", "", 0, 0, partial_input: false) \ + .should == :destination_buffer_full + ec.primitive_errinfo.should == [:destination_buffer_full, nil, nil, nil, nil] + end + + it "returns the status of the last primitive conversion, even if it was successful and the previous one wasn't" do + ec = Encoding::Converter.new("utf-8", "iso-8859-1") + ec.primitive_convert("\xf1abcd","").should == :invalid_byte_sequence + ec.primitive_convert("glark".force_encoding('utf-8'),"").should == :finished + ec.primitive_errinfo.should == [:finished, nil, nil, nil, nil] + end + + it "returns the state, source encoding, target encoding, and the erroneous bytes when #primitive_convert last returned :undefined_conversion" do + ec = Encoding::Converter.new("utf-8", "iso-8859-1") + ec.primitive_convert("\u{9876}","").should == :undefined_conversion + ec.primitive_errinfo.should == + [:undefined_conversion, "UTF-8", "ISO-8859-1", "\xE9\xA1\xB6", ""] + end + + it "returns the state, source encoding, target encoding, and erroneous bytes when #primitive_convert last returned :incomplete_input" do + ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1") + ec.primitive_convert("\xa4", "", nil, 10).should == :incomplete_input + ec.primitive_errinfo.should == [:incomplete_input, "EUC-JP", "UTF-8", "\xA4", ""] + end + + it "returns the state, source encoding, target encoding, erroneous bytes, and the read-again bytes when #primitive_convert last returned :invalid_byte_sequence" do + ec = Encoding::Converter.new("utf-8", "iso-8859-1") + ec.primitive_convert("\xf1abcd","").should == :invalid_byte_sequence + ec.primitive_errinfo.should == + [:invalid_byte_sequence, "UTF-8", "ISO-8859-1", "\xF1", "a"] + end + + it "returns the state, source encoding, target encoding, erroneous bytes, and the read-again bytes when #convert last raised InvalidByteSequenceError" do + ec = Encoding::Converter.new("utf-8", "iso-8859-1") + lambda { ec.convert("\xf1abcd") }.should raise_error(Encoding::InvalidByteSequenceError) + ec.primitive_errinfo.should == + [:invalid_byte_sequence, "UTF-8", "ISO-8859-1", "\xF1", "a"] + end + + it "returns the state, source encoding, target encoding, erroneous bytes, and the read-again bytes when #finish last raised InvalidByteSequenceError" do + ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1") + ec.convert("\xa4") + lambda { ec.finish }.should raise_error(Encoding::InvalidByteSequenceError) + ec.primitive_errinfo.should == [:incomplete_input, "EUC-JP", "UTF-8", "\xA4", ""] + end + end +end diff --git a/spec/ruby/core/encoding/converter/putback_spec.rb b/spec/ruby/core/encoding/converter/putback_spec.rb new file mode 100644 index 0000000000..69ce59e89b --- /dev/null +++ b/spec/ruby/core/encoding/converter/putback_spec.rb @@ -0,0 +1,50 @@ +# -*- encoding: binary -*- +require File.expand_path('../../../../spec_helper', __FILE__) + +with_feature :encoding do + describe "Encoding::Converter#putback" do + before :each do + @ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1") + @ret = @ec.primitive_convert(@src="abc\xa1def", @dst="", nil, 10) + end + + it "returns a String" do + @ec.putback.should be_an_instance_of(String) + end + + it "returns a String in the source encoding" do + @ec.putback.encoding.should == Encoding::EUC_JP + end + + it "returns the bytes buffered due to an :invalid_byte_sequence error" do + @ret.should == :invalid_byte_sequence + @ec.putback.should == 'd' + @ec.primitive_errinfo.last.should == 'd' + end + + it "allows conversion to be resumed after an :invalid_byte_sequence" do + @src = @ec.putback + @src + @ret = @ec.primitive_convert(@src, @dst, nil, 10) + @ret.should == :finished + @dst.should == "abcdef" + @src.should == "" + end + + it "returns an empty String when there are no more bytes to put back" do + @ec.putback + @ec.putback.should == "" + end + + it "accepts an integer argument corresponding to the number of bytes to be put back" do + ec = Encoding::Converter.new("utf-16le", "iso-8859-1") + src = "\x00\xd8\x61\x00" + dst = "" + ec.primitive_convert(src, dst).should == :invalid_byte_sequence + ec.primitive_errinfo.should == + [:invalid_byte_sequence, "UTF-16LE", "UTF-8", "\x00\xD8", "a\x00"] + ec.putback(1).should == "\x00".force_encoding("utf-16le") + ec.putback.should == "a".force_encoding("utf-16le") + ec.putback.should == "" + end + end +end diff --git a/spec/ruby/core/encoding/converter/replacement_spec.rb b/spec/ruby/core/encoding/converter/replacement_spec.rb new file mode 100644 index 0000000000..9c25887cd7 --- /dev/null +++ b/spec/ruby/core/encoding/converter/replacement_spec.rb @@ -0,0 +1,74 @@ +require File.expand_path('../../../../spec_helper', __FILE__) + +with_feature :encoding do + describe "Encoding::Converter#replacement" do + it "returns '?' in US-ASCII when the destination encoding is not UTF-8" do + ec = Encoding::Converter.new("utf-8", "us-ascii") + ec.replacement.should == "?" + ec.replacement.encoding.should == Encoding::US_ASCII + + ec = Encoding::Converter.new("utf-8", "sjis") + ec.replacement.should == "?" + ec.replacement.encoding.should == Encoding::US_ASCII + end + + it "returns \\uFFFD when the destination encoding is UTF-8" do + ec = Encoding::Converter.new("us-ascii", "utf-8") + ec.replacement.should == "\u{fffd}".force_encoding('utf-8') + ec.replacement.encoding.should == Encoding::UTF_8 + end + end + + describe "Encoding::Converter#replacement=" do + it "accepts a String argument" do + ec = Encoding::Converter.new("utf-8", "us-ascii") + ec.replacement = "!" + ec.replacement.should == "!" + end + + it "accepts a String argument of arbitrary length" do + ec = Encoding::Converter.new("utf-8", "us-ascii") + ec.replacement = "?!?" * 9999 + ec.replacement.should == "?!?" * 9999 + end + + it "raises a TypeError if assigned a non-String argument" do + ec = Encoding::Converter.new("utf-8", "us-ascii") + lambda { ec.replacement = nil }.should raise_error(TypeError) + end + + it "sets #replacement" do + ec = Encoding::Converter.new("us-ascii", "utf-8") + ec.replacement.should == "\u{fffd}".force_encoding('utf-8') + ec.replacement = '?'.encode('utf-8') + ec.replacement.should == '?'.force_encoding('utf-8') + end + + it "raises an UndefinedConversionError is the argument cannot be converted into the destination encoding" do + ec = Encoding::Converter.new("sjis", "ascii") + utf8_q = "\u{986}".force_encoding('utf-8') + ec.primitive_convert(utf8_q.dup, "").should == :undefined_conversion + lambda { ec.replacement = utf8_q }.should \ + raise_error(Encoding::UndefinedConversionError) + end + + it "does not change the replacement character if the argument cannot be converted into the destination encoding" do + ec = Encoding::Converter.new("sjis", "ascii") + utf8_q = "\u{986}".force_encoding('utf-8') + ec.primitive_convert(utf8_q.dup, "").should == :undefined_conversion + lambda { ec.replacement = utf8_q }.should \ + raise_error(Encoding::UndefinedConversionError) + ec.replacement.should == "?".force_encoding('us-ascii') + end + + it "uses the replacement character" do + ec = Encoding::Converter.new("utf-8", "us-ascii", :invalid => :replace, :undef => :replace) + ec.replacement = "!" + dest = "" + status = ec.primitive_convert "中文123", dest + + status.should == :finished + dest.should == "!!123" + end + end +end diff --git a/spec/ruby/core/encoding/converter/search_convpath_spec.rb b/spec/ruby/core/encoding/converter/search_convpath_spec.rb new file mode 100644 index 0000000000..c04eeb98ad --- /dev/null +++ b/spec/ruby/core/encoding/converter/search_convpath_spec.rb @@ -0,0 +1,73 @@ +require File.expand_path('../../../../spec_helper', __FILE__) + +with_feature :encoding do + describe "Encoding::Converter.search_convpath" do + before :all do + @perms = Encoding.name_list.permutation(2).map do |pair| + Encoding::Converter.search_convpath(pair.first, pair.last) rescue [] + end + end + + it "returns an Array" do + Encoding::Converter.search_convpath('ASCII', 'EUC-JP').\ + should be_an_instance_of(Array) + end + + it "returns each encoding pair as a sub-Array" do + cp = Encoding::Converter.search_convpath('ASCII', 'EUC-JP') + cp.first.should be_an_instance_of(Array) + cp.first.size.should == 2 + end + + it "returns each encoding as an Encoding object" do + cp = Encoding::Converter.search_convpath('ASCII', 'EUC-JP') + cp.first.first.should be_an_instance_of(Encoding) + cp.first.last.should be_an_instance_of(Encoding) + end + + it "returns multiple encoding pairs when direct conversion is impossible" do + cp = Encoding::Converter.search_convpath('ascii','Big5') + cp.size.should == 2 + cp.first.should == [Encoding::US_ASCII, Encoding::UTF_8] + cp.last.should == [Encoding::UTF_8, Encoding::Big5] + end + + it "sets the last element of each pair to the first element of the next" do + @perms.each do |convpath| + next if convpath.size == 1 + convpath.each_with_index do |pair, idx| + break if idx == convpath.size - 1 + pair.last.should == convpath[idx+1].first + end + end + end + + it "only lists a source encoding once" do + @perms.each do |convpath| + next if convpath.size < 2 + seen = Hash.new(false) + convpath.each_with_index do |pair, idx| + seen.key?(pair.first).should be_false if idx > 0 + seen[pair.first] = true + end + end + end + + it "indicates if crlf_newline conversion would occur" do + cp = Encoding::Converter.search_convpath( + "ISo-8859-1", "EUC-JP", {crlf_newline: true}) + cp.last.should == "crlf_newline" + + cp = Encoding::Converter.search_convpath( + "ASCII", "UTF-8", {crlf_newline: false}) + cp.last.should_not == "crlf_newline" + end + + it "raises an Encoding::ConverterNotFoundError if no conversion path exists" do + lambda do + Encoding::Converter.search_convpath( + Encoding::ASCII_8BIT, Encoding::Emacs_Mule) + end.should raise_error(Encoding::ConverterNotFoundError) + end + end +end diff --git a/spec/ruby/core/encoding/converter/source_encoding_spec.rb b/spec/ruby/core/encoding/converter/source_encoding_spec.rb new file mode 100644 index 0000000000..acec01502d --- /dev/null +++ b/spec/ruby/core/encoding/converter/source_encoding_spec.rb @@ -0,0 +1,13 @@ +require File.expand_path('../../../../spec_helper', __FILE__) + +with_feature :encoding do + describe "Encoding::Converter#source_encoding" do + it "returns the source encoding as an Encoding object" do + ec = Encoding::Converter.new('ASCII','Big5') + ec.source_encoding.should == Encoding::US_ASCII + + ec = Encoding::Converter.new('Shift_JIS','EUC-JP') + ec.source_encoding.should == Encoding::SHIFT_JIS + end + end +end |