diff options
author | Jeremy Evans <code@jeremyevans.net> | 2022-08-23 12:22:24 -0700 |
---|---|---|
committer | Jeremy Evans <code@jeremyevans.net> | 2023-01-01 09:00:24 -0800 |
commit | 0903a251796c2b4086804a94420c231c04e3cea1 (patch) | |
tree | 15a05d6357a257df60c80cf8166e6410413f87e2 | |
parent | a4e9606da425a5f6bb5089b1037a3afd8cfa585b (diff) | |
download | ruby-0903a251796c2b4086804a94420c231c04e3cea1.tar.gz |
Make IO#set_encoding with binary external encoding use nil internal encoding
This was already the behavior when a single `'external:internal'`
encoding specifier string was passed. This makes the behavior
consistent for the case where separate external and internal
encoding specifiers are provided.
While here, fix the IO#set_encoding method documentation to
state that either the first or second argument can be a string
with an encoding name, and describe the behavior when the
external encoding is binary.
Fixes [Bug #18899]
-rw-r--r-- | io.c | 15 | ||||
-rw-r--r-- | spec/ruby/core/io/gets_spec.rb | 24 | ||||
-rw-r--r-- | test/ruby/test_io_m17n.rb | 72 |
3 files changed, 103 insertions, 8 deletions
@@ -11573,6 +11573,11 @@ io_encoding_set(rb_io_t *fptr, VALUE v1, VALUE v2, VALUE opt) enc2 = NULL; } } + if (enc2 == rb_ascii8bit_encoding()) { + /* If external is ASCII-8BIT, no transcoding */ + enc = enc2; + enc2 = NULL; + } SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags); ecflags = rb_econv_prepare_options(opt, &ecopts, ecflags); } @@ -13393,10 +13398,12 @@ rb_io_internal_encoding(VALUE io) * * See {Encodings}[rdoc-ref:File@Encodings]. * - * Argument +ext_enc+, if given, must be an Encoding object; + * Argument +ext_enc+, if given, must be an Encoding object + * or a String with the encoding name; * it is assigned as the encoding for the stream. * - * Argument +int_enc+, if given, must be an Encoding object; + * Argument +int_enc+, if given, must be an Encoding object + * or a String with the encoding name; * it is assigned as the encoding for the internal string. * * Argument <tt>'ext_enc:int_enc'</tt>, if given, is a string @@ -13404,6 +13411,10 @@ rb_io_internal_encoding(VALUE io) * corresponding Encoding objects are assigned as the external * and internal encodings for the stream. * + * If the external encoding of a string is binary/ASCII-8BIT, + * the internal encoding of the string is set to nil, since no + * transcoding is needed. + * * Optional keyword arguments +enc_opts+ specify * {Encoding options}[rdoc-ref:encodings.rdoc@Encoding+Options]. * diff --git a/spec/ruby/core/io/gets_spec.rb b/spec/ruby/core/io/gets_spec.rb index 07be99f400..2d5e3d1ae3 100644 --- a/spec/ruby/core/io/gets_spec.rb +++ b/spec/ruby/core/io/gets_spec.rb @@ -301,11 +301,23 @@ describe "IO#gets" do @io.gets.encoding.should == Encoding::BINARY end - it "transcodes to internal encoding if the IO object's external encoding is BINARY" do - Encoding.default_external = Encoding::BINARY - Encoding.default_internal = Encoding::UTF_8 - @io = new_io @name, 'r' - @io.set_encoding Encoding::BINARY, Encoding::UTF_8 - @io.gets.encoding.should == Encoding::UTF_8 + ruby_version_is ''...'3.3' do + it "transcodes to internal encoding if the IO object's external encoding is BINARY" do + Encoding.default_external = Encoding::BINARY + Encoding.default_internal = Encoding::UTF_8 + @io = new_io @name, 'r' + @io.set_encoding Encoding::BINARY, Encoding::UTF_8 + @io.gets.encoding.should == Encoding::UTF_8 + end + end + + ruby_version_is '3.3' do + it "ignores the internal encoding if the IO object's external encoding is BINARY" do + Encoding.default_external = Encoding::BINARY + Encoding.default_internal = Encoding::UTF_8 + @io = new_io @name, 'r' + @io.set_encoding Encoding::BINARY, Encoding::UTF_8 + @io.gets.encoding.should == Encoding::BINARY + end end end diff --git a/test/ruby/test_io_m17n.rb b/test/ruby/test_io_m17n.rb index 9c14087eba..267975d4ac 100644 --- a/test/ruby/test_io_m17n.rb +++ b/test/ruby/test_io_m17n.rb @@ -1158,6 +1158,78 @@ EOT end end + def test_set_encoding_argument_parsing + File.open(File::NULL) do |f| + f.set_encoding('binary') + assert_equal(Encoding::ASCII_8BIT, f.external_encoding) + end + + File.open(File::NULL) do |f| + f.set_encoding(Encoding.find('binary')) + assert_equal(Encoding::ASCII_8BIT, f.external_encoding) + end + + File.open(File::NULL) do |f| + f.set_encoding('binary:utf-8') + assert_equal(nil, f.internal_encoding) + assert_equal(Encoding::ASCII_8BIT, f.external_encoding) + end + + File.open(File::NULL) do |f| + f.set_encoding('binary', 'utf-8') + assert_equal(nil, f.internal_encoding) + assert_equal(Encoding::ASCII_8BIT, f.external_encoding) + end + + File.open(File::NULL) do |f| + f.set_encoding(Encoding.find('binary'), Encoding.find('utf-8')) + assert_equal(nil, f.internal_encoding) + assert_equal(Encoding::ASCII_8BIT, f.external_encoding) + end + + File.open(File::NULL) do |f| + f.set_encoding('binary', Encoding.find('utf-8')) + assert_equal(nil, f.internal_encoding) + assert_equal(Encoding::ASCII_8BIT, f.external_encoding) + end + + File.open(File::NULL) do |f| + f.set_encoding(Encoding.find('binary'), 'utf-8') + assert_equal(nil, f.internal_encoding) + assert_equal(Encoding::ASCII_8BIT, f.external_encoding) + end + + File.open(File::NULL) do |f| + f.set_encoding('iso-8859-1:utf-8') + assert_equal(Encoding::UTF_8, f.internal_encoding) + assert_equal(Encoding::ISO_8859_1, f.external_encoding) + end + + File.open(File::NULL) do |f| + f.set_encoding('iso-8859-1', 'utf-8') + assert_equal(Encoding::UTF_8, f.internal_encoding) + assert_equal(Encoding::ISO_8859_1, f.external_encoding) + end + + File.open(File::NULL) do |f| + f.set_encoding(Encoding.find('iso-8859-1'), Encoding.find('utf-8')) + assert_equal(Encoding::UTF_8, f.internal_encoding) + assert_equal(Encoding::ISO_8859_1, f.external_encoding) + end + + File.open(File::NULL) do |f| + f.set_encoding('iso-8859-1', Encoding.find('utf-8')) + assert_equal(Encoding::UTF_8, f.internal_encoding) + assert_equal(Encoding::ISO_8859_1, f.external_encoding) + end + + File.open(File::NULL) do |f| + f.set_encoding(Encoding.find('iso-8859-1'), 'utf-8') + assert_equal(Encoding::UTF_8, f.internal_encoding) + assert_equal(Encoding::ISO_8859_1, f.external_encoding) + end + end + def test_textmode_twice assert_raise(ArgumentError) { open(__FILE__, "rt", textmode: true) {|f| |