aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJeremy Evans <code@jeremyevans.net>2022-08-23 12:22:24 -0700
committerJeremy Evans <code@jeremyevans.net>2023-01-01 09:00:24 -0800
commit0903a251796c2b4086804a94420c231c04e3cea1 (patch)
tree15a05d6357a257df60c80cf8166e6410413f87e2
parenta4e9606da425a5f6bb5089b1037a3afd8cfa585b (diff)
downloadruby-0903a251796c2b4086804a94420c231c04e3cea1.tar.gz
Make IO#set_encoding with binary external encoding use nil internal encoding
This was already the behavior when a single `'external:internal'` encoding specifier string was passed. This makes the behavior consistent for the case where separate external and internal encoding specifiers are provided. While here, fix the IO#set_encoding method documentation to state that either the first or second argument can be a string with an encoding name, and describe the behavior when the external encoding is binary. Fixes [Bug #18899]
-rw-r--r--io.c15
-rw-r--r--spec/ruby/core/io/gets_spec.rb24
-rw-r--r--test/ruby/test_io_m17n.rb72
3 files changed, 103 insertions, 8 deletions
diff --git a/io.c b/io.c
index 2e51a46635..dbcfcf3fc0 100644
--- a/io.c
+++ b/io.c
@@ -11573,6 +11573,11 @@ io_encoding_set(rb_io_t *fptr, VALUE v1, VALUE v2, VALUE opt)
enc2 = NULL;
}
}
+ if (enc2 == rb_ascii8bit_encoding()) {
+ /* If external is ASCII-8BIT, no transcoding */
+ enc = enc2;
+ enc2 = NULL;
+ }
SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags);
ecflags = rb_econv_prepare_options(opt, &ecopts, ecflags);
}
@@ -13393,10 +13398,12 @@ rb_io_internal_encoding(VALUE io)
*
* See {Encodings}[rdoc-ref:File@Encodings].
*
- * Argument +ext_enc+, if given, must be an Encoding object;
+ * Argument +ext_enc+, if given, must be an Encoding object
+ * or a String with the encoding name;
* it is assigned as the encoding for the stream.
*
- * Argument +int_enc+, if given, must be an Encoding object;
+ * Argument +int_enc+, if given, must be an Encoding object
+ * or a String with the encoding name;
* it is assigned as the encoding for the internal string.
*
* Argument <tt>'ext_enc:int_enc'</tt>, if given, is a string
@@ -13404,6 +13411,10 @@ rb_io_internal_encoding(VALUE io)
* corresponding Encoding objects are assigned as the external
* and internal encodings for the stream.
*
+ * If the external encoding of a string is binary/ASCII-8BIT,
+ * the internal encoding of the string is set to nil, since no
+ * transcoding is needed.
+ *
* Optional keyword arguments +enc_opts+ specify
* {Encoding options}[rdoc-ref:encodings.rdoc@Encoding+Options].
*
diff --git a/spec/ruby/core/io/gets_spec.rb b/spec/ruby/core/io/gets_spec.rb
index 07be99f400..2d5e3d1ae3 100644
--- a/spec/ruby/core/io/gets_spec.rb
+++ b/spec/ruby/core/io/gets_spec.rb
@@ -301,11 +301,23 @@ describe "IO#gets" do
@io.gets.encoding.should == Encoding::BINARY
end
- it "transcodes to internal encoding if the IO object's external encoding is BINARY" do
- Encoding.default_external = Encoding::BINARY
- Encoding.default_internal = Encoding::UTF_8
- @io = new_io @name, 'r'
- @io.set_encoding Encoding::BINARY, Encoding::UTF_8
- @io.gets.encoding.should == Encoding::UTF_8
+ ruby_version_is ''...'3.3' do
+ it "transcodes to internal encoding if the IO object's external encoding is BINARY" do
+ Encoding.default_external = Encoding::BINARY
+ Encoding.default_internal = Encoding::UTF_8
+ @io = new_io @name, 'r'
+ @io.set_encoding Encoding::BINARY, Encoding::UTF_8
+ @io.gets.encoding.should == Encoding::UTF_8
+ end
+ end
+
+ ruby_version_is '3.3' do
+ it "ignores the internal encoding if the IO object's external encoding is BINARY" do
+ Encoding.default_external = Encoding::BINARY
+ Encoding.default_internal = Encoding::UTF_8
+ @io = new_io @name, 'r'
+ @io.set_encoding Encoding::BINARY, Encoding::UTF_8
+ @io.gets.encoding.should == Encoding::BINARY
+ end
end
end
diff --git a/test/ruby/test_io_m17n.rb b/test/ruby/test_io_m17n.rb
index 9c14087eba..267975d4ac 100644
--- a/test/ruby/test_io_m17n.rb
+++ b/test/ruby/test_io_m17n.rb
@@ -1158,6 +1158,78 @@ EOT
end
end
+ def test_set_encoding_argument_parsing
+ File.open(File::NULL) do |f|
+ f.set_encoding('binary')
+ assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
+ end
+
+ File.open(File::NULL) do |f|
+ f.set_encoding(Encoding.find('binary'))
+ assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
+ end
+
+ File.open(File::NULL) do |f|
+ f.set_encoding('binary:utf-8')
+ assert_equal(nil, f.internal_encoding)
+ assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
+ end
+
+ File.open(File::NULL) do |f|
+ f.set_encoding('binary', 'utf-8')
+ assert_equal(nil, f.internal_encoding)
+ assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
+ end
+
+ File.open(File::NULL) do |f|
+ f.set_encoding(Encoding.find('binary'), Encoding.find('utf-8'))
+ assert_equal(nil, f.internal_encoding)
+ assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
+ end
+
+ File.open(File::NULL) do |f|
+ f.set_encoding('binary', Encoding.find('utf-8'))
+ assert_equal(nil, f.internal_encoding)
+ assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
+ end
+
+ File.open(File::NULL) do |f|
+ f.set_encoding(Encoding.find('binary'), 'utf-8')
+ assert_equal(nil, f.internal_encoding)
+ assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
+ end
+
+ File.open(File::NULL) do |f|
+ f.set_encoding('iso-8859-1:utf-8')
+ assert_equal(Encoding::UTF_8, f.internal_encoding)
+ assert_equal(Encoding::ISO_8859_1, f.external_encoding)
+ end
+
+ File.open(File::NULL) do |f|
+ f.set_encoding('iso-8859-1', 'utf-8')
+ assert_equal(Encoding::UTF_8, f.internal_encoding)
+ assert_equal(Encoding::ISO_8859_1, f.external_encoding)
+ end
+
+ File.open(File::NULL) do |f|
+ f.set_encoding(Encoding.find('iso-8859-1'), Encoding.find('utf-8'))
+ assert_equal(Encoding::UTF_8, f.internal_encoding)
+ assert_equal(Encoding::ISO_8859_1, f.external_encoding)
+ end
+
+ File.open(File::NULL) do |f|
+ f.set_encoding('iso-8859-1', Encoding.find('utf-8'))
+ assert_equal(Encoding::UTF_8, f.internal_encoding)
+ assert_equal(Encoding::ISO_8859_1, f.external_encoding)
+ end
+
+ File.open(File::NULL) do |f|
+ f.set_encoding(Encoding.find('iso-8859-1'), 'utf-8')
+ assert_equal(Encoding::UTF_8, f.internal_encoding)
+ assert_equal(Encoding::ISO_8859_1, f.external_encoding)
+ end
+ end
+
def test_textmode_twice
assert_raise(ArgumentError) {
open(__FILE__, "rt", textmode: true) {|f|