diff options
-rw-r--r-- | NEWS | 7 | ||||
-rw-r--r-- | io.c | 44 | ||||
-rw-r--r-- | test/ruby/test_io_m17n.rb | 8 |
3 files changed, 56 insertions, 3 deletions
@@ -86,6 +86,13 @@ GC:: Details on the algorithm and caveats can be found here: https://bugs.ruby-lang.org/issues/15626 +IO:: + + New method:: + + * Added IO#set_encoding_by_bom to check the BOM and set the external + encoding. [Bug #15210] + Integer:: Modified method:: @@ -6170,20 +6170,23 @@ io_strip_bom(VALUE io) return 0; } -static void +static rb_encoding * io_set_encoding_by_bom(VALUE io) { int idx = io_strip_bom(io); rb_io_t *fptr; + rb_encoding *extenc = NULL; GetOpenFile(io, fptr); if (idx) { - io_encoding_set(fptr, rb_enc_from_encoding(rb_enc_from_index(idx)), - rb_io_internal_encoding(io), Qnil); + extenc = rb_enc_from_index(idx); + io_encoding_set(fptr, rb_enc_from_encoding(extenc), + rb_io_internal_encoding(io), Qnil); } else { fptr->encs.enc2 = NULL; } + return extenc; } static VALUE @@ -8308,6 +8311,40 @@ rb_io_initialize(int argc, VALUE *argv, VALUE io) /* * call-seq: + * ios.set_encoding_by_bom -> encoding or nil + * + * Checks if +ios+ starts with a BOM, and then consumes it and sets + * the external encoding. Returns the result encoding if found, or + * nil. If +ios+ is not binmode or its encoding has been set + * already, an exception will be raised. + * + * File.write("bom.txt", "\u{FEFF}abc") + * ios = File.open("bom.txt", "rb") + * ios.set_encoding_by_bom #=> #<Encoding:UTF-8> + * + * File.write("nobom.txt", "abc") + * ios = File.open("nobom.txt", "rb") + * ios.set_encoding_by_bom #=> nil + */ + +static VALUE +rb_io_set_encoding_by_bom(VALUE io) +{ + rb_io_t *fptr; + + GetOpenFile(io, fptr); + if (!(fptr->mode & FMODE_BINMODE)) { + rb_raise(rb_eArgError, "ASCII incompatible encoding needs binmode"); + } + if (fptr->encs.enc2) { + rb_raise(rb_eArgError, "encoding conversion is set"); + } + if (!io_set_encoding_by_bom(io)) return Qnil; + return rb_enc_from_encoding(fptr->encs.enc); +} + +/* + * call-seq: * File.new(filename, mode="r" [, opt]) -> file * File.new(filename [, mode [, perm]] [, opt]) -> file * @@ -13319,6 +13356,7 @@ Init_IO(void) rb_define_method(rb_cIO, "external_encoding", rb_io_external_encoding, 0); rb_define_method(rb_cIO, "internal_encoding", rb_io_internal_encoding, 0); rb_define_method(rb_cIO, "set_encoding", rb_io_set_encoding, -1); + rb_define_method(rb_cIO, "set_encoding_by_bom", rb_io_set_encoding_by_bom, 0); rb_define_method(rb_cIO, "autoclose?", rb_io_autoclose_p, 0); rb_define_method(rb_cIO, "autoclose=", rb_io_set_autoclose, 1); diff --git a/test/ruby/test_io_m17n.rb b/test/ruby/test_io_m17n.rb index 6fb8d8a0cf..630f2eec08 100644 --- a/test/ruby/test_io_m17n.rb +++ b/test/ruby/test_io_m17n.rb @@ -2097,6 +2097,10 @@ EOT assert_equal(Encoding::UTF_8, result.encoding, message) assert_equal(stripped, result, message) end + + File.open(path, "rb") {|f| + assert_equal(Encoding.find(name), f.set_encoding_by_bom) + } } end end @@ -2139,6 +2143,10 @@ EOT assert_equal(stripped, result, bug8323) result = File.read(path, encoding: 'BOM|UTF-8:UTF-8') assert_equal(stripped, result, bug8323) + + File.open(path, "rb") {|f| + assert_nil(f.set_encoding_by_bom) + } } end |