From e717d6faa8463c70407e6aaf116c6b6181f30be6 Mon Sep 17 00:00:00 2001 From: Nobuyoshi Nakada Date: Sat, 8 Jun 2019 21:35:33 +0900 Subject: IO#set_encoding_by_bom * io.c (rb_io_set_encoding_by_bom): IO#set_encoding_by_bom to set the encoding by BOM if exists. [Bug #15210] --- io.c | 44 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 41 insertions(+), 3 deletions(-) (limited to 'io.c') diff --git a/io.c b/io.c index 893cabc248..0db981e39c 100644 --- a/io.c +++ b/io.c @@ -6170,20 +6170,23 @@ io_strip_bom(VALUE io) return 0; } -static void +static rb_encoding * io_set_encoding_by_bom(VALUE io) { int idx = io_strip_bom(io); rb_io_t *fptr; + rb_encoding *extenc = NULL; GetOpenFile(io, fptr); if (idx) { - io_encoding_set(fptr, rb_enc_from_encoding(rb_enc_from_index(idx)), - rb_io_internal_encoding(io), Qnil); + extenc = rb_enc_from_index(idx); + io_encoding_set(fptr, rb_enc_from_encoding(extenc), + rb_io_internal_encoding(io), Qnil); } else { fptr->encs.enc2 = NULL; } + return extenc; } static VALUE @@ -8306,6 +8309,40 @@ rb_io_initialize(int argc, VALUE *argv, VALUE io) return io; } +/* + * call-seq: + * ios.set_encoding_by_bom -> encoding or nil + * + * Checks if +ios+ starts with a BOM, and then consumes it and sets + * the external encoding. Returns the result encoding if found, or + * nil. If +ios+ is not binmode or its encoding has been set + * already, an exception will be raised. + * + * File.write("bom.txt", "\u{FEFF}abc") + * ios = File.open("bom.txt", "rb") + * ios.set_encoding_by_bom #=> # + * + * File.write("nobom.txt", "abc") + * ios = File.open("nobom.txt", "rb") + * ios.set_encoding_by_bom #=> nil + */ + +static VALUE +rb_io_set_encoding_by_bom(VALUE io) +{ + rb_io_t *fptr; + + GetOpenFile(io, fptr); + if (!(fptr->mode & FMODE_BINMODE)) { + rb_raise(rb_eArgError, "ASCII incompatible encoding needs binmode"); + } + if (fptr->encs.enc2) { + rb_raise(rb_eArgError, "encoding conversion is set"); + } + if (!io_set_encoding_by_bom(io)) return Qnil; + return rb_enc_from_encoding(fptr->encs.enc); +} + /* * call-seq: * File.new(filename, mode="r" [, opt]) -> file @@ -13319,6 +13356,7 @@ Init_IO(void) rb_define_method(rb_cIO, "external_encoding", rb_io_external_encoding, 0); rb_define_method(rb_cIO, "internal_encoding", rb_io_internal_encoding, 0); rb_define_method(rb_cIO, "set_encoding", rb_io_set_encoding, -1); + rb_define_method(rb_cIO, "set_encoding_by_bom", rb_io_set_encoding_by_bom, 0); rb_define_method(rb_cIO, "autoclose?", rb_io_autoclose_p, 0); rb_define_method(rb_cIO, "autoclose=", rb_io_set_autoclose, 1); -- cgit v1.2.3