diff options
author | nobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2015-12-13 09:48:27 +0000 |
---|---|---|
committer | nobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2015-12-13 09:48:27 +0000 |
commit | b861d5473cfa1f3aa64979d9ff0655230eea9a14 (patch) | |
tree | bbe2feebe5fd95bf5081846fff9be917cefe2339 | |
parent | ce6f0e36a3107e4d78f8b508581cebbc9c8dd0f7 (diff) | |
download | ruby-b861d5473cfa1f3aa64979d9ff0655230eea9a14.tar.gz |
io.c: BOM with non-UTF
* io.c (io_encname_bom_p): check BOM prefix only, not including
UTF prefix.
* io.c (parse_mode_enc): warn BOM with non-UTF encoding.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@53084 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r-- | ChangeLog | 7 | ||||
-rw-r--r-- | io.c | 47 | ||||
-rw-r--r-- | test/ruby/test_io_m17n.rb | 14 |
3 files changed, 42 insertions, 26 deletions
@@ -1,4 +1,9 @@ -Sun Dec 13 18:45:12 2015 Nobuyoshi Nakada <nobu@ruby-lang.org> +Sun Dec 13 18:46:31 2015 Nobuyoshi Nakada <nobu@ruby-lang.org> + + * io.c (io_encname_bom_p): check BOM prefix only, not including + UTF prefix. + + * io.c (parse_mode_enc): warn BOM with non-UTF encoding. * io.c (parse_mode_enc): fix buffer overflow. @@ -4852,11 +4852,14 @@ rb_io_fmode_modestr(int fmode) } } +static const char bom_prefix[] = "bom|"; +static const char utf_prefix[] = "utf-"; +enum {bom_prefix_len = (int)sizeof(bom_prefix) - 1}; +enum {utf_prefix_len = (int)sizeof(utf_prefix) - 1}; + static int io_encname_bom_p(const char *name, long len) { - static const char bom_prefix[] = "bom|utf-"; - enum {bom_prefix_len = (int)sizeof(bom_prefix) - 1}; return len > bom_prefix_len && STRNCASECMP(name, bom_prefix, bom_prefix_len) == 0; } @@ -5064,37 +5067,31 @@ parse_mode_enc(const char *estr, rb_encoding **enc_p, rb_encoding **enc2_p, int int idx, idx2; int fmode = fmode_p ? *fmode_p : 0; rb_encoding *ext_enc, *int_enc; + long len; /* parse estr as "enc" or "enc2:enc" or "enc:-" */ p = strrchr(estr, ':'); - if (p) { - long len = (p++) - estr; - if (len == 0 || len > ENCODING_MAXNAMELEN) - idx = -1; + len = p ? (p++ - estr) : (long)strlen(estr); + if ((fmode & FMODE_SETENC_BY_BOM) || io_encname_bom_p(estr, len)) { + estr += bom_prefix_len; + len -= bom_prefix_len; + if (!STRNCASECMP(estr, utf_prefix, utf_prefix_len)) { + fmode |= FMODE_SETENC_BY_BOM; + } else { - if (io_encname_bom_p(estr, len)) { - fmode |= FMODE_SETENC_BY_BOM; - estr += 4; - len -= 4; - } - memcpy(encname, estr, len); - encname[len] = '\0'; - estr = encname; - idx = rb_enc_find_index(encname); + rb_warn("BOM with non-UTF encoding %s is nonsense", estr); + fmode &= ~FMODE_SETENC_BY_BOM; } } + if (len == 0 || len > ENCODING_MAXNAMELEN) { + idx = -1; + } else { - long len = strlen(estr); - if (io_encname_bom_p(estr, len)) { - fmode |= FMODE_SETENC_BY_BOM; - estr += 4; - len -= 4; - if (len > 0 && len <= ENCODING_MAXNAMELEN) { - memcpy(encname, estr, len); - encname[len] = '\0'; - estr = encname; - } + if (p) { + memcpy(encname, estr, len); + encname[len] = '\0'; + estr = encname; } idx = rb_enc_find_index(estr); } diff --git a/test/ruby/test_io_m17n.rb b/test/ruby/test_io_m17n.rb index 63eb4b7d88..fb99c0cd6d 100644 --- a/test/ruby/test_io_m17n.rb +++ b/test/ruby/test_io_m17n.rb @@ -2095,6 +2095,20 @@ EOT end; end + def test_bom_non_utf + enc = nil + + assert_warn(/BOM/) { + open(__FILE__, "r:bom|us-ascii") {|f| enc = f.external_encoding} + } + assert_equal(Encoding::US_ASCII, enc) + + assert_warn(/BOM/) { + open(IO::NULL, "w:bom|us-ascii") {|f| enc = f.external_encoding} + } + assert_equal(Encoding::US_ASCII, enc) + end + def test_cbuf with_tmpdir { fn = "tst" |