From daa622aed079b4434e607820d7577b4a7d4f2bfc Mon Sep 17 00:00:00 2001 From: matz Date: Fri, 29 Feb 2008 09:19:15 +0000 Subject: * time.c (time_strftime): format should be ascii compatible. * parse.y (rb_intern3): non ASCII compatible symbols. * re.c (rb_reg_regsub): add encoding check. * string.c (rb_str_chomp_bang): ditto. * test/ruby/test_utf16.rb (TestUTF16::test_chomp): raises exception. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15640 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 12 ++++++++++++ parse.y | 4 ++++ re.c | 1 + string.c | 11 +++++++++++ test/ruby/test_utf16.rb | 20 +++++++++++++++----- time.c | 4 ++++ 6 files changed, 47 insertions(+), 5 deletions(-) diff --git a/ChangeLog b/ChangeLog index 306c4bc295..3415de734d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,15 @@ +Fri Feb 29 18:08:43 2008 Yukihiro Matsumoto + + * time.c (time_strftime): format should be ascii compatible. + + * parse.y (rb_intern3): non ASCII compatible symbols. + + * re.c (rb_reg_regsub): add encoding check. + + * string.c (rb_str_chomp_bang): ditto. + + * test/ruby/test_utf16.rb (TestUTF16::test_chomp): raises exception. + Fri Feb 29 15:16:31 2008 Yukihiro Matsumoto * string.c (rb_str_rpartition): calculation was done in byte indexing. diff --git a/parse.y b/parse.y index bf0cd7388f..88e72de4bf 100644 --- a/parse.y +++ b/parse.y @@ -9008,6 +9008,10 @@ rb_intern3(const char *name, long len, rb_encoding *enc) if (st_lookup(global_symbols.sym_id, str, (st_data_t *)&id)) return id; + if (rb_cString && !rb_enc_asciicompat(enc)) { + id = ID_JUNK; + goto new_id; + } last = len-1; id = 0; switch (*m) { diff --git a/re.c b/re.c index 49424eb632..61f6990d04 100644 --- a/re.c +++ b/re.c @@ -2960,6 +2960,7 @@ rb_reg_regsub(VALUE str, VALUE src, struct re_registers *regs, VALUE regexp) rb_encoding *str_enc = rb_enc_get(str); rb_encoding *src_enc = rb_enc_get(src); + rb_enc_check(str, src); p = s = RSTRING_PTR(str); e = s + RSTRING_LEN(str); diff --git a/string.c b/string.c index ffae4518e9..1b88ded7b1 100644 --- a/string.c +++ b/string.c @@ -5273,6 +5273,7 @@ rb_str_chomp_bang(int argc, VALUE *argv, VALUE str) rs = rb_rs; if (rs == rb_default_rs) { smart_chomp: + rb_enc_check(str, rs); rb_str_modify(str); if (RSTRING_PTR(str)[len-1] == '\n') { STR_DEC_LEN(str); @@ -5648,6 +5649,11 @@ rb_str_scan(VALUE str, VALUE pat) static VALUE rb_str_hex(VALUE str) { + rb_encoding *enc = rb_enc_get(str); + + if (!rb_enc_asciicompat(enc)) { + rb_raise(rb_eArgError, "ASCII incompatible encoding: %s", rb_enc_name(enc)); + } return rb_str_to_inum(str, 16, Qfalse); } @@ -5669,6 +5675,11 @@ rb_str_hex(VALUE str) static VALUE rb_str_oct(VALUE str) { + rb_encoding *enc = rb_enc_get(str); + + if (!rb_enc_asciicompat(enc)) { + rb_raise(rb_eArgError, "ASCII incompatible encoding: %s", rb_enc_name(enc)); + } return rb_str_to_inum(str, -8, Qfalse); } diff --git a/test/ruby/test_utf16.rb b/test/ruby/test_utf16.rb index bd56377734..1647563f84 100644 --- a/test/ruby/test_utf16.rb +++ b/test/ruby/test_utf16.rb @@ -132,13 +132,21 @@ EOT end def test_hex - s1 = "f\0f\0".force_encoding("utf-16le") - assert_equal(255, s1.hex, "#{encdump s1}.hex") + assert_raise(ArgumentError) { + "ff".encode("utf-16le").hex + } + assert_raise(ArgumentError) { + "ff".encode("utf-16be").hex + } end def test_oct - assert_equal(077, "77".encode("utf-16le").oct) - assert_equal(077, "77".encode("utf-16be").oct) + assert_raise(ArgumentError) { + "77".encode("utf-16le").oct + } + assert_raise(ArgumentError) { + "77".encode("utf-16be").oct + } end def test_count @@ -224,7 +232,9 @@ EOT def test_chomp s = "\1\n".force_encoding("utf-16be") - assert_str_equal(s, s.chomp, "#{encdump s}.chomp") + assert_raise(ArgumentError, "#{encdump s}.chomp") { + s.chomp + } end def test_succ diff --git a/time.c b/time.c index 8b3ac6c501..455f15bc1f 100644 --- a/time.c +++ b/time.c @@ -12,6 +12,7 @@ #include "ruby/ruby.h" #include #include +#include "ruby/encoding.h" #ifdef HAVE_UNISTD_H #include @@ -2077,6 +2078,9 @@ time_strftime(VALUE time, VALUE format) time_get_tm(time, tobj->gmt); } StringValue(format); + if (!rb_enc_str_asciicompat_p(format)) { + rb_raise(rb_eArgError, "format should have ASCII compatible encoding"); + } format = rb_str_new4(format); fmt = RSTRING_PTR(format); len = RSTRING_LEN(format); -- cgit v1.2.3