diff options
-rw-r--r-- | ChangeLog | 14 | ||||
-rw-r--r-- | bootstraptest/test_m17n.rb | 200 | ||||
-rw-r--r-- | configure.in | 2 | ||||
-rw-r--r-- | encoding.c | 22 | ||||
-rw-r--r-- | string.c | 11 | ||||
-rw-r--r-- | test/ruby/test_m17n.rb | 109 |
6 files changed, 137 insertions, 221 deletions
@@ -1,3 +1,17 @@ +Tue Jan 22 12:57:07 2008 Yukihiro Matsumoto <matz@ruby-lang.org> + + * configure.in (MINIRUBY): remove -I$(EXTOUT)/$(arch) from + MINIRUBY since miniruby might not be able to load DLL. + + * test/ruby/test_m17n.rb: move tests from bootstrap test. + + * encoding.c (enc_find): should check name if ASCII compatible. + + * string.c (rb_str_end_with): should check character boundary. + + * encoding.c (rb_enc_compatible): encoding must be ASCII + compatible before checking ENC_CODERANGE_7BIT. + Tue Jan 22 09:26:47 2008 Nobuyoshi Nakada <nobu@ruby-lang.org> * string.c (rb_str_each_char): iterates over a shadow. diff --git a/bootstraptest/test_m17n.rb b/bootstraptest/test_m17n.rb deleted file mode 100644 index d69a110f3d..0000000000 --- a/bootstraptest/test_m17n.rb +++ /dev/null @@ -1,200 +0,0 @@ -assert_normal_exit %q{ - "abcdefghij\xf0".force_encoding("utf-8").reverse.inspect -}, '[ruby-dev:32448]' - -assert_equal 'true', %q{ - "abc".sub(/b/, "\xa1\xa1".force_encoding("euc-jp")) == - "a\xa1\xa1c".force_encoding("euc-jp") -} - -assert_equal 'ok', %q{ - begin - if ("\xa1\xa2\xa1\xa3").force_encoding("euc-jp").split(//) == - ["\xa1\xa2".force_encoding("euc-jp"), "\xa1\xa3".force_encoding("euc-jp")] - :ok - else - :ng - end - rescue - :ng - end -}, '[ruby-dev:32452]' - -assert_equal 'ok', %q{ - begin - "\xa1\xa1".force_encoding("euc-jp") + "\xa1".force_encoding("ascii-8bit") - :ng - rescue ArgumentError - :ok - end -} - -assert_equal 'ok', %q{ - begin - "%s%s" % ["\xc2\xa1".force_encoding("sjis"), "\xc2\xa1".force_encoding("euc-jp")] - rescue ArgumentError - :ok - end -} - -assert_equal '0', %q{ - "\xa1\xa2".force_encoding("euc-jp").count("z") -} - -assert_equal '1', %q{ - "\xa1\xa2".force_encoding("euc-jp").delete("z").length -} - -assert_equal 'false', %q{ - "\xa1\xa2\xa3\xa4".force_encoding("euc-jp").include?("\xa3".force_encoding("euc-jp")) -} - -assert_equal 'ok', %q{ - "\xa1\xa2\xa3\xa4".force_encoding("euc-jp").index("\xa3".force_encoding("euc-jp")) or :ok -} - -assert_equal 'ok', %q{ - "\xa1\xa2\xa3\xa4".force_encoding("euc-jp").rindex("\xa3".force_encoding("euc-jp")) or :ok -} - -assert_equal 'false', %q{ - s1 = "\xa1\xa1".force_encoding("euc-jp") - s2 = s1.dup - (94*94+94).times { s2.next! } - s1 == s2 -} - -assert_equal 'ok', %q{ - "\xa1\xa2a\xa3\xa4".force_encoding("euc-jp").scan(/a/) - :ok -} - -assert_equal 'ok', %q{ - "\xa1\xa2a\xa3\xa4".force_encoding("euc-jp").split(/a/) - :ok -} - -assert_equal 'ok', %q{ - s1 = "\xa1\xa2".force_encoding("euc-jp") - s2 = "\xa1\xa2".force_encoding("sjis") - begin - s1.upto(s2) {|x| break } - :ng - rescue ArgumentError - :ok - end -} - -assert_equal 'true', %q{ - "%s" % "\xa1\xa1".force_encoding("euc-jp") == - "\xa1\xa1".force_encoding("euc-jp") -} - -assert_equal 'a', %q{ - s = "a".dup.force_encoding("EUC-JP") - s.length - s[0,2] -} - -assert_equal 'ok', %q{ - s1 = "\x81\x41".force_encoding("sjis") - s2 = "\x81\x61".force_encoding("sjis") - s1.casecmp(s2) == 0 ? :ng : :ok -} - -assert_equal 'EUC-JP', %q{ ("\xc2\xa1 %s".force_encoding("EUC-JP") % "foo").encoding.name } -assert_equal 'true', %q{ "\xa1\xa2\xa3\xa4".force_encoding("euc-jp")["\xa2\xa3".force_encoding("euc-jp")] == nil } -assert_equal 'ok', %q{ - s = "\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp") - begin - s["\xb0\xa3"] = "foo" - :ng - rescue ArgumentError - :ok - end -} - -assert_equal 'EUC-JP', %q{ "\xa3\xb0".force_encoding("EUC-JP").center(10).encoding.name } - -assert_equal 'ok', %q{ - s = "\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp") - begin - s.chomp("\xa3\xb4".force_encoding("shift_jis")) - :ng - rescue ArgumentError - :ok - end -} - -assert_equal 'ok', %q{ - s = "\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp") - begin - s.count("\xa3\xb0".force_encoding("ascii-8bit")) - :ng - rescue ArgumentError - :ok - end -} - -assert_equal 'ok', %q{ - s = "\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp") - begin - s.delete("\xa3\xb2".force_encoding("ascii-8bit")) - :ng - rescue ArgumentError - :ok - end -} - -assert_equal 'ok', %q{ - s = "\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp") - begin - s.each_line("\xa3\xb1".force_encoding("ascii-8bit")) {|l| } - :ng - rescue ArgumentError - :ok - end -} - -assert_equal 'true', %q{ - s = "\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp") - s.gsub(/\xa3\xb1/e, "z") == "\xa3\xb0z\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp") -} - -assert_equal 'false', %q{ - s = "\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp") - s.include?("\xb0\xa3".force_encoding("euc-jp")) -} - -assert_equal 'ok', %q{ - s = "\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp") - s.index("\xb3\xa3".force_encoding("euc-jp")) or :ok -} - -assert_equal 'ok', %q{ - s = "\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp") - s.insert(-1, "a") - :ok -} - -assert_finish 1, %q{ "\xa3\xfe".force_encoding("euc-jp").next } - -assert_equal 'ok', %q{ - s = "\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp") - begin - s.rindex("\xb1\xa3".force_encoding("ascii-8bit")) - :ng - rescue ArgumentError - :ok - end -} - -assert_equal 'true', %q{ - s = "\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp") - s.split("\xa3\xb1".force_encoding("euc-jp")) == [ - "\xa3\xb0".force_encoding("euc-jp"), - "\xa3\xb2\xa3\xb3\xa3\xb4".force_encoding("euc-jp") - ] -}, '[ruby-dev:32452]' - -assert_normal_exit %q{ // =~ :a } diff --git a/configure.in b/configure.in index e2e549c8bb..c064b378c0 100644 --- a/configure.in +++ b/configure.in @@ -1370,7 +1370,7 @@ if test x"$cross_compiling" = xyes; then PREP=fake.rb RUNRUBY='$(MINIRUBY) -I`cd $(srcdir)/lib; pwd`' else - MINIRUBY='./miniruby$(EXEEXT) -I$(srcdir)/lib -I$(EXTOUT)/$(arch)' + MINIRUBY='./miniruby$(EXEEXT) -I$(srcdir)/lib' PREP='miniruby$(EXEEXT)' RUNRUBY='$(MINIRUBY) $(srcdir)/runruby.rb --extout=$(EXTOUT)' fi diff --git a/encoding.c b/encoding.c index 328f2d4742..f429f2c816 100644 --- a/encoding.c +++ b/encoding.c @@ -640,7 +640,7 @@ rb_encoding* rb_enc_compatible(VALUE str1, VALUE str2) { int idx1, idx2; - rb_encoding *enc; + rb_encoding *enc1, *enc2; idx1 = rb_enc_get_index(str1); idx2 = rb_enc_get_index(str2); @@ -648,6 +648,10 @@ rb_enc_compatible(VALUE str1, VALUE str2) if (idx1 == idx2) { return rb_enc_from_index(idx1); } + enc1 = rb_enc_from_index(idx1); + enc2 = rb_enc_from_index(idx2); + if (!rb_enc_asciicompat(enc1) || !rb_enc_asciicompat(enc2)) + return 0; if (BUILTIN_TYPE(str1) != T_STRING) { VALUE tmp = str1; int idx0 = idx1; @@ -664,17 +668,15 @@ rb_enc_compatible(VALUE str1, VALUE str2) cr2 = rb_enc_str_coderange(str2); if (cr1 != cr2) { /* may need to handle ENC_CODERANGE_BROKEN */ - if (cr1 == ENC_CODERANGE_7BIT) return rb_enc_from_index(idx2); - if (cr2 == ENC_CODERANGE_7BIT) return rb_enc_from_index(idx1); + if (cr1 == ENC_CODERANGE_7BIT) return enc2; } if (cr2 == ENC_CODERANGE_7BIT) { - if (idx1 == 0) return rb_enc_from_index(idx2); - return rb_enc_from_index(idx1); + if (idx1 == 0) return enc2; + return enc1; } } - if (cr1 == ENC_CODERANGE_7BIT && - rb_enc_asciicompat(enc = rb_enc_from_index(idx2))) - return enc; + if (cr1 == ENC_CODERANGE_7BIT) + return enc2; } return 0; } @@ -908,7 +910,11 @@ static VALUE enc_find(VALUE klass, VALUE enc) { int idx; + if (SYMBOL_P(enc)) enc = rb_id2str(SYM2ID(enc)); + if (!rb_enc_asciicompat(rb_enc_get(enc))) { + rb_raise(rb_eArgError, "invalid name encoding (non ASCII)"); + } idx = rb_enc_find_index(StringValueCStr(enc)); if (idx < 0) { rb_raise(rb_eArgError, "unknown encoding name - %s", RSTRING_PTR(enc)); @@ -5522,14 +5522,19 @@ static VALUE rb_str_end_with(int argc, VALUE *argv, VALUE str) { int i; + char *p, *s; + rb_encoding *enc; for (i=0; i<argc; i++) { VALUE tmp = rb_check_string_type(argv[i]); if (NIL_P(tmp)) continue; - rb_enc_check(str, tmp); + enc = rb_enc_check(str, tmp); if (RSTRING_LEN(str) < RSTRING_LEN(tmp)) continue; - if (memcmp(RSTRING_PTR(str) + RSTRING_LEN(str) - RSTRING_LEN(tmp), - RSTRING_PTR(tmp), RSTRING_LEN(tmp)) == 0) + p = RSTRING_PTR(str); + s = p + RSTRING_LEN(str) - RSTRING_LEN(tmp); + if (rb_enc_left_char_head(p, s, enc) != s) + continue; + if (memcmp(s, p, RSTRING_LEN(tmp)) == 0) return Qtrue; } return Qfalse; diff --git a/test/ruby/test_m17n.rb b/test/ruby/test_m17n.rb index 5a0115e285..a7ec25d57d 100644 --- a/test/ruby/test_m17n.rb +++ b/test/ruby/test_m17n.rb @@ -690,6 +690,9 @@ class TestM17N < Test::Unit::TestCase #assert_raise(ArgumentError) { s("%c") % 0xc2a1 } assert_strenc("\u{c2a1}", 'UTF-8', u("%c") % 0xc2a1) assert_strenc("\u{c2}", 'UTF-8', u("%c") % 0xc2) + assert_raise(ArgumentError) { + "%s%s" % [s("\xc2\xa1"), e("\xc2\xa1")] + } end def test_sprintf_p @@ -744,6 +747,7 @@ class TestM17N < Test::Unit::TestCase assert_strenc("\x00", 'EUC-JP', e("%s") % e("\x00")) assert_strenc("\x00", 'Windows-31J', s("%s") % s("\x00")) assert_strenc("\x00", 'UTF-8', u("%s") % u("\x00")) + assert_equal("EUC-JP", (e("\xc2\xa1 %s") % "foo").encoding.name) end def test_str_lt @@ -791,29 +795,34 @@ class TestM17N < Test::Unit::TestCase assert_equal(nil, u("\xc2\xa1\xc2\xa2\xc2\xa3")[u("\xa1\xc2")]) assert_raise(ArgumentError) { u("\xc2\xa1\xc2\xa2\xc2\xa3")[a("\xa1\xc2")] } + assert_nil(e("\xa1\xa2\xa3\xa4")[e("\xa2\xa3")]) + end + + def test_aset + s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4") + assert_raise(ArgumentError){s["\xb0\xa3"] = "foo"} end def test_str_center - assert_encoding("EUC-JP", "a".center(5, "\xa1\xa2".force_encoding("euc-jp")).encoding) + assert_encoding("EUC-JP", "a".center(5, e("\xa1\xa2")).encoding) + assert_encoding("EUC-JP", e("\xa3\xb0").center(10).encoding) end def test_squeeze - s = "\xa3\xb0\xa3\xb1\xa3\xb1\xa3\xb3\xa3\xb4".force_encoding("euc-jp") - assert_equal("\xa3\xb0\xa3\xb1\xa3\xb3\xa3\xb4".force_encoding("euc-jp"), s.squeeze) + s = e("\xa3\xb0\xa3\xb1\xa3\xb1\xa3\xb3\xa3\xb4") + assert_equal(e("\xa3\xb0\xa3\xb1\xa3\xb3\xa3\xb4"), s.squeeze) end def test_tr - s = "\x81\x41".force_encoding("shift_jis") + s = s("\x81\x41") assert_equal(s.tr("A", "B"), s) assert_equal(s.tr_s("A", "B"), s) assert_nothing_raised { - "a".force_encoding("ASCII-8BIT").tr("a".force_encoding("ASCII-8BIT"), "a".force_encoding("EUC-JP")) + "a".force_encoding("ASCII-8BIT").tr(a("a"), a("a")) } - assert_equal("\xA1\xA1".force_encoding("EUC-JP"), - "a".force_encoding("ASCII-8BIT").tr("a".force_encoding("ASCII-8BIT"), "\xA1\xA1".force_encoding("EUC-JP"))) - + assert_equal(e("\xA1\xA1"), a("a").tr(a("a"), e("\xA1\xA1"))) end def test_tr_s @@ -821,6 +830,40 @@ class TestM17N < Test::Unit::TestCase "a".force_encoding("ASCII-8BIT").tr("a".force_encoding("ASCII-8BIT"), "\xA1\xA1".force_encoding("EUC-JP"))) end + def test_count + assert_equal(0, e("\xa1\xa2").count("z")) + s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4") + assert_raise(ArgumentError){s.count(a("\xa3\xb0"))} + end + + def test_delete + assert_equal(1, e("\xa1\xa2").delete("z").length) + s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4") + assert_raise(ArgumentError){s.delete(a("\xa3\xb2"))} + end + + def test_include? + assert_equal(false, e("\xa1\xa2\xa3\xa4").include?(e("\xa3"))) + s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4") + assert_equal(false, s.include?(e("\xb0\xa3"))) + end + + def test_index + s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4") + assert_nil(s.index(e("\xb3\xa3"))) + assert_nil(e("\xa1\xa2\xa3\xa4").index(e("\xa3"))) + assert_nil(e("\xa1\xa2\xa3\xa4").rindex(e("\xa3"))) + s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4") + assert_raise(ArgumentError){s.rindex(a("\xb1\xa3"))} + end + + def test_next + s1 = e("\xa1\xa1") + s2 = s1.dup + (94*94+94).times { s2.next! } + assert_not_equal(s1, s2) + end + def test_sub s = "abc".sub(/b/, "\xa1\xa1".force_encoding("euc-jp")) assert_encoding("EUC-JP", s.encoding) @@ -828,6 +871,40 @@ class TestM17N < Test::Unit::TestCase assert_equal(Encoding::EUC_JP, "\xa4\xa2".force_encoding("euc-jp").gsub(/./, '\&').encoding) end + def test_insert + s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4") + assert_equal(e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4a"), s.insert(-1, "a")) + end + + def test_scan + assert_equal(["a"], e("\xa1\xa2a\xa3\xa4").scan(/a/)) + end + + def test_upto + s1 = e("\xa1\xa2") + s2 = s("\xa1\xa2") + assert_raise(ArgumentError){s1.upto(s2) {|x| break }} + end + + def test_casecmp + s1 = s("\x81\x41") + s2 = s("\x81\x61") + assert_not_equal(0, s1.casecmp(s2)) + end + + def test_reverse + assert_equal(u("\xf0jihgfedcba"), u("abcdefghij\xf0").reverse) + end + + def test_plus + assert_raise(ArgumentError){u("\xe3\x81\x82") + a("\xa1")} + end + + def test_chomp + s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4") + assert_raise(ArgumentError){s.chomp(s("\xa3\xb4"))} + end + def test_gsub s = 'abc' s.ascii_only? @@ -844,14 +921,21 @@ class TestM17N < Test::Unit::TestCase "\xc2\xa1".force_encoding("utf-8") } } + s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4") + assert_equal(e("\xa3\xb0z\xa3\xb2\xa3\xb3\xa3\xb4"), s.gsub(/\xa3\xb1/e, "z")) end def test_end_with - s1 = "\x81\x40".force_encoding("sjis") + s1 = s("\x81\x40") s2 = "@" assert_equal(false, s1.end_with?(s2), "#{encdump s1}.end_with?(#{encdump s2})") end + def test_each_line + s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4") + assert_raise(ArgumentError){s.each_line(a("\xa3\xb1")) {|l| }} + end + def test_each_char a = [e("\xa4\xa2"), "b", e("\xa4\xa4"), "c"] s = "\xa4\xa2b\xa4\xa4c".force_encoding("euc-jp") @@ -860,6 +944,13 @@ class TestM17N < Test::Unit::TestCase def test_regexp_match assert_equal([0,0], //.match("\xa1\xa1".force_encoding("euc-jp"),-1).offset(0)) + assert_equal(0, // =~ :a) + end + + def test_split + assert_equal(e("\xa1\xa2\xa1\xa3").split(//), + [e("\xa1\xa2"), e("\xa1\xa3")], + '[ruby-dev:32452]') end def test_nonascii_method_name |