From a197a30a94938a54543c35a3f55fbcd542d402b8 Mon Sep 17 00:00:00 2001 From: mame Date: Sun, 17 Feb 2008 11:13:22 +0000 Subject: * test/ruby/test_regexp.rb: add tests to achieve over 90% test coverage of re.c. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15523 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- test/ruby/test_regexp.rb | 495 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 495 insertions(+) (limited to 'test') diff --git a/test/ruby/test_regexp.rb b/test/ruby/test_regexp.rb index 50aaefa618..813ade7149 100644 --- a/test/ruby/test_regexp.rb +++ b/test/ruby/test_regexp.rb @@ -1,6 +1,15 @@ require 'test/unit' class TestRegexp < Test::Unit::TestCase + def setup + @verbose = $VERBOSE + $VERBOSE = nil + end + + def teardown + $VERBOSE = @verbose + end + def test_ruby_dev_24643 assert_nothing_raised("[ruby-dev:24643]") { /(?:(?:[a]*[a])?b)*a*$/ =~ "aabaaca" @@ -115,4 +124,490 @@ class TestRegexp < Test::Unit::TestCase m = r.match("a") assert_equal(r, m.regexp) end + + def test_source + assert_equal('', //.source) + end + + def test_inspect + assert_equal('//', //.inspect) + assert_equal('//i', //i.inspect) + assert_equal('/\//i', /\//i.inspect) + assert_equal('/\//i', /#{'/'}/i.inspect) + assert_equal('/\/x/i', /\/x/i.inspect) + assert_equal('/\x00/i', /#{"\0"}/i.inspect) + assert_equal("/\n/i", /#{"\n"}/i.inspect) + s = [0xff].pack("C") + assert_equal('/\/'+s+'/i', /\/#{s}/i.inspect) + end + + def test_char_to_option + assert_equal("BAR", "FOOBARBAZ"[/b../i]) + assert_equal("bar", "foobarbaz"[/ b . . /x]) + assert_equal("bar\n", "foo\nbar\nbaz"[/b.../m]) + assert_raise(SyntaxError) { eval('//z') } + end + + def test_char_to_option_kcode + assert_equal("bar", "foobarbaz"[/b../s]) + assert_equal("bar", "foobarbaz"[/b../e]) + assert_equal("bar", "foobarbaz"[/b../u]) + end + + def test_to_s2 + assert_equal('(?-mix:foo)', /(?:foo)/.to_s) + assert_equal('(?m-ix:foo)', /(?:foo)/m.to_s) + assert_equal('(?mi-x:foo)', /(?:foo)/mi.to_s) + assert_equal('(?mix:foo)', /(?:foo)/mix.to_s) + assert_equal('(?m-ix:foo)', /(?m-ix:foo)/.to_s) + assert_equal('(?mi-x:foo)', /(?mi-x:foo)/.to_s) + assert_equal('(?mix:foo)', /(?mix:foo)/.to_s) + assert_equal('(?mix:)', /(?mix)/.to_s) + assert_equal('(?-mix:(?mix:foo) )', /(?mix:foo) /.to_s) + end + + def test_casefold_p + assert_equal(false, /a/.casefold?) + assert_equal(true, /a/i.casefold?) + assert_equal(false, /(?i:a)/.casefold?) + end + + def test_options + assert_equal(Regexp::IGNORECASE, /a/i.options) + assert_equal(Regexp::EXTENDED, /a/x.options) + assert_equal(Regexp::MULTILINE, /a/m.options) + end + + def test_match_init_copy + m = /foo/.match("foo") + assert_equal(/foo/, m.dup.regexp) + assert_raise(TypeError) do + m.instance_eval { initialize_copy(nil) } + end + assert_equal([0, 3], m.offset(0)) + assert_equal(/foo/, m.dup.regexp) + end + + def test_match_regexp + re = /foo/ + assert_equal(re, re.match("foo").regexp) + end + + def test_match_size + m = /(.)(.)(\d+)(\d)/.match("THX1138.") + assert_equal(5, m.size) + end + + def test_match_offset_begin_end + m = /(?b..)/.match("foobarbaz") + assert_equal([3, 6], m.offset("x")) + assert_equal(3, m.begin("x")) + assert_equal(6, m.end("x")) + assert_raise(IndexError) { m.offset("y") } + assert_raise(IndexError) { m.offset(2) } + assert_raise(IndexError) { m.begin(2) } + assert_raise(IndexError) { m.end(2) } + + m = /(?q..)?/.match("foobarbaz") + assert_equal([nil, nil], m.offset("x")) + assert_equal(nil, m.begin("x")) + assert_equal(nil, m.end("x")) + + m = /\A\u3042(.)(.)?(.)\z/.match("\u3042\u3043\u3044") + assert_equal([1, 2], m.offset(1)) + assert_equal([nil, nil], m.offset(2)) + assert_equal([2, 3], m.offset(3)) + end + + def test_match_to_s + m = /(?b..)/.match("foobarbaz") + assert_equal("bar", m.to_s) + end + + def test_match_pre_post + m = /(?b..)/.match("foobarbaz") + assert_equal("foo", m.pre_match) + assert_equal("baz", m.post_match) + end + + def test_match_array + m = /(...)(...)(...)(...)?/.match("foobarbaz") + assert_equal(["foobarbaz", "foo", "bar", "baz", nil], m.to_a) + end + + def test_match_captures + m = /(...)(...)(...)(...)?/.match("foobarbaz") + assert_equal(["foo", "bar", "baz", nil], m.captures) + end + + def test_match_aref + m = /(...)(...)(...)(...)?/.match("foobarbaz") + assert_equal("foo", m[1]) + assert_equal(["foo", "bar", "baz"], m[1..3]) + assert_nil(m[5]) + assert_raise(IndexError) { m[:foo] } + end + + def test_match_values_at + m = /(...)(...)(...)(...)?/.match("foobarbaz") + assert_equal(["foo", "bar", "baz"], m.values_at(1, 2, 3)) + end + + def test_match_string + m = /(?b..)/.match("foobarbaz") + assert_equal("foobarbaz", m.string) + end + + def test_match_inspect + m = /(...)(...)(...)(...)?/.match("foobarbaz") + assert_equal('#', m.inspect) + end + + def test_initialize + assert_raise(ArgumentError) { Regexp.new } + assert_equal(/foo/, Regexp.new(/foo/, Regexp::IGNORECASE)) + re = /foo/ + assert_raise(SecurityError) do + Thread.new { $SAFE = 4; re.instance_eval { initialize(re) } }.join + end + re.taint + assert_raise(SecurityError) do + Thread.new { $SAFE = 4; re.instance_eval { initialize(re) } }.join + end + + assert_equal(Encoding::ASCII_8BIT, Regexp.new("b..", nil, "n").encoding) + assert_equal("bar", "foobarbaz"[Regexp.new("b..", nil, "n")]) + + assert_raise(RegexpError) { Regexp.new(")(") } + end + + def test_unescape + assert_raise(ArgumentError) { s = '\\'; /#{ s }/ } + assert_equal(/\177/, (s = '\177'; /#{ s }/)) + assert_raise(ArgumentError) { s = '\u'; /#{ s }/ } + assert_raise(ArgumentError) { s = '\u{ ffffffff }'; /#{ s }/ } + assert_raise(ArgumentError) { s = '\u{ ffffff }'; /#{ s }/ } + assert_raise(ArgumentError) { s = '\u{ ffff X }'; /#{ s }/ } + assert_raise(ArgumentError) { s = '\u{ }'; /#{ s }/ } + assert_equal("b", "abc"[(s = '\u{0062}'; /#{ s }/)]) + assert_equal("b", "abc"[(s = '\u0062'; /#{ s }/)]) + assert_raise(ArgumentError) { s = '\u0'; /#{ s }/ } + assert_raise(ArgumentError) { s = '\u000X'; /#{ s }/ } + assert_raise(ArgumentError) { s = "\xff" + '\u3042'; /#{ s }/ } + assert_raise(ArgumentError) { s = '\u3042' + [0xff].pack("C"); /#{ s }/ } + assert_raise(SyntaxError) { s = ''; eval(%q(/\u#{ s }/)) } + + assert_equal(/a/, eval(%q(s="\u0061";/#{s}/n))) + assert_raise(RegexpError) { s = "\u3042"; eval(%q(/#{s}/n)) } + assert_raise(RegexpError) { s = "\u0061"; eval(%q(/\u3042#{s}/n)) } + assert_raise(ArgumentError) { s1=[0xff].pack("C"); s2="\u3042"; eval(%q(/#{s1}#{s2}/)) } + + assert_raise(ArgumentError) { s = '\x'; /#{ s }/ } + + assert_equal("\xe1", [0x00, 0xe1, 0xff].pack("C*")[/\M-a/]) + assert_equal("\xdc", [0x00, 0xdc, 0xff].pack("C*")[/\M-\\/]) + assert_equal("\x8a", [0x00, 0x8a, 0xff].pack("C*")[/\M-\n/]) + assert_equal("\x89", [0x00, 0x89, 0xff].pack("C*")[/\M-\t/]) + assert_equal("\x8d", [0x00, 0x8d, 0xff].pack("C*")[/\M-\r/]) + assert_equal("\x8c", [0x00, 0x8c, 0xff].pack("C*")[/\M-\f/]) + assert_equal("\x8b", [0x00, 0x8b, 0xff].pack("C*")[/\M-\v/]) + assert_equal("\x87", [0x00, 0x87, 0xff].pack("C*")[/\M-\a/]) + assert_equal("\x9b", [0x00, 0x9b, 0xff].pack("C*")[/\M-\e/]) + assert_equal("\x01", [0x00, 0x01, 0xff].pack("C*")[/\C-a/]) + + assert_raise(ArgumentError) { s = '\M'; /#{ s }/ } + assert_raise(ArgumentError) { s = '\M-\M-a'; /#{ s }/ } + assert_raise(ArgumentError) { s = '\M-\\'; /#{ s }/ } + + assert_raise(ArgumentError) { s = '\C'; /#{ s }/ } + assert_raise(ArgumentError) { s = '\c'; /#{ s }/ } + assert_raise(ArgumentError) { s = '\C-\C-a'; /#{ s }/ } + + assert_raise(ArgumentError) { s = '\M-\z'; /#{ s }/ } + assert_raise(ArgumentError) { s = '\M-\777'; /#{ s }/ } + + assert_equal("\u3042\u3042", "\u3042\u3042"[(s = "\u3042" + %q(\xe3\x81\x82); /#{s}/)]) + assert_raise(ArgumentError) { s = "\u3042" + %q(\xe3); /#{s}/ } + assert_raise(ArgumentError) { s = "\u3042" + %q(\xe3\xe3); /#{s}/ } + assert_raise(ArgumentError) { s = '\u3042' + [0xff].pack("C"); /#{s}/ } + + assert_raise(SyntaxError) { eval("/\u3042/n") } + + s = ".........." + 5.times { s.sub!(".", "") } + assert_equal(".....", s) + end + + def test_equal + assert_equal(true, /abc/ == /abc/) + assert_equal(false, /abc/ == /abc/m) + assert_equal(false, /abc/ == /abd/) + end + + def test_match + assert_nil(//.match(nil)) + assert_equal("abc", /.../.match(:abc)[0]) + assert_raise(TypeError) { /.../.match(Object.new)[0] } + assert_equal("bc", /../.match('abc', 1)[0]) + assert_equal("bc", /../.match('abc', -2)[0]) + assert_nil(/../.match("abc", -4)) + assert_nil(/../.match("abc", 4)) + assert_equal('\x', /../n.match("\u3042" + '\x', 1)[0]) + + r = nil + /.../.match("abc") {|m| r = m[0] } + assert_equal("abc", r) + + $_ = "abc"; assert_equal(1, ~/bc/) + $_ = "abc"; assert_nil(~/d/) + $_ = nil; assert_nil(~/./) + end + + def test_eqq + assert_equal(false, /../ === nil) + end + + def test_quote + assert_equal("\xff", Regexp.quote([0xff].pack("C"))) + assert_equal("\\ ", Regexp.quote("\ ")) + assert_equal("\\t", Regexp.quote("\t")) + assert_equal("\\n", Regexp.quote("\n")) + assert_equal("\\r", Regexp.quote("\r")) + assert_equal("\\f", Regexp.quote("\f")) + assert_equal("\\v", Regexp.quote("\v")) + assert_equal("\u3042\\t", Regexp.quote("\u3042\t")) + assert_equal("\\t\xff", Regexp.quote("\t" + [0xff].pack("C"))) + end + + def test_try_convert + assert_equal(/re/, Regexp.try_convert(/re/)) + assert_nil(Regexp.try_convert("re")) + + o = Object.new + assert_nil(Regexp.try_convert(o)) + def o.to_regexp() /foo/ end + assert_equal(/foo/, Regexp.try_convert(o)) + end + + def test_union2 + assert_equal(/(?!)/, Regexp.union) + assert_equal(/foo/, Regexp.union(/foo/)) + assert_equal(/foo/, Regexp.union([/foo/])) + assert_equal(/\t/, Regexp.union("\t")) + assert_equal(/(?-mix:\u3042)|(?-mix:\u3042)/, Regexp.union(/\u3042/, /\u3042/)) + assert_equal("\u3041", "\u3041"[Regexp.union(/\u3042/, "\u3041")]) + end + + def test_dup + assert_equal(//, //.dup) + assert_raise(TypeError) { //.instance_eval { initialize_copy(nil) } } + end + + def test_regsub + assert_equal("fooXXXbaz", "foobarbaz".sub!(/bar/, "XXX")) + s = [0xff].pack("C") + assert_equal(s, "X".sub!(/./, s)) + assert_equal('\\' + s + '\\' + s, "X".sub!(/./, '\\' + s)) # ??? + assert_equal('\k', "foo".sub!(/.../, '\k')) + assert_raise(RuntimeError) { "foo".sub!(/(?o)/, '\k\`\'/, "*+?{}|()<>`'") + check(/\A\w\W\z/, %w(a. b!), %w(.. ab)) + check(/\A.\b.\b.\B.\B.\z/, %w(a.aaa .a...), %w(aaaaa .....)) + check(/\A\s\S\z/, [' a', "\n."], [' ', "\n\n", 'a ']) + check(/\A\d\D\z/, '0a', %w(00 aa)) + check(/\A\h\H\z/, %w(0g ag BH), %w(a0 af GG)) + check(/\Afoo\Z\s\z/, "foo\n", ["foo", "foo\nbar"]) + assert_equal(%w(a b c), "abc def".scan(/\G\w/)) + check(/\A\u3042\z/, "\u3042", ["", "\u3043", "a"]) + check(/\A(..)\1\z/, %w(abab ....), %w(abba aba)) + failcheck('\1') + check(/\A\80\z/, "80", ["\100", ""]) + check(/\A\77\z/, "?") + check(/\A\78\z/, "\7" + '8', ["\100", ""]) + check(/\A\Qfoo\E\z/, "QfooE") + check(/\Aa++\z/, "aaa") + check(/\Ax]\z/, "x]") + check(/x#foo/x, "x", "#foo") + check(/\Ax#foo#{ "\n" }x\z/x, "xx", ["x", "x#foo\nx"]) + check(/\A\p{Alpha}\z/, ["a", "z"], [".", "", ".."]) + check(/\A\p{^Alpha}\z/, [".", "!"], ["!a", ""]) + check(/\A\n\z/, "\n") + check(/\A\t\z/, "\t") + check(/\A\r\z/, "\r") + check(/\A\f\z/, "\f") + check(/\A\a\z/, "\007") + check(/\A\e\z/, "\033") + check(/\A\v\z/, "\v") + end + + def test_parse_kg + check(/\A(.)(.)\k<1>(.)\z/, %w(abac abab ....), %w(abcd aaba xxx)) + check(/\A(.)(.)\k<-1>(.)\z/, %w(abbc abba ....), %w(abcd aaba xxx)) + check(/\A(?.)(?\g){0}(?\k){0}\g\g\z/, "aba", "abb") + check(/\A(?.)(?\g){0}(?\k){0}\g\g\z/, "abb", "aba") + check(/\A(?..)\k\z/, %w(abab ....), %w(abac abba xxx)) + check(/\A(.)(..)\g<-1>\z/, "abcde", %w(.... ......)) + failcheck('\k') + failcheck('\k<') + failcheck('\k<>') + failcheck('\k<.>') + failcheck('\k') + failcheck('\k<1.>') + failcheck('\k') + failcheck('()\g<-2>') + check(/\A(?.)(?.)\k\z/, %w(aba abb), %w(abc .. ....)) + check(/\k\g/, "kg") + end + + def test_parse_curly_brace + check(/\A{/, ["{", ["{", "{x"]]) + check(/\A{ /, ["{ ", ["{ ", "{ x"]]) + check(/\A{,}\z/, "{,}") + check(/\A{}\z/, "{}") + check(/\Aa{0}+\z/, "", %w(a aa aab)) + check(/\Aa{1}+\z/, %w(a aa), ["", "aab"]) + check(/\Aa{1,2}b{1,2}\z/, %w(ab aab abb aabb), ["", "aaabb", "abbb"]) + failcheck('.{100001}') + failcheck('.{0,100001}') + failcheck('.{1,0}') + failcheck('{0}') + failcheck('(?!x){0,1}') + end + + def test_parse_comment + check(/\A(?#foo\)bar)\z/, "", "a") + failcheck('(?#') + end + + def test_char_type + check(/\u3042\d/, ["\u30421", "\u30422"]) + + # CClassTable cache test + assert(/\u3042\d/.match("\u30421")) + assert(/\u3042\d/.match("\u30422")) + end + + def test_char_class + failcheck('[]') + failcheck('[x') + check(/\A[]]\z/, "]", "") + check(/\A[]\.]+\z/, %w(] . ]..]), ["", "["]) + check(/\A[\u3042]\z/, "\u3042", "\u3042aa") + check(/\A[\u3042\x61]+\z/, ["aa\u3042aa", "\u3042\u3042", "a"], ["", "b"]) + check(/\A[\u3042\x61\x62]+\z/, "abab\u3042abab\u3042") + check(/\A[abc]+\z/, "abcba", ["", "ada"]) + check(/\A[\w][\W]\z/, %w(a. b!), %w(.. ab)) + check(/\A[\s][\S]\z/, [' a', "\n."], [' ', "\n\n", 'a ']) + check(/\A[\d][\D]\z/, '0a', %w(00 aa)) + check(/\A[\h][\H]\z/, %w(0g ag BH), %w(a0 af GG)) + check(/\A[\p{Alpha}]\z/, ["a", "z"], [".", "", ".."]) + check(/\A[\p{^Alpha}]\z/, [".", "!"], ["!a", ""]) + check(/\A[\xff]\z/, "\xff", ["", "\xfe"]) + check(/\A[\80]+\z/, "8008", ["\\80", "\100", "\1000"]) + check(/\A[\77]+\z/, "???") + check(/\A[\78]+\z/, "\788\7") + check(/\A[\0]\z/, "\0") + check(/\A[[:0]]\z/, [":", "0"], ["", ":0"]) + check(/\A[0-]\z/, ["0", "-"], "0-") + check(/\A[a-&&\w]\z/, "a", "-") + check(/\A[--0]\z/, ["-", "/", "0"], ["", "1"]) + check(/\A['--0]\z/, %w(* + \( \) 0 ,), ["", ".", "1"]) + check(/\A[a-b-]\z/, %w(a b -), ["", "c"]) + check(/\A[a-b-&&\w]\z/, %w(a b), ["", "-"]) + check(/\A[a-b-&&\W]\z/, "-", ["", "a", "b"]) + check(/\A[a-c-e]\z/, %w(a b c e), %w(- d)) # is it OK? + check(/\A[a-f&&[^b-c]&&[^e]]\z/, %w(a d f), %w(b c e g 0)) + check(/\A[[^b-c]&&[^e]&&a-f]\z/, %w(a d f), %w(b c e g 0)) + check(/\A[\n\r\t]\z/, ["\n", "\r", "\t"]) + end + + def test_posix_bracket + check(/\A[[:alpha:]0]\z/, %w(0 a), %w(1 .)) + check(/\A[[:^alpha:]0]\z/, %w(0 1 .), "a") + check(/\A[[:alpha\:]]\z/, %w(a l p h a :), %w(b 0 1 .)) + check(/\A[[:alpha:foo]0]\z/, %w(0 a), %w(1 .)) + check(/\A[[:xdigit:]&&[:alpha:]]\z/, "a", %w(g 0)) + check(/\A[[:abcdefghijklmnopqrstu:]]+\z/, "[]") + failcheck('[[:alpha') + failcheck('[[:alpha:') + failcheck('[[:alp:]]') + end end -- cgit v1.2.3