aboutsummaryrefslogtreecommitdiffstats
path: root/spec/ruby/language/regexp
diff options
context:
space:
mode:
authoreregon <eregon@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2017-09-20 20:18:52 +0000
committereregon <eregon@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2017-09-20 20:18:52 +0000
commit1d15d5f08032acf1b7bceacbb450d617ff6e0931 (patch)
treea3785a79899302bc149e4a6e72f624ac27dc1f10 /spec/ruby/language/regexp
parent75bfc6440d595bf339007f4fb280fd4d743e89c1 (diff)
downloadruby-1d15d5f08032acf1b7bceacbb450d617ff6e0931.tar.gz
Move spec/rubyspec to spec/ruby for consistency
* Other ruby implementations use the spec/ruby directory. [Misc #13792] [ruby-core:82287] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@59979 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'spec/ruby/language/regexp')
-rw-r--r--spec/ruby/language/regexp/anchors_spec.rb179
-rw-r--r--spec/ruby/language/regexp/back-references_spec.rb48
-rw-r--r--spec/ruby/language/regexp/character_classes_spec.rb610
-rw-r--r--spec/ruby/language/regexp/encoding_spec.rb103
-rw-r--r--spec/ruby/language/regexp/escapes_spec.rb81
-rw-r--r--spec/ruby/language/regexp/grouping_spec.rb23
-rw-r--r--spec/ruby/language/regexp/interpolation_spec.rb58
-rw-r--r--spec/ruby/language/regexp/modifiers_spec.rb110
-rw-r--r--spec/ruby/language/regexp/repetition_spec.rb57
9 files changed, 1269 insertions, 0 deletions
diff --git a/spec/ruby/language/regexp/anchors_spec.rb b/spec/ruby/language/regexp/anchors_spec.rb
new file mode 100644
index 0000000000..c6a620a221
--- /dev/null
+++ b/spec/ruby/language/regexp/anchors_spec.rb
@@ -0,0 +1,179 @@
+require File.expand_path('../../../spec_helper', __FILE__)
+require File.expand_path('../../fixtures/classes', __FILE__)
+
+describe "Regexps with anchors" do
+ it "supports ^ (line start anchor)" do
+ # Basic matching
+ /^foo/.match("foo").to_a.should == ["foo"]
+ /^bar/.match("foo\nbar").to_a.should == ["bar"]
+ # Basic non-matching
+ /^foo/.match(" foo").should be_nil
+ /foo^/.match("foo\n\n\n").should be_nil
+
+ # A bit advanced
+ /^^^foo/.match("foo").to_a.should == ["foo"]
+ (/^[^f]/ =~ "foo\n\n").should == "foo\n".size and $~.to_a.should == ["\n"]
+ (/($^)($^)/ =~ "foo\n\n").should == "foo\n".size and $~.to_a.should == ["", "", ""]
+
+ # Different start of line chars
+ /^bar/.match("foo\rbar").should be_nil
+ /^bar/.match("foo\0bar").should be_nil
+
+ # Trivial
+ /^/.match("foo").to_a.should == [""]
+
+ # Grouping
+ /(^foo)/.match("foo").to_a.should == ["foo", "foo"]
+ /(^)/.match("foo").to_a.should == ["", ""]
+ /(foo\n^)(^bar)/.match("foo\nbar").to_a.should == ["foo\nbar", "foo\n", "bar"]
+ end
+
+ it "does not match ^ after trailing \\n" do
+ /^(?!\A)/.match("foo\n").should be_nil # There is no (empty) line after a trailing \n
+ end
+
+ it "supports $ (line end anchor)" do
+ # Basic matching
+ /foo$/.match("foo").to_a.should == ["foo"]
+ /foo$/.match("foo\nbar").to_a.should == ["foo"]
+ # Basic non-matching
+ /foo$/.match("foo ").should be_nil
+ /$foo/.match("\n\n\nfoo").should be_nil
+
+ # A bit advanced
+ /foo$$$/.match("foo").to_a.should == ["foo"]
+ (/[^o]$/ =~ "foo\n\n").should == ("foo\n".size - 1) and $~.to_a.should == ["\n"]
+
+ # Different end of line chars
+ /foo$/.match("foo\r\nbar").should be_nil
+ /foo$/.match("foo\0bar").should be_nil
+
+ # Trivial
+ (/$/ =~ "foo").should == "foo".size and $~.to_a.should == [""]
+
+ # Grouping
+ /(foo$)/.match("foo").to_a.should == ["foo", "foo"]
+ (/($)/ =~ "foo").should == "foo".size and $~.to_a.should == ["", ""]
+ /(foo$)($\nbar)/.match("foo\nbar").to_a.should == ["foo\nbar", "foo", "\nbar"]
+ end
+
+ it "supports \\A (string start anchor)" do
+ # Basic matching
+ /\Afoo/.match("foo").to_a.should == ["foo"]
+ # Basic non-matching
+ /\Abar/.match("foo\nbar").should be_nil
+ /\Afoo/.match(" foo").should be_nil
+
+ # A bit advanced
+ /\A\A\Afoo/.match("foo").to_a.should == ["foo"]
+ /(\A\Z)(\A\Z)/.match("").to_a.should == ["", "", ""]
+
+ # Different start of line chars
+ /\Abar/.match("foo\0bar").should be_nil
+
+ # Grouping
+ /(\Afoo)/.match("foo").to_a.should == ["foo", "foo"]
+ /(\A)/.match("foo").to_a.should == ["", ""]
+ end
+
+ it "supports \\Z (string end anchor, including before trailing \\n)" do
+ # Basic matching
+ /foo\Z/.match("foo").to_a.should == ["foo"]
+ /foo\Z/.match("foo\n").to_a.should == ["foo"]
+ # Basic non-matching
+ /foo\Z/.match("foo\nbar").should be_nil
+ /foo\Z/.match("foo ").should be_nil
+
+ # A bit advanced
+ /foo\Z\Z\Z/.match("foo\n").to_a.should == ["foo"]
+ (/($\Z)($\Z)/ =~ "foo\n").should == "foo".size and $~.to_a.should == ["", "", ""]
+ (/(\z\Z)(\z\Z)/ =~ "foo\n").should == "foo\n".size and $~.to_a.should == ["", "", ""]
+
+ # Different end of line chars
+ /foo\Z/.match("foo\0bar").should be_nil
+ /foo\Z/.match("foo\r\n").should be_nil
+
+ # Grouping
+ /(foo\Z)/.match("foo").to_a.should == ["foo", "foo"]
+ (/(\Z)/ =~ "foo").should == "foo".size and $~.to_a.should == ["", ""]
+ end
+
+ it "supports \\z (string end anchor)" do
+ # Basic matching
+ /foo\z/.match("foo").to_a.should == ["foo"]
+ # Basic non-matching
+ /foo\z/.match("foo\nbar").should be_nil
+ /foo\z/.match("foo\n").should be_nil
+ /foo\z/.match("foo ").should be_nil
+
+ # A bit advanced
+ /foo\z\z\z/.match("foo").to_a.should == ["foo"]
+ (/($\z)($\z)/ =~ "foo").should == "foo".size and $~.to_a.should == ["", "", ""]
+
+ # Different end of line chars
+ /foo\z/.match("foo\0bar").should be_nil
+ /foo\z/.match("foo\r\nbar").should be_nil
+
+ # Grouping
+ /(foo\z)/.match("foo").to_a.should == ["foo", "foo"]
+ (/(\z)/ =~ "foo").should == "foo".size and $~.to_a.should == ["", ""]
+ end
+
+ it "supports \\b (word boundary)" do
+ # Basic matching
+ /foo\b/.match("foo").to_a.should == ["foo"]
+ /foo\b/.match("foo\n").to_a.should == ["foo"]
+ LanguageSpecs.white_spaces.scan(/./).each do |c|
+ /foo\b/.match("foo" + c).to_a.should == ["foo"]
+ end
+ LanguageSpecs.non_alphanum_non_space.scan(/./).each do |c|
+ /foo\b/.match("foo" + c).to_a.should == ["foo"]
+ end
+ /foo\b/.match("foo\0").to_a.should == ["foo"]
+ # Basic non-matching
+ /foo\b/.match("foobar").should be_nil
+ /foo\b/.match("foo123").should be_nil
+ /foo\b/.match("foo_").should be_nil
+ end
+
+ it "supports \\B (non-word-boundary)" do
+ # Basic matching
+ /foo\B/.match("foobar").to_a.should == ["foo"]
+ /foo\B/.match("foo123").to_a.should == ["foo"]
+ /foo\B/.match("foo_").to_a.should == ["foo"]
+ # Basic non-matching
+ /foo\B/.match("foo").should be_nil
+ /foo\B/.match("foo\n").should be_nil
+ LanguageSpecs.white_spaces.scan(/./).each do |c|
+ /foo\B/.match("foo" + c).should be_nil
+ end
+ LanguageSpecs.non_alphanum_non_space.scan(/./).each do |c|
+ /foo\B/.match("foo" + c).should be_nil
+ end
+ /foo\B/.match("foo\0").should be_nil
+ end
+
+ it "supports (?= ) (positive lookahead)" do
+ /foo.(?=bar)/.match("foo1 foo2bar").to_a.should == ["foo2"]
+ end
+
+ it "supports (?! ) (negative lookahead)" do
+ /foo.(?!bar)/.match("foo1bar foo2").to_a.should == ["foo2"]
+ end
+
+ it "supports (?!<) (negative lookbehind)" do
+ /(?<!foo)bar./.match("foobar1 bar2").to_a.should == ["bar2"]
+ end
+
+ it "supports (?<=) (positive lookbehind)" do
+ /(?<=foo)bar./.match("bar1 foobar2").to_a.should == ["bar2"]
+ end
+
+ it "supports (?<=\\b) (positive lookbehind with word boundary)" do
+ /(?<=\bfoo)bar./.match("1foobar1 foobar2").to_a.should == ["bar2"]
+ end
+
+ it "supports (?!<\\b) (negative lookbehind with word boundary)" do
+ /(?<!\bfoo)bar./.match("foobar1 1foobar2").to_a.should == ["bar2"]
+ end
+end
diff --git a/spec/ruby/language/regexp/back-references_spec.rb b/spec/ruby/language/regexp/back-references_spec.rb
new file mode 100644
index 0000000000..607f4463fd
--- /dev/null
+++ b/spec/ruby/language/regexp/back-references_spec.rb
@@ -0,0 +1,48 @@
+require File.expand_path('../../../spec_helper', __FILE__)
+require File.expand_path('../../fixtures/classes', __FILE__)
+
+describe "Regexps with back-references" do
+ it "saves match data in the $~ pseudo-global variable" do
+ "hello" =~ /l+/
+ $~.to_a.should == ["ll"]
+ end
+
+ it "saves captures in numbered $[1-9] variables" do
+ "1234567890" =~ /(1)(2)(3)(4)(5)(6)(7)(8)(9)(0)/
+ $~.to_a.should == ["1234567890", "1", "2", "3", "4", "5", "6", "7", "8", "9", "0"]
+ $1.should == "1"
+ $2.should == "2"
+ $3.should == "3"
+ $4.should == "4"
+ $5.should == "5"
+ $6.should == "6"
+ $7.should == "7"
+ $8.should == "8"
+ $9.should == "9"
+ end
+
+ it "will not clobber capture variables across threads" do
+ cap1, cap2, cap3 = nil
+ "foo" =~ /(o+)/
+ cap1 = [$~.to_a, $1]
+ Thread.new do
+ cap2 = [$~.to_a, $1]
+ "bar" =~ /(a)/
+ cap3 = [$~.to_a, $1]
+ end.join
+ cap4 = [$~.to_a, $1]
+ cap1.should == [["oo", "oo"], "oo"]
+ cap2.should == [[], nil]
+ cap3.should == [["a", "a"], "a"]
+ cap4.should == [["oo", "oo"], "oo"]
+ end
+
+ it "supports \<n> (backreference to previous group match)" do
+ /(foo.)\1/.match("foo1foo1").to_a.should == ["foo1foo1", "foo1"]
+ /(foo.)\1/.match("foo1foo2").should be_nil
+ end
+
+ it "resets nested \<n> backreference before match of outer subexpression" do
+ /(a\1?){2}/.match("aaaa").to_a.should == ["aa", "a"]
+ end
+end
diff --git a/spec/ruby/language/regexp/character_classes_spec.rb b/spec/ruby/language/regexp/character_classes_spec.rb
new file mode 100644
index 0000000000..74568a0beb
--- /dev/null
+++ b/spec/ruby/language/regexp/character_classes_spec.rb
@@ -0,0 +1,610 @@
+require File.expand_path('../../../spec_helper', __FILE__)
+require File.expand_path('../../fixtures/classes', __FILE__)
+
+describe "Regexp with character classes" do
+ it "supports \\w (word character)" do
+ /\w/.match("a").to_a.should == ["a"]
+ /\w/.match("1").to_a.should == ["1"]
+ /\w/.match("_").to_a.should == ["_"]
+
+ # Non-matches
+ /\w/.match(LanguageSpecs.white_spaces).should be_nil
+ /\w/.match(LanguageSpecs.non_alphanum_non_space).should be_nil
+ /\w/.match("\0").should be_nil
+ end
+
+ it "supports \\W (non-word character)" do
+ /\W+/.match(LanguageSpecs.white_spaces).to_a.should == [LanguageSpecs.white_spaces]
+ /\W+/.match(LanguageSpecs.non_alphanum_non_space).to_a.should == [LanguageSpecs.non_alphanum_non_space]
+ /\W/.match("\0").to_a.should == ["\0"]
+
+ # Non-matches
+ /\W/.match("a").should be_nil
+ /\W/.match("1").should be_nil
+ /\W/.match("_").should be_nil
+ end
+
+ it "supports \\s (space character)" do
+ /\s+/.match(LanguageSpecs.white_spaces).to_a.should == [LanguageSpecs.white_spaces]
+
+ # Non-matches
+ /\s/.match("a").should be_nil
+ /\s/.match("1").should be_nil
+ /\s/.match(LanguageSpecs.non_alphanum_non_space).should be_nil
+ /\s/.match("\0").should be_nil
+ end
+
+ it "supports \\S (non-space character)" do
+ /\S/.match("a").to_a.should == ["a"]
+ /\S/.match("1").to_a.should == ["1"]
+ /\S+/.match(LanguageSpecs.non_alphanum_non_space).to_a.should == [LanguageSpecs.non_alphanum_non_space]
+ /\S/.match("\0").to_a.should == ["\0"]
+
+ # Non-matches
+ /\S/.match(LanguageSpecs.white_spaces).should be_nil
+ end
+
+ it "supports \\d (numeric digit)" do
+ /\d/.match("1").to_a.should == ["1"]
+
+ # Non-matches
+ /\d/.match("a").should be_nil
+ /\d/.match(LanguageSpecs.white_spaces).should be_nil
+ /\d/.match(LanguageSpecs.non_alphanum_non_space).should be_nil
+ /\d/.match("\0").should be_nil
+ end
+
+ it "supports \\D (non-digit)" do
+ /\D/.match("a").to_a.should == ["a"]
+ /\D+/.match(LanguageSpecs.white_spaces).to_a.should == [LanguageSpecs.white_spaces]
+ /\D+/.match(LanguageSpecs.non_alphanum_non_space).to_a.should == [LanguageSpecs.non_alphanum_non_space]
+ /\D/.match("\0").to_a.should == ["\0"]
+
+ # Non-matches
+ /\D/.match("1").should be_nil
+ end
+
+ it "supports [] (character class)" do
+ /[a-z]+/.match("fooBAR").to_a.should == ["foo"]
+ /[\b]/.match("\b").to_a.should == ["\b"] # \b inside character class is backspace
+ end
+
+ it "supports [[:alpha:][:digit:][:etc:]] (predefined character classes)" do
+ /[[:alnum:]]+/.match("a1").to_a.should == ["a1"]
+ /[[:alpha:]]+/.match("Aa1").to_a.should == ["Aa"]
+ /[[:blank:]]+/.match(LanguageSpecs.white_spaces).to_a.should == [LanguageSpecs.blanks]
+ # /[[:cntrl:]]/.match("").to_a.should == [""] # TODO: what should this match?
+ /[[:digit:]]/.match("1").to_a.should == ["1"]
+ # /[[:graph:]]/.match("").to_a.should == [""] # TODO: what should this match?
+ /[[:lower:]]+/.match("Aa1").to_a.should == ["a"]
+ /[[:print:]]+/.match(LanguageSpecs.white_spaces).to_a.should == [" "] # include all of multibyte encoded characters
+ /[[:punct:]]+/.match(LanguageSpecs.punctuations).to_a.should == [LanguageSpecs.punctuations]
+ /[[:space:]]+/.match(LanguageSpecs.white_spaces).to_a.should == [LanguageSpecs.white_spaces]
+ /[[:upper:]]+/.match("123ABCabc").to_a.should == ["ABC"]
+ /[[:xdigit:]]+/.match("xyz0123456789ABCDEFabcdefXYZ").to_a.should == ["0123456789ABCDEFabcdef"]
+
+ # Parsing
+ /[[:lower:][:digit:]A-C]+/.match("a1ABCDEF").to_a.should == ["a1ABC"] # can be composed with other constructs in the character class
+ /[^[:lower:]A-C]+/.match("abcABCDEF123def").to_a.should == ["DEF123"] # negated character class
+ /[:alnum:]+/.match("a:l:n:u:m").to_a.should == ["a:l:n:u:m"] # should behave like regular character class composed of the individual letters
+ /[\[:alnum:]+/.match("[:a:l:n:u:m").to_a.should == ["[:a:l:n:u:m"] # should behave like regular character class composed of the individual letters
+ lambda { eval('/[[:alpha:]-[:digit:]]/') }.should raise_error(SyntaxError) # can't use character class as a start value of range
+ end
+
+ it "matches ASCII characters with [[:ascii:]]" do
+ "\x00".match(/[[:ascii:]]/).to_a.should == ["\x00"]
+ "\x7F".match(/[[:ascii:]]/).to_a.should == ["\x7F"]
+ end
+
+ not_supported_on :opal do
+ it "doesn't match non-ASCII characters with [[:ascii:]]" do
+ /[[:ascii:]]/.match("\u{80}").should be_nil
+ /[[:ascii:]]/.match("\u{9898}").should be_nil
+ end
+ end
+
+ it "matches Unicode letter characters with [[:alnum:]]" do
+ "à".match(/[[:alnum:]]/).to_a.should == ["à"]
+ end
+
+ it "matches Unicode digits with [[:alnum:]]" do
+ "\u{0660}".match(/[[:alnum:]]/).to_a.should == ["\u{0660}"]
+ end
+
+ it "doesn't matches Unicode marks with [[:alnum:]]" do
+ "\u{36F}".match(/[[:alnum:]]/).should be_nil
+ end
+
+ it "doesn't match Unicode control characters with [[:alnum:]]" do
+ "\u{16}".match(/[[:alnum:]]/).to_a.should == []
+ end
+
+ it "doesn't match Unicode punctuation characters with [[:alnum:]]" do
+ "\u{3F}".match(/[[:alnum:]]/).to_a.should == []
+ end
+
+ it "matches Unicode letter characters with [[:alpha:]]" do
+ "à".match(/[[:alpha:]]/).to_a.should == ["à"]
+ end
+
+ it "doesn't match Unicode digits with [[:alpha:]]" do
+ "\u{0660}".match(/[[:alpha:]]/).to_a.should == []
+ end
+
+ it "doesn't matches Unicode marks with [[:alpha:]]" do
+ "\u{36F}".match(/[[:alpha:]]/).should be_nil
+ end
+
+ it "doesn't match Unicode control characters with [[:alpha:]]" do
+ "\u{16}".match(/[[:alpha:]]/).to_a.should == []
+ end
+
+ it "doesn't match Unicode punctuation characters with [[:alpha:]]" do
+ "\u{3F}".match(/[[:alpha:]]/).to_a.should == []
+ end
+
+ it "matches Unicode space characters with [[:blank:]]" do
+ "\u{1680}".match(/[[:blank:]]/).to_a.should == ["\u{1680}"]
+ end
+
+ it "doesn't match Unicode control characters with [[:blank:]]" do
+ "\u{16}".match(/[[:blank:]]/).should be_nil
+ end
+
+ it "doesn't match Unicode punctuation characters with [[:blank:]]" do
+ "\u{3F}".match(/[[:blank:]]/).should be_nil
+ end
+
+ it "doesn't match Unicode letter characters with [[:blank:]]" do
+ "à".match(/[[:blank:]]/).should be_nil
+ end
+
+ it "doesn't match Unicode digits with [[:blank:]]" do
+ "\u{0660}".match(/[[:blank:]]/).should be_nil
+ end
+
+ it "doesn't match Unicode marks with [[:blank:]]" do
+ "\u{36F}".match(/[[:blank:]]/).should be_nil
+ end
+
+ it "doesn't Unicode letter characters with [[:cntrl:]]" do
+ "à".match(/[[:cntrl:]]/).should be_nil
+ end
+
+ it "doesn't match Unicode digits with [[:cntrl:]]" do
+ "\u{0660}".match(/[[:cntrl:]]/).should be_nil
+ end
+
+ it "doesn't match Unicode marks with [[:cntrl:]]" do
+ "\u{36F}".match(/[[:cntrl:]]/).should be_nil
+ end
+
+ it "doesn't match Unicode punctuation characters with [[:cntrl:]]" do
+ "\u{3F}".match(/[[:cntrl:]]/).should be_nil
+ end
+
+ it "matches Unicode control characters with [[:cntrl:]]" do
+ "\u{16}".match(/[[:cntrl:]]/).to_a.should == ["\u{16}"]
+ end
+
+ it "doesn't match Unicode format characters with [[:cntrl:]]" do
+ "\u{2060}".match(/[[:cntrl:]]/).should be_nil
+ end
+
+ it "doesn't match Unicode private-use characters with [[:cntrl:]]" do
+ "\u{E001}".match(/[[:cntrl:]]/).should be_nil
+ end
+
+ it "doesn't match Unicode letter characters with [[:digit:]]" do
+ "à".match(/[[:digit:]]/).should be_nil
+ end
+
+ it "matches Unicode digits with [[:digit:]]" do
+ "\u{0660}".match(/[[:digit:]]/).to_a.should == ["\u{0660}"]
+ "\u{FF12}".match(/[[:digit:]]/).to_a.should == ["\u{FF12}"]
+ end
+
+ it "doesn't match Unicode marks with [[:digit:]]" do
+ "\u{36F}".match(/[[:digit:]]/).should be_nil
+ end
+
+ it "doesn't match Unicode punctuation characters with [[:digit:]]" do
+ "\u{3F}".match(/[[:digit:]]/).should be_nil
+ end
+
+ it "doesn't match Unicode control characters with [[:digit:]]" do
+ "\u{16}".match(/[[:digit:]]/).should be_nil
+ end
+
+ it "doesn't match Unicode format characters with [[:digit:]]" do
+ "\u{2060}".match(/[[:digit:]]/).should be_nil
+ end
+
+ it "doesn't match Unicode private-use characters with [[:digit:]]" do
+ "\u{E001}".match(/[[:digit:]]/).should be_nil
+ end
+
+ it "matches Unicode letter characters with [[:graph:]]" do
+ "à".match(/[[:graph:]]/).to_a.should == ["à"]
+ end
+
+ it "matches Unicode digits with [[:graph:]]" do
+ "\u{0660}".match(/[[:graph:]]/).to_a.should == ["\u{0660}"]
+ "\u{FF12}".match(/[[:graph:]]/).to_a.should == ["\u{FF12}"]
+ end
+
+ it "matches Unicode marks with [[:graph:]]" do
+ "\u{36F}".match(/[[:graph:]]/).to_a.should ==["\u{36F}"]
+ end
+
+ it "matches Unicode punctuation characters with [[:graph:]]" do
+ "\u{3F}".match(/[[:graph:]]/).to_a.should == ["\u{3F}"]
+ end
+
+ it "doesn't match Unicode control characters with [[:graph:]]" do
+ "\u{16}".match(/[[:graph:]]/).should be_nil
+ end
+
+ it "match Unicode format characters with [[:graph:]]" do
+ "\u{2060}".match(/[[:graph:]]/).to_a.should == ["\u2060"]
+ end
+
+ it "match Unicode private-use characters with [[:graph:]]" do
+ "\u{E001}".match(/[[:graph:]]/).to_a.should == ["\u{E001}"]
+ end
+
+ it "matches Unicode lowercase letter characters with [[:lower:]]" do
+ "\u{FF41}".match(/[[:lower:]]/).to_a.should == ["\u{FF41}"]
+ "\u{1D484}".match(/[[:lower:]]/).to_a.should == ["\u{1D484}"]
+ "\u{E8}".match(/[[:lower:]]/).to_a.should == ["\u{E8}"]
+ end
+
+ it "doesn't match Unicode uppercase letter characters with [[:lower:]]" do
+ "\u{100}".match(/[[:lower:]]/).should be_nil
+ "\u{130}".match(/[[:lower:]]/).should be_nil
+ "\u{405}".match(/[[:lower:]]/).should be_nil
+ end
+
+ it "doesn't match Unicode title-case characters with [[:lower:]]" do
+ "\u{1F88}".match(/[[:lower:]]/).should be_nil
+ "\u{1FAD}".match(/[[:lower:]]/).should be_nil
+ "\u{01C5}".match(/[[:lower:]]/).should be_nil
+ end
+
+ it "doesn't match Unicode digits with [[:lower:]]" do
+ "\u{0660}".match(/[[:lower:]]/).should be_nil
+ "\u{FF12}".match(/[[:lower:]]/).should be_nil
+ end
+
+ it "doesn't match Unicode marks with [[:lower:]]" do
+ "\u{36F}".match(/[[:lower:]]/).should be_nil
+ end
+
+ it "doesn't match Unicode punctuation characters with [[:lower:]]" do
+ "\u{3F}".match(/[[:lower:]]/).should be_nil
+ end
+
+ it "doesn't match Unicode control characters with [[:lower:]]" do
+ "\u{16}".match(/[[:lower:]]/).should be_nil
+ end
+
+ it "doesn't match Unicode format characters with [[:lower:]]" do
+ "\u{2060}".match(/[[:lower:]]/).should be_nil
+ end
+
+ it "doesn't match Unicode private-use characters with [[:lower:]]" do
+ "\u{E001}".match(/[[:lower:]]/).should be_nil
+ end
+
+ it "matches Unicode lowercase letter characters with [[:print:]]" do
+ "\u{FF41}".match(/[[:print:]]/).to_a.should == ["\u{FF41}"]
+ "\u{1D484}".match(/[[:print:]]/).to_a.should == ["\u{1D484}"]
+ "\u{E8}".match(/[[:print:]]/).to_a.should == ["\u{E8}"]
+ end
+
+ it "matches Unicode uppercase letter characters with [[:print:]]" do
+ "\u{100}".match(/[[:print:]]/).to_a.should == ["\u{100}"]
+ "\u{130}".match(/[[:print:]]/).to_a.should == ["\u{130}"]
+ "\u{405}".match(/[[:print:]]/).to_a.should == ["\u{405}"]
+ end
+
+ it "matches Unicode title-case characters with [[:print:]]" do
+ "\u{1F88}".match(/[[:print:]]/).to_a.should == ["\u{1F88}"]
+ "\u{1FAD}".match(/[[:print:]]/).to_a.should == ["\u{1FAD}"]
+ "\u{01C5}".match(/[[:print:]]/).to_a.should == ["\u{01C5}"]
+ end
+
+ it "matches Unicode digits with [[:print:]]" do
+ "\u{0660}".match(/[[:print:]]/).to_a.should == ["\u{0660}"]
+ "\u{FF12}".match(/[[:print:]]/).to_a.should == ["\u{FF12}"]
+ end
+
+ it "matches Unicode marks with [[:print:]]" do
+ "\u{36F}".match(/[[:print:]]/).to_a.should == ["\u{36F}"]
+ end
+
+ it "matches Unicode punctuation characters with [[:print:]]" do
+ "\u{3F}".match(/[[:print:]]/).to_a.should == ["\u{3F}"]
+ end
+
+ it "doesn't match Unicode control characters with [[:print:]]" do
+ "\u{16}".match(/[[:print:]]/).should be_nil
+ end
+
+ it "match Unicode format characters with [[:print:]]" do
+ "\u{2060}".match(/[[:print:]]/).to_a.should == ["\u{2060}"]
+ end
+
+ it "match Unicode private-use characters with [[:print:]]" do
+ "\u{E001}".match(/[[:print:]]/).to_a.should == ["\u{E001}"]
+ end
+
+
+ it "doesn't match Unicode lowercase letter characters with [[:punct:]]" do
+ "\u{FF41}".match(/[[:punct:]]/).should be_nil
+ "\u{1D484}".match(/[[:punct:]]/).should be_nil
+ "\u{E8}".match(/[[:punct:]]/).should be_nil
+ end
+
+ it "doesn't match Unicode uppercase letter characters with [[:punct:]]" do
+ "\u{100}".match(/[[:punct:]]/).should be_nil
+ "\u{130}".match(/[[:punct:]]/).should be_nil
+ "\u{405}".match(/[[:punct:]]/).should be_nil
+ end
+
+ it "doesn't match Unicode title-case characters with [[:punct:]]" do
+ "\u{1F88}".match(/[[:punct:]]/).should be_nil
+ "\u{1FAD}".match(/[[:punct:]]/).should be_nil
+ "\u{01C5}".match(/[[:punct:]]/).should be_nil
+ end
+
+ it "doesn't match Unicode digits with [[:punct:]]" do
+ "\u{0660}".match(/[[:punct:]]/).should be_nil
+ "\u{FF12}".match(/[[:punct:]]/).should be_nil
+ end
+
+ it "doesn't match Unicode marks with [[:punct:]]" do
+ "\u{36F}".match(/[[:punct:]]/).should be_nil
+ end
+
+ it "matches Unicode Pc characters with [[:punct:]]" do
+ "\u{203F}".match(/[[:punct:]]/).to_a.should == ["\u{203F}"]
+ end
+
+ it "matches Unicode Pd characters with [[:punct:]]" do
+ "\u{2E17}".match(/[[:punct:]]/).to_a.should == ["\u{2E17}"]
+ end
+
+ it "matches Unicode Ps characters with [[:punct:]]" do
+ "\u{0F3A}".match(/[[:punct:]]/).to_a.should == ["\u{0F3A}"]
+ end
+
+ it "matches Unicode Pe characters with [[:punct:]]" do
+ "\u{2046}".match(/[[:punct:]]/).to_a.should == ["\u{2046}"]
+ end
+
+ it "matches Unicode Pi characters with [[:punct:]]" do
+ "\u{00AB}".match(/[[:punct:]]/).to_a.should == ["\u{00AB}"]
+ end
+
+ it "matches Unicode Pf characters with [[:punct:]]" do
+ "\u{201D}".match(/[[:punct:]]/).to_a.should == ["\u{201D}"]
+ "\u{00BB}".match(/[[:punct:]]/).to_a.should == ["\u{00BB}"]
+ end
+
+ it "matches Unicode Po characters with [[:punct:]]" do
+ "\u{00BF}".match(/[[:punct:]]/).to_a.should == ["\u{00BF}"]
+ end
+
+ it "doesn't match Unicode format characters with [[:punct:]]" do
+ "\u{2060}".match(/[[:punct:]]/).should be_nil
+ end
+
+ it "doesn't match Unicode private-use characters with [[:punct:]]" do
+ "\u{E001}".match(/[[:punct:]]/).should be_nil
+ end
+
+ it "doesn't match Unicode lowercase letter characters with [[:space:]]" do
+ "\u{FF41}".match(/[[:space:]]/).should be_nil
+ "\u{1D484}".match(/[[:space:]]/).should be_nil
+ "\u{E8}".match(/[[:space:]]/).should be_nil
+ end
+
+ it "doesn't match Unicode uppercase letter characters with [[:space:]]" do
+ "\u{100}".match(/[[:space:]]/).should be_nil
+ "\u{130}".match(/[[:space:]]/).should be_nil
+ "\u{405}".match(/[[:space:]]/).should be_nil
+ end
+
+ it "doesn't match Unicode title-case characters with [[:space:]]" do
+ "\u{1F88}".match(/[[:space:]]/).should be_nil
+ "\u{1FAD}".match(/[[:space:]]/).should be_nil
+ "\u{01C5}".match(/[[:space:]]/).should be_nil
+ end
+
+ it "doesn't match Unicode digits with [[:space:]]" do
+ "\u{0660}".match(/[[:space:]]/).should be_nil
+ "\u{FF12}".match(/[[:space:]]/).should be_nil
+ end
+
+ it "doesn't match Unicode marks with [[:space:]]" do
+ "\u{36F}".match(/[[:space:]]/).should be_nil
+ end
+
+ it "matches Unicode Zs characters with [[:space:]]" do
+ "\u{205F}".match(/[[:space:]]/).to_a.should == ["\u{205F}"]
+ end
+
+ it "matches Unicode Zl characters with [[:space:]]" do
+ "\u{2028}".match(/[[:space:]]/).to_a.should == ["\u{2028}"]
+ end
+
+ it "matches Unicode Zp characters with [[:space:]]" do
+ "\u{2029}".match(/[[:space:]]/).to_a.should == ["\u{2029}"]
+ end
+
+ it "doesn't match Unicode format characters with [[:space:]]" do
+ "\u{2060}".match(/[[:space:]]/).should be_nil
+ end
+
+ it "doesn't match Unicode private-use characters with [[:space:]]" do
+ "\u{E001}".match(/[[:space:]]/).should be_nil
+ end
+
+ it "doesn't match Unicode lowercase characters with [[:upper:]]" do
+ "\u{FF41}".match(/[[:upper:]]/).should be_nil
+ "\u{1D484}".match(/[[:upper:]]/).should be_nil
+ "\u{E8}".match(/[[:upper:]]/).should be_nil
+ end
+
+ it "matches Unicode uppercase characters with [[:upper:]]" do
+ "\u{100}".match(/[[:upper:]]/).to_a.should == ["\u{100}"]
+ "\u{130}".match(/[[:upper:]]/).to_a.should == ["\u{130}"]
+ "\u{405}".match(/[[:upper:]]/).to_a.should == ["\u{405}"]
+ end
+
+ it "doesn't match Unicode title-case characters with [[:upper:]]" do
+ "\u{1F88}".match(/[[:upper:]]/).should be_nil
+ "\u{1FAD}".match(/[[:upper:]]/).should be_nil
+ "\u{01C5}".match(/[[:upper:]]/).should be_nil
+ end
+
+ it "doesn't match Unicode digits with [[:upper:]]" do
+ "\u{0660}".match(/[[:upper:]]/).should be_nil
+ "\u{FF12}".match(/[[:upper:]]/).should be_nil
+ end
+
+ it "doesn't match Unicode marks with [[:upper:]]" do
+ "\u{36F}".match(/[[:upper:]]/).should be_nil
+ end
+
+ it "doesn't match Unicode punctuation characters with [[:upper:]]" do
+ "\u{3F}".match(/[[:upper:]]/).should be_nil
+ end
+
+ it "doesn't match Unicode control characters with [[:upper:]]" do
+ "\u{16}".match(/[[:upper:]]/).should be_nil
+ end
+
+ it "doesn't match Unicode format characters with [[:upper:]]" do
+ "\u{2060}".match(/[[:upper:]]/).should be_nil
+ end
+
+ it "doesn't match Unicode private-use characters with [[:upper:]]" do
+ "\u{E001}".match(/[[:upper:]]/).should be_nil
+ end
+
+ it "doesn't match Unicode letter characters [^a-fA-F] with [[:xdigit:]]" do
+ "à".match(/[[:xdigit:]]/).should be_nil
+ "g".match(/[[:xdigit:]]/).should be_nil
+ "X".match(/[[:xdigit:]]/).should be_nil
+ end
+
+ it "matches Unicode letter characters [a-fA-F] with [[:xdigit:]]" do
+ "a".match(/[[:xdigit:]]/).to_a.should == ["a"]
+ "F".match(/[[:xdigit:]]/).to_a.should == ["F"]
+ end
+
+ it "doesn't match Unicode digits [^0-9] with [[:xdigit:]]" do
+ "\u{0660}".match(/[[:xdigit:]]/).should be_nil
+ "\u{FF12}".match(/[[:xdigit:]]/).should be_nil
+ end
+
+ it "doesn't match Unicode marks with [[:xdigit:]]" do
+ "\u{36F}".match(/[[:xdigit:]]/).should be_nil
+ end
+
+ it "doesn't match Unicode punctuation characters with [[:xdigit:]]" do
+ "\u{3F}".match(/[[:xdigit:]]/).should be_nil
+ end
+
+ it "doesn't match Unicode control characters with [[:xdigit:]]" do
+ "\u{16}".match(/[[:xdigit:]]/).should be_nil
+ end
+
+ it "doesn't match Unicode format characters with [[:xdigit:]]" do
+ "\u{2060}".match(/[[:xdigit:]]/).should be_nil
+ end
+
+ it "doesn't match Unicode private-use characters with [[:xdigit:]]" do
+ "\u{E001}".match(/[[:xdigit:]]/).should be_nil
+ end
+
+ it "matches Unicode lowercase characters with [[:word:]]" do
+ "\u{FF41}".match(/[[:word:]]/).to_a.should == ["\u{FF41}"]
+ "\u{1D484}".match(/[[:word:]]/).to_a.should == ["\u{1D484}"]
+ "\u{E8}".match(/[[:word:]]/).to_a.should == ["\u{E8}"]
+ end
+
+ it "matches Unicode uppercase characters with [[:word:]]" do
+ "\u{100}".match(/[[:word:]]/).to_a.should == ["\u{100}"]
+ "\u{130}".match(/[[:word:]]/).to_a.should == ["\u{130}"]
+ "\u{405}".match(/[[:word:]]/).to_a.should == ["\u{405}"]
+ end
+
+ it "matches Unicode title-case characters with [[:word:]]" do
+ "\u{1F88}".match(/[[:word:]]/).to_a.should == ["\u{1F88}"]
+ "\u{1FAD}".match(/[[:word:]]/).to_a.should == ["\u{1FAD}"]
+ "\u{01C5}".match(/[[:word:]]/).to_a.should == ["\u{01C5}"]
+ end
+
+ it "matches Unicode decimal digits with [[:word:]]" do
+ "\u{FF10}".match(/[[:word:]]/).to_a.should == ["\u{FF10}"]
+ "\u{096C}".match(/[[:word:]]/).to_a.should == ["\u{096C}"]
+ end
+
+ it "matches Unicode marks with [[:word:]]" do
+ "\u{36F}".match(/[[:word:]]/).to_a.should == ["\u{36F}"]
+ end
+
+ it "match Unicode Nl characters with [[:word:]]" do
+ "\u{16EE}".match(/[[:word:]]/).to_a.should == ["\u{16EE}"]
+ end
+
+ it "doesn't match Unicode No characters with [[:word:]]" do
+ "\u{17F0}".match(/[[:word:]]/).should be_nil
+ end
+ it "doesn't match Unicode punctuation characters with [[:word:]]" do
+ "\u{3F}".match(/[[:word:]]/).should be_nil
+ end
+
+ it "doesn't match Unicode control characters with [[:word:]]" do
+ "\u{16}".match(/[[:word:]]/).should be_nil
+ end
+
+ it "doesn't match Unicode format characters with [[:word:]]" do
+ "\u{2060}".match(/[[:word:]]/).should be_nil
+ end
+
+ it "doesn't match Unicode private-use characters with [[:word:]]" do
+ "\u{E001}".match(/[[:word:]]/).should be_nil
+ end
+
+ it "matches unicode named character properties" do
+ "a1".match(/\p{Alpha}/).to_a.should == ["a"]
+ end
+
+ it "matches unicode abbreviated character properties" do
+ "a1".match(/\p{L}/).to_a.should == ["a"]
+ end
+
+ it "matches unicode script properties" do
+ "a\u06E9b".match(/\p{Arabic}/).to_a.should == ["\u06E9"]
+ end
+
+ it "matches unicode Han properties" do
+ "松本行弘 Ruby".match(/\p{Han}+/u).to_a.should == ["松本行弘"]
+ end
+
+ it "matches unicode Hiragana properties" do
+ "Ruby(ルビー)、まつもとゆきひろ".match(/\p{Hiragana}+/u).to_a.should == ["まつもとゆきひろ"]
+ end
+
+ it "matches unicode Katakana properties" do
+ "Ruby(ルビー)、まつもとゆきひろ".match(/\p{Katakana}+/u).to_a.should == ["ルビ"]
+ end
+
+ it "matches unicode Hangul properties" do
+ "루비(Ruby)".match(/\p{Hangul}+/u).to_a.should == ["루비"]
+ end
+end
diff --git a/spec/ruby/language/regexp/encoding_spec.rb b/spec/ruby/language/regexp/encoding_spec.rb
new file mode 100644
index 0000000000..1f62244a28
--- /dev/null
+++ b/spec/ruby/language/regexp/encoding_spec.rb
@@ -0,0 +1,103 @@
+# -*- encoding: binary -*-
+require File.expand_path('../../../spec_helper', __FILE__)
+require File.expand_path('../../fixtures/classes', __FILE__)
+
+describe "Regexps with encoding modifiers" do
+ it "supports /e (EUC encoding)" do
+ match = /./e.match("\303\251".force_encoding(Encoding::EUC_JP))
+ match.to_a.should == ["\303\251".force_encoding(Encoding::EUC_JP)]
+ end
+
+ it "supports /e (EUC encoding) with interpolation" do
+ match = /#{/./}/e.match("\303\251".force_encoding(Encoding::EUC_JP))
+ match.to_a.should == ["\303\251".force_encoding(Encoding::EUC_JP)]
+ end
+
+ it "supports /e (EUC encoding) with interpolation /o" do
+ match = /#{/./}/e.match("\303\251".force_encoding(Encoding::EUC_JP))
+ match.to_a.should == ["\303\251".force_encoding(Encoding::EUC_JP)]
+ end
+
+ it 'uses EUC-JP as /e encoding' do
+ /./e.encoding.should == Encoding::EUC_JP
+ end
+
+ it 'preserves EUC-JP as /e encoding through interpolation' do
+ /#{/./}/e.encoding.should == Encoding::EUC_JP
+ end
+
+ it "supports /n (No encoding)" do
+ /./n.match("\303\251").to_a.should == ["\303"]
+ end
+
+ it "supports /n (No encoding) with interpolation" do
+ /#{/./}/n.match("\303\251").to_a.should == ["\303"]
+ end
+
+ it "supports /n (No encoding) with interpolation /o" do
+ /#{/./}/n.match("\303\251").to_a.should == ["\303"]
+ end
+
+ it 'uses US-ASCII as /n encoding if all chars are 7-bit' do
+ /./n.encoding.should == Encoding::US_ASCII
+ end
+
+ it 'uses ASCII-8BIT as /n encoding if not all chars are 7-bit' do
+ /\xFF/n.encoding.should == Encoding::ASCII_8BIT
+ end
+
+ it 'preserves US-ASCII as /n encoding through interpolation if all chars are 7-bit' do
+ /.#{/./}/n.encoding.should == Encoding::US_ASCII
+ end
+
+ it 'preserves ASCII-8BIT as /n encoding through interpolation if all chars are 7-bit' do
+ /\xFF#{/./}/n.encoding.should == Encoding::ASCII_8BIT
+ end
+
+ it "supports /s (Windows_31J encoding)" do
+ match = /./s.match("\303\251".force_encoding(Encoding::Windows_31J))
+ match.to_a.should == ["\303".force_encoding(Encoding::Windows_31J)]
+ end
+
+ it "supports /s (Windows_31J encoding) with interpolation" do
+ match = /#{/./}/s.match("\303\251".force_encoding(Encoding::Windows_31J))
+ match.to_a.should == ["\303".force_encoding(Encoding::Windows_31J)]
+ end
+
+ it "supports /s (Windows_31J encoding) with interpolation and /o" do
+ match = /#{/./}/s.match("\303\251".force_encoding(Encoding::Windows_31J))
+ match.to_a.should == ["\303".force_encoding(Encoding::Windows_31J)]
+ end
+
+ it 'uses Windows-31J as /s encoding' do
+ /./s.encoding.should == Encoding::Windows_31J
+ end
+
+ it 'preserves Windows-31J as /s encoding through interpolation' do
+ /#{/./}/s.encoding.should == Encoding::Windows_31J
+ end
+
+ it "supports /u (UTF8 encoding)" do
+ /./u.match("\303\251".force_encoding('utf-8')).to_a.should == ["\u{e9}"]
+ end
+
+ it "supports /u (UTF8 encoding) with interpolation" do
+ /#{/./}/u.match("\303\251".force_encoding('utf-8')).to_a.should == ["\u{e9}"]
+ end
+
+ it "supports /u (UTF8 encoding) with interpolation and /o" do
+ /#{/./}/u.match("\303\251".force_encoding('utf-8')).to_a.should == ["\u{e9}"]
+ end
+
+ it 'uses UTF-8 as /u encoding' do
+ /./u.encoding.should == Encoding::UTF_8
+ end
+
+ it 'preserves UTF-8 as /u encoding through interpolation' do
+ /#{/./}/u.encoding.should == Encoding::UTF_8
+ end
+
+ it "selects last of multiple encoding specifiers" do
+ /foo/ensuensuens.should == /foo/s
+ end
+end
diff --git a/spec/ruby/language/regexp/escapes_spec.rb b/spec/ruby/language/regexp/escapes_spec.rb
new file mode 100644
index 0000000000..50ac22e51e
--- /dev/null
+++ b/spec/ruby/language/regexp/escapes_spec.rb
@@ -0,0 +1,81 @@
+# -*- encoding: binary -*-
+require File.expand_path('../../../spec_helper', __FILE__)
+require File.expand_path('../../fixtures/classes', __FILE__)
+
+describe "Regexps with escape characters" do
+ it "they're supported" do
+ /\t/.match("\t").to_a.should == ["\t"] # horizontal tab
+ /\v/.match("\v").to_a.should == ["\v"] # vertical tab
+ /\n/.match("\n").to_a.should == ["\n"] # newline
+ /\r/.match("\r").to_a.should == ["\r"] # return
+ /\f/.match("\f").to_a.should == ["\f"] # form feed
+ /\a/.match("\a").to_a.should == ["\a"] # bell
+ /\e/.match("\e").to_a.should == ["\e"] # escape
+
+ # \nnn octal char (encoded byte value)
+ end
+
+ it "support quoting meta-characters via escape sequence" do
+ /\\/.match("\\").to_a.should == ["\\"]
+ /\//.match("/").to_a.should == ["/"]
+ # parenthesis, etc
+ /\(/.match("(").to_a.should == ["("]
+ /\)/.match(")").to_a.should == [")"]
+ /\[/.match("[").to_a.should == ["["]
+ /\]/.match("]").to_a.should == ["]"]
+ /\{/.match("{").to_a.should == ["{"]
+ /\}/.match("}").to_a.should == ["}"]
+ # alternation separator
+ /\|/.match("|").to_a.should == ["|"]
+ # quantifiers
+ /\?/.match("?").to_a.should == ["?"]
+ /\./.match(".").to_a.should == ["."]
+ /\*/.match("*").to_a.should == ["*"]
+ /\+/.match("+").to_a.should == ["+"]
+ # line anchors
+ /\^/.match("^").to_a.should == ["^"]
+ /\$/.match("$").to_a.should == ["$"]
+ end
+
+ it "allows any character to be escaped" do
+ /\y/.match("y").to_a.should == ["y"]
+ end
+
+ it "support \\x (hex characters)" do
+ /\xA/.match("\nxyz").to_a.should == ["\n"]
+ /\x0A/.match("\n").to_a.should == ["\n"]
+ /\xAA/.match("\nA").should be_nil
+ /\x0AA/.match("\nA").to_a.should == ["\nA"]
+ /\xAG/.match("\nG").to_a.should == ["\nG"]
+ # Non-matches
+ lambda { eval('/\xG/') }.should raise_error(SyntaxError)
+
+ # \x{7HHHHHHH} wide hexadecimal char (character code point value)
+ end
+
+ it "support \\c (control characters)" do
+ #/\c \c@\c`/.match("\00\00\00").to_a.should == ["\00\00\00"]
+ /\c#\cc\cC/.match("\03\03\03").to_a.should == ["\03\03\03"]
+ /\c'\cG\cg/.match("\a\a\a").to_a.should == ["\a\a\a"]
+ /\c(\cH\ch/.match("\b\b\b").to_a.should == ["\b\b\b"]
+ /\c)\cI\ci/.match("\t\t\t").to_a.should == ["\t\t\t"]
+ /\c*\cJ\cj/.match("\n\n\n").to_a.should == ["\n\n\n"]
+ /\c+\cK\ck/.match("\v\v\v").to_a.should == ["\v\v\v"]
+ /\c,\cL\cl/.match("\f\f\f").to_a.should == ["\f\f\f"]
+ /\c-\cM\cm/.match("\r\r\r").to_a.should == ["\r\r\r"]
+
+ /\cJ/.match("\r").should be_nil
+
+ # Parsing precedence
+ /\cJ+/.match("\n\n").to_a.should == ["\n\n"] # Quantifers apply to entire escape sequence
+ /\\cJ/.match("\\cJ").to_a.should == ["\\cJ"]
+ lambda { eval('/[abc\x]/') }.should raise_error(SyntaxError) # \x is treated as a escape sequence even inside a character class
+ # Syntax error
+ lambda { eval('/\c/') }.should raise_error(SyntaxError)
+
+ # \cx control char (character code point value)
+ # \C-x control char (character code point value)
+ # \M-x meta (x|0x80) (character code point value)
+ # \M-\C-x meta control char (character code point value)
+ end
+end
diff --git a/spec/ruby/language/regexp/grouping_spec.rb b/spec/ruby/language/regexp/grouping_spec.rb
new file mode 100644
index 0000000000..443cab7ee0
--- /dev/null
+++ b/spec/ruby/language/regexp/grouping_spec.rb
@@ -0,0 +1,23 @@
+require File.expand_path('../../../spec_helper', __FILE__)
+require File.expand_path('../../fixtures/classes', __FILE__)
+
+describe "Regexps with grouping" do
+ it "support ()" do
+ /(a)/.match("a").to_a.should == ["a", "a"]
+ end
+
+ it "allows groups to be nested" do
+ md = /(hay(st)a)ck/.match('haystack')
+ md.to_a.should == ['haystack','haysta', 'st']
+ end
+
+ it "raises a SyntaxError when parentheses aren't balanced" do
+ lambda { eval "/(hay(st)ack/" }.should raise_error(SyntaxError)
+ end
+
+ it "supports (?: ) (non-capturing group)" do
+ /(?:foo)(bar)/.match("foobar").to_a.should == ["foobar", "bar"]
+ # Parsing precedence
+ /(?:xdigit:)/.match("xdigit:").to_a.should == ["xdigit:"]
+ end
+end
diff --git a/spec/ruby/language/regexp/interpolation_spec.rb b/spec/ruby/language/regexp/interpolation_spec.rb
new file mode 100644
index 0000000000..5536c718f1
--- /dev/null
+++ b/spec/ruby/language/regexp/interpolation_spec.rb
@@ -0,0 +1,58 @@
+require File.expand_path('../../../spec_helper', __FILE__)
+require File.expand_path('../../fixtures/classes', __FILE__)
+
+describe "Regexps with interpolation" do
+
+ it "allows interpolation of strings" do
+ str = "foo|bar"
+ /#{str}/.should == /foo|bar/
+ end
+
+ it "allows interpolation of literal regexps" do
+ re = /foo|bar/
+ /#{re}/.should == /(?-mix:foo|bar)/
+ end
+
+ it "allows interpolation of any object that responds to to_s" do
+ o = Object.new
+ def o.to_s
+ "object_with_to_s"
+ end
+ /#{o}/.should == /object_with_to_s/
+ end
+
+ it "allows interpolation which mixes modifiers" do
+ re = /foo/i
+ /#{re} bar/m.should == /(?i-mx:foo) bar/m
+ end
+
+ it "allows interpolation to interact with other Regexp constructs" do
+ str = "foo)|(bar"
+ /(#{str})/.should == /(foo)|(bar)/
+
+ str = "a"
+ /[#{str}-z]/.should == /[a-z]/
+ end
+
+ it "gives precedence to escape sequences over substitution" do
+ str = "J"
+ /\c#{str}/.to_s.should == '(?-mix:\c#' + '{str})'
+ end
+
+ it "throws RegexpError for malformed interpolation" do
+ s = ""
+ lambda { /(#{s}/ }.should raise_error(RegexpError)
+ s = "("
+ lambda { /#{s}/ }.should raise_error(RegexpError)
+ end
+
+ it "allows interpolation in extended mode" do
+ var = "#comment\n foo #comment\n | bar"
+ (/#{var}/x =~ "foo").should == (/foo|bar/ =~ "foo")
+ end
+
+ it "allows escape sequences in interpolated regexps" do
+ escape_seq = %r{"\x80"}n
+ %r{#{escape_seq}}n.should == /(?-mix:"\x80")/n
+ end
+end
diff --git a/spec/ruby/language/regexp/modifiers_spec.rb b/spec/ruby/language/regexp/modifiers_spec.rb
new file mode 100644
index 0000000000..03dec26f3f
--- /dev/null
+++ b/spec/ruby/language/regexp/modifiers_spec.rb
@@ -0,0 +1,110 @@
+require File.expand_path('../../../spec_helper', __FILE__)
+require File.expand_path('../../fixtures/classes', __FILE__)
+
+describe "Regexps with modifers" do
+ it "supports /i (case-insensitive)" do
+ /foo/i.match("FOO").to_a.should == ["FOO"]
+ end
+
+ it "supports /m (multiline)" do
+ /foo.bar/m.match("foo\nbar").to_a.should == ["foo\nbar"]
+ /foo.bar/.match("foo\nbar").should be_nil
+ end
+
+ it "supports /x (extended syntax)" do
+ /\d +/x.match("abc123").to_a.should == ["123"] # Quantifiers can be separated from the expression they apply to
+ end
+
+ it "supports /o (once)" do
+ 2.times do |i|
+ /#{i}/o.should == /0/
+ end
+ end
+
+ it "invokes substitutions for /o only once" do
+ ScratchPad.record []
+ o = Object.new
+ def o.to_s
+ ScratchPad << :to_s
+ "class_with_to_s"
+ end
+ eval "2.times { /#{o}/o }"
+ ScratchPad.recorded.should == [:to_s]
+ end
+
+ it "supports modifier combinations" do
+ /foo/imox.match("foo").to_a.should == ["foo"]
+ /foo/imoximox.match("foo").to_a.should == ["foo"]
+
+ lambda { eval('/foo/a') }.should raise_error(SyntaxError)
+ end
+
+ it "supports (?imx-imx) (inline modifiers)" do
+ /(?i)foo/.match("FOO").to_a.should == ["FOO"]
+ /foo(?i)/.match("FOO").should be_nil
+ # Interaction with /i
+ /(?-i)foo/i.match("FOO").should be_nil
+ /foo(?-i)/i.match("FOO").to_a.should == ["FOO"]
+ # Multiple uses
+ /foo (?i)bar (?-i)baz/.match("foo BAR baz").to_a.should == ["foo BAR baz"]
+ /foo (?i)bar (?-i)baz/.match("foo BAR BAZ").should be_nil
+
+ /(?m)./.match("\n").to_a.should == ["\n"]
+ /.(?m)/.match("\n").should be_nil
+ # Interaction with /m
+ /(?-m)./m.match("\n").should be_nil
+ /.(?-m)/m.match("\n").to_a.should == ["\n"]
+ # Multiple uses
+ /. (?m). (?-m)./.match(". \n .").to_a.should == [". \n ."]
+ /. (?m). (?-m)./.match(". \n \n").should be_nil
+
+ /(?x) foo /.match("foo").to_a.should == ["foo"]
+ / foo (?x)/.match("foo").should be_nil
+ # Interaction with /x
+ /(?-x) foo /x.match("foo").should be_nil
+ / foo (?-x)/x.match("foo").to_a.should == ["foo"]
+ # Multiple uses
+ /( foo )(?x)( bar )(?-x)( baz )/.match(" foo bar baz ").to_a.should == [" foo bar baz ", " foo ", "bar", " baz "]
+ /( foo )(?x)( bar )(?-x)( baz )/.match(" foo barbaz").should be_nil
+
+ # Parsing
+ /(?i-i)foo/.match("FOO").should be_nil
+ /(?ii)foo/.match("FOO").to_a.should == ["FOO"]
+ /(?-)foo/.match("foo").to_a.should == ["foo"]
+ lambda { eval('/(?o)/') }.should raise_error(SyntaxError)
+ end
+
+ it "supports (?imx-imx:expr) (scoped inline modifiers)" do
+ /foo (?i:bar) baz/.match("foo BAR baz").to_a.should == ["foo BAR baz"]
+ /foo (?i:bar) baz/.match("foo BAR BAZ").should be_nil
+ /foo (?-i:bar) baz/i.match("foo BAR BAZ").should be_nil
+
+ /. (?m:.) ./.match(". \n .").to_a.should == [". \n ."]
+ /. (?m:.) ./.match(". \n \n").should be_nil
+ /. (?-m:.) ./m.match("\n \n \n").should be_nil
+
+ /( foo )(?x: bar )( baz )/.match(" foo bar baz ").to_a.should == [" foo bar baz ", " foo ", " baz "]
+ /( foo )(?x: bar )( baz )/.match(" foo barbaz").should be_nil
+ /( foo )(?-x: bar )( baz )/x.match("foo bar baz").to_a.should == ["foo bar baz", "foo", "baz"]
+
+ # Parsing
+ /(?i-i:foo)/.match("FOO").should be_nil
+ /(?ii:foo)/.match("FOO").to_a.should == ["FOO"]
+ /(?-:)foo/.match("foo").to_a.should == ["foo"]
+ lambda { eval('/(?o:)/') }.should raise_error(SyntaxError)
+ end
+
+ it "supports . with /m" do
+ # Basic matching
+ /./m.match("\n").to_a.should == ["\n"]
+ end
+
+ it "supports ASII/Unicode modifiers" do
+ eval('/(?a)[[:alpha:]]+/').match("a\u3042").to_a.should == ["a"]
+ eval('/(?d)[[:alpha:]]+/').match("a\u3042").to_a.should == ["a\u3042"]
+ eval('/(?u)[[:alpha:]]+/').match("a\u3042").to_a.should == ["a\u3042"]
+ eval('/(?a)\w+/').match("a\u3042").to_a.should == ["a"]
+ eval('/(?d)\w+/').match("a\u3042").to_a.should == ["a"]
+ eval('/(?u)\w+/').match("a\u3042").to_a.should == ["a\u3042"]
+ end
+end
diff --git a/spec/ruby/language/regexp/repetition_spec.rb b/spec/ruby/language/regexp/repetition_spec.rb
new file mode 100644
index 0000000000..2fc8a74a47
--- /dev/null
+++ b/spec/ruby/language/regexp/repetition_spec.rb
@@ -0,0 +1,57 @@
+require File.expand_path('../../../spec_helper', __FILE__)
+require File.expand_path('../../fixtures/classes', __FILE__)
+
+describe "Regexps with repetition" do
+ it "supports * (0 or more of previous subexpression)" do
+ /a*/.match("aaa").to_a.should == ["aaa"]
+ /a*/.match("bbb").to_a.should == [""]
+ /<.*>/.match("<a>foo</a>").to_a.should == ["<a>foo</a>"] # it is greedy
+ end
+
+ it "supports *? (0 or more of previous subexpression - lazy)" do
+ /a*?/.match("aaa").to_a.should == [""]
+ /<.*?>/.match("<a>foo</a>").to_a.should == ["<a>"]
+ end
+
+ it "supports + (1 or more of previous subexpression)" do
+ /a+/.match("aaa").to_a.should == ["aaa"]
+ /a+/.match("bbb").should be_nil
+ /<.+>/.match("<a>foo</a>").to_a.should == ["<a>foo</a>"] # it is greedy
+ end
+
+ it "supports +? (0 or more of previous subexpression - lazy)" do
+ /a+?/.match("aaa").to_a.should == ["a"]
+ /<.+?>/.match("<a>foo</a>").to_a.should == ["<a>"]
+ end
+
+ it "supports {m,n} (m to n of previous subexpression)" do
+ /a{2,4}/.match("aaaaaa").to_a.should == ["aaaa"]
+ /<.{1,}>/.match("<a>foo</a>").to_a.should == ["<a>foo</a>"] # it is greedy
+ end
+
+ it "supports {m,n}? (m to n of previous subexpression) - lazy)" do
+ /<.{1,}?>/.match("<a>foo</a>").to_a.should == ["<a>"]
+ /.([0-9]){3,5}?foo/.match("9876543210foo").to_a.should == ["543210foo", "0"]
+ end
+
+ ruby_version_is ""..."2.4" do
+ it "does not treat {m,n}+ as possessive" do
+ @regexp = eval "/foo(A{0,1}+)Abar/"
+ @regexp.match("fooAAAbar").to_a.should == ["fooAAAbar", "AA"]
+ end
+ end
+
+ ruby_version_is "2.4" do
+ it "does not treat {m,n}+ as possessive" do
+ -> {
+ @regexp = eval "/foo(A{0,1}+)Abar/"
+ }.should complain(/nested repeat operato/)
+ @regexp.match("fooAAAbar").to_a.should == ["fooAAAbar", "AA"]
+ end
+ end
+
+ it "supports ? (0 or 1 of previous subexpression)" do
+ /a?/.match("aaa").to_a.should == ["a"]
+ /a?/.match("bbb").to_a.should == [""]
+ end
+end