From 95c420c4a65ca2e7f3edf27134ad33691959296c Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Mon, 14 Oct 2019 12:40:50 +0900 Subject: Import StringScanner 1.0.3 (#2553) --- NEWS | 5 + ext/strscan/extconf.rb | 3 +- ext/strscan/strscan.c | 288 +++++++++++++++++++++------ ext/strscan/strscan.gemspec | 9 +- spec/ruby/library/stringscanner/dup_spec.rb | 2 +- spec/ruby/library/stringscanner/scan_spec.rb | 42 +++- test/strscan/test_stringscanner.rb | 240 +++++++++++++++------- 7 files changed, 449 insertions(+), 140 deletions(-) diff --git a/NEWS b/NEWS index d26523352f..a3283a66b1 100644 --- a/NEWS +++ b/NEWS @@ -416,6 +416,11 @@ RubyGems:: * Upgrade to RubyGems 3.1.0.pre1 Bundled from https://github.com/rubygems/rubygems/commit/97b264f0fa248c864b6ee9a23d3ff1cdd217dddb +StringScanner:: + + * Upgrade to 1.0.3. + See https://github.com/ruby/strscan/blob/master/NEWS.md. + === Compatibility issues (excluding feature bug fixes) * Removed unmaintained libraries. diff --git a/ext/strscan/extconf.rb b/ext/strscan/extconf.rb index 714fa99fae..f0ecbf85d8 100644 --- a/ext/strscan/extconf.rb +++ b/ext/strscan/extconf.rb @@ -1,4 +1,5 @@ # frozen_string_literal: true require 'mkmf' -$INCFLAGS << " -I$(top_srcdir)" +$INCFLAGS << " -I$(top_srcdir)" if $extmk +have_func("onig_region_memsize", "ruby.h") create_makefile 'strscan' diff --git a/ext/strscan/strscan.c b/ext/strscan/strscan.c index 77a36fe323..99d6992601 100644 --- a/ext/strscan/strscan.c +++ b/ext/strscan/strscan.c @@ -11,9 +11,18 @@ #include "ruby/ruby.h" #include "ruby/re.h" #include "ruby/encoding.h" -#include "regint.h" -#define STRSCAN_VERSION "0.7.0" +#ifdef RUBY_EXTCONF_H +# include RUBY_EXTCONF_H +#endif + +#ifdef HAVE_ONIG_REGION_MEMSIZE +extern size_t onig_region_memsize(const struct re_registers *regs); +#endif + +#include + +#define STRSCAN_VERSION "1.0.3" /* ======================================================================= Data Type Definitions @@ -41,6 +50,9 @@ struct strscanner /* regexp used for last scan */ VALUE regex; + + /* anchor mode */ + bool fixed_anchor_p; }; #define MATCHED_P(s) ((s)->flags & FLAG_MATCHED) @@ -186,7 +198,11 @@ static size_t strscan_memsize(const void *ptr) { const struct strscanner *p = ptr; - return sizeof(*p) - sizeof(p->regs) + onig_region_memsize(&p->regs); + size_t size = sizeof(*p) - sizeof(p->regs); +#ifdef HAVE_ONIG_REGION_MEMSIZE + size += onig_region_memsize(&p->regs); +#endif + return size; } static const rb_data_type_t strscanner_type = { @@ -208,19 +224,41 @@ strscan_s_allocate(VALUE klass) } /* - * call-seq: StringScanner.new(string, dup = false) + * call-seq: + * StringScanner.new(string, fixed_anchor: false) + * StringScanner.new(string, dup = false) * * Creates a new StringScanner object to scan over the given +string+. + * + * If +fixed_anchor+ is +true+, +\A+ always matches the beginning of + * the string. Otherwise, +\A+ always matches the current position. + * * +dup+ argument is obsolete and not used now. */ static VALUE strscan_initialize(int argc, VALUE *argv, VALUE self) { struct strscanner *p; - VALUE str, need_dup; + VALUE str, options; p = check_strscan(self); - rb_scan_args(argc, argv, "11", &str, &need_dup); + rb_scan_args(argc, argv, "11", &str, &options); + options = rb_check_hash_type(options); + if (!NIL_P(options)) { + VALUE fixed_anchor; + ID keyword_ids[1]; + keyword_ids[0] = rb_intern("fixed_anchor"); + rb_get_kwargs(options, keyword_ids, 0, 1, &fixed_anchor); + if (fixed_anchor == Qundef) { + p->fixed_anchor_p = false; + } + else { + p->fixed_anchor_p = RTEST(fixed_anchor); + } + } + else { + p->fixed_anchor_p = false; + } StringValue(str); p->str = str; @@ -294,7 +332,7 @@ strscan_reset(VALUE self) * terminate * clear * - * Set the scan pointer to the end of the string and clear matching data. + * Sets the scan pointer to the end of the string and clear matching data. */ static VALUE strscan_terminate(VALUE self) @@ -425,7 +463,7 @@ strscan_get_charpos(VALUE self) /* * call-seq: pos=(n) * - * Set the byte position of the scan pointer. + * Sets the byte position of the scan pointer. * * s = StringScanner.new('test string') * s.pos = 7 # -> 7 @@ -446,16 +484,79 @@ strscan_set_pos(VALUE self, VALUE v) return INT2NUM(i); } +static inline UChar * +match_target(struct strscanner *p) +{ + if (p->fixed_anchor_p) { + return (UChar *)S_PBEG(p); + } + else + { + return (UChar *)CURPTR(p); + } +} + +static inline void +set_registers(struct strscanner *p, size_t length) +{ + onig_region_clear(&(p->regs)); + if (p->fixed_anchor_p) { + onig_region_set(&(p->regs), 0, p->curr, p->curr + length); + } + else + { + onig_region_set(&(p->regs), 0, 0, length); + } +} + +static inline void +succ(struct strscanner *p) +{ + if (p->fixed_anchor_p) { + p->curr = p->regs.end[0]; + } + else + { + p->curr += p->regs.end[0]; + } +} + +static inline long +last_match_length(struct strscanner *p) +{ + if (p->fixed_anchor_p) { + return p->regs.end[0] - p->prev; + } + else + { + return p->regs.end[0]; + } +} + +static inline long +adjust_register_position(struct strscanner *p, long position) +{ + if (p->fixed_anchor_p) { + return position; + } + else { + return p->prev + position; + } +} + static VALUE -strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly) +strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly) { - regex_t *rb_reg_prepare_re(VALUE re, VALUE str); struct strscanner *p; - regex_t *re; - long ret; - int tmpreg; - Check_Type(regex, T_REGEXP); + if (headonly) { + if (!RB_TYPE_P(pattern, T_REGEXP)) { + StringValue(pattern); + } + } + else { + Check_Type(pattern, T_REGEXP); + } GET_SCANNER(self, p); CLEAR_MATCH_STATUS(p); @@ -463,49 +564,76 @@ strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly) return Qnil; } - p->regex = regex; - re = rb_reg_prepare_re(regex, p->str); - tmpreg = re != RREGEXP_PTR(regex); - if (!tmpreg) RREGEXP(regex)->usecnt++; + if (RB_TYPE_P(pattern, T_REGEXP)) { + regex_t *rb_reg_prepare_re(VALUE re, VALUE str); + regex_t *re; + long ret; + int tmpreg; + + p->regex = pattern; + re = rb_reg_prepare_re(pattern, p->str); + tmpreg = re != RREGEXP_PTR(pattern); + if (!tmpreg) RREGEXP(pattern)->usecnt++; + + if (headonly) { + ret = onig_match(re, + match_target(p), + (UChar* )(CURPTR(p) + S_RESTLEN(p)), + (UChar* )CURPTR(p), + &(p->regs), + ONIG_OPTION_NONE); + } + else { + ret = onig_search(re, + match_target(p), + (UChar* )(CURPTR(p) + S_RESTLEN(p)), + (UChar* )CURPTR(p), + (UChar* )(CURPTR(p) + S_RESTLEN(p)), + &(p->regs), + ONIG_OPTION_NONE); + } + if (!tmpreg) RREGEXP(pattern)->usecnt--; + if (tmpreg) { + if (RREGEXP(pattern)->usecnt) { + onig_free(re); + } + else { + onig_free(RREGEXP_PTR(pattern)); + RREGEXP_PTR(pattern) = re; + } + } - if (headonly) { - ret = onig_match(re, (UChar* )CURPTR(p), - (UChar* )(CURPTR(p) + S_RESTLEN(p)), - (UChar* )CURPTR(p), &(p->regs), ONIG_OPTION_NONE); + if (ret == -2) rb_raise(ScanError, "regexp buffer overflow"); + if (ret < 0) { + /* not matched */ + return Qnil; + } } else { - ret = onig_search(re, - (UChar* )CURPTR(p), (UChar* )(CURPTR(p) + S_RESTLEN(p)), - (UChar* )CURPTR(p), (UChar* )(CURPTR(p) + S_RESTLEN(p)), - &(p->regs), ONIG_OPTION_NONE); - } - if (!tmpreg) RREGEXP(regex)->usecnt--; - if (tmpreg) { - if (RREGEXP(regex)->usecnt) { - onig_free(re); + rb_enc_check(p->str, pattern); + if (S_RESTLEN(p) < RSTRING_LEN(pattern)) { + return Qnil; } - else { - onig_free(RREGEXP_PTR(regex)); - RREGEXP_PTR(regex) = re; + if (memcmp(CURPTR(p), RSTRING_PTR(pattern), RSTRING_LEN(pattern)) != 0) { + return Qnil; } - } - - if (ret == -2) rb_raise(ScanError, "regexp buffer overflow"); - if (ret < 0) { - /* not matched */ - return Qnil; + set_registers(p, RSTRING_LEN(pattern)); } MATCHED(p); p->prev = p->curr; + if (succptr) { - p->curr += p->regs.end[0]; - } - if (getstr) { - return extract_beg_len(p, p->prev, p->regs.end[0]); + succ(p); } - else { - return INT2FIX(p->regs.end[0]); + { + const long length = last_match_length(p); + if (getstr) { + return extract_beg_len(p, p->prev, length); + } + else { + return INT2FIX(length); + } } } @@ -520,7 +648,8 @@ strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly) * p s.scan(/\w+/) # -> "test" * p s.scan(/\w+/) # -> nil * p s.scan(/\s+/) # -> " " - * p s.scan(/\w+/) # -> "string" + * p s.scan("str") # -> "str" + * p s.scan(/\w+/) # -> "ing" * p s.scan(/./) # -> nil * */ @@ -539,6 +668,7 @@ strscan_scan(VALUE self, VALUE re) * s = StringScanner.new('test string') * p s.match?(/\w+/) # -> 4 * p s.match?(/\w+/) # -> 4 + * p s.match?("test") # -> 4 * p s.match?(/\s+/) # -> nil */ static VALUE @@ -560,7 +690,8 @@ strscan_match_p(VALUE self, VALUE re) * p s.skip(/\w+/) # -> 4 * p s.skip(/\w+/) # -> nil * p s.skip(/\s+/) # -> 1 - * p s.skip(/\w+/) # -> 6 + * p s.skip("st") # -> 2 + * p s.skip(/\w+/) # -> 4 * p s.skip(/./) # -> nil * */ @@ -704,7 +835,12 @@ static void adjust_registers_to_matched(struct strscanner *p) { onig_region_clear(&(p->regs)); - onig_region_set(&(p->regs), 0, 0, (int)(p->curr - p->prev)); + if (p->fixed_anchor_p) { + onig_region_set(&(p->regs), 0, (int)p->prev, (int)p->curr); + } + else { + onig_region_set(&(p->regs), 0, 0, (int)(p->curr - p->prev)); + } } /* @@ -738,8 +874,9 @@ strscan_getch(VALUE self) p->curr += len; MATCHED(p); adjust_registers_to_matched(p); - return extract_range(p, p->prev + p->regs.beg[0], - p->prev + p->regs.end[0]); + return extract_range(p, + adjust_register_position(p, p->regs.beg[0]), + adjust_register_position(p, p->regs.end[0])); } /* @@ -772,8 +909,9 @@ strscan_get_byte(VALUE self) p->curr++; MATCHED(p); adjust_registers_to_matched(p); - return extract_range(p, p->prev + p->regs.beg[0], - p->prev + p->regs.end[0]); + return extract_range(p, + adjust_register_position(p, p->regs.beg[0]), + adjust_register_position(p, p->regs.end[0])); } /* @@ -826,7 +964,7 @@ strscan_peep(VALUE self, VALUE vlen) } /* - * Set the scan pointer to the previous position. Only one previous position is + * Sets the scan pointer to the previous position. Only one previous position is * remembered, and it changes with each scanning operation. * * s = StringScanner.new('test string') @@ -951,8 +1089,9 @@ strscan_matched(VALUE self) GET_SCANNER(self, p); if (! MATCHED_P(p)) return Qnil; - return extract_range(p, p->prev + p->regs.beg[0], - p->prev + p->regs.end[0]); + return extract_range(p, + adjust_register_position(p, p->regs.beg[0]), + adjust_register_position(p, p->regs.end[0])); } /* @@ -1048,8 +1187,9 @@ strscan_aref(VALUE self, VALUE idx) if (i >= p->regs.num_regs) return Qnil; if (p->regs.beg[i] == -1) return Qnil; - return extract_range(p, p->prev + p->regs.beg[i], - p->prev + p->regs.end[i]); + return extract_range(p, + adjust_register_position(p, p->regs.beg[i]), + adjust_register_position(p, p->regs.end[i])); } /* @@ -1098,8 +1238,9 @@ strscan_captures(VALUE self) new_ary = rb_ary_new2(num_regs); for (i = 1; i < num_regs; i++) { - VALUE str = extract_range(p, p->prev + p->regs.beg[i], - p->prev + p->regs.end[i]); + VALUE str = extract_range(p, + adjust_register_position(p, p->regs.beg[i]), + adjust_register_position(p, p->regs.end[i])); rb_ary_push(new_ary, str); } @@ -1154,7 +1295,9 @@ strscan_pre_match(VALUE self) GET_SCANNER(self, p); if (! MATCHED_P(p)) return Qnil; - return extract_range(p, 0, p->prev + p->regs.beg[0]); + return extract_range(p, + 0, + adjust_register_position(p, p->regs.beg[0])); } /* @@ -1173,7 +1316,9 @@ strscan_post_match(VALUE self) GET_SCANNER(self, p); if (! MATCHED_P(p)) return Qnil; - return extract_range(p, p->prev + p->regs.end[0], S_LEN(p)); + return extract_range(p, + adjust_register_position(p, p->regs.end[0]), + S_LEN(p)); } /* @@ -1302,6 +1447,23 @@ inspect2(struct strscanner *p) return rb_str_dump(str); } +/* + * call-seq: + * scanner.fixed_anchor? -> true or false + * + * Whether +scanner+ uses fixed anchor mode or not. + * + * If fixed anchor mode is used, +\A+ always matches the beginning of + * the string. Otherwise, +\A+ always matches the current position. + */ +static VALUE +strscan_fixed_anchor_p(VALUE self) +{ + struct strscanner *p; + p = check_strscan(self); + return p->fixed_anchor_p ? Qtrue : Qfalse; +} + /* ======================================================================= Ruby Interface ======================================================================= */ @@ -1488,4 +1650,6 @@ Init_strscan(void) rb_define_method(StringScanner, "restsize", strscan_restsize, 0); rb_define_method(StringScanner, "inspect", strscan_inspect, 0); + + rb_define_method(StringScanner, "fixed_anchor?", strscan_fixed_anchor_p, 0); } diff --git a/ext/strscan/strscan.gemspec b/ext/strscan/strscan.gemspec index eefe8fbf2c..4759c6c860 100644 --- a/ext/strscan/strscan.gemspec +++ b/ext/strscan/strscan.gemspec @@ -1,19 +1,20 @@ # frozen_string_literal: true Gem::Specification.new do |s| s.name = "strscan" - s.version = '1.0.0' + s.version = '1.0.3' s.summary = "Provides lexical scanning operations on a String." s.description = "Provides lexical scanning operations on a String." s.require_path = %w{lib} - s.files = %w{ext/strscan/extconf.rb ext/strscan/strscan.c ext/strscan/regenc.h ext/strscan/regint.h} + s.files = %w{ext/strscan/extconf.rb ext/strscan/strscan.c} s.extensions = %w{ext/strscan/extconf.rb} s.required_ruby_version = ">= 2.4.0" - s.authors = ["Minero Aoki"] - s.email = [nil] + s.authors = ["Minero Aoki", "Sutou Kouhei"] + s.email = [nil, "kou@cozmixng.org"] s.homepage = "https://github.com/ruby/strscan" s.license = "BSD-2-Clause" s.add_development_dependency "rake-compiler" + s.add_development_dependency "benchmark-driver" end diff --git a/spec/ruby/library/stringscanner/dup_spec.rb b/spec/ruby/library/stringscanner/dup_spec.rb index 3b426f138e..0fc52a1477 100644 --- a/spec/ruby/library/stringscanner/dup_spec.rb +++ b/spec/ruby/library/stringscanner/dup_spec.rb @@ -12,7 +12,7 @@ describe "StringScanner#dup" do s.string.should == @string end - it "copies the passed StringSCanner's position to self" do + it "copies the passed StringScanner's position to self" do @orig_s.pos = 5 s = @orig_s.dup s.pos.should eql(5) diff --git a/spec/ruby/library/stringscanner/scan_spec.rb b/spec/ruby/library/stringscanner/scan_spec.rb index 8b9960e6e6..2269abd6b3 100644 --- a/spec/ruby/library/stringscanner/scan_spec.rb +++ b/spec/ruby/library/stringscanner/scan_spec.rb @@ -50,10 +50,48 @@ describe "StringScanner#scan" do @s.scan(/./).should be_nil end - it "raises a TypeError if pattern isn't a Regexp" do - -> { @s.scan("aoeu") }.should raise_error(TypeError) + ruby_version_is ""..."2.7" do + it "raises a TypeError if pattern is a String" do + -> { @s.scan("aoeu") }.should raise_error(TypeError) + end + end + + ruby_version_is "2.7" do + it "treats String as the pattern itself" do + @s.scan("this").should be_nil + @s.scan("This").should == "This" + end + end + + it "raises a TypeError if pattern isn't a Regexp nor String" do -> { @s.scan(5) }.should raise_error(TypeError) -> { @s.scan(:test) }.should raise_error(TypeError) -> { @s.scan(mock('x')) }.should raise_error(TypeError) end end + +describe "StringScanner#scan with fixed_anchor: true" do + before :each do + @s = StringScanner.new("This\nis\na\ntest", fixed_anchor: true) + end + + ruby_version_is "2.7" do + it "returns the matched string" do + @s.scan(/\w+/).should == "This" + @s.scan(/.../m).should == "\nis" + @s.scan(//).should == "" + @s.scan(/\s+/).should == "\n" + end + + it "treats ^ as matching from the beginning of line" do + @s.scan(/\w+\n/).should == "This\n" + @s.scan(/^\w/).should == "i" + @s.scan(/^\w/).should be_nil + end + + it "treats \\A as matching from the beginning of string" do + @s.scan(/\A\w/).should == "T" + @s.scan(/\A\w/).should be_nil + end + end +end diff --git a/test/strscan/test_stringscanner.rb b/test/strscan/test_stringscanner.rb index 3423f9cfed..5e798028b7 100644 --- a/test/strscan/test_stringscanner.rb +++ b/test/strscan/test_stringscanner.rb @@ -8,15 +8,19 @@ require 'strscan' require 'test/unit' class TestStringScanner < Test::Unit::TestCase + def create_string_scanner(string, *args) + StringScanner.new(string, *args) + end + def test_s_new - s = StringScanner.new('test string') + s = create_string_scanner('test string') assert_instance_of StringScanner, s assert_equal false, s.eos? assert_equal false, s.tainted? str = 'test string'.dup str.taint - s = StringScanner.new(str, false) + s = create_string_scanner(str, false) assert_instance_of StringScanner, s assert_equal false, s.eos? assert_same str, s.string @@ -24,7 +28,7 @@ class TestStringScanner < Test::Unit::TestCase str = 'test string'.dup str.taint - s = StringScanner.new(str) + s = create_string_scanner(str) assert_equal true, s.string.tainted? end @@ -48,7 +52,7 @@ class TestStringScanner < Test::Unit::TestCase end def test_dup - s = StringScanner.new('test string') + s = create_string_scanner('test string') d = s.dup assert_equal s.inspect, d.inspect assert_equal s.string, d.string @@ -56,7 +60,7 @@ class TestStringScanner < Test::Unit::TestCase assert_equal s.matched?, d.matched? assert_equal s.eos?, d.eos? - s = StringScanner.new('test string') + s = create_string_scanner('test string') s.scan(/test/) d = s.dup assert_equal s.inspect, d.inspect @@ -65,7 +69,7 @@ class TestStringScanner < Test::Unit::TestCase assert_equal s.matched?, d.matched? assert_equal s.eos?, d.eos? - s = StringScanner.new('test string') + s = create_string_scanner('test string') s.scan(/test/) s.scan(/NOT MATCH/) d = s.dup @@ -75,7 +79,7 @@ class TestStringScanner < Test::Unit::TestCase assert_equal s.matched?, d.matched? assert_equal s.eos?, d.eos? - s = StringScanner.new('test string') + s = create_string_scanner('test string') s.terminate d = s.dup assert_equal s.inspect, d.inspect @@ -98,7 +102,7 @@ class TestStringScanner < Test::Unit::TestCase def test_inspect str = 'test string'.dup str.taint - s = StringScanner.new(str, false) + s = create_string_scanner(str, false) assert_instance_of String, s.inspect assert_equal s.inspect, s.inspect assert_equal '#', s.inspect.sub(/StringScanner_C/, 'StringScanner') @@ -106,12 +110,12 @@ class TestStringScanner < Test::Unit::TestCase assert_equal '#', s.inspect.sub(/StringScanner_C/, 'StringScanner') assert_equal true, s.inspect.tainted? - s = StringScanner.new("\n") + s = create_string_scanner("\n") assert_equal '#', s.inspect end def test_eos? - s = StringScanner.new('test string') + s = create_string_scanner('test string') assert_equal false, s.eos? assert_equal false, s.eos? s.scan(/\w+/) @@ -124,14 +128,14 @@ class TestStringScanner < Test::Unit::TestCase s.scan(/\w+/) assert_equal true, s.eos? - s = StringScanner.new('test'.dup) + s = create_string_scanner('test'.dup) s.scan(/te/) s.string.replace '' assert_equal true, s.eos? end def test_bol? - s = StringScanner.new("a\nbbb\n\ncccc\nddd\r\neee") + s = create_string_scanner("a\nbbb\n\ncccc\nddd\r\neee") assert_equal true, s.bol? assert_equal true, s.bol? s.scan(/a/) @@ -161,7 +165,7 @@ class TestStringScanner < Test::Unit::TestCase end def test_string - s = StringScanner.new('test') + s = create_string_scanner('test') assert_equal 'test', s.string s.string = 'a' assert_equal 'a', s.string @@ -173,7 +177,7 @@ class TestStringScanner < Test::Unit::TestCase def test_string_set_is_equal name = 'tenderlove' - s = StringScanner.new(name) + s = create_string_scanner(name) assert_equal name.object_id, s.string.object_id s.string = name @@ -181,7 +185,7 @@ class TestStringScanner < Test::Unit::TestCase end def test_string_append - s = StringScanner.new('tender'.dup) + s = create_string_scanner('tender'.dup) s << 'love' assert_equal 'tenderlove', s.string @@ -191,7 +195,7 @@ class TestStringScanner < Test::Unit::TestCase end def test_pos - s = StringScanner.new('test string') + s = create_string_scanner('test string') assert_equal 0, s.pos s.get_byte assert_equal 1, s.pos @@ -202,7 +206,7 @@ class TestStringScanner < Test::Unit::TestCase end def test_pos_unicode - s = StringScanner.new("abcädeföghi") + s = create_string_scanner("abcädeföghi") assert_equal 0, s.charpos assert_equal "abcä", s.scan_until(/ä/) assert_equal 4, s.charpos @@ -213,7 +217,7 @@ class TestStringScanner < Test::Unit::TestCase end def test_concat - s = StringScanner.new('a'.dup) + s = create_string_scanner('a'.dup) s.scan(/a/) s.concat 'b' assert_equal false, s.eos? @@ -226,7 +230,7 @@ class TestStringScanner < Test::Unit::TestCase end def test_scan - s = StringScanner.new('stra strb strc', true) + s = create_string_scanner('stra strb strc', true) tmp = s.scan(/\w+/) assert_equal 'stra', tmp assert_equal false, tmp.tainted? @@ -248,7 +252,7 @@ class TestStringScanner < Test::Unit::TestCase str = 'stra strb strc'.dup str.taint - s = StringScanner.new(str, false) + s = create_string_scanner(str, false) tmp = s.scan(/\w+/) assert_equal 'stra', tmp assert_equal true, tmp.tainted? @@ -267,7 +271,7 @@ class TestStringScanner < Test::Unit::TestCase assert_nil s.scan(/\w+/) assert_nil s.scan(/\w+/) - s = StringScanner.new('test'.dup) + s = create_string_scanner('test'.dup) s.scan(/te/) # This assumes #string does not duplicate string, # but it is implementation specific issue. @@ -277,13 +281,29 @@ class TestStringScanner < Test::Unit::TestCase assert_equal nil, s.scan(/test/) # [ruby-bugs:4361] - s = StringScanner.new("") + s = create_string_scanner("") assert_equal "", s.scan(//) assert_equal "", s.scan(//) end + def test_scan_string + s = create_string_scanner('stra strb strc') + assert_equal 'str', s.scan('str') + assert_equal 'str', s[0] + assert_equal 3, s.pos + assert_equal false, s.tainted? + assert_equal 'a ', s.scan('a ') + + str = 'stra strb strc'.dup + str.taint + s = create_string_scanner(str, false) + matched = s.scan('str') + assert_equal 'str', matched + assert_equal true, matched.tainted? + end + def test_skip - s = StringScanner.new('stra strb strc', true) + s = create_string_scanner('stra strb strc', true) assert_equal 4, s.skip(/\w+/) assert_equal 1, s.skip(/\s+/) assert_equal 4, s.skip(/\w+/) @@ -293,19 +313,32 @@ class TestStringScanner < Test::Unit::TestCase assert_nil s.skip(/\s+/) assert_equal true, s.eos? - s = StringScanner.new('test'.dup) + s = create_string_scanner('test'.dup) s.scan(/te/) s.string.replace '' assert_equal nil, s.skip(/./) # [ruby-bugs:4361] - s = StringScanner.new("") + s = create_string_scanner("") assert_equal 0, s.skip(//) assert_equal 0, s.skip(//) end + def test_skip_with_begenning_of_string_anchor_match + s = create_string_scanner("a\nb") + assert_equal 2, s.skip(/a\n/) + assert_equal 1, s.skip(/\Ab/) + end + + def test_skip_with_begenning_of_line_anchor_match + s = create_string_scanner("a\nbc") + assert_equal 2, s.skip(/a\n/) + assert_equal 1, s.skip(/^b/) + assert_equal 1, s.skip(/^c/) + end + def test_getch - s = StringScanner.new('abcde') + s = create_string_scanner('abcde') assert_equal 'a', s.getch assert_equal 'b', s.getch assert_equal 'c', s.getch @@ -315,24 +348,24 @@ class TestStringScanner < Test::Unit::TestCase str = 'abc'.dup str.taint - s = StringScanner.new(str) + s = create_string_scanner(str) assert_equal true, s.getch.tainted? assert_equal true, s.getch.tainted? assert_equal true, s.getch.tainted? assert_nil s.getch - s = StringScanner.new("\244\242".dup.force_encoding("euc-jp")) + s = create_string_scanner("\244\242".dup.force_encoding("euc-jp")) assert_equal "\244\242".dup.force_encoding("euc-jp"), s.getch assert_nil s.getch - s = StringScanner.new('test'.dup) + s = create_string_scanner('test'.dup) s.scan(/te/) s.string.replace '' assert_equal nil, s.getch end def test_get_byte - s = StringScanner.new('abcde') + s = create_string_scanner('abcde') assert_equal 'a', s.get_byte assert_equal 'b', s.get_byte assert_equal 'c', s.get_byte @@ -343,32 +376,34 @@ class TestStringScanner < Test::Unit::TestCase str = 'abc'.dup str.taint - s = StringScanner.new(str) + s = create_string_scanner(str) assert_equal true, s.get_byte.tainted? assert_equal true, s.get_byte.tainted? assert_equal true, s.get_byte.tainted? assert_nil s.get_byte - s = StringScanner.new("\244\242".dup.force_encoding("euc-jp")) + s = create_string_scanner("\244\242".dup.force_encoding("euc-jp")) assert_equal "\244".dup.force_encoding("euc-jp"), s.get_byte assert_equal "\242".dup.force_encoding("euc-jp"), s.get_byte assert_nil s.get_byte - s = StringScanner.new('test'.dup) + s = create_string_scanner('test'.dup) s.scan(/te/) s.string.replace '' assert_equal nil, s.get_byte end def test_matched - s = StringScanner.new('stra strb strc') + s = create_string_scanner('stra strb strc') s.scan(/\w+/) assert_equal 'stra', s.matched assert_equal false, s.matched.tainted? s.scan(/\s+/) assert_equal ' ', s.matched + s.scan('st') + assert_equal 'st', s.matched s.scan(/\w+/) - assert_equal 'strb', s.matched + assert_equal 'rb', s.matched s.scan(/\s+/) assert_equal ' ', s.matched s.scan(/\w+/) @@ -378,7 +413,7 @@ class TestStringScanner < Test::Unit::TestCase s.getch assert_nil s.matched - s = StringScanner.new('stra strb strc') + s = create_string_scanner('stra strb strc') s.getch assert_equal 's', s.matched assert_equal false, s.matched.tainted? @@ -389,14 +424,14 @@ class TestStringScanner < Test::Unit::TestCase str = 'test'.dup str.taint - s = StringScanner.new(str) + s = create_string_scanner(str) s.scan(/\w+/) assert_equal true, s.matched.tainted? assert_equal true, s.matched.tainted? end def test_AREF - s = StringScanner.new('stra strb strc') + s = create_string_scanner('stra strb strc') s.scan(/\w+/) assert_nil s[-2] @@ -447,13 +482,13 @@ class TestStringScanner < Test::Unit::TestCase assert_nil s[0] - s = StringScanner.new("\244\242".dup.force_encoding("euc-jp")) + s = create_string_scanner("\244\242".dup.force_encoding("euc-jp")) s.getch assert_equal "\244\242".dup.force_encoding("euc-jp"), s[0] str = 'test'.dup str.taint - s = StringScanner.new(str) + s = create_string_scanner(str) s.scan(/(t)(e)(s)(t)/) assert_equal true, s[0].tainted? assert_equal true, s[1].tainted? @@ -461,7 +496,7 @@ class TestStringScanner < Test::Unit::TestCase assert_equal true, s[3].tainted? assert_equal true, s[4].tainted? - s = StringScanner.new("foo bar baz") + s = create_string_scanner("foo bar baz") s.scan(/(?\w+) (?\w+) (\w+)/) assert_equal 'foo', s[1] assert_equal 'bar', s[2] @@ -476,14 +511,14 @@ class TestStringScanner < Test::Unit::TestCase end def test_pre_match - s = StringScanner.new('a b c d e') + s = create_string_scanner('a b c d e') s.scan(/\w/) assert_equal '', s.pre_match assert_equal false, s.pre_match.tainted? s.skip(/\s/) assert_equal 'a', s.pre_match assert_equal false, s.pre_match.tainted? - s.scan(/\w/) + s.scan('b') assert_equal 'a ', s.pre_match s.scan_until(/c/) assert_equal 'a b ', s.pre_match @@ -498,7 +533,7 @@ class TestStringScanner < Test::Unit::TestCase str = 'test string'.dup str.taint - s = StringScanner.new(str) + s = create_string_scanner(str) s.scan(/\w+/) assert_equal true, s.pre_match.tainted? s.scan(/\s+/) @@ -508,12 +543,12 @@ class TestStringScanner < Test::Unit::TestCase end def test_post_match - s = StringScanner.new('a b c d e') + s = create_string_scanner('a b c d e') s.scan(/\w/) assert_equal ' b c d e', s.post_match s.skip(/\s/) assert_equal 'b c d e', s.post_match - s.scan(/\w/) + s.scan('b') assert_equal ' c d e', s.post_match s.scan_until(/c/) assert_equal ' d e', s.post_match @@ -532,7 +567,7 @@ class TestStringScanner < Test::Unit::TestCase str = 'test string'.dup str.taint - s = StringScanner.new(str) + s = create_string_scanner(str) s.scan(/\w+/) assert_equal true, s.post_match.tainted? s.scan(/\s+/) @@ -542,7 +577,7 @@ class TestStringScanner < Test::Unit::TestCase end def test_terminate - s = StringScanner.new('ssss') + s = create_string_scanner('ssss') s.getch s.terminate assert_equal true, s.eos? @@ -551,7 +586,7 @@ class TestStringScanner < Test::Unit::TestCase end def test_reset - s = StringScanner.new('ssss') + s = create_string_scanner('ssss') s.getch s.reset assert_equal 0, s.pos @@ -563,7 +598,7 @@ class TestStringScanner < Test::Unit::TestCase end def test_matched_size - s = StringScanner.new('test string') + s = create_string_scanner('test string') assert_nil s.matched_size s.scan(/test/) assert_equal 4, s.matched_size @@ -576,7 +611,7 @@ class TestStringScanner < Test::Unit::TestCase s.terminate assert_nil s.matched_size - s = StringScanner.new('test string') + s = create_string_scanner('test string') assert_nil s.matched_size s.scan(/test/) assert_equal 4, s.matched_size @@ -585,31 +620,45 @@ class TestStringScanner < Test::Unit::TestCase end def test_encoding - ss = StringScanner.new("\xA1\xA2".dup.force_encoding("euc-jp")) + ss = create_string_scanner("\xA1\xA2".dup.force_encoding("euc-jp")) assert_equal(Encoding::EUC_JP, ss.scan(/./e).encoding) end + def test_encoding_string + str = "\xA1\xA2".dup.force_encoding("euc-jp") + ss = create_string_scanner(str) + assert_equal(str.dup, ss.scan(str.dup)) + end + + def test_invalid_encoding_string + str = "\xA1\xA2".dup.force_encoding("euc-jp") + ss = create_string_scanner(str) + assert_raise(Encoding::CompatibilityError) do + ss.scan(str.encode("UTF-8")) + end + end + def test_generic_regexp - ss = StringScanner.new("\xA1\xA2".dup.force_encoding("euc-jp")) + ss = create_string_scanner("\xA1\xA2".dup.force_encoding("euc-jp")) t = ss.scan(/./) assert_equal("\xa1\xa2".dup.force_encoding("euc-jp"), t) end def test_set_pos - s = StringScanner.new("test string") + s = create_string_scanner("test string") s.pos = 7 assert_equal("ring", s.rest) end def test_match_p - s = StringScanner.new("test string") + s = create_string_scanner("test string") assert_equal(4, s.match?(/\w+/)) assert_equal(4, s.match?(/\w+/)) assert_equal(nil, s.match?(/\s+/)) end def test_check - s = StringScanner.new("Foo Bar Baz") + s = create_string_scanner("Foo Bar Baz") assert_equal("Foo", s.check(/Foo/)) assert_equal(0, s.pos) assert_equal("Foo", s.matched) @@ -618,7 +667,7 @@ class TestStringScanner < Test::Unit::TestCase end def test_scan_full - s = StringScanner.new("Foo Bar Baz") + s = create_string_scanner("Foo Bar Baz") assert_equal(4, s.scan_full(/Foo /, false, false)) assert_equal(0, s.pos) assert_equal(nil, s.scan_full(/Baz/, false, false)) @@ -634,7 +683,7 @@ class TestStringScanner < Test::Unit::TestCase end def test_exist_p - s = StringScanner.new("test string") + s = create_string_scanner("test string") assert_equal(3, s.exist?(/s/)) assert_equal(0, s.pos) s.scan(/test/) @@ -643,8 +692,15 @@ class TestStringScanner < Test::Unit::TestCase assert_equal(nil, s.exist?(/e/)) end + def test_exist_p_string + s = create_string_scanner("test string") + assert_raise(TypeError) do + s.exist?(" ") + end + end + def test_skip_until - s = StringScanner.new("Foo Bar Baz") + s = create_string_scanner("Foo Bar Baz") assert_equal(3, s.skip_until(/Foo/)) assert_equal(3, s.pos) assert_equal(4, s.skip_until(/Bar/)) @@ -653,7 +709,7 @@ class TestStringScanner < Test::Unit::TestCase end def test_check_until - s = StringScanner.new("Foo Bar Baz") + s = create_string_scanner("Foo Bar Baz") assert_equal("Foo", s.check_until(/Foo/)) assert_equal(0, s.pos) assert_equal("Foo Bar", s.check_until(/Bar/)) @@ -662,7 +718,7 @@ class TestStringScanner < Test::Unit::TestCase end def test_search_full - s = StringScanner.new("Foo Bar Baz") + s = create_string_scanner("Foo Bar Baz") assert_equal(8, s.search_full(/Bar /, false, false)) assert_equal(0, s.pos) assert_equal("Foo Bar ", s.search_full(/Bar /, false, true)) @@ -674,7 +730,7 @@ class TestStringScanner < Test::Unit::TestCase end def test_peek - s = StringScanner.new("test string") + s = create_string_scanner("test string") assert_equal("test st", s.peek(7)) assert_equal("test st", s.peek(7)) s.scan(/test/) @@ -685,7 +741,7 @@ class TestStringScanner < Test::Unit::TestCase end def test_unscan - s = StringScanner.new('test string') + s = create_string_scanner('test string') assert_equal("test", s.scan(/\w+/)) s.unscan assert_equal("te", s.scan(/../)) @@ -694,7 +750,7 @@ class TestStringScanner < Test::Unit::TestCase end def test_rest - s = StringScanner.new('test string') + s = create_string_scanner('test string') assert_equal("test string", s.rest) s.scan(/test/) assert_equal(" string", s.rest) @@ -704,7 +760,7 @@ class TestStringScanner < Test::Unit::TestCase end def test_rest_size - s = StringScanner.new('test string') + s = create_string_scanner('test string') assert_equal(11, s.rest_size) s.scan(/test/) assert_equal(7, s.rest_size) @@ -714,13 +770,13 @@ class TestStringScanner < Test::Unit::TestCase end def test_inspect2 - s = StringScanner.new('test string test') + s = create_string_scanner('test string test') s.scan(/test strin/) assert_equal('#', s.inspect) end def test_aref_without_regex - s = StringScanner.new('abc') + s = create_string_scanner('abc') s.get_byte assert_nil(s[:c]) assert_nil(s["c"]) @@ -730,13 +786,14 @@ class TestStringScanner < Test::Unit::TestCase end def test_size - s = StringScanner.new("Fri Dec 12 1975 14:39") + s = create_string_scanner("Fri Dec 12 1975 14:39") s.scan(/(\w+) (\w+) (\d+) /) assert_equal(4, s.size) end def test_captures - s = StringScanner.new("Fri Dec 12 1975 14:39") + s = create_string_scanner("Timestamp: Fri Dec 12 1975 14:39") + s.scan("Timestamp: ") s.scan(/(\w+) (\w+) (\d+) /) assert_equal(["Fri", "Dec", "12"], s.captures) s.scan(/(\w+) (\w+) (\d+) /) @@ -744,10 +801,53 @@ class TestStringScanner < Test::Unit::TestCase end def test_values_at - s = StringScanner.new("Fri Dec 12 1975 14:39") + s = create_string_scanner("Timestamp: Fri Dec 12 1975 14:39") + s.scan("Timestamp: ") s.scan(/(\w+) (\w+) (\d+) /) assert_equal(["Fri Dec 12 ", "12", nil, "Dec"], s.values_at(0, -1, 5, 2)) s.scan(/(\w+) (\w+) (\d+) /) assert_nil(s.values_at(0, -1, 5, 2)) end + + def test_fixed_anchor_true + assert_equal(true, StringScanner.new("a", fixed_anchor: true).fixed_anchor?) + end + + def test_fixed_anchor_false + assert_equal(false, StringScanner.new("a").fixed_anchor?) + assert_equal(false, StringScanner.new("a", true).fixed_anchor?) + assert_equal(false, StringScanner.new("a", false).fixed_anchor?) + assert_equal(false, StringScanner.new("a", {}).fixed_anchor?) + assert_equal(false, StringScanner.new("a", fixed_anchor: nil).fixed_anchor?) + assert_equal(false, StringScanner.new("a", fixed_anchor: false).fixed_anchor?) + end +end + +class TestStringScannerFixedAnchor < TestStringScanner + def create_string_scanner(string, *args) + StringScanner.new(string, fixed_anchor: true) + end + + def test_skip_with_begenning_of_string_anchor_match + s = create_string_scanner("a") + assert_equal 1, s.skip(/\Aa/) + end + + def test_skip_with_begenning_of_string_anchor_not_match + s = create_string_scanner("a\nb") + assert_equal 2, s.skip(/a\n/) + assert_nil s.skip(/\Ab/) + end + + def test_skip_with_begenning_of_line_anchor_match + s = create_string_scanner("a\nb") + assert_equal 2, s.skip(/a\n/) + assert_equal 1, s.skip(/^b/) + end + + def test_skip_with_begenning_of_line_anchor_not_match + s = create_string_scanner("ab") + assert_equal 1, s.skip(/a/) + assert_nil s.skip(/^b/) + end end -- cgit v1.2.3