aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSutou Kouhei <kou@cozmixng.org>2019-10-14 12:40:50 +0900
committerGitHub <noreply@github.com>2019-10-14 12:40:50 +0900
commit95c420c4a65ca2e7f3edf27134ad33691959296c (patch)
tree844e207d891996ecf7c60950d8a1e652be6d1938
parent6fa3492362dc91cfec7eb4fd55918791da5a34fb (diff)
downloadruby-95c420c4a65ca2e7f3edf27134ad33691959296c.tar.gz
Import StringScanner 1.0.3 (#2553)
-rw-r--r--NEWS5
-rw-r--r--ext/strscan/extconf.rb3
-rw-r--r--ext/strscan/strscan.c288
-rw-r--r--ext/strscan/strscan.gemspec9
-rw-r--r--spec/ruby/library/stringscanner/dup_spec.rb2
-rw-r--r--spec/ruby/library/stringscanner/scan_spec.rb42
-rw-r--r--test/strscan/test_stringscanner.rb240
7 files changed, 449 insertions, 140 deletions
diff --git a/NEWS b/NEWS
index d26523352f..a3283a66b1 100644
--- a/NEWS
+++ b/NEWS
@@ -416,6 +416,11 @@ RubyGems::
* Upgrade to RubyGems 3.1.0.pre1
Bundled from https://github.com/rubygems/rubygems/commit/97b264f0fa248c864b6ee9a23d3ff1cdd217dddb
+StringScanner::
+
+ * Upgrade to 1.0.3.
+ See https://github.com/ruby/strscan/blob/master/NEWS.md.
+
=== Compatibility issues (excluding feature bug fixes)
* Removed unmaintained libraries.
diff --git a/ext/strscan/extconf.rb b/ext/strscan/extconf.rb
index 714fa99fae..f0ecbf85d8 100644
--- a/ext/strscan/extconf.rb
+++ b/ext/strscan/extconf.rb
@@ -1,4 +1,5 @@
# frozen_string_literal: true
require 'mkmf'
-$INCFLAGS << " -I$(top_srcdir)"
+$INCFLAGS << " -I$(top_srcdir)" if $extmk
+have_func("onig_region_memsize", "ruby.h")
create_makefile 'strscan'
diff --git a/ext/strscan/strscan.c b/ext/strscan/strscan.c
index 77a36fe323..99d6992601 100644
--- a/ext/strscan/strscan.c
+++ b/ext/strscan/strscan.c
@@ -11,9 +11,18 @@
#include "ruby/ruby.h"
#include "ruby/re.h"
#include "ruby/encoding.h"
-#include "regint.h"
-#define STRSCAN_VERSION "0.7.0"
+#ifdef RUBY_EXTCONF_H
+# include RUBY_EXTCONF_H
+#endif
+
+#ifdef HAVE_ONIG_REGION_MEMSIZE
+extern size_t onig_region_memsize(const struct re_registers *regs);
+#endif
+
+#include <stdbool.h>
+
+#define STRSCAN_VERSION "1.0.3"
/* =======================================================================
Data Type Definitions
@@ -41,6 +50,9 @@ struct strscanner
/* regexp used for last scan */
VALUE regex;
+
+ /* anchor mode */
+ bool fixed_anchor_p;
};
#define MATCHED_P(s) ((s)->flags & FLAG_MATCHED)
@@ -186,7 +198,11 @@ static size_t
strscan_memsize(const void *ptr)
{
const struct strscanner *p = ptr;
- return sizeof(*p) - sizeof(p->regs) + onig_region_memsize(&p->regs);
+ size_t size = sizeof(*p) - sizeof(p->regs);
+#ifdef HAVE_ONIG_REGION_MEMSIZE
+ size += onig_region_memsize(&p->regs);
+#endif
+ return size;
}
static const rb_data_type_t strscanner_type = {
@@ -208,19 +224,41 @@ strscan_s_allocate(VALUE klass)
}
/*
- * call-seq: StringScanner.new(string, dup = false)
+ * call-seq:
+ * StringScanner.new(string, fixed_anchor: false)
+ * StringScanner.new(string, dup = false)
*
* Creates a new StringScanner object to scan over the given +string+.
+ *
+ * If +fixed_anchor+ is +true+, +\A+ always matches the beginning of
+ * the string. Otherwise, +\A+ always matches the current position.
+ *
* +dup+ argument is obsolete and not used now.
*/
static VALUE
strscan_initialize(int argc, VALUE *argv, VALUE self)
{
struct strscanner *p;
- VALUE str, need_dup;
+ VALUE str, options;
p = check_strscan(self);
- rb_scan_args(argc, argv, "11", &str, &need_dup);
+ rb_scan_args(argc, argv, "11", &str, &options);
+ options = rb_check_hash_type(options);
+ if (!NIL_P(options)) {
+ VALUE fixed_anchor;
+ ID keyword_ids[1];
+ keyword_ids[0] = rb_intern("fixed_anchor");
+ rb_get_kwargs(options, keyword_ids, 0, 1, &fixed_anchor);
+ if (fixed_anchor == Qundef) {
+ p->fixed_anchor_p = false;
+ }
+ else {
+ p->fixed_anchor_p = RTEST(fixed_anchor);
+ }
+ }
+ else {
+ p->fixed_anchor_p = false;
+ }
StringValue(str);
p->str = str;
@@ -294,7 +332,7 @@ strscan_reset(VALUE self)
* terminate
* clear
*
- * Set the scan pointer to the end of the string and clear matching data.
+ * Sets the scan pointer to the end of the string and clear matching data.
*/
static VALUE
strscan_terminate(VALUE self)
@@ -425,7 +463,7 @@ strscan_get_charpos(VALUE self)
/*
* call-seq: pos=(n)
*
- * Set the byte position of the scan pointer.
+ * Sets the byte position of the scan pointer.
*
* s = StringScanner.new('test string')
* s.pos = 7 # -> 7
@@ -446,16 +484,79 @@ strscan_set_pos(VALUE self, VALUE v)
return INT2NUM(i);
}
+static inline UChar *
+match_target(struct strscanner *p)
+{
+ if (p->fixed_anchor_p) {
+ return (UChar *)S_PBEG(p);
+ }
+ else
+ {
+ return (UChar *)CURPTR(p);
+ }
+}
+
+static inline void
+set_registers(struct strscanner *p, size_t length)
+{
+ onig_region_clear(&(p->regs));
+ if (p->fixed_anchor_p) {
+ onig_region_set(&(p->regs), 0, p->curr, p->curr + length);
+ }
+ else
+ {
+ onig_region_set(&(p->regs), 0, 0, length);
+ }
+}
+
+static inline void
+succ(struct strscanner *p)
+{
+ if (p->fixed_anchor_p) {
+ p->curr = p->regs.end[0];
+ }
+ else
+ {
+ p->curr += p->regs.end[0];
+ }
+}
+
+static inline long
+last_match_length(struct strscanner *p)
+{
+ if (p->fixed_anchor_p) {
+ return p->regs.end[0] - p->prev;
+ }
+ else
+ {
+ return p->regs.end[0];
+ }
+}
+
+static inline long
+adjust_register_position(struct strscanner *p, long position)
+{
+ if (p->fixed_anchor_p) {
+ return position;
+ }
+ else {
+ return p->prev + position;
+ }
+}
+
static VALUE
-strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly)
+strscan_do_scan(VALUE self, VALUE pattern, int succptr, int getstr, int headonly)
{
- regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
struct strscanner *p;
- regex_t *re;
- long ret;
- int tmpreg;
- Check_Type(regex, T_REGEXP);
+ if (headonly) {
+ if (!RB_TYPE_P(pattern, T_REGEXP)) {
+ StringValue(pattern);
+ }
+ }
+ else {
+ Check_Type(pattern, T_REGEXP);
+ }
GET_SCANNER(self, p);
CLEAR_MATCH_STATUS(p);
@@ -463,49 +564,76 @@ strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly)
return Qnil;
}
- p->regex = regex;
- re = rb_reg_prepare_re(regex, p->str);
- tmpreg = re != RREGEXP_PTR(regex);
- if (!tmpreg) RREGEXP(regex)->usecnt++;
+ if (RB_TYPE_P(pattern, T_REGEXP)) {
+ regex_t *rb_reg_prepare_re(VALUE re, VALUE str);
+ regex_t *re;
+ long ret;
+ int tmpreg;
+
+ p->regex = pattern;
+ re = rb_reg_prepare_re(pattern, p->str);
+ tmpreg = re != RREGEXP_PTR(pattern);
+ if (!tmpreg) RREGEXP(pattern)->usecnt++;
+
+ if (headonly) {
+ ret = onig_match(re,
+ match_target(p),
+ (UChar* )(CURPTR(p) + S_RESTLEN(p)),
+ (UChar* )CURPTR(p),
+ &(p->regs),
+ ONIG_OPTION_NONE);
+ }
+ else {
+ ret = onig_search(re,
+ match_target(p),
+ (UChar* )(CURPTR(p) + S_RESTLEN(p)),
+ (UChar* )CURPTR(p),
+ (UChar* )(CURPTR(p) + S_RESTLEN(p)),
+ &(p->regs),
+ ONIG_OPTION_NONE);
+ }
+ if (!tmpreg) RREGEXP(pattern)->usecnt--;
+ if (tmpreg) {
+ if (RREGEXP(pattern)->usecnt) {
+ onig_free(re);
+ }
+ else {
+ onig_free(RREGEXP_PTR(pattern));
+ RREGEXP_PTR(pattern) = re;
+ }
+ }
- if (headonly) {
- ret = onig_match(re, (UChar* )CURPTR(p),
- (UChar* )(CURPTR(p) + S_RESTLEN(p)),
- (UChar* )CURPTR(p), &(p->regs), ONIG_OPTION_NONE);
+ if (ret == -2) rb_raise(ScanError, "regexp buffer overflow");
+ if (ret < 0) {
+ /* not matched */
+ return Qnil;
+ }
}
else {
- ret = onig_search(re,
- (UChar* )CURPTR(p), (UChar* )(CURPTR(p) + S_RESTLEN(p)),
- (UChar* )CURPTR(p), (UChar* )(CURPTR(p) + S_RESTLEN(p)),
- &(p->regs), ONIG_OPTION_NONE);
- }
- if (!tmpreg) RREGEXP(regex)->usecnt--;
- if (tmpreg) {
- if (RREGEXP(regex)->usecnt) {
- onig_free(re);
+ rb_enc_check(p->str, pattern);
+ if (S_RESTLEN(p) < RSTRING_LEN(pattern)) {
+ return Qnil;
}
- else {
- onig_free(RREGEXP_PTR(regex));
- RREGEXP_PTR(regex) = re;
+ if (memcmp(CURPTR(p), RSTRING_PTR(pattern), RSTRING_LEN(pattern)) != 0) {
+ return Qnil;
}
- }
-
- if (ret == -2) rb_raise(ScanError, "regexp buffer overflow");
- if (ret < 0) {
- /* not matched */
- return Qnil;
+ set_registers(p, RSTRING_LEN(pattern));
}
MATCHED(p);
p->prev = p->curr;
+
if (succptr) {
- p->curr += p->regs.end[0];
- }
- if (getstr) {
- return extract_beg_len(p, p->prev, p->regs.end[0]);
+ succ(p);
}
- else {
- return INT2FIX(p->regs.end[0]);
+ {
+ const long length = last_match_length(p);
+ if (getstr) {
+ return extract_beg_len(p, p->prev, length);
+ }
+ else {
+ return INT2FIX(length);
+ }
}
}
@@ -520,7 +648,8 @@ strscan_do_scan(VALUE self, VALUE regex, int succptr, int getstr, int headonly)
* p s.scan(/\w+/) # -> "test"
* p s.scan(/\w+/) # -> nil
* p s.scan(/\s+/) # -> " "
- * p s.scan(/\w+/) # -> "string"
+ * p s.scan("str") # -> "str"
+ * p s.scan(/\w+/) # -> "ing"
* p s.scan(/./) # -> nil
*
*/
@@ -539,6 +668,7 @@ strscan_scan(VALUE self, VALUE re)
* s = StringScanner.new('test string')
* p s.match?(/\w+/) # -> 4
* p s.match?(/\w+/) # -> 4
+ * p s.match?("test") # -> 4
* p s.match?(/\s+/) # -> nil
*/
static VALUE
@@ -560,7 +690,8 @@ strscan_match_p(VALUE self, VALUE re)
* p s.skip(/\w+/) # -> 4
* p s.skip(/\w+/) # -> nil
* p s.skip(/\s+/) # -> 1
- * p s.skip(/\w+/) # -> 6
+ * p s.skip("st") # -> 2
+ * p s.skip(/\w+/) # -> 4
* p s.skip(/./) # -> nil
*
*/
@@ -704,7 +835,12 @@ static void
adjust_registers_to_matched(struct strscanner *p)
{
onig_region_clear(&(p->regs));
- onig_region_set(&(p->regs), 0, 0, (int)(p->curr - p->prev));
+ if (p->fixed_anchor_p) {
+ onig_region_set(&(p->regs), 0, (int)p->prev, (int)p->curr);
+ }
+ else {
+ onig_region_set(&(p->regs), 0, 0, (int)(p->curr - p->prev));
+ }
}
/*
@@ -738,8 +874,9 @@ strscan_getch(VALUE self)
p->curr += len;
MATCHED(p);
adjust_registers_to_matched(p);
- return extract_range(p, p->prev + p->regs.beg[0],
- p->prev + p->regs.end[0]);
+ return extract_range(p,
+ adjust_register_position(p, p->regs.beg[0]),
+ adjust_register_position(p, p->regs.end[0]));
}
/*
@@ -772,8 +909,9 @@ strscan_get_byte(VALUE self)
p->curr++;
MATCHED(p);
adjust_registers_to_matched(p);
- return extract_range(p, p->prev + p->regs.beg[0],
- p->prev + p->regs.end[0]);
+ return extract_range(p,
+ adjust_register_position(p, p->regs.beg[0]),
+ adjust_register_position(p, p->regs.end[0]));
}
/*
@@ -826,7 +964,7 @@ strscan_peep(VALUE self, VALUE vlen)
}
/*
- * Set the scan pointer to the previous position. Only one previous position is
+ * Sets the scan pointer to the previous position. Only one previous position is
* remembered, and it changes with each scanning operation.
*
* s = StringScanner.new('test string')
@@ -951,8 +1089,9 @@ strscan_matched(VALUE self)
GET_SCANNER(self, p);
if (! MATCHED_P(p)) return Qnil;
- return extract_range(p, p->prev + p->regs.beg[0],
- p->prev + p->regs.end[0]);
+ return extract_range(p,
+ adjust_register_position(p, p->regs.beg[0]),
+ adjust_register_position(p, p->regs.end[0]));
}
/*
@@ -1048,8 +1187,9 @@ strscan_aref(VALUE self, VALUE idx)
if (i >= p->regs.num_regs) return Qnil;
if (p->regs.beg[i] == -1) return Qnil;
- return extract_range(p, p->prev + p->regs.beg[i],
- p->prev + p->regs.end[i]);
+ return extract_range(p,
+ adjust_register_position(p, p->regs.beg[i]),
+ adjust_register_position(p, p->regs.end[i]));
}
/*
@@ -1098,8 +1238,9 @@ strscan_captures(VALUE self)
new_ary = rb_ary_new2(num_regs);
for (i = 1; i < num_regs; i++) {
- VALUE str = extract_range(p, p->prev + p->regs.beg[i],
- p->prev + p->regs.end[i]);
+ VALUE str = extract_range(p,
+ adjust_register_position(p, p->regs.beg[i]),
+ adjust_register_position(p, p->regs.end[i]));
rb_ary_push(new_ary, str);
}
@@ -1154,7 +1295,9 @@ strscan_pre_match(VALUE self)
GET_SCANNER(self, p);
if (! MATCHED_P(p)) return Qnil;
- return extract_range(p, 0, p->prev + p->regs.beg[0]);
+ return extract_range(p,
+ 0,
+ adjust_register_position(p, p->regs.beg[0]));
}
/*
@@ -1173,7 +1316,9 @@ strscan_post_match(VALUE self)
GET_SCANNER(self, p);
if (! MATCHED_P(p)) return Qnil;
- return extract_range(p, p->prev + p->regs.end[0], S_LEN(p));
+ return extract_range(p,
+ adjust_register_position(p, p->regs.end[0]),
+ S_LEN(p));
}
/*
@@ -1302,6 +1447,23 @@ inspect2(struct strscanner *p)
return rb_str_dump(str);
}
+/*
+ * call-seq:
+ * scanner.fixed_anchor? -> true or false
+ *
+ * Whether +scanner+ uses fixed anchor mode or not.
+ *
+ * If fixed anchor mode is used, +\A+ always matches the beginning of
+ * the string. Otherwise, +\A+ always matches the current position.
+ */
+static VALUE
+strscan_fixed_anchor_p(VALUE self)
+{
+ struct strscanner *p;
+ p = check_strscan(self);
+ return p->fixed_anchor_p ? Qtrue : Qfalse;
+}
+
/* =======================================================================
Ruby Interface
======================================================================= */
@@ -1488,4 +1650,6 @@ Init_strscan(void)
rb_define_method(StringScanner, "restsize", strscan_restsize, 0);
rb_define_method(StringScanner, "inspect", strscan_inspect, 0);
+
+ rb_define_method(StringScanner, "fixed_anchor?", strscan_fixed_anchor_p, 0);
}
diff --git a/ext/strscan/strscan.gemspec b/ext/strscan/strscan.gemspec
index eefe8fbf2c..4759c6c860 100644
--- a/ext/strscan/strscan.gemspec
+++ b/ext/strscan/strscan.gemspec
@@ -1,19 +1,20 @@
# frozen_string_literal: true
Gem::Specification.new do |s|
s.name = "strscan"
- s.version = '1.0.0'
+ s.version = '1.0.3'
s.summary = "Provides lexical scanning operations on a String."
s.description = "Provides lexical scanning operations on a String."
s.require_path = %w{lib}
- s.files = %w{ext/strscan/extconf.rb ext/strscan/strscan.c ext/strscan/regenc.h ext/strscan/regint.h}
+ s.files = %w{ext/strscan/extconf.rb ext/strscan/strscan.c}
s.extensions = %w{ext/strscan/extconf.rb}
s.required_ruby_version = ">= 2.4.0"
- s.authors = ["Minero Aoki"]
- s.email = [nil]
+ s.authors = ["Minero Aoki", "Sutou Kouhei"]
+ s.email = [nil, "kou@cozmixng.org"]
s.homepage = "https://github.com/ruby/strscan"
s.license = "BSD-2-Clause"
s.add_development_dependency "rake-compiler"
+ s.add_development_dependency "benchmark-driver"
end
diff --git a/spec/ruby/library/stringscanner/dup_spec.rb b/spec/ruby/library/stringscanner/dup_spec.rb
index 3b426f138e..0fc52a1477 100644
--- a/spec/ruby/library/stringscanner/dup_spec.rb
+++ b/spec/ruby/library/stringscanner/dup_spec.rb
@@ -12,7 +12,7 @@ describe "StringScanner#dup" do
s.string.should == @string
end
- it "copies the passed StringSCanner's position to self" do
+ it "copies the passed StringScanner's position to self" do
@orig_s.pos = 5
s = @orig_s.dup
s.pos.should eql(5)
diff --git a/spec/ruby/library/stringscanner/scan_spec.rb b/spec/ruby/library/stringscanner/scan_spec.rb
index 8b9960e6e6..2269abd6b3 100644
--- a/spec/ruby/library/stringscanner/scan_spec.rb
+++ b/spec/ruby/library/stringscanner/scan_spec.rb
@@ -50,10 +50,48 @@ describe "StringScanner#scan" do
@s.scan(/./).should be_nil
end
- it "raises a TypeError if pattern isn't a Regexp" do
- -> { @s.scan("aoeu") }.should raise_error(TypeError)
+ ruby_version_is ""..."2.7" do
+ it "raises a TypeError if pattern is a String" do
+ -> { @s.scan("aoeu") }.should raise_error(TypeError)
+ end
+ end
+
+ ruby_version_is "2.7" do
+ it "treats String as the pattern itself" do
+ @s.scan("this").should be_nil
+ @s.scan("This").should == "This"
+ end
+ end
+
+ it "raises a TypeError if pattern isn't a Regexp nor String" do
-> { @s.scan(5) }.should raise_error(TypeError)
-> { @s.scan(:test) }.should raise_error(TypeError)
-> { @s.scan(mock('x')) }.should raise_error(TypeError)
end
end
+
+describe "StringScanner#scan with fixed_anchor: true" do
+ before :each do
+ @s = StringScanner.new("This\nis\na\ntest", fixed_anchor: true)
+ end
+
+ ruby_version_is "2.7" do
+ it "returns the matched string" do
+ @s.scan(/\w+/).should == "This"
+ @s.scan(/.../m).should == "\nis"
+ @s.scan(//).should == ""
+ @s.scan(/\s+/).should == "\n"
+ end
+
+ it "treats ^ as matching from the beginning of line" do
+ @s.scan(/\w+\n/).should == "This\n"
+ @s.scan(/^\w/).should == "i"
+ @s.scan(/^\w/).should be_nil
+ end
+
+ it "treats \\A as matching from the beginning of string" do
+ @s.scan(/\A\w/).should == "T"
+ @s.scan(/\A\w/).should be_nil
+ end
+ end
+end
diff --git a/test/strscan/test_stringscanner.rb b/test/strscan/test_stringscanner.rb
index 3423f9cfed..5e798028b7 100644
--- a/test/strscan/test_stringscanner.rb
+++ b/test/strscan/test_stringscanner.rb
@@ -8,15 +8,19 @@ require 'strscan'
require 'test/unit'
class TestStringScanner < Test::Unit::TestCase
+ def create_string_scanner(string, *args)
+ StringScanner.new(string, *args)
+ end
+
def test_s_new
- s = StringScanner.new('test string')
+ s = create_string_scanner('test string')
assert_instance_of StringScanner, s
assert_equal false, s.eos?
assert_equal false, s.tainted?
str = 'test string'.dup
str.taint
- s = StringScanner.new(str, false)
+ s = create_string_scanner(str, false)
assert_instance_of StringScanner, s
assert_equal false, s.eos?
assert_same str, s.string
@@ -24,7 +28,7 @@ class TestStringScanner < Test::Unit::TestCase
str = 'test string'.dup
str.taint
- s = StringScanner.new(str)
+ s = create_string_scanner(str)
assert_equal true, s.string.tainted?
end
@@ -48,7 +52,7 @@ class TestStringScanner < Test::Unit::TestCase
end
def test_dup
- s = StringScanner.new('test string')
+ s = create_string_scanner('test string')
d = s.dup
assert_equal s.inspect, d.inspect
assert_equal s.string, d.string
@@ -56,7 +60,7 @@ class TestStringScanner < Test::Unit::TestCase
assert_equal s.matched?, d.matched?
assert_equal s.eos?, d.eos?
- s = StringScanner.new('test string')
+ s = create_string_scanner('test string')
s.scan(/test/)
d = s.dup
assert_equal s.inspect, d.inspect
@@ -65,7 +69,7 @@ class TestStringScanner < Test::Unit::TestCase
assert_equal s.matched?, d.matched?
assert_equal s.eos?, d.eos?
- s = StringScanner.new('test string')
+ s = create_string_scanner('test string')
s.scan(/test/)
s.scan(/NOT MATCH/)
d = s.dup
@@ -75,7 +79,7 @@ class TestStringScanner < Test::Unit::TestCase
assert_equal s.matched?, d.matched?
assert_equal s.eos?, d.eos?
- s = StringScanner.new('test string')
+ s = create_string_scanner('test string')
s.terminate
d = s.dup
assert_equal s.inspect, d.inspect
@@ -98,7 +102,7 @@ class TestStringScanner < Test::Unit::TestCase
def test_inspect
str = 'test string'.dup
str.taint
- s = StringScanner.new(str, false)
+ s = create_string_scanner(str, false)
assert_instance_of String, s.inspect
assert_equal s.inspect, s.inspect
assert_equal '#<StringScanner 0/11 @ "test ...">', s.inspect.sub(/StringScanner_C/, 'StringScanner')
@@ -106,12 +110,12 @@ class TestStringScanner < Test::Unit::TestCase
assert_equal '#<StringScanner 1/11 "t" @ "est s...">', s.inspect.sub(/StringScanner_C/, 'StringScanner')
assert_equal true, s.inspect.tainted?
- s = StringScanner.new("\n")
+ s = create_string_scanner("\n")
assert_equal '#<StringScanner 0/1 @ "\n">', s.inspect
end
def test_eos?
- s = StringScanner.new('test string')
+ s = create_string_scanner('test string')
assert_equal false, s.eos?
assert_equal false, s.eos?
s.scan(/\w+/)
@@ -124,14 +128,14 @@ class TestStringScanner < Test::Unit::TestCase
s.scan(/\w+/)
assert_equal true, s.eos?
- s = StringScanner.new('test'.dup)
+ s = create_string_scanner('test'.dup)
s.scan(/te/)
s.string.replace ''
assert_equal true, s.eos?
end
def test_bol?
- s = StringScanner.new("a\nbbb\n\ncccc\nddd\r\neee")
+ s = create_string_scanner("a\nbbb\n\ncccc\nddd\r\neee")
assert_equal true, s.bol?
assert_equal true, s.bol?
s.scan(/a/)
@@ -161,7 +165,7 @@ class TestStringScanner < Test::Unit::TestCase
end
def test_string
- s = StringScanner.new('test')
+ s = create_string_scanner('test')
assert_equal 'test', s.string
s.string = 'a'
assert_equal 'a', s.string
@@ -173,7 +177,7 @@ class TestStringScanner < Test::Unit::TestCase
def test_string_set_is_equal
name = 'tenderlove'
- s = StringScanner.new(name)
+ s = create_string_scanner(name)
assert_equal name.object_id, s.string.object_id
s.string = name
@@ -181,7 +185,7 @@ class TestStringScanner < Test::Unit::TestCase
end
def test_string_append
- s = StringScanner.new('tender'.dup)
+ s = create_string_scanner('tender'.dup)
s << 'love'
assert_equal 'tenderlove', s.string
@@ -191,7 +195,7 @@ class TestStringScanner < Test::Unit::TestCase
end
def test_pos
- s = StringScanner.new('test string')
+ s = create_string_scanner('test string')
assert_equal 0, s.pos
s.get_byte
assert_equal 1, s.pos
@@ -202,7 +206,7 @@ class TestStringScanner < Test::Unit::TestCase
end
def test_pos_unicode
- s = StringScanner.new("abcädeföghi")
+ s = create_string_scanner("abcädeföghi")
assert_equal 0, s.charpos
assert_equal "abcä", s.scan_until(/ä/)
assert_equal 4, s.charpos
@@ -213,7 +217,7 @@ class TestStringScanner < Test::Unit::TestCase
end
def test_concat
- s = StringScanner.new('a'.dup)
+ s = create_string_scanner('a'.dup)
s.scan(/a/)
s.concat 'b'
assert_equal false, s.eos?
@@ -226,7 +230,7 @@ class TestStringScanner < Test::Unit::TestCase
end
def test_scan
- s = StringScanner.new('stra strb strc', true)
+ s = create_string_scanner('stra strb strc', true)
tmp = s.scan(/\w+/)
assert_equal 'stra', tmp
assert_equal false, tmp.tainted?
@@ -248,7 +252,7 @@ class TestStringScanner < Test::Unit::TestCase
str = 'stra strb strc'.dup
str.taint
- s = StringScanner.new(str, false)
+ s = create_string_scanner(str, false)
tmp = s.scan(/\w+/)
assert_equal 'stra', tmp
assert_equal true, tmp.tainted?
@@ -267,7 +271,7 @@ class TestStringScanner < Test::Unit::TestCase
assert_nil s.scan(/\w+/)
assert_nil s.scan(/\w+/)
- s = StringScanner.new('test'.dup)
+ s = create_string_scanner('test'.dup)
s.scan(/te/)
# This assumes #string does not duplicate string,
# but it is implementation specific issue.
@@ -277,13 +281,29 @@ class TestStringScanner < Test::Unit::TestCase
assert_equal nil, s.scan(/test/)
# [ruby-bugs:4361]
- s = StringScanner.new("")
+ s = create_string_scanner("")
assert_equal "", s.scan(//)
assert_equal "", s.scan(//)
end
+ def test_scan_string
+ s = create_string_scanner('stra strb strc')
+ assert_equal 'str', s.scan('str')
+ assert_equal 'str', s[0]
+ assert_equal 3, s.pos
+ assert_equal false, s.tainted?
+ assert_equal 'a ', s.scan('a ')
+
+ str = 'stra strb strc'.dup
+ str.taint
+ s = create_string_scanner(str, false)
+ matched = s.scan('str')
+ assert_equal 'str', matched
+ assert_equal true, matched.tainted?
+ end
+
def test_skip
- s = StringScanner.new('stra strb strc', true)
+ s = create_string_scanner('stra strb strc', true)
assert_equal 4, s.skip(/\w+/)
assert_equal 1, s.skip(/\s+/)
assert_equal 4, s.skip(/\w+/)
@@ -293,19 +313,32 @@ class TestStringScanner < Test::Unit::TestCase
assert_nil s.skip(/\s+/)
assert_equal true, s.eos?
- s = StringScanner.new('test'.dup)
+ s = create_string_scanner('test'.dup)
s.scan(/te/)
s.string.replace ''
assert_equal nil, s.skip(/./)
# [ruby-bugs:4361]
- s = StringScanner.new("")
+ s = create_string_scanner("")
assert_equal 0, s.skip(//)
assert_equal 0, s.skip(//)
end
+ def test_skip_with_begenning_of_string_anchor_match
+ s = create_string_scanner("a\nb")
+ assert_equal 2, s.skip(/a\n/)
+ assert_equal 1, s.skip(/\Ab/)
+ end
+
+ def test_skip_with_begenning_of_line_anchor_match
+ s = create_string_scanner("a\nbc")
+ assert_equal 2, s.skip(/a\n/)
+ assert_equal 1, s.skip(/^b/)
+ assert_equal 1, s.skip(/^c/)
+ end
+
def test_getch
- s = StringScanner.new('abcde')
+ s = create_string_scanner('abcde')
assert_equal 'a', s.getch
assert_equal 'b', s.getch
assert_equal 'c', s.getch
@@ -315,24 +348,24 @@ class TestStringScanner < Test::Unit::TestCase
str = 'abc'.dup
str.taint
- s = StringScanner.new(str)
+ s = create_string_scanner(str)
assert_equal true, s.getch.tainted?
assert_equal true, s.getch.tainted?
assert_equal true, s.getch.tainted?
assert_nil s.getch
- s = StringScanner.new("\244\242".dup.force_encoding("euc-jp"))
+ s = create_string_scanner("\244\242".dup.force_encoding("euc-jp"))
assert_equal "\244\242".dup.force_encoding("euc-jp"), s.getch
assert_nil s.getch
- s = StringScanner.new('test'.dup)
+ s = create_string_scanner('test'.dup)
s.scan(/te/)
s.string.replace ''
assert_equal nil, s.getch
end
def test_get_byte
- s = StringScanner.new('abcde')
+ s = create_string_scanner('abcde')
assert_equal 'a', s.get_byte
assert_equal 'b', s.get_byte
assert_equal 'c', s.get_byte
@@ -343,32 +376,34 @@ class TestStringScanner < Test::Unit::TestCase
str = 'abc'.dup
str.taint
- s = StringScanner.new(str)
+ s = create_string_scanner(str)
assert_equal true, s.get_byte.tainted?
assert_equal true, s.get_byte.tainted?
assert_equal true, s.get_byte.tainted?
assert_nil s.get_byte
- s = StringScanner.new("\244\242".dup.force_encoding("euc-jp"))
+ s = create_string_scanner("\244\242".dup.force_encoding("euc-jp"))
assert_equal "\244".dup.force_encoding("euc-jp"), s.get_byte
assert_equal "\242".dup.force_encoding("euc-jp"), s.get_byte
assert_nil s.get_byte
- s = StringScanner.new('test'.dup)
+ s = create_string_scanner('test'.dup)
s.scan(/te/)
s.string.replace ''
assert_equal nil, s.get_byte
end
def test_matched
- s = StringScanner.new('stra strb strc')
+ s = create_string_scanner('stra strb strc')
s.scan(/\w+/)
assert_equal 'stra', s.matched
assert_equal false, s.matched.tainted?
s.scan(/\s+/)
assert_equal ' ', s.matched
+ s.scan('st')
+ assert_equal 'st', s.matched
s.scan(/\w+/)
- assert_equal 'strb', s.matched
+ assert_equal 'rb', s.matched
s.scan(/\s+/)
assert_equal ' ', s.matched
s.scan(/\w+/)
@@ -378,7 +413,7 @@ class TestStringScanner < Test::Unit::TestCase
s.getch
assert_nil s.matched
- s = StringScanner.new('stra strb strc')
+ s = create_string_scanner('stra strb strc')
s.getch
assert_equal 's', s.matched
assert_equal false, s.matched.tainted?
@@ -389,14 +424,14 @@ class TestStringScanner < Test::Unit::TestCase
str = 'test'.dup
str.taint
- s = StringScanner.new(str)
+ s = create_string_scanner(str)
s.scan(/\w+/)
assert_equal true, s.matched.tainted?
assert_equal true, s.matched.tainted?
end
def test_AREF
- s = StringScanner.new('stra strb strc')
+ s = create_string_scanner('stra strb strc')
s.scan(/\w+/)
assert_nil s[-2]
@@ -447,13 +482,13 @@ class TestStringScanner < Test::Unit::TestCase
assert_nil s[0]
- s = StringScanner.new("\244\242".dup.force_encoding("euc-jp"))
+ s = create_string_scanner("\244\242".dup.force_encoding("euc-jp"))
s.getch
assert_equal "\244\242".dup.force_encoding("euc-jp"), s[0]
str = 'test'.dup
str.taint
- s = StringScanner.new(str)
+ s = create_string_scanner(str)
s.scan(/(t)(e)(s)(t)/)
assert_equal true, s[0].tainted?
assert_equal true, s[1].tainted?
@@ -461,7 +496,7 @@ class TestStringScanner < Test::Unit::TestCase
assert_equal true, s[3].tainted?
assert_equal true, s[4].tainted?
- s = StringScanner.new("foo bar baz")
+ s = create_string_scanner("foo bar baz")
s.scan(/(?<a>\w+) (?<b>\w+) (\w+)/)
assert_equal 'foo', s[1]
assert_equal 'bar', s[2]
@@ -476,14 +511,14 @@ class TestStringScanner < Test::Unit::TestCase
end
def test_pre_match
- s = StringScanner.new('a b c d e')
+ s = create_string_scanner('a b c d e')
s.scan(/\w/)
assert_equal '', s.pre_match
assert_equal false, s.pre_match.tainted?
s.skip(/\s/)
assert_equal 'a', s.pre_match
assert_equal false, s.pre_match.tainted?
- s.scan(/\w/)
+ s.scan('b')
assert_equal 'a ', s.pre_match
s.scan_until(/c/)
assert_equal 'a b ', s.pre_match
@@ -498,7 +533,7 @@ class TestStringScanner < Test::Unit::TestCase
str = 'test string'.dup
str.taint
- s = StringScanner.new(str)
+ s = create_string_scanner(str)
s.scan(/\w+/)
assert_equal true, s.pre_match.tainted?
s.scan(/\s+/)
@@ -508,12 +543,12 @@ class TestStringScanner < Test::Unit::TestCase
end
def test_post_match
- s = StringScanner.new('a b c d e')
+ s = create_string_scanner('a b c d e')
s.scan(/\w/)
assert_equal ' b c d e', s.post_match
s.skip(/\s/)
assert_equal 'b c d e', s.post_match
- s.scan(/\w/)
+ s.scan('b')
assert_equal ' c d e', s.post_match
s.scan_until(/c/)
assert_equal ' d e', s.post_match
@@ -532,7 +567,7 @@ class TestStringScanner < Test::Unit::TestCase
str = 'test string'.dup
str.taint
- s = StringScanner.new(str)
+ s = create_string_scanner(str)
s.scan(/\w+/)
assert_equal true, s.post_match.tainted?
s.scan(/\s+/)
@@ -542,7 +577,7 @@ class TestStringScanner < Test::Unit::TestCase
end
def test_terminate
- s = StringScanner.new('ssss')
+ s = create_string_scanner('ssss')
s.getch
s.terminate
assert_equal true, s.eos?
@@ -551,7 +586,7 @@ class TestStringScanner < Test::Unit::TestCase
end
def test_reset
- s = StringScanner.new('ssss')
+ s = create_string_scanner('ssss')
s.getch
s.reset
assert_equal 0, s.pos
@@ -563,7 +598,7 @@ class TestStringScanner < Test::Unit::TestCase
end
def test_matched_size
- s = StringScanner.new('test string')
+ s = create_string_scanner('test string')
assert_nil s.matched_size
s.scan(/test/)
assert_equal 4, s.matched_size
@@ -576,7 +611,7 @@ class TestStringScanner < Test::Unit::TestCase
s.terminate
assert_nil s.matched_size
- s = StringScanner.new('test string')
+ s = create_string_scanner('test string')
assert_nil s.matched_size
s.scan(/test/)
assert_equal 4, s.matched_size
@@ -585,31 +620,45 @@ class TestStringScanner < Test::Unit::TestCase
end
def test_encoding
- ss = StringScanner.new("\xA1\xA2".dup.force_encoding("euc-jp"))
+ ss = create_string_scanner("\xA1\xA2".dup.force_encoding("euc-jp"))
assert_equal(Encoding::EUC_JP, ss.scan(/./e).encoding)
end
+ def test_encoding_string
+ str = "\xA1\xA2".dup.force_encoding("euc-jp")
+ ss = create_string_scanner(str)
+ assert_equal(str.dup, ss.scan(str.dup))
+ end
+
+ def test_invalid_encoding_string
+ str = "\xA1\xA2".dup.force_encoding("euc-jp")
+ ss = create_string_scanner(str)
+ assert_raise(Encoding::CompatibilityError) do
+ ss.scan(str.encode("UTF-8"))
+ end
+ end
+
def test_generic_regexp
- ss = StringScanner.new("\xA1\xA2".dup.force_encoding("euc-jp"))
+ ss = create_string_scanner("\xA1\xA2".dup.force_encoding("euc-jp"))
t = ss.scan(/./)
assert_equal("\xa1\xa2".dup.force_encoding("euc-jp"), t)
end
def test_set_pos
- s = StringScanner.new("test string")
+ s = create_string_scanner("test string")
s.pos = 7
assert_equal("ring", s.rest)
end
def test_match_p
- s = StringScanner.new("test string")
+ s = create_string_scanner("test string")
assert_equal(4, s.match?(/\w+/))
assert_equal(4, s.match?(/\w+/))
assert_equal(nil, s.match?(/\s+/))
end
def test_check
- s = StringScanner.new("Foo Bar Baz")
+ s = create_string_scanner("Foo Bar Baz")
assert_equal("Foo", s.check(/Foo/))
assert_equal(0, s.pos)
assert_equal("Foo", s.matched)
@@ -618,7 +667,7 @@ class TestStringScanner < Test::Unit::TestCase
end
def test_scan_full
- s = StringScanner.new("Foo Bar Baz")
+ s = create_string_scanner("Foo Bar Baz")
assert_equal(4, s.scan_full(/Foo /, false, false))
assert_equal(0, s.pos)
assert_equal(nil, s.scan_full(/Baz/, false, false))
@@ -634,7 +683,7 @@ class TestStringScanner < Test::Unit::TestCase
end
def test_exist_p
- s = StringScanner.new("test string")
+ s = create_string_scanner("test string")
assert_equal(3, s.exist?(/s/))
assert_equal(0, s.pos)
s.scan(/test/)
@@ -643,8 +692,15 @@ class TestStringScanner < Test::Unit::TestCase
assert_equal(nil, s.exist?(/e/))
end
+ def test_exist_p_string
+ s = create_string_scanner("test string")
+ assert_raise(TypeError) do
+ s.exist?(" ")
+ end
+ end
+
def test_skip_until
- s = StringScanner.new("Foo Bar Baz")
+ s = create_string_scanner("Foo Bar Baz")
assert_equal(3, s.skip_until(/Foo/))
assert_equal(3, s.pos)
assert_equal(4, s.skip_until(/Bar/))
@@ -653,7 +709,7 @@ class TestStringScanner < Test::Unit::TestCase
end
def test_check_until
- s = StringScanner.new("Foo Bar Baz")
+ s = create_string_scanner("Foo Bar Baz")
assert_equal("Foo", s.check_until(/Foo/))
assert_equal(0, s.pos)
assert_equal("Foo Bar", s.check_until(/Bar/))
@@ -662,7 +718,7 @@ class TestStringScanner < Test::Unit::TestCase
end
def test_search_full
- s = StringScanner.new("Foo Bar Baz")
+ s = create_string_scanner("Foo Bar Baz")
assert_equal(8, s.search_full(/Bar /, false, false))
assert_equal(0, s.pos)
assert_equal("Foo Bar ", s.search_full(/Bar /, false, true))
@@ -674,7 +730,7 @@ class TestStringScanner < Test::Unit::TestCase
end
def test_peek
- s = StringScanner.new("test string")
+ s = create_string_scanner("test string")
assert_equal("test st", s.peek(7))
assert_equal("test st", s.peek(7))
s.scan(/test/)
@@ -685,7 +741,7 @@ class TestStringScanner < Test::Unit::TestCase
end
def test_unscan
- s = StringScanner.new('test string')
+ s = create_string_scanner('test string')
assert_equal("test", s.scan(/\w+/))
s.unscan
assert_equal("te", s.scan(/../))
@@ -694,7 +750,7 @@ class TestStringScanner < Test::Unit::TestCase
end
def test_rest
- s = StringScanner.new('test string')
+ s = create_string_scanner('test string')
assert_equal("test string", s.rest)
s.scan(/test/)
assert_equal(" string", s.rest)
@@ -704,7 +760,7 @@ class TestStringScanner < Test::Unit::TestCase
end
def test_rest_size
- s = StringScanner.new('test string')
+ s = create_string_scanner('test string')
assert_equal(11, s.rest_size)
s.scan(/test/)
assert_equal(7, s.rest_size)
@@ -714,13 +770,13 @@ class TestStringScanner < Test::Unit::TestCase
end
def test_inspect2
- s = StringScanner.new('test string test')
+ s = create_string_scanner('test string test')
s.scan(/test strin/)
assert_equal('#<StringScanner 10/16 "...strin" @ "g tes...">', s.inspect)
end
def test_aref_without_regex
- s = StringScanner.new('abc')
+ s = create_string_scanner('abc')
s.get_byte
assert_nil(s[:c])
assert_nil(s["c"])
@@ -730,13 +786,14 @@ class TestStringScanner < Test::Unit::TestCase
end
def test_size
- s = StringScanner.new("Fri Dec 12 1975 14:39")
+ s = create_string_scanner("Fri Dec 12 1975 14:39")
s.scan(/(\w+) (\w+) (\d+) /)
assert_equal(4, s.size)
end
def test_captures
- s = StringScanner.new("Fri Dec 12 1975 14:39")
+ s = create_string_scanner("Timestamp: Fri Dec 12 1975 14:39")
+ s.scan("Timestamp: ")
s.scan(/(\w+) (\w+) (\d+) /)
assert_equal(["Fri", "Dec", "12"], s.captures)
s.scan(/(\w+) (\w+) (\d+) /)
@@ -744,10 +801,53 @@ class TestStringScanner < Test::Unit::TestCase
end
def test_values_at
- s = StringScanner.new("Fri Dec 12 1975 14:39")
+ s = create_string_scanner("Timestamp: Fri Dec 12 1975 14:39")
+ s.scan("Timestamp: ")
s.scan(/(\w+) (\w+) (\d+) /)
assert_equal(["Fri Dec 12 ", "12", nil, "Dec"], s.values_at(0, -1, 5, 2))
s.scan(/(\w+) (\w+) (\d+) /)
assert_nil(s.values_at(0, -1, 5, 2))
end
+
+ def test_fixed_anchor_true
+ assert_equal(true, StringScanner.new("a", fixed_anchor: true).fixed_anchor?)
+ end
+
+ def test_fixed_anchor_false
+ assert_equal(false, StringScanner.new("a").fixed_anchor?)
+ assert_equal(false, StringScanner.new("a", true).fixed_anchor?)
+ assert_equal(false, StringScanner.new("a", false).fixed_anchor?)
+ assert_equal(false, StringScanner.new("a", {}).fixed_anchor?)
+ assert_equal(false, StringScanner.new("a", fixed_anchor: nil).fixed_anchor?)
+ assert_equal(false, StringScanner.new("a", fixed_anchor: false).fixed_anchor?)
+ end
+end
+
+class TestStringScannerFixedAnchor < TestStringScanner
+ def create_string_scanner(string, *args)
+ StringScanner.new(string, fixed_anchor: true)
+ end
+
+ def test_skip_with_begenning_of_string_anchor_match
+ s = create_string_scanner("a")
+ assert_equal 1, s.skip(/\Aa/)
+ end
+
+ def test_skip_with_begenning_of_string_anchor_not_match
+ s = create_string_scanner("a\nb")
+ assert_equal 2, s.skip(/a\n/)
+ assert_nil s.skip(/\Ab/)
+ end
+
+ def test_skip_with_begenning_of_line_anchor_match
+ s = create_string_scanner("a\nb")
+ assert_equal 2, s.skip(/a\n/)
+ assert_equal 1, s.skip(/^b/)
+ end
+
+ def test_skip_with_begenning_of_line_anchor_not_match
+ s = create_string_scanner("ab")
+ assert_equal 1, s.skip(/a/)
+ assert_nil s.skip(/^b/)
+ end
end