diff options
author | ksaito <ksaito@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2004-11-28 03:09:13 +0000 |
---|---|---|
committer | ksaito <ksaito@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2004-11-28 03:09:13 +0000 |
commit | 09542e3269f5b915b45eb5c03fcb8c61452d751e (patch) | |
tree | 91eb489927947f75436f5bd366896772c6b0c71c | |
parent | 5fb312bd18a09ac50e8c7d707cb87893609ce9fc (diff) | |
download | ruby-09542e3269f5b915b45eb5c03fcb8c61452d751e.tar.gz |
* regparse.c, test/ruby/test_regexp.rb: fixed problem with UTF-8 characters that have U+00FE or invalid characters.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@7398 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r-- | ChangeLog | 5 | ||||
-rw-r--r-- | regparse.c | 20 | ||||
-rw-r--r-- | test/ruby/test_regexp.rb | 12 |
3 files changed, 26 insertions, 11 deletions
@@ -1,3 +1,8 @@ +Sun Nov 28 12:08:15 2004 Kazuo Saito <ksaito@uranus.dti.ne.jp> + + * regparse.c, test/ruby/test_regexp.rb: fixed problem with UTF-8 + characters that have U+00FE or invalid characters. + Sun Nov 28 12:07:04 2004 Kazuo Saito <ksaito@uranus.dti.ne.jp> * regexec.c, test/ruby/test_regexp.rb: fixed segmentation falut diff --git a/regparse.c b/regparse.c index e6fea8e68a..16792ee9eb 100644 --- a/regparse.c +++ b/regparse.c @@ -3631,6 +3631,9 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v, case CCS_RANGE: if (intype == *type) { if (intype == CCV_SB) { + if (*vs > 0xff || v > 0xff) + return ONIGERR_INVALID_WIDE_CHAR_VALUE; + if (*vs > v) { if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC)) goto ccs_range_end; @@ -3646,14 +3649,8 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v, } else { #if 0 - if (intype == CCV_CODE_POINT && *type == CCV_SB && - ONIGENC_IS_CONTINUOUS_SB_MB(env->enc)) { - bitset_set_range(cc->bs, (int )*vs, 0x7f); - r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )0x80, v); - if (r < 0) return r; - } -#else if (intype == CCV_CODE_POINT && *type == CCV_SB) { +#endif if (*vs > v) { if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC)) goto ccs_range_end; @@ -3663,10 +3660,11 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v, bitset_set_range(cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff)); r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*vs, v); if (r < 0) return r; +#if 0 } -#endif else return ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE; +#endif } ccs_range_end: *state = CCS_COMPLETE; @@ -3826,7 +3824,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, break; case TK_CODE_POINT: - v = tok->u.code; + v = (OnigCodePoint )tok->u.code; in_israw = 1; val_entry: len = ONIGENC_CODE_TO_MBCLEN(env->enc, v); @@ -3952,7 +3950,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, case TK_CC_AND: /* && */ { if (state == CCS_VALUE) { - r = next_state_val(cc, &vs, 0, &val_israw, 0, CCV_SB, + r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type, &val_type, &state, env); if (r != 0) goto err; } @@ -3992,7 +3990,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, } if (state == CCS_VALUE) { - r = next_state_val(cc, &vs, 0, &val_israw, 0, CCV_SB, + r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type, &val_type, &state, env); if (r != 0) goto err; } diff --git a/test/ruby/test_regexp.rb b/test/ruby/test_regexp.rb index 889fbb4b2d..dbfe4d7aeb 100644 --- a/test/ruby/test_regexp.rb +++ b/test/ruby/test_regexp.rb @@ -12,4 +12,16 @@ class TestRegexp < Test::Unit::TestCase def test_ruby_dev_24887 assert_equal("a".gsub(/a\Z/, ""), "") end + + def test_yoshidam_net_20041111_1 + s = "[\xC2\xA0-\xC3\xBE]" + assert_match(Regexp.new(s, nil, "u"), "\xC3\xBE") + end + + def test_yoshidam_net_20041111_2 + assert_raise(RegexpError) do + s = "[\xFF-\xFF]" + Regexp.new(s, nil, "u") + end + end end |