diff options
author | matz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2003-01-20 08:29:24 +0000 |
---|---|---|
committer | matz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2003-01-20 08:29:24 +0000 |
commit | d242ae60d8d8bccb8e209130d3bdb1cc41333b8c (patch) | |
tree | e1cc1f856aac1c3bda4897c7a244b43039316f3d /regex.c | |
parent | 181edd12a09a76e2443c702ff2e17777fe8cfeb5 (diff) | |
download | ruby-d242ae60d8d8bccb8e209130d3bdb1cc41333b8c.tar.gz |
* regex.c (is_in_list): should work weill with UTF-8.
* regex.c (re_match_exec): ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@3363 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'regex.c')
-rw-r--r-- | regex.c | 44 |
1 files changed, 33 insertions, 11 deletions
@@ -698,7 +698,18 @@ set_list_bits(c1, c2, b) } static int -is_in_list(c, b) +is_in_list_sbc(c, b) + unsigned long c; + const unsigned char *b; +{ + unsigned short size; + + size = *b++; + return ((int)c / BYTEWIDTH < (int)size && b[c / BYTEWIDTH] & 1 << c % BYTEWIDTH); +} + +static int +is_in_list_mbc(c, b) unsigned long c; const unsigned char *b; { @@ -706,9 +717,6 @@ is_in_list(c, b) unsigned short i, j; size = *b++; - if ((int)c / BYTEWIDTH < (int)size && b[c / BYTEWIDTH] & 1 << c % BYTEWIDTH) { - return 1; - } b += size + 2; size = EXTRACT_UNSIGNED(&b[-2]); if (size == 0) return 0; @@ -727,6 +735,14 @@ is_in_list(c, b) return 0; } +static int +is_in_list(c, b) + unsigned long c; + const unsigned char *b; +{ + return is_in_list_sbc(c, b) || is_in_list_mbc(c, b); +} + static void print_partial_compiled_pattern(start, end) unsigned char *start; @@ -3815,19 +3831,25 @@ re_match_exec(bufp, string_arg, size, pos, beg, regs) int cc, c; PREFETCH; - cc = c = (unsigned char)*d++; + c = (unsigned char)*d++; if (ismbchar(c)) { if (d + mbclen(c) - 1 <= dend) { + cc = c; MBC2WC(c, d); + not = is_in_list_mbc(c, p); + if (!not) { + part = not = is_in_list_sbc(cc, p); + } + } else { + not = is_in_list_sbc(c, p); } } - else if (TRANSLATE_P()) - cc = c = (unsigned char)translate[c]; - - not = is_in_list(c, p); - if (!not && cc != c) { - part = not = is_in_list(cc, p); + else { + if (TRANSLATE_P()) + c = (unsigned char)translate[c]; + not = is_in_list_sbc(c, p); } + if (*(p - 1) == (unsigned char)charset_not) { not = !not; } |