aboutsummaryrefslogtreecommitdiffstats
path: root/regexec.c
diff options
context:
space:
mode:
authorTSUYUSATO Kitsune <make.just.on@gmail.com>2022-10-04 11:20:49 +0900
committerYusuke Endoh <mame@ruby-lang.org>2022-11-09 23:21:26 +0900
commit8c9e4d37a3be68574e0a84d2321d694b3f87793f (patch)
tree63f7f8fc7b535b293117de622f76d52a781b5b24 /regexec.c
parentf07dea16e3f70fbc33c51dbc73ef0f33f965e010 (diff)
downloadruby-8c9e4d37a3be68574e0a84d2321d694b3f87793f.tar.gz
Fix look-around like operators and cclass
Diffstat (limited to 'regexec.c')
-rw-r--r--regexec.c68
1 files changed, 25 insertions, 43 deletions
diff --git a/regexec.c b/regexec.c
index 70fd1a63a6..e098cc734a 100644
--- a/regexec.c
+++ b/regexec.c
@@ -285,9 +285,13 @@ int count_num_cache_opcode(regex_t* reg)
p += SIZE_BITSET; break;
case OP_CCLASS_MB:
case OP_CCLASS_MB_NOT:
+ GET_LENGTH_INC(len, p); p += len; break;
case OP_CCLASS_MIX:
case OP_CCLASS_MIX_NOT:
- GET_LENGTH_INC(len, p); p += len; break;
+ p += SIZE_BITSET;
+ GET_LENGTH_INC(len, p);
+ p += len;
+ break;
case OP_ANYCHAR:
case OP_ANYCHAR_ML:
@@ -373,33 +377,17 @@ int count_num_cache_opcode(regex_t* reg)
case OP_PUSH_POS:
case OP_POP_POS:
- break;
case OP_PUSH_POS_NOT:
- p += SIZE_RELADDR; break;
case OP_FAIL_POS:
- break;
case OP_PUSH_STOP_BT:
case OP_POP_STOP_BT:
- return NUM_CACHE_OPCODE_FAIL;
case OP_LOOK_BEHIND:
- /* GET_LENGTH_INC(len, p); break; */
- return NUM_CACHE_OPCODE_FAIL;
case OP_PUSH_LOOK_BEHIND_NOT:
- // Since optimization assumes a string offset does not back,
- // we cannot optimize look-behind opcodes.
- /*
- GET_RELADDR_INC(addr, p);
- GET_LENGTH_INC(len, p);
- break;
- */
- return NUM_CACHE_OPCODE_FAIL;
case OP_FAIL_LOOK_BEHIND_NOT:
- return NUM_CACHE_OPCODE_FAIL;
case OP_PUSH_ABSENT_POS:
case OP_ABSENT_END:
- break;
case OP_ABSENT:
- p += SIZE_RELADDR; break;
+ return NUM_CACHE_OPCODE_FAIL;
case OP_CALL:
case OP_RETURN:
@@ -427,6 +415,7 @@ int count_num_cache_opcode(regex_t* reg)
void init_cache_index_table(regex_t* reg, UChar **table)
{
+ UChar** tstart = table;
UChar* pbegin;
UChar* p = reg->p;
UChar* pend = p + reg->used;
@@ -472,11 +461,14 @@ void init_cache_index_table(regex_t* reg, UChar **table)
case OP_CCLASS:
case OP_CCLASS_NOT:
p += SIZE_BITSET; break;
- case OP_CCLASS_MB:
case OP_CCLASS_MB_NOT:
+ GET_LENGTH_INC(len, p); p += len; break;
case OP_CCLASS_MIX:
case OP_CCLASS_MIX_NOT:
- GET_LENGTH_INC(len, p); p += len; break;
+ p += SIZE_BITSET;
+ GET_LENGTH_INC(len, p);
+ p += len;
+ break;
case OP_ANYCHAR:
case OP_ANYCHAR_ML:
@@ -564,33 +556,17 @@ void init_cache_index_table(regex_t* reg, UChar **table)
case OP_PUSH_POS:
case OP_POP_POS:
- break;
case OP_PUSH_POS_NOT:
- p += SIZE_RELADDR; break;
case OP_FAIL_POS:
- break;
case OP_PUSH_STOP_BT:
case OP_POP_STOP_BT:
- return;
case OP_LOOK_BEHIND:
- /* GET_LENGTH_INC(len, p); break; */
- return;
case OP_PUSH_LOOK_BEHIND_NOT:
- // Since optimization assumes a string offset does not back,
- // we cannot optimize look-behind opcodes.
- /*
- GET_RELADDR_INC(addr, p);
- GET_LENGTH_INC(len, p);
- break;
- */
- return;
case OP_FAIL_LOOK_BEHIND_NOT:
- return;
case OP_PUSH_ABSENT_POS:
case OP_ABSENT_END:
- break;
case OP_ABSENT:
- p += SIZE_RELADDR; break;
+ return;
case OP_CALL:
case OP_RETURN:
@@ -1096,13 +1072,19 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
#define DO_CACHE_MATCH_OPT(enable,p,num_cache_table,table,pos,match_cache) do {\
if (enable) {\
int cache_index = find_cache_index_table((table), (num_cache_table), (p));\
- int key = (num_cache_table) * (pos) + cache_index;\
- int index = key >> 3;\
- int mask = 1 << (key & 7);\
- if ((match_cache)[index] & mask) {\
- goto fail;\
+ if (cache_index >= 0) {\
+ int key = (num_cache_table) * (pos) + cache_index;\
+ int index = key >> 3;\
+ int mask = 1 << (key & 7);\
+ if ((match_cache)[index] & mask) {\
+ /*fprintf(stderr, "Use cache (pos: %d, p: %p, pc: %d, cache index: %d, key: %d, index: %d, mask: %d)\n", pos, p, (int)(p - pstart), cache_index, key, index, mask);*/\
+ goto fail;\
+ }\
+ /*fprintf(stderr, "Add cache (pos: %d, p: %p, pc: %d, cache index: %d, key: %d, index: %d, mask: %d)\n", pos, p, (int)(p - pstart), cache_index, key, index, mask);*/\
+ (match_cache)[index] |= mask;\
+ } else {\
+ /*fprintf(stderr, "Miss cache (pos: %d, p: %p, pc: %d, cache index: %d)\n", pos, p, (int)(p - pstart), cache_index);*/\
}\
- (match_cache)[index] |= mask;\
}\
} while (0)