diff options
Diffstat (limited to 'regexec.c')
-rw-r--r-- | regexec.c | 375 |
1 files changed, 224 insertions, 151 deletions
@@ -242,9 +242,6 @@ The `Regexp#match` optimization by using a cache. "cache opcode" A cachable opcode (e.g. `OP_PUSH`, `OP_REPEAT`, etc). It is corresponding to some cache points. -`OP_NULL_CHECK_START` and `OP_NULL_CHECK_END_MEMST` are exceptions. -They are counted as cache opcodes, but they have no corresponding cache points. -They are used for resetting a match cache buffer when a null loop is detected. "cache point" A cachable point on matching. @@ -258,7 +255,7 @@ We encode a match cache point to an integer value by the following equation: "match cache point" = "position on input string" * "total number of cache points" + "cache point" "match cache buffer" -A bit-array for recording (caching) match cache points once arrived. +A bit-array for memoizing (recording) match cache points once backtracked. */ /* count the total number of cache opcodes for allocating a match cache buffer. */ @@ -272,7 +269,7 @@ count_num_cache_opcodes(const regex_t* reg, long* num_cache_opcodes_ptr) OnigEncoding enc = reg->enc; MemNumType current_repeat_mem = -1; long num_cache_opcodes = 0; - long num_cache_opcodes_at_null_check_start = -1; + int lookaround_nesting = 0; while (p < pend) { switch (*p++) { @@ -326,7 +323,7 @@ count_num_cache_opcodes(const regex_t* reg, long* num_cache_opcodes_ptr) break; case OP_ANYCHAR_STAR: case OP_ANYCHAR_ML_STAR: - num_cache_opcodes++;break; + num_cache_opcodes++; break; case OP_ANYCHAR_STAR_PEEK_NEXT: case OP_ANYCHAR_ML_STAR_PEEK_NEXT: p++; num_cache_opcodes++; break; @@ -370,7 +367,12 @@ count_num_cache_opcodes(const regex_t* reg, long* num_cache_opcodes_ptr) case OP_MEMORY_END_PUSH_REC: case OP_MEMORY_END: case OP_MEMORY_END_REC: - p += SIZE_MEMNUM; break; + p += SIZE_MEMNUM; + // A memory (capture) in look-around is found. + if (lookaround_nesting != 0) { + goto impossible; + } + break; case OP_KEEP: break; @@ -422,37 +424,60 @@ count_num_cache_opcodes(const regex_t* reg, long* num_cache_opcodes_ptr) goto impossible; case OP_NULL_CHECK_START: p += SIZE_MEMNUM; - if (num_cache_opcodes_at_null_check_start != -1) { - // A nested OP_NULL_CHECK_START is not yet supported. - goto impossible; - } - num_cache_opcodes_at_null_check_start = num_cache_opcodes; break; case OP_NULL_CHECK_END: case OP_NULL_CHECK_END_MEMST_PUSH: p += SIZE_MEMNUM; - num_cache_opcodes_at_null_check_start = -1; break; case OP_NULL_CHECK_END_MEMST: p += SIZE_MEMNUM; - // OP_NULL_CHECK_START and OP_NULL_CHECK_END_MEMST are counted as cache opcodes. - if (num_cache_opcodes_at_null_check_start < num_cache_opcodes) { - num_cache_opcodes += 2; - } - num_cache_opcodes_at_null_check_start = -1; break; case OP_PUSH_POS: - case OP_POP_POS: + if (lookaround_nesting < 0) { + // A look-around nested in a atomic grouping is found. + goto impossible; + } + lookaround_nesting++; + break; case OP_PUSH_POS_NOT: + if (lookaround_nesting < 0) { + // A look-around nested in a atomic grouping is found. + goto impossible; + } + p += SIZE_RELADDR; + lookaround_nesting++; + break; + case OP_PUSH_LOOK_BEHIND_NOT: + if (lookaround_nesting < 0) { + // A look-around nested in a atomic grouping is found. + goto impossible; + } + p += SIZE_RELADDR; + p += SIZE_LENGTH; + lookaround_nesting++; + break; + case OP_POP_POS: case OP_FAIL_POS: - goto impossible; + case OP_FAIL_LOOK_BEHIND_NOT: + lookaround_nesting--; + break; case OP_PUSH_STOP_BT: + if (lookaround_nesting != 0) { + // A nested atomic grouping is found. + goto impossible; + } + lookaround_nesting++; + lookaround_nesting *= -1; + break; case OP_POP_STOP_BT: + lookaround_nesting *= -1; + lookaround_nesting--; break; case OP_LOOK_BEHIND: - case OP_PUSH_LOOK_BEHIND_NOT: - case OP_FAIL_LOOK_BEHIND_NOT: + p += SIZE_LENGTH; + break; + case OP_PUSH_ABSENT_POS: case OP_ABSENT_END: case OP_ABSENT: @@ -506,19 +531,34 @@ init_cache_opcodes(const regex_t* reg, OnigCacheOpcode* cache_opcodes, long* num MemNumType current_repeat_mem = -1; long cache_point = 0; long num_cache_points_at_repeat = 0; + int lookaround_nesting = 0; + const OnigCacheOpcode* cache_opcodes_begin = cache_opcodes; OnigCacheOpcode* cache_opcodes_at_repeat = NULL; - UChar* null_check_start_addr = NULL; - OnigCacheOpcode* cache_opcodes_at_null_check_start = NULL; -# define INC_CACHE_OPCODES do {\ +# define INC_CACHE_OPCODES do {\ cache_opcodes->addr = pbegin;\ cache_opcodes->cache_point = cache_point - num_cache_points_at_repeat;\ cache_opcodes->outer_repeat_mem = current_repeat_mem;\ cache_opcodes->num_cache_points_at_outer_repeat = num_cache_points_at_repeat;\ cache_opcodes->num_cache_points_in_outer_repeat = 0;\ - cache_point++;\ + cache_opcodes->lookaround_nesting = lookaround_nesting;\ + cache_point += lookaround_nesting > 0 ? 2 : 1;\ cache_opcodes++;\ - } while (0) + } while (0) + +# define INC_LOOKAROUND_NESTING lookaround_nesting++ +# define DEC_LOOKAROUND_NESTING do {\ + UChar* match_addr = p - 1;\ + OnigCacheOpcode* cache_opcodes_in_lookaround = cache_opcodes - 1;\ + while (\ + cache_opcodes_in_lookaround >= cache_opcodes_begin &&\ + cache_opcodes_in_lookaround->lookaround_nesting == lookaround_nesting\ + ) {\ + cache_opcodes_in_lookaround->match_addr = match_addr;\ + cache_opcodes_in_lookaround--;\ + }\ + lookaround_nesting--;\ + } while (0) while (p < pend) { pbegin = p; @@ -620,7 +660,11 @@ init_cache_opcodes(const regex_t* reg, OnigCacheOpcode* cache_opcodes, long* num case OP_MEMORY_END_PUSH_REC: case OP_MEMORY_END: case OP_MEMORY_END_REC: - p += SIZE_MEMNUM; break; + p += SIZE_MEMNUM; + if (lookaround_nesting != 0) { + goto unexpected_bytecode_error; + } + break; case OP_KEEP: break; @@ -679,46 +723,45 @@ init_cache_opcodes(const regex_t* reg, OnigCacheOpcode* cache_opcodes, long* num case OP_REPEAT_INC_NG_SG: goto unexpected_bytecode_error; case OP_NULL_CHECK_START: - if (cache_opcodes_at_null_check_start != NULL) { - goto unexpected_bytecode_error; - } p += SIZE_MEMNUM; - null_check_start_addr = pbegin; - cache_opcodes_at_null_check_start = cache_opcodes; break; case OP_NULL_CHECK_END: case OP_NULL_CHECK_END_MEMST_PUSH: p += SIZE_MEMNUM; - cache_opcodes_at_null_check_start = NULL; break; case OP_NULL_CHECK_END_MEMST: p += SIZE_MEMNUM; - { - if (cache_opcodes_at_null_check_start < cache_opcodes) { - *cache_opcodes = *(cache_opcodes - 1); - cache_opcodes->addr = pbegin; - cache_opcodes++; - long num_cache_opcodes_in_null_check = (cache_opcodes - cache_opcodes_at_null_check_start); - xmemmove(cache_opcodes_at_null_check_start + 1, cache_opcodes_at_null_check_start, sizeof(OnigCacheOpcode) * num_cache_opcodes_in_null_check); - cache_opcodes_at_null_check_start->addr = null_check_start_addr; - cache_opcodes++; - } - cache_opcodes_at_null_check_start = NULL; - } break; case OP_PUSH_POS: - case OP_POP_POS: + INC_LOOKAROUND_NESTING; + break; case OP_PUSH_POS_NOT: + p += SIZE_RELADDR; + INC_LOOKAROUND_NESTING; + break; + case OP_PUSH_LOOK_BEHIND_NOT: + p += SIZE_RELADDR; + p += SIZE_LENGTH; + INC_LOOKAROUND_NESTING; + break; + case OP_POP_POS: case OP_FAIL_POS: - goto unexpected_bytecode_error; + case OP_FAIL_LOOK_BEHIND_NOT: + DEC_LOOKAROUND_NESTING; + break; case OP_PUSH_STOP_BT: + INC_LOOKAROUND_NESTING; + lookaround_nesting *= -1; + break; case OP_POP_STOP_BT: + lookaround_nesting *= -1; + DEC_LOOKAROUND_NESTING; break; case OP_LOOK_BEHIND: - case OP_PUSH_LOOK_BEHIND_NOT: - case OP_FAIL_LOOK_BEHIND_NOT: - case OP_PUSH_ABSENT_POS: + p += SIZE_LENGTH; + break; + case OP_ABSENT_END: case OP_ABSENT: goto unexpected_bytecode_error; @@ -933,31 +976,33 @@ onig_region_copy(OnigRegion* to, const OnigRegion* from) /* stack type */ /* used by normal-POP */ -#define STK_ALT 0x0001 -#define STK_LOOK_BEHIND_NOT 0x0002 -#define STK_POS_NOT 0x0003 +#define STK_ALT 0x0001 +#define STK_LOOK_BEHIND_NOT 0x0002 +#define STK_POS_NOT 0x0003 /* handled by normal-POP */ -#define STK_MEM_START 0x0100 -#define STK_MEM_END 0x8200 -#define STK_REPEAT_INC 0x0300 -#define STK_STATE_CHECK_MARK 0x1000 +#define STK_MEM_START 0x0100 +#define STK_MEM_END 0x8200 +#define STK_REPEAT_INC 0x0300 +#define STK_STATE_CHECK_MARK 0x1000 /* avoided by normal-POP */ -#define STK_NULL_CHECK_START 0x3000 -#define STK_NULL_CHECK_END 0x5000 /* for recursive call */ -#define STK_MEM_END_MARK 0x8400 -#define STK_POS 0x0500 /* used when POP-POS */ -#define STK_STOP_BT 0x0600 /* mark for "(?>...)" */ -#define STK_REPEAT 0x0700 -#define STK_CALL_FRAME 0x0800 -#define STK_RETURN 0x0900 -#define STK_VOID 0x0a00 /* for fill a blank */ -#define STK_ABSENT_POS 0x0b00 /* for absent */ -#define STK_ABSENT 0x0c00 /* absent inner loop marker */ +#define STK_NULL_CHECK_START 0x3000 +#define STK_NULL_CHECK_END 0x5000 /* for recursive call */ +#define STK_MEM_END_MARK 0x8400 +#define STK_POS 0x0500 /* used when POP-POS */ +#define STK_STOP_BT 0x0600 /* mark for "(?>...)" */ +#define STK_REPEAT 0x0700 +#define STK_CALL_FRAME 0x0800 +#define STK_RETURN 0x0900 +#define STK_VOID 0x0a00 /* for fill a blank */ +#define STK_ABSENT_POS 0x0b00 /* for absent */ +#define STK_ABSENT 0x0c00 /* absent inner loop marker */ +#define STK_MATCH_CACHE_POINT 0x0d00 /* for the match cache optimization */ +#define STK_ATOMIC_MATCH_CACHE_POINT 0x0e00 /* stack type check mask */ -#define STK_MASK_POP_USED 0x00ff -#define STK_MASK_TO_VOID_TARGET 0x10ff -#define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */ +#define STK_MASK_POP_USED 0x00ff +#define STK_MASK_TO_VOID_TARGET 0x10ff +#define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */ #ifdef USE_MATCH_CACHE #define MATCH_ARG_INIT_MATCH_CACHE(msa) do {\ @@ -1387,6 +1432,15 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, STACK_INC;\ } while(0) +#define STACK_PUSH_MATCH_CACHE_POINT(match_cache_point_index, match_cache_point_mask) do {\ + STACK_ENSURE(1);\ + stk->type = STK_MATCH_CACHE_POINT;\ + stk->null_check = stk == stk_base ? 0 : (stk-1)->null_check;\ + stk->u.match_cache_point.index = (match_cache_point_index);\ + stk->u.match_cache_point.mask = (match_cache_point_mask);\ + STACK_INC;\ +} while(0) + #ifdef ONIG_DEBUG # define STACK_BASE_CHECK(p, at) \ @@ -1398,6 +1452,42 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, # define STACK_BASE_CHECK(p, at) #endif +#ifdef ONIG_DEBUG_MATCH_CACHE +# define MATCH_CACHE_DEBUG_MEMOIZE(stkp) fprintf(stderr, "MATCH CACHE: memoize (index=%ld mask=%d)\n", stkp->u.match_cache_point.index, stkp->u.match_cache_point.mask); +#else +# define MATCH_CACHE_DEBUG_MEMOIZE(stkp) ((void) 0) +#endif + +#ifdef USE_MATCH_CACHE +# define INC_NUM_FAILS msa->num_fails++ +# define MEMOIZE_MATCH_CACHE_POINT do {\ + if (stk->type == STK_MATCH_CACHE_POINT) {\ + msa->match_cache_buf[stk->u.match_cache_point.index] |= stk->u.match_cache_point.mask;\ + MATCH_CACHE_DEBUG_MEMOIZE(stk);\ + } else if (stk->type == STK_ATOMIC_MATCH_CACHE_POINT) {\ + memoize_extended_match_cache_point(msa->match_cache_buf, stk->u.match_cache_point.index, stk->u.match_cache_point.mask);\ + MATCH_CACHE_DEBUG_MEMOIZE(stkp);\ + }\ + } while(0) +# define MEMOIZE_LOOKAROUND_MATCH_CACHE_POINT(stkp) do {\ + if (stkp->type == STK_MATCH_CACHE_POINT) {\ + stkp->type = STK_VOID;\ + memoize_extended_match_cache_point(msa->match_cache_buf, stkp->u.match_cache_point.index, stkp->u.match_cache_point.mask);\ + MATCH_CACHE_DEBUG_MEMOIZE(stkp);\ + }\ + } while(0) +# define MEMOIZE_ATOMIC_MATCH_CACHE_POINT do {\ + if (stk->type == STK_MATCH_CACHE_POINT) {\ + memoize_extended_match_cache_point(msa->match_cache_buf, stk->u.match_cache_point.index, stk->u.match_cache_point.mask);\ + MATCH_CACHE_DEBUG_MEMOIZE(stkp);\ + }\ + } while(0) +#else +# define INC_NUM_FAILS ((void) 0) +# define MEMOIZE_MATCH_CACHE_POINT ((void) 0) +# define MEMOIZE_LOOKAROUND_MATCH_CACHE_POINT ((void) 0) +#endif + #define STACK_POP_ONE do {\ stk--;\ STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \ @@ -1411,6 +1501,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, STACK_BASE_CHECK(stk, "STACK_POP"); \ if ((stk->type & STK_MASK_POP_USED) != 0) break;\ ELSE_IF_STATE_CHECK_MARK(stk);\ + MEMOIZE_MATCH_CACHE_POINT;\ }\ break;\ case STACK_POP_LEVEL_MEM_START:\ @@ -1423,6 +1514,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ }\ ELSE_IF_STATE_CHECK_MARK(stk);\ + MEMOIZE_MATCH_CACHE_POINT;\ }\ break;\ default:\ @@ -1442,6 +1534,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ }\ ELSE_IF_STATE_CHECK_MARK(stk);\ + MEMOIZE_MATCH_CACHE_POINT;\ }\ break;\ }\ @@ -1463,7 +1556,11 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ }\ + else if (IS_TO_VOID_TARGET(stk)) {\ + INC_NUM_FAILS;\ + }\ ELSE_IF_STATE_CHECK_MARK(stk);\ + MEMOIZE_LOOKAROUND_MATCH_CACHE_POINT(stk);\ }\ } while(0) @@ -1520,12 +1617,14 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, k--;\ STACK_BASE_CHECK(k, "STACK_POS_END"); \ if (IS_TO_VOID_TARGET(k)) {\ + INC_NUM_FAILS;\ k->type = STK_VOID;\ }\ else if (k->type == STK_POS) {\ k->type = STK_VOID;\ break;\ }\ + MEMOIZE_LOOKAROUND_MATCH_CACHE_POINT(k);\ }\ } while(0) @@ -1535,12 +1634,28 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, k--;\ STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \ if (IS_TO_VOID_TARGET(k)) {\ + INC_NUM_FAILS;\ k->type = STK_VOID;\ }\ else if (k->type == STK_STOP_BT) {\ k->type = STK_VOID;\ break;\ }\ + else if (k->type == STK_MATCH_CACHE_POINT) {\ + k->type = STK_ATOMIC_MATCH_CACHE_POINT;\ + }\ + }\ +} while(0) + +#define STACK_STOP_BT_FAIL do {\ + while (1) {\ + stk--;\ + STACK_BASE_CHECK(stk, "STACK_STOP_BT_END"); \ + if (stk->type == STK_STOP_BT) {\ + stk->type = STK_VOID;\ + break;\ + }\ + MEMOIZE_ATOMIC_MATCH_CACHE_POINT;\ }\ } while(0) @@ -1579,7 +1694,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, }\ } while(0) -#define STACK_NULL_CHECK_MEMST(isnull,ischange,id,s,reg) do {\ +#define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\ OnigStackType* k = STACK_AT((stk-1)->null_check)+1;\ while (1) {\ k--;\ @@ -1596,14 +1711,14 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end, while (k < stk) {\ if (k->type == STK_MEM_START) {\ if (k->u.mem.end == INVALID_STACK_INDEX) {\ - (isnull) = 0; (ischange) = 1; break;\ + (isnull) = 0; break;\ }\ if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\ endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\ else\ endp = (UChar* )k->u.mem.end;\ if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\ - (isnull) = 0; (ischange) = 1; break;\ + (isnull) = 0; break;\ }\ else if (endp != s) {\ (isnull) = -1; /* empty, but position changed */ \ @@ -2008,6 +2123,7 @@ stack_type_str(int stack_type) case STK_VOID: return "Void "; case STK_ABSENT_POS: return "AbsPos"; case STK_ABSENT: return "Absent"; + case STK_MATCH_CACHE_POINT: return "MCache"; default: return " "; } } @@ -2029,7 +2145,7 @@ bsearch_cache_opcodes(const OnigCacheOpcode *cache_opcodes, long num_cache_opcod } static long -find_cache_point(regex_t* reg, const OnigCacheOpcode* cache_opcodes, long num_cache_opcodes, const UChar* p, const OnigStackType *stk, const OnigStackIndex *repeat_stk) +find_cache_point(regex_t* reg, const OnigCacheOpcode* cache_opcodes, long num_cache_opcodes, const UChar* p, const OnigStackType *stk, const OnigStackIndex *repeat_stk, const OnigCacheOpcode **cache_opcode_ptr) { long m; const OnigCacheOpcode* cache_opcode; @@ -2048,6 +2164,7 @@ find_cache_point(regex_t* reg, const OnigCacheOpcode* cache_opcodes, long num_ca } cache_opcode = &cache_opcodes[m]; + *cache_opcode_ptr = &cache_opcodes[m]; cache_point = cache_opcode->cache_point; if (cache_opcode->outer_repeat_mem == -1) { return cache_point; @@ -2079,54 +2196,23 @@ find_cache_point(regex_t* reg, const OnigCacheOpcode* cache_opcodes, long num_ca cache_point; } -static void -reset_match_cache_on_null_check(regex_t* reg, const UChar* pstart, const UChar* pend, long pos, uint8_t* match_cache_buf, const OnigCacheOpcode* cache_opcodes, long num_cache_opcodes, long num_cache_points, const OnigStackType *stk, const OnigStackIndex *repeat_stk) -{ - long cache_point_start, cache_point_end; - long match_cache_point_start, match_cache_point_end; - long match_cache_point_start_index, match_cache_point_end_index; - uint8_t match_cache_point_start_bit, match_cache_point_end_bit; - -#ifdef ONIG_DEBUG_MATCH_CACHE - fprintf(stderr, "MATCH CACHE: reset start=%p end=%p pos=%ld cache_opcodes=%p num_cache_opcodes=%ld\n", pstart, pend, pos, cache_opcodes, num_cache_opcodes); -#endif - - cache_point_start = find_cache_point(reg, cache_opcodes, num_cache_opcodes, pstart, stk, repeat_stk); - cache_point_end = find_cache_point(reg, cache_opcodes, num_cache_opcodes, pend, stk, repeat_stk); - // Skip to reset if `pstart` or `pend` is not found. - // This situation occurs when there is no cache point - // between OP_NULL_CHECK_START and OP_NULL_CHECK_END_MEMST. - if (cache_point_start == -1 || cache_point_end == -1) return; - - match_cache_point_start = pos * num_cache_points + cache_point_start; - match_cache_point_end = pos * num_cache_points + cache_point_end; - match_cache_point_start_index = match_cache_point_start >> 3; - match_cache_point_end_index = match_cache_point_end >> 3; - match_cache_point_start_bit = match_cache_point_start & 7; - match_cache_point_end_bit = match_cache_point_end & 7; - -#ifdef ONIG_DEBUG_MATCH_CACHE - fprintf(stderr, "MATCH CACHE: reset start %ld (%ld index=%ld bit=%d)\n", match_cache_point_start, cache_point_start, match_cache_point_start_index, match_cache_point_start_bit); - fprintf(stderr, "MATCH CACHE: reset end %ld (%ld index=%ld bit=%d)\n", match_cache_point_end, cache_point_end, match_cache_point_end_index, match_cache_point_end_bit); -#endif +static int check_extended_match_cache_point(uint8_t *match_cache_buf, long match_cache_point_index, uint8_t match_cache_point_mask) { + if (match_cache_point_mask & 0x80) { + return (match_cache_buf[match_cache_point_index + 1] & 0x01) > 0; + } else { + return (match_cache_buf[match_cache_point_index] & (match_cache_point_mask << 1)) > 0; + } +} - if (match_cache_point_start_index == match_cache_point_end_index) { - match_cache_buf[match_cache_point_start_index] &= - (((1 << (8 - match_cache_point_end_bit - 1)) - 1) << (match_cache_point_end_bit + 1)) | - ((1 << match_cache_point_start_bit) - 1); +static void memoize_extended_match_cache_point(uint8_t *match_cache_buf, long match_cache_point_index, uint8_t match_cache_point_mask) { + match_cache_buf[match_cache_point_index] |= match_cache_point_mask; + if (match_cache_point_mask & 0x80) { + match_cache_buf[match_cache_point_index + 1] |= 0x01; } else { - if (match_cache_point_start_bit) { - match_cache_buf[match_cache_point_start_index] &= (1 << (match_cache_point_start_bit - 1)) - 1; - match_cache_point_start_index++; - } - if (match_cache_point_start_index < match_cache_point_end_index) { - xmemset(&match_cache_buf[match_cache_point_start_index], 0, match_cache_point_end_index - match_cache_point_start_index); - if (match_cache_point_end_bit) { - match_cache_buf[match_cache_point_end_index] &= (((1 << (8 - match_cache_point_end_bit - 1)) - 1) << (match_cache_point_end_bit + 1)); - } - } + match_cache_buf[match_cache_point_index] |= match_cache_point_mask << 1; } } + #endif /* USE_MATCH_CACHE */ /* match data(str - end) from position (sstart). */ @@ -2447,7 +2533,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, # define CHECK_MATCH_CACHE do {\ if (msa->match_cache_status == MATCH_CACHE_STATUS_ENABLED) {\ - long cache_point = find_cache_point(reg, msa->cache_opcodes, msa->num_cache_opcodes, pbegin, stk_base, repeat_stk);\ + const OnigCacheOpcode *cache_opcode;\ + long cache_point = find_cache_point(reg, msa->cache_opcodes, msa->num_cache_opcodes, pbegin, stk_base, repeat_stk, &cache_opcode);\ if (cache_point >= 0) {\ long match_cache_point = msa->num_cache_points * (long)(s - str) + cache_point;\ long match_cache_point_index = match_cache_point >> 3;\ @@ -2455,9 +2542,21 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, MATCH_CACHE_DEBUG;\ if (msa->match_cache_buf[match_cache_point_index] & match_cache_point_mask) {\ MATCH_CACHE_DEBUG_HIT;\ - goto fail;\ + if (cache_opcode->lookaround_nesting == 0) goto fail;\ + else if (cache_opcode->lookaround_nesting < 0) {\ + if (check_extended_match_cache_point(msa->match_cache_buf, match_cache_point_index, match_cache_point_mask)) {\ + STACK_STOP_BT_FAIL;\ + goto fail;\ + } else goto fail;\ + } else {\ + if (check_extended_match_cache_point(msa->match_cache_buf, match_cache_point_index, match_cache_point_mask)) {\ + p = cache_opcode->match_addr;\ + MOP_OUT;\ + JUMP;\ + } else goto fail;\ + }\ }\ - msa->match_cache_buf[match_cache_point_index] |= match_cache_point_mask;\ + STACK_PUSH_MATCH_CACHE_POINT(match_cache_point_index, match_cache_point_mask);\ }\ }\ } while (0) @@ -3538,10 +3637,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE(OP_NULL_CHECK_END_MEMST) MOP_IN(OP_NULL_CHECK_END_MEMST); { int isnull; - int ischanged = 0; // set 1 when a loop is empty but memory status is changed. GET_MEMNUM_INC(mem, p); /* mem: null check id */ - STACK_NULL_CHECK_MEMST(isnull, ischanged, mem, s, reg); + STACK_NULL_CHECK_MEMST(isnull, mem, s, reg); if (isnull) { # ifdef ONIG_DEBUG_MATCH fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%"PRIuPTR" (%p)\n", @@ -3550,31 +3648,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (isnull == -1) goto fail; goto null_check_found; } -# ifdef USE_MATCH_CACHE - if (ischanged && msa->match_cache_status == MATCH_CACHE_STATUS_ENABLED) { - RelAddrType rel; - OnigUChar *null_check_start; - OnigUChar *null_check_end = pbegin; - MemNumType mem; - UChar* tmp = p; - switch (*tmp++) { - case OP_JUMP: - case OP_PUSH: - GET_RELADDR_INC(rel, tmp); - null_check_start = tmp + rel; - break; - case OP_REPEAT_INC: - case OP_REPEAT_INC_NG: - GET_MEMNUM_INC(mem, tmp); - // Skip OP_REPEAT (or OP_REPEAT_NG) - null_check_start = STACK_AT(repeat_stk[mem])->u.repeat.pcode; - break; - default: - goto unexpected_bytecode_error; - } - reset_match_cache_on_null_check(reg, null_check_start, null_check_end, (long)(s - str), msa->match_cache_buf, msa->cache_opcodes, msa->num_cache_opcodes, msa->num_cache_points, stk_base, repeat_stk); - } -# endif } MOP_OUT; JUMP; @@ -4002,7 +4075,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, fprintf(stderr, "MATCH CACHE: #cache points = %ld\n", msa->num_cache_points); fprintf(stderr, "MATCH CACHE: cache opcodes (%p):\n", msa->cache_opcodes); for (int i = 0; i < msa->num_cache_opcodes; i++) { - fprintf(stderr, "MATCH CACHE: [%p] cache_point=%ld outer_repeat_mem=%d num_cache_opcodes_at_outer_repeat=%ld num_cache_opcodes_in_outer_repeat=%ld\n", msa->cache_opcodes[i].addr, msa->cache_opcodes[i].cache_point, msa->cache_opcodes[i].outer_repeat_mem, msa->cache_opcodes[i].num_cache_points_at_outer_repeat, msa->cache_opcodes[i].num_cache_points_in_outer_repeat); + fprintf(stderr, "MATCH CACHE: [%p] cache_point=%ld outer_repeat_mem=%d num_cache_opcodes_at_outer_repeat=%ld num_cache_opcodes_in_outer_repeat=%ld lookaround_nesting=%d match_addr=%p\n", msa->cache_opcodes[i].addr, msa->cache_opcodes[i].cache_point, msa->cache_opcodes[i].outer_repeat_mem, msa->cache_opcodes[i].num_cache_points_at_outer_repeat, msa->cache_opcodes[i].num_cache_points_in_outer_repeat, msa->cache_opcodes[i].lookaround_nesting, msa->cache_opcodes[i].match_addr); } #endif } |