aboutsummaryrefslogtreecommitdiffstats
path: root/regexec.c
diff options
context:
space:
mode:
authormatz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2007-05-23 01:32:08 +0000
committermatz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2007-05-23 01:32:08 +0000
commit6ee2e542394a14a70acd12b3278b2a9fc5b861fc (patch)
tree1341a885b61229d97f5ba2ad7196100f120d81d4 /regexec.c
parent2cdb1c33763d3dda6691ee4b7ffc53e684bd528e (diff)
downloadruby-6ee2e542394a14a70acd12b3278b2a9fc5b861fc.tar.gz
* oniguruma.h: updated to Oniguruma 5.7.0.
* regsyntax.c, unicode.c: new files along with Oniguruma 5.x. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12376 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'regexec.c')
-rw-r--r--regexec.c531
1 files changed, 231 insertions, 300 deletions
diff --git a/regexec.c b/regexec.c
index ba2a1b1cd4..2a568fb1da 100644
--- a/regexec.c
+++ b/regexec.c
@@ -2,7 +2,7 @@
regexec.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -76,7 +76,7 @@ history_root_free(OnigRegion* r)
}
static OnigCaptureTreeNode*
-history_node_new()
+history_node_new(void)
{
OnigCaptureTreeNode* node;
@@ -233,7 +233,7 @@ onig_region_init(OnigRegion* region)
}
extern OnigRegion*
-onig_region_new()
+onig_region_new(void)
{
OnigRegion* r;
@@ -297,47 +297,6 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)
/** stack **/
#define INVALID_STACK_INDEX -1
-typedef long StackIndex;
-
-typedef struct _StackType {
- unsigned int type;
- union {
- struct {
- UChar *pcode; /* byte code position */
- UChar *pstr; /* string position */
- UChar *pstr_prev; /* previous char position of pstr */
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
- unsigned int state_check;
-#endif
- } state;
- struct {
- int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */
- UChar *pcode; /* byte code position (head of repeated target) */
- int num; /* repeat id */
- } repeat;
- struct {
- StackIndex si; /* index of stack */
- } repeat_inc;
- struct {
- int num; /* memory num */
- UChar *pstr; /* start/end position */
- /* Following information is setted, if this stack type is MEM-START */
- StackIndex start; /* prev. info (for backtrack "(...)*" ) */
- StackIndex end; /* prev. info (for backtrack "(...)*" ) */
- } mem;
- struct {
- int num; /* null check id */
- UChar *pstr; /* start position */
- } null_check;
-#ifdef USE_SUBEXP_CALL
- struct {
- UChar *ret_addr; /* byte code position */
- int num; /* null check id */
- UChar *pstr; /* string position */
- } call_frame;
-#endif
- } u;
-} StackType;
/* stack type */
/* used by normal-POP */
@@ -365,24 +324,22 @@ typedef struct _StackType {
#define STK_MASK_TO_VOID_TARGET 0x10ff
#define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
-typedef struct {
- void* stack_p;
- int stack_n;
- OnigOptionType options;
- OnigRegion* region;
- const UChar* start; /* search start position (for \G: BEGIN_POSITION) */
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
- void* state_check_buff;
- int state_check_buff_size;
-#endif
-} MatchArg;
-
+#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
+#define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
+ (msa).stack_p = (void* )0;\
+ (msa).options = (arg_option);\
+ (msa).region = (arg_region);\
+ (msa).start = (arg_start);\
+ (msa).best_len = ONIG_MISMATCH;\
+} while (0)
+#else
#define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start) do {\
- (msa).stack_p = (void* )0;\
- (msa).options = (arg_option);\
- (msa).region = (arg_region);\
- (msa).start = (arg_start);\
+ (msa).stack_p = (void* )0;\
+ (msa).options = (arg_option);\
+ (msa).region = (arg_region);\
+ (msa).start = (arg_start);\
} while (0)
+#endif
#ifdef USE_COMBINATION_EXPLOSION_CHECK
@@ -428,15 +385,15 @@ typedef struct {
#define STACK_INIT(alloc_addr, ptr_num, stack_num) do {\
if (msa->stack_p) {\
alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num));\
- stk_alloc = (StackType* )(msa->stack_p);\
+ stk_alloc = (OnigStackType* )(msa->stack_p);\
stk_base = stk_alloc;\
stk = stk_base;\
stk_end = stk_base + msa->stack_n;\
}\
else {\
alloc_addr = (char* )xalloca(sizeof(char*) * (ptr_num)\
- + sizeof(StackType) * (stack_num));\
- stk_alloc = (StackType* )(alloc_addr + sizeof(char*) * (ptr_num));\
+ + sizeof(OnigStackType) * (stack_num));\
+ stk_alloc = (OnigStackType* )(alloc_addr + sizeof(char*) * (ptr_num));\
stk_base = stk_alloc;\
stk = stk_base;\
stk_end = stk_base + (stack_num);\
@@ -466,11 +423,11 @@ onig_set_match_stack_limit_size(unsigned int size)
}
static int
-stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
- StackType** arg_stk, StackType* stk_alloc, MatchArg* msa)
+stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
+ OnigStackType** arg_stk, OnigStackType* stk_alloc, OnigMatchArg* msa)
{
unsigned int n;
- StackType *x, *stk_base, *stk_end, *stk;
+ OnigStackType *x, *stk_base, *stk_end, *stk;
stk_base = *arg_stk_base;
stk_end = *arg_stk_end;
@@ -478,12 +435,12 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
n = stk_end - stk_base;
if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) {
- x = (StackType* )xmalloc(sizeof(StackType) * n * 2);
+ x = (OnigStackType* )xmalloc(sizeof(OnigStackType) * n * 2);
if (IS_NULL(x)) {
STACK_SAVE;
return ONIGERR_MEMORY;
}
- xmemcpy(x, stk_base, n * sizeof(StackType));
+ xmemcpy(x, stk_base, n * sizeof(OnigStackType));
n *= 2;
}
else {
@@ -494,7 +451,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
else
n = MatchStackLimitSize;
}
- x = (StackType* )xrealloc(stk_base, sizeof(StackType) * n);
+ x = (OnigStackType* )xrealloc(stk_base, sizeof(OnigStackType) * n);
if (IS_NULL(x)) {
STACK_SAVE;
return ONIGERR_MEMORY;
@@ -830,7 +787,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
} while(0)
#define STACK_STOP_BT_END do {\
- StackType *k = stk;\
+ OnigStackType *k = stk;\
while (1) {\
k--;\
STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \
@@ -845,7 +802,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
} while(0)
#define STACK_NULL_CHECK(isnull,id,s) do {\
- StackType* k = stk;\
+ OnigStackType* k = stk;\
while (1) {\
k--;\
STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \
@@ -860,7 +817,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
#define STACK_NULL_CHECK_REC(isnull,id,s) do {\
int level = 0;\
- StackType* k = stk;\
+ OnigStackType* k = stk;\
while (1) {\
k--;\
STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \
@@ -880,7 +837,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
} while(0)
#define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\
- StackType* k = stk;\
+ OnigStackType* k = stk;\
while (1) {\
k--;\
STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \
@@ -920,7 +877,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
#define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\
int level = 0;\
- StackType* k = stk;\
+ OnigStackType* k = stk;\
while (1) {\
k--;\
STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \
@@ -986,7 +943,7 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
#define STACK_RETURN(addr) do {\
int level = 0;\
- StackType* k = stk;\
+ OnigStackType* k = stk;\
while (1) {\
k--;\
STACK_BASE_CHECK(k, "STACK_RETURN"); \
@@ -1009,25 +966,25 @@ stack_double(StackType** arg_stk_base, StackType** arg_stk_end,
}\
} while(0)
-#define STRING_CMP_IC(ambig_flag,s1,ps2,len) do {\
- if (string_cmp_ic(encode, ambig_flag, s1, ps2, len) == 0) \
+#define STRING_CMP_IC(case_fold_flag,s1,ps2,len) do {\
+ if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \
goto fail; \
} while(0)
-static int string_cmp_ic(OnigEncoding enc, int ambig_flag,
+static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
UChar* s1, UChar** ps2, int mblen)
{
- UChar buf1[ONIGENC_MBC_NORMALIZE_MAXLEN];
- UChar buf2[ONIGENC_MBC_NORMALIZE_MAXLEN];
- UChar *p1, *p2, *end, *s2, *end2;
+ UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN];
+ UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN];
+ UChar *p1, *p2, *end1, *s2, *end2;
int len1, len2;
s2 = *ps2;
- end = s1 + mblen;
+ end1 = s1 + mblen;
end2 = s2 + mblen;
- while (s1 < end) {
- len1 = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &s1, end, buf1);
- len2 = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &s2, end2, buf2);
+ while (s1 < end1) {
+ len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, end1, buf1);
+ len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, end2, buf2);
if (len1 != len2) return 0;
p1 = buf1;
p2 = buf2;
@@ -1051,31 +1008,36 @@ static int string_cmp_ic(OnigEncoding enc, int ambig_flag,
}\
} while(0)
-#define STRING_CMP_VALUE_IC(ambig_flag,s1,ps2,len,is_fail) do {\
- if (string_cmp_ic(encode, ambig_flag, s1, ps2, len) == 0) \
+#define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,is_fail) do {\
+ if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len) == 0) \
is_fail = 1; \
else \
is_fail = 0; \
} while(0)
-#define ON_STR_BEGIN(s) ((s) == str)
-#define ON_STR_END(s) ((s) == end)
-#define IS_EMPTY_STR (str == end)
-
-#define DATA_ENSURE(n) \
- if (s + (n) > end) goto fail
-
+#define IS_EMPTY_STR (str == end)
+#define ON_STR_BEGIN(s) ((s) == str)
+#define ON_STR_END(s) ((s) == end)
+#ifdef USE_MATCH_RANGE_IS_COMPLETE_RANGE
+#define DATA_ENSURE_CHECK1 (s < right_range)
+#define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
+#define DATA_ENSURE(n) if (s + (n) > right_range) goto fail
+#else
+#define DATA_ENSURE_CHECK1 (s < end)
#define DATA_ENSURE_CHECK(n) (s + (n) <= end)
+#define DATA_ENSURE(n) if (s + (n) > end) goto fail
+#endif /* USE_MATCH_RANGE_IS_COMPLETE_RANGE */
+
#ifdef USE_CAPTURE_HISTORY
static int
-make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp,
- StackType* stk_top, UChar* str, regex_t* reg)
+make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp,
+ OnigStackType* stk_top, UChar* str, regex_t* reg)
{
int n, r;
OnigCaptureTreeNode* child;
- StackType* k = *kp;
+ OnigStackType* k = *kp;
while (k < stk_top) {
if (k->type == STK_MEM_START) {
@@ -1124,13 +1086,13 @@ static int mem_is_in_memp(int mem, int num, UChar* memp)
}
static int backref_match_at_nested_level(regex_t* reg
- , StackType* top, StackType* stk_base
- , int ignore_case, int ambig_flag
+ , OnigStackType* top, OnigStackType* stk_base
+ , int ignore_case, int case_fold_flag
, int nest, int mem_num, UChar* memp, UChar** s, const UChar* send)
{
UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
int level;
- StackType* k;
+ OnigStackType* k;
level = 0;
k = top;
@@ -1152,7 +1114,7 @@ static int backref_match_at_nested_level(regex_t* reg
ss = *s;
if (ignore_case != 0) {
- if (string_cmp_ic(reg->enc, ambig_flag,
+ if (string_cmp_ic(reg->enc, case_fold_flag,
pstart, &ss, (int )(pend - pstart)) == 0)
return 0; /* or goto next_mem; */
}
@@ -1181,68 +1143,6 @@ static int backref_match_at_nested_level(regex_t* reg
#endif /* USE_BACKREF_AT_LEVEL */
-#ifdef RUBY_PLATFORM
-
-typedef struct {
- int state;
- regex_t* reg;
- MatchArg* msa;
- StackType* stk_base;
-} TrapEnsureArg;
-
-static VALUE
-trap_ensure(VALUE arg)
-{
- TrapEnsureArg* ta = (TrapEnsureArg* )arg;
-
- if (ta->state == 0) { /* trap_exec() is not normal return */
- ONIG_STATE_DEC_THREAD(ta->reg);
- if (! IS_NULL(ta->msa->stack_p) && ta->stk_base != ta->msa->stack_p)
- xfree(ta->stk_base);
-
- MATCH_ARG_FREE(*(ta->msa));
- }
-
- return Qnil;
-}
-
-static VALUE
-trap_exec(VALUE arg)
-{
- TrapEnsureArg* ta;
-
- rb_trap_exec();
-
- ta = (TrapEnsureArg* )arg;
- ta->state = 1; /* normal return */
- return Qnil;
-}
-
-extern void
-onig_exec_trap(regex_t* reg, MatchArg* msa, StackType* stk_base)
-{
- VALUE arg;
- TrapEnsureArg ta;
-
- ta.state = 0;
- ta.reg = reg;
- ta.msa = msa;
- ta.stk_base = stk_base;
- arg = (VALUE )(&ta);
- rb_ensure(trap_exec, arg, trap_ensure, arg);
-}
-
-#define CHECK_INTERRUPT_IN_MATCH_AT do {\
- if (rb_trap_pending) {\
- if (! rb_prohibit_interrupt) {\
- onig_exec_trap(reg, msa, stk_base);\
- }\
- }\
-} while (0)
-#else
-#define CHECK_INTERRUPT_IN_MATCH_AT
-#endif /* RUBY_PLATFORM */
-
#ifdef ONIG_DEBUG_STATISTICS
#define USE_TIMEOFDAY
@@ -1286,29 +1186,14 @@ static int MaxStackDepth = 0;
OpTime[OpCurr] += TIMEDIFF(te, ts);\
} while (0)
-#ifdef RUBY_PLATFORM
-
-/*
- * :nodoc:
- */
-static VALUE onig_stat_print()
-{
- onig_print_statistics(stderr);
- return Qnil;
-}
-#endif
-
-extern void onig_statistics_init()
+extern void
+onig_statistics_init(void)
{
int i;
for (i = 0; i < 256; i++) {
OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0;
}
MaxStackDepth = 0;
-
-#ifdef RUBY_PLATFORM
- rb_define_global_function("onig_stat_print", onig_stat_print, 0);
-#endif
}
extern void
@@ -1406,8 +1291,11 @@ typedef struct {
/* match data(str - end) from position (sstart). */
/* if sstart == str then set sprev to NULL. */
static int
-match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
- UChar* sprev, MatchArg* msa)
+match_at(regex_t* reg, const UChar* str, const UChar* end,
+#ifdef USE_MATCH_RANGE_IS_COMPLETE_RANGE
+ const UChar* right_range,
+#endif
+ const UChar* sstart, UChar* sprev, OnigMatchArg* msa)
{
static UChar FinishCode[] = { OP_FINISH };
@@ -1417,15 +1305,15 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
RelAddrType addr;
OnigOptionType option = reg->options;
OnigEncoding encode = reg->enc;
- OnigAmbigType ambig_flag = reg->ambig_flag;
+ OnigCaseFoldType case_fold_flag = reg->case_fold_flag;
UChar *s, *q, *sbegin;
UChar *p = reg->p;
char *alloca_base;
- StackType *stk_alloc, *stk_base, *stk, *stk_end;
- StackType *stkp; /* used as any purpose. */
- StackIndex si;
- StackIndex *repeat_stk;
- StackIndex *mem_start_stk, *mem_end_stk;
+ OnigStackType *stk_alloc, *stk_base, *stk, *stk_end;
+ OnigStackType *stkp; /* used as any purpose. */
+ OnigStackIndex si;
+ OnigStackIndex *repeat_stk;
+ OnigStackIndex *mem_start_stk, *mem_end_stk;
#ifdef USE_COMBINATION_EXPLOSION_CHECK
int scv;
unsigned char* state_check_buff = msa->state_check_buff;
@@ -1436,9 +1324,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
STACK_INIT(alloca_base, n, INIT_MATCH_STACK_SIZE);
pop_level = reg->stack_pop_level;
num_mem = reg->num_mem;
- repeat_stk = (StackIndex* )alloca_base;
+ repeat_stk = (OnigStackIndex* )alloca_base;
- mem_start_stk = (StackIndex* )(repeat_stk + reg->num_repeat);
+ mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat);
mem_end_stk = mem_start_stk + num_mem;
mem_start_stk--; /* for index start from 1,
mem_start_stk[1]..mem_start_stk[num_mem] */
@@ -1472,7 +1360,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; }
else { xmemcpy(bp, "\"", 1); bp += 1; }
*bp = 0;
- fputs(buf, stderr);
+ fputs((char* )buf, stderr);
for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr);
onig_print_compiled_byte_code(stderr, p, NULL, encode);
fprintf(stderr, "\n");
@@ -1484,8 +1372,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
case OP_END: STAT_OP_IN(OP_END);
n = s - sstart;
if (n > best_len) {
- OnigRegion* region = msa->region;
+ OnigRegion* region;
+#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
+ if (IS_FIND_LONGEST(option)) {
+ if (n > msa->best_len) {
+ msa->best_len = n;
+ msa->best_s = (UChar* )sstart;
+ }
+ else
+ goto end_best_len;
+ }
+#endif
best_len = n;
+ region = msa->region;
if (region) {
#ifdef USE_POSIX_REGION_OPTION
if (IS_POSIX_REGION(msa->options)) {
@@ -1561,6 +1460,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
#endif
} /* if (region) */
} /* n > best_len */
+
+#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
+ end_best_len:
+#endif
STAT_OP_OUT;
if (IS_FIND_CONDITION(option)) {
@@ -1568,7 +1471,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
best_len = ONIG_MISMATCH;
goto fail; /* for retry */
}
- if (IS_FIND_LONGEST(option) && s < end) {
+ if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {
goto fail; /* for retry */
}
}
@@ -1592,30 +1495,21 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
case OP_EXACT1_IC: STAT_OP_IN(OP_EXACT1_IC);
{
int len;
- UChar *q, *ss, *sp, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
+ UChar *q, *ss, *sp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
DATA_ENSURE(1);
ss = s;
sp = p;
- exact1_ic_retry:
- len = ONIGENC_MBC_TO_NORMALIZE(encode, ambig_flag, &s, end, lowbuf);
+ len = ONIGENC_MBC_CASE_FOLD(encode,
+ /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
+ case_fold_flag,
+ &s, end, lowbuf);
DATA_ENSURE(0);
q = lowbuf;
while (len-- > 0) {
if (*p != *q) {
-#if 1
- if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
- ambig_flag &= ~ONIGENC_AMBIGUOUS_MATCH_COMPOUND;
- s = ss;
- p = sp;
- goto exact1_ic_retry;
- }
- else
- goto fail;
-#else
goto fail;
-#endif
}
p++; q++;
}
@@ -1693,7 +1587,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
case OP_EXACTN_IC: STAT_OP_IN(OP_EXACTN_IC);
{
int len;
- UChar *ss, *sp, *q, *endp, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
+ UChar *ss, *sp, *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
GET_LENGTH_INC(tlen, p);
endp = p + tlen;
@@ -1704,25 +1598,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
ss = s;
sp = p;
- exactn_ic_retry:
- len = ONIGENC_MBC_TO_NORMALIZE(encode, ambig_flag, &s, end, lowbuf);
+ len = ONIGENC_MBC_CASE_FOLD(encode,
+ /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
+ case_fold_flag,
+ &s, end, lowbuf);
DATA_ENSURE(0);
q = lowbuf;
while (len-- > 0) {
- if (*p != *q) {
-#if 1
- if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
- ambig_flag &= ~ONIGENC_AMBIGUOUS_MATCH_COMPOUND;
- s = ss;
- p = sp;
- goto exactn_ic_retry;
- }
- else
- goto fail;
-#else
- goto fail;
-#endif
- }
+ if (*p != *q) goto fail;
p++; q++;
}
}
@@ -1898,7 +1781,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
UChar *ss;
int mb_len = enc_len(encode, s);
- if (s + mb_len > end) {
+ if (! DATA_ENSURE_CHECK(mb_len)) {
DATA_ENSURE(1);
s = (UChar* )end;
p += tlen;
@@ -1978,7 +1861,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
break;
case OP_ANYCHAR_STAR: STAT_OP_IN(OP_ANYCHAR_STAR);
- while (s < end) {
+ while (DATA_ENSURE_CHECK1) {
STACK_PUSH_ALT(p, s, sprev);
n = enc_len(encode, s);
DATA_ENSURE(n);
@@ -1990,7 +1873,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
break;
case OP_ANYCHAR_ML_STAR: STAT_OP_IN(OP_ANYCHAR_ML_STAR);
- while (s < end) {
+ while (DATA_ENSURE_CHECK1) {
STACK_PUSH_ALT(p, s, sprev);
n = enc_len(encode, s);
if (n > 1) {
@@ -2007,7 +1890,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
break;
case OP_ANYCHAR_STAR_PEEK_NEXT: STAT_OP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);
- while (s < end) {
+ while (DATA_ENSURE_CHECK1) {
if (*p == *s) {
STACK_PUSH_ALT(p + 1, s, sprev);
}
@@ -2022,7 +1905,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
break;
case OP_ANYCHAR_ML_STAR_PEEK_NEXT:STAT_OP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
- while (s < end) {
+ while (DATA_ENSURE_CHECK1) {
if (*p == *s) {
STACK_PUSH_ALT(p + 1, s, sprev);
}
@@ -2044,7 +1927,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
#ifdef USE_COMBINATION_EXPLOSION_CHECK
case OP_STATE_CHECK_ANYCHAR_STAR: STAT_OP_IN(OP_STATE_CHECK_ANYCHAR_STAR);
GET_STATE_CHECK_NUM_INC(mem, p);
- while (s < end) {
+ while (DATA_ENSURE_CHECK1) {
STATE_CHECK_VAL(scv, mem);
if (scv) goto fail;
@@ -2062,7 +1945,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
STAT_OP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR);
GET_STATE_CHECK_NUM_INC(mem, p);
- while (s < end) {
+ while (DATA_ENSURE_CHECK1) {
STATE_CHECK_VAL(scv, mem);
if (scv) goto fail;
@@ -2121,7 +2004,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
case OP_NOT_WORD_BOUND: STAT_OP_IN(OP_NOT_WORD_BOUND);
if (ON_STR_BEGIN(s)) {
- if (DATA_ENSURE_CHECK(1) && ONIGENC_IS_MBC_WORD(encode, s, end))
+ if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end))
goto fail;
}
else if (ON_STR_END(s)) {
@@ -2139,7 +2022,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
#ifdef USE_WORD_BEGIN_END
case OP_WORD_BEGIN: STAT_OP_IN(OP_WORD_BEGIN);
- if (DATA_ENSURE_CHECK(1) && ONIGENC_IS_MBC_WORD(encode, s, end)) {
+ if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) {
if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
STAT_OP_OUT;
continue;
@@ -2257,7 +2140,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
case OP_MEMORY_START: STAT_OP_IN(OP_MEMORY_START);
GET_MEMNUM_INC(mem, p);
- mem_start_stk[mem] = (StackIndex )((void* )s);
+ mem_start_stk[mem] = (OnigStackIndex )((void* )s);
STAT_OP_OUT;
continue;
break;
@@ -2271,7 +2154,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
case OP_MEMORY_END: STAT_OP_IN(OP_MEMORY_END);
GET_MEMNUM_INC(mem, p);
- mem_end_stk[mem] = (StackIndex )((void* )s);
+ mem_end_stk[mem] = (OnigStackIndex )((void* )s);
STAT_OP_OUT;
continue;
break;
@@ -2288,13 +2171,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
case OP_MEMORY_END_REC: STAT_OP_IN(OP_MEMORY_END_REC);
GET_MEMNUM_INC(mem, p);
- mem_end_stk[mem] = (StackIndex )((void* )s);
+ mem_end_stk[mem] = (OnigStackIndex )((void* )s);
STACK_GET_MEM_START(mem, stkp);
if (BIT_STATUS_AT(reg->bt_mem_start, mem))
mem_start_stk[mem] = GET_STACK_INDEX(stkp);
else
- mem_start_stk[mem] = (StackIndex )((void* )stkp->u.mem.pstr);
+ mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr);
STACK_PUSH_MEM_END_MARK(mem);
STAT_OP_OUT;
@@ -2368,7 +2251,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
n = pend - pstart;
DATA_ENSURE(n);
sprev = s;
- STRING_CMP_IC(ambig_flag, pstart, &s, n);
+ STRING_CMP_IC(case_fold_flag, pstart, &s, n);
while (sprev + (len = enc_len(encode, sprev)) < s)
sprev += len;
@@ -2440,7 +2323,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
DATA_ENSURE(n);
sprev = s;
swork = s;
- STRING_CMP_VALUE_IC(ambig_flag, pstart, &swork, n, is_fail);
+ STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail);
if (is_fail) continue;
s = swork;
while (sprev + (len = enc_len(encode, sprev)) < s)
@@ -2467,8 +2350,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
GET_LENGTH_INC(tlen, p);
sprev = s;
- if (backref_match_at_nested_level(reg, stk, stk_base, ic, ambig_flag
- , (int )level, (int )tlen, p, &s, end)) {
+ if (backref_match_at_nested_level(reg, stk, stk_base, ic
+ , case_fold_flag, (int )level, (int )tlen, p, &s, end)) {
while (sprev + (len = enc_len(encode, sprev)) < s)
sprev += len;
@@ -2649,7 +2532,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, const UChar* sstart,
case OP_PUSH_OR_JUMP_EXACT1: STAT_OP_IN(OP_PUSH_OR_JUMP_EXACT1);
GET_RELADDR_INC(addr, p);
- if (*p == *s && DATA_ENSURE_CHECK(1)) {
+ if (*p == *s && DATA_ENSURE_CHECK1) {
p++;
STACK_PUSH_ALT(p + addr, s, sprev);
STAT_OP_OUT;
@@ -2945,33 +2828,18 @@ slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
}
static int
-str_lower_case_match(OnigEncoding enc, int ambig_flag,
+str_lower_case_match(OnigEncoding enc, int case_fold_flag,
const UChar* t, const UChar* tend,
const UChar* p, const UChar* end)
{
int lowlen;
- UChar *q, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
- const UChar* tsave;
- const UChar* psave;
-
- tsave = t;
- psave = p;
+ UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
- retry:
while (t < tend) {
- lowlen = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &p, end, lowbuf);
+ lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf);
q = lowbuf;
while (lowlen > 0) {
- if (*t++ != *q++) {
- if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
- ambig_flag &= ~ONIGENC_AMBIGUOUS_MATCH_COMPOUND;
- t = tsave;
- p = psave;
- goto retry;
- }
- else
- return 0;
- }
+ if (*t++ != *q++) return 0;
lowlen--;
}
}
@@ -2980,7 +2848,7 @@ str_lower_case_match(OnigEncoding enc, int ambig_flag,
}
static UChar*
-slow_search_ic(OnigEncoding enc, int ambig_flag,
+slow_search_ic(OnigEncoding enc, int case_fold_flag,
UChar* target, UChar* target_end,
const UChar* text, const UChar* text_end, UChar* text_range)
{
@@ -2994,7 +2862,8 @@ slow_search_ic(OnigEncoding enc, int ambig_flag,
s = (UChar* )text;
while (s < end) {
- if (str_lower_case_match(enc, ambig_flag, target, target_end, s, text_end))
+ if (str_lower_case_match(enc, case_fold_flag, target, target_end,
+ s, text_end))
return s;
s += enc_len(enc, s);
@@ -3036,7 +2905,7 @@ slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
}
static UChar*
-slow_search_backward_ic(OnigEncoding enc, int ambig_flag,
+slow_search_backward_ic(OnigEncoding enc, int case_fold_flag,
UChar* target, UChar* target_end,
const UChar* text, const UChar* adjust_text,
const UChar* text_end, const UChar* text_start)
@@ -3051,7 +2920,7 @@ slow_search_backward_ic(OnigEncoding enc, int ambig_flag,
s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
while (s >= text) {
- if (str_lower_case_match(enc, ambig_flag,
+ if (str_lower_case_match(enc, case_fold_flag,
target, target_end, s, text_end))
return s;
@@ -3242,7 +3111,7 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On
{
int r;
UChar *prev;
- MatchArg msa;
+ OnigMatchArg msa;
#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
start:
@@ -3289,7 +3158,11 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On
if (r == 0) {
prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at);
- r = match_at(reg, str, end, at, prev, &msa);
+ r = match_at(reg, str, end,
+#ifdef USE_MATCH_RANGE_IS_COMPLETE_RANGE
+ end,
+#endif
+ at, prev, &msa);
}
MATCH_ARG_FREE(msa);
@@ -3325,7 +3198,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
break;
case ONIG_OPTIMIZE_EXACT_IC:
- p = slow_search_ic(reg->enc, reg->ambig_flag,
+ p = slow_search_ic(reg->enc, reg->case_fold_flag,
reg->exact, reg->exact_end, p, end, range);
break;
@@ -3446,7 +3319,7 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
break;
case ONIG_OPTIMIZE_EXACT_IC:
- p = slow_search_backward_ic(reg->enc, reg->ambig_flag,
+ p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,
reg->exact, reg->exact_end,
range, adjrange, end, p);
break;
@@ -3535,8 +3408,11 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
{
int r;
UChar *s, *prev;
- MatchArg msa;
+ OnigMatchArg msa;
const UChar *orig_start = start;
+#ifdef USE_MATCH_RANGE_IS_COMPLETE_RANGE
+ const UChar *orig_range = range;
+#endif
#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
start:
@@ -3580,16 +3456,57 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
if (start > end || start < str) goto mismatch_no_msa;
-#define MATCH_AND_RETURN_CHECK \
+
+#ifdef USE_MATCH_RANGE_IS_COMPLETE_RANGE
+#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
+#define MATCH_AND_RETURN_CHECK(upper_range) \
+ r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
+ if (r != ONIG_MISMATCH) {\
+ if (r >= 0) {\
+ if (! IS_FIND_LONGEST(reg->options)) {\
+ goto match;\
+ }\
+ }\
+ else goto finish; /* error */ \
+ }
+#else
+#define MATCH_AND_RETURN_CHECK(upper_range) \
+ r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
+ if (r != ONIG_MISMATCH) {\
+ if (r >= 0) {\
+ goto match;\
+ }\
+ else goto finish; /* error */ \
+ }
+#endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
+#else
+#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
+#define MATCH_AND_RETURN_CHECK(none) \
+ r = match_at(reg, str, end, s, prev, &msa);\
+ if (r != ONIG_MISMATCH) {\
+ if (r >= 0) {\
+ if (! IS_FIND_LONGEST(reg->options)) {\
+ goto match;\
+ }\
+ }\
+ else goto finish; /* error */ \
+ }
+#else
+#define MATCH_AND_RETURN_CHECK(none) \
r = match_at(reg, str, end, s, prev, &msa);\
if (r != ONIG_MISMATCH) {\
- if (r >= 0) goto match;\
- goto finish; /* error */ \
+ if (r >= 0) {\
+ goto match;\
+ }\
+ else goto finish; /* error */ \
}
+#endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
+#endif /* USE_MATCH_RANGE_IS_COMPLETE_RANGE */
+
/* anchor optimize: resume search range */
if (reg->anchor != 0 && str < end) {
- UChar* semi_end;
+ UChar *min_semi_end, *max_semi_end;
if (reg->anchor & ANCHOR_BEGIN_POSITION) {
/* search start-position only */
@@ -3615,49 +3532,50 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
}
}
else if (reg->anchor & ANCHOR_END_BUF) {
- semi_end = (UChar* )end;
+ min_semi_end = max_semi_end = (UChar* )end;
end_buf:
- if ((OnigDistance )(semi_end - str) < reg->anchor_dmin)
+ if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin)
goto mismatch_no_msa;
if (range > start) {
- if ((OnigDistance )(semi_end - start) > reg->anchor_dmax) {
- start = semi_end - reg->anchor_dmax;
+ if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) {
+ start = min_semi_end - reg->anchor_dmax;
if (start < end)
start = onigenc_get_right_adjust_char_head(reg->enc, str, start);
else { /* match with empty at end */
start = onigenc_get_prev_char_head(reg->enc, str, end);
}
}
- if ((OnigDistance )(semi_end - (range - 1)) < reg->anchor_dmin) {
- range = semi_end - reg->anchor_dmin + 1;
+ if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) {
+ range = max_semi_end - reg->anchor_dmin + 1;
}
if (start >= range) goto mismatch_no_msa;
}
else {
- if ((OnigDistance )(semi_end - range) > reg->anchor_dmax) {
- range = semi_end - reg->anchor_dmax;
+ if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) {
+ range = min_semi_end - reg->anchor_dmax;
}
- if ((OnigDistance )(semi_end - start) < reg->anchor_dmin) {
- start = semi_end - reg->anchor_dmin;
+ if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) {
+ start = max_semi_end - reg->anchor_dmin;
start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start);
- if (range > start) goto mismatch_no_msa;
}
+ if (range > start) goto mismatch_no_msa;
}
}
else if (reg->anchor & ANCHOR_SEMI_END_BUF) {
UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, 1);
+ max_semi_end = (UChar* )end;
if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
- semi_end = pre_end;
- if (semi_end > str && start <= semi_end) {
+ min_semi_end = pre_end;
+ if (min_semi_end > str && start <= min_semi_end) {
goto end_buf;
}
}
else {
- semi_end = (UChar* )end;
+ min_semi_end = (UChar* )end;
goto end_buf;
}
}
@@ -3679,10 +3597,10 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
MATCH_ARG_INIT(msa, option, region, start);
#ifdef USE_COMBINATION_EXPLOSION_CHECK
- msa.state_check_buff = (void* )0;
- msa.state_check_buff_size = 0;
+ msa.state_check_buff = (void* )0;
+ msa.state_check_buff_size = 0; /* NO NEED, for valgrind */
#endif
- MATCH_AND_RETURN_CHECK;
+ MATCH_AND_RETURN_CHECK(end);
goto mismatch;
}
goto mismatch_no_msa;
@@ -3733,7 +3651,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
prev = low_prev;
}
while (s <= high) {
- MATCH_AND_RETURN_CHECK;
+ MATCH_AND_RETURN_CHECK(orig_range);
prev = s;
s += enc_len(reg->enc, s);
}
@@ -3746,7 +3664,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
do {
- MATCH_AND_RETURN_CHECK;
+ MATCH_AND_RETURN_CHECK(orig_range);
prev = s;
s += enc_len(reg->enc, s);
@@ -3761,16 +3679,21 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
}
do {
- MATCH_AND_RETURN_CHECK;
+ MATCH_AND_RETURN_CHECK(orig_range);
prev = s;
s += enc_len(reg->enc, s);
} while (s < range);
if (s == range) { /* because empty match with /$/. */
- MATCH_AND_RETURN_CHECK;
+ MATCH_AND_RETURN_CHECK(orig_range);
}
}
else { /* backward search */
+#ifdef USE_MATCH_RANGE_IS_COMPLETE_RANGE
+ if (orig_start < end)
+ orig_start += enc_len(reg->enc, orig_start); /* is upper range */
+#endif
+
if (reg->optimize != ONIG_OPTIMIZE_NONE) {
UChar *low, *high, *adjrange, *sch_start;
@@ -3793,7 +3716,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
while (s >= low) {
prev = onigenc_get_prev_char_head(reg->enc, str, s);
- MATCH_AND_RETURN_CHECK;
+ MATCH_AND_RETURN_CHECK(orig_start);
s = prev;
}
} while (s >= range);
@@ -3821,12 +3744,20 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
do {
prev = onigenc_get_prev_char_head(reg->enc, str, s);
- MATCH_AND_RETURN_CHECK;
+ MATCH_AND_RETURN_CHECK(orig_start);
s = prev;
} while (s >= range);
}
mismatch:
+#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
+ if (IS_FIND_LONGEST(reg->options)) {
+ if (msa.best_len >= 0) {
+ s = msa.best_s;
+ goto match;
+ }
+ }
+#endif
r = ONIG_MISMATCH;
finish:
@@ -3877,10 +3808,10 @@ onig_get_options(regex_t* reg)
return reg->options;
}
-extern OnigAmbigType
-onig_get_ambig_flag(regex_t* reg)
+extern OnigCaseFoldType
+onig_get_case_fold_flag(regex_t* reg)
{
- return reg->ambig_flag;
+ return reg->case_fold_flag;
}
extern OnigSyntaxType*