aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authornaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2014-09-15 16:18:41 +0000
committernaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2014-09-15 16:18:41 +0000
commitd2a5354255bab4d74c710ee8ff21f43998f33095 (patch)
tree1ec7bb66dc38e3d46bc69892fa6d18184386ec72
parentd198d64e0464c141f70c49880bf511ac3dcd1162 (diff)
downloadruby-d2a5354255bab4d74c710ee8ff21f43998f33095.tar.gz
* reg*.c: Merge Onigmo 5.15.0 38a870960aa7370051a3544
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@47598 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog4
-rw-r--r--enc/big5.c6
-rw-r--r--enc/euc_jp.c7
-rw-r--r--enc/iso_8859_1.c2
-rw-r--r--enc/iso_8859_10.c4
-rw-r--r--enc/iso_8859_13.c4
-rw-r--r--enc/iso_8859_14.c4
-rw-r--r--enc/iso_8859_15.c4
-rw-r--r--enc/iso_8859_16.c4
-rw-r--r--enc/iso_8859_2.c2
-rw-r--r--enc/iso_8859_3.c4
-rw-r--r--enc/iso_8859_4.c4
-rw-r--r--enc/iso_8859_5.c4
-rw-r--r--enc/iso_8859_7.c4
-rw-r--r--enc/iso_8859_9.c4
-rw-r--r--enc/koi8_r.c4
-rw-r--r--enc/koi8_u.c4
-rw-r--r--enc/shift_jis.c7
-rw-r--r--enc/unicode.c1
-rw-r--r--enc/us_ascii.c6
-rw-r--r--enc/utf_16_32.h2
-rw-r--r--enc/utf_16be.c4
-rw-r--r--enc/utf_16le.c4
-rw-r--r--enc/utf_8.c4
-rw-r--r--enc/windows_1251.c4
-rw-r--r--include/ruby/oniguruma.h4
-rw-r--r--regcomp.c127
-rw-r--r--regenc.c64
-rw-r--r--regenc.h14
-rw-r--r--regexec.c7
-rw-r--r--regint.h6
-rw-r--r--regparse.c299
-rw-r--r--regparse.h4
33 files changed, 390 insertions, 236 deletions
diff --git a/ChangeLog b/ChangeLog
index e52a75bb4b..659f69a170 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+Tue Sep 16 01:06:40 2014 NARUSE, Yui <naruse@ruby-lang.org>
+
+ * reg*.c: Merge Onigmo 5.15.0 38a870960aa7370051a3544
+
Mon Sep 15 16:21:10 2014 Eric Wong <e@80x24.org>
* io.c (struct io_advise_struct): 32 => 24 bytes on 64-bit
diff --git a/enc/big5.c b/enc/big5.c
index 9d7738d8f9..27315c4ba9 100644
--- a/enc/big5.c
+++ b/enc/big5.c
@@ -167,19 +167,19 @@ big5_mbc_enc_len0(const UChar* p, const UChar* e, int tridx, const int tbl[])
static int
big5_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc ARG_UNUSED)
{
- return big5_mbc_enc_len0(p, e, 0, EncLen_BIG5);
+ return big5_mbc_enc_len0(p, e, 0, EncLen_BIG5);
}
static int
big5_hkscs_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc ARG_UNUSED)
{
- return big5_mbc_enc_len0(p, e, 2, EncLen_BIG5_HKSCS);
+ return big5_mbc_enc_len0(p, e, 2, EncLen_BIG5_HKSCS);
}
static int
big5_uao_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc ARG_UNUSED)
{
- return big5_mbc_enc_len0(p, e, 2, EncLen_BIG5_UAO);
+ return big5_mbc_enc_len0(p, e, 2, EncLen_BIG5_UAO);
}
static OnigCodePoint
diff --git a/enc/euc_jp.c b/enc/euc_jp.c
index 8ee24bffdb..61bb8ba65a 100644
--- a/enc/euc_jp.c
+++ b/enc/euc_jp.c
@@ -293,7 +293,7 @@ apply_all_case_fold(OnigCaseFoldType flag,
OnigApplyAllCaseFoldFunc f, void* arg, OnigEncoding enc)
{
return onigenc_apply_all_case_fold_with_map(
- sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
+ numberof(CaseFoldMap), CaseFoldMap, 0,
flag, f, arg);
}
@@ -504,13 +504,14 @@ static int
property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end)
{
UChar *s = p, *e = end;
- const struct enc_property *prop = onig_jis_property((const char *)s, (unsigned int)(e-s));
+ const struct enc_property *prop =
+ onig_jis_property((const char* )s, (unsigned int )(e - s));
if (!prop) {
return onigenc_minimum_property_name_to_ctype(enc, s, e);
}
- return (int)prop->ctype;
+ return (int )prop->ctype;
}
static int
diff --git a/enc/iso_8859_1.c b/enc/iso_8859_1.c
index 92dc14f978..088b427d1c 100644
--- a/enc/iso_8859_1.c
+++ b/enc/iso_8859_1.c
@@ -29,8 +29,6 @@
#include "regenc.h"
-#define numberof(array) (int)(sizeof(array) / sizeof((array)[0]))
-
#define ENC_IS_ISO_8859_1_CTYPE(code,ctype) \
((EncISO_8859_1_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
diff --git a/enc/iso_8859_10.c b/enc/iso_8859_10.c
index ec20a15baa..ab71a5adcf 100644
--- a/enc/iso_8859_10.c
+++ b/enc/iso_8859_10.c
@@ -208,7 +208,7 @@ apply_all_case_fold(OnigCaseFoldType flag,
OnigEncoding enc ARG_UNUSED)
{
return onigenc_apply_all_case_fold_with_map(
- sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
+ numberof(CaseFoldMap), CaseFoldMap, 1,
flag, f, arg);
}
@@ -219,7 +219,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
OnigEncoding enc ARG_UNUSED)
{
return onigenc_get_case_fold_codes_by_str_with_map(
- sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
+ numberof(CaseFoldMap), CaseFoldMap, 1,
flag, p, end, items);
}
diff --git a/enc/iso_8859_13.c b/enc/iso_8859_13.c
index 4d7b328818..11b3dda1c9 100644
--- a/enc/iso_8859_13.c
+++ b/enc/iso_8859_13.c
@@ -197,7 +197,7 @@ apply_all_case_fold(OnigCaseFoldType flag,
OnigEncoding enc ARG_UNUSED)
{
return onigenc_apply_all_case_fold_with_map(
- sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
+ numberof(CaseFoldMap), CaseFoldMap, 1,
flag, f, arg);
}
@@ -208,7 +208,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
OnigEncoding enc ARG_UNUSED)
{
return onigenc_get_case_fold_codes_by_str_with_map(
- sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
+ numberof(CaseFoldMap), CaseFoldMap, 1,
flag, p, end, items);
}
diff --git a/enc/iso_8859_14.c b/enc/iso_8859_14.c
index 1271c8a7a6..2939e89b7b 100644
--- a/enc/iso_8859_14.c
+++ b/enc/iso_8859_14.c
@@ -210,7 +210,7 @@ apply_all_case_fold(OnigCaseFoldType flag,
OnigEncoding enc ARG_UNUSED)
{
return onigenc_apply_all_case_fold_with_map(
- sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
+ numberof(CaseFoldMap), CaseFoldMap, 1,
flag, f, arg);
}
@@ -221,7 +221,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
OnigEncoding enc ARG_UNUSED)
{
return onigenc_get_case_fold_codes_by_str_with_map(
- sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
+ numberof(CaseFoldMap), CaseFoldMap, 1,
flag, p, end, items);
}
diff --git a/enc/iso_8859_15.c b/enc/iso_8859_15.c
index 451033e158..fdb7ca12d7 100644
--- a/enc/iso_8859_15.c
+++ b/enc/iso_8859_15.c
@@ -204,7 +204,7 @@ apply_all_case_fold(OnigCaseFoldType flag,
OnigEncoding enc ARG_UNUSED)
{
return onigenc_apply_all_case_fold_with_map(
- sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
+ numberof(CaseFoldMap), CaseFoldMap, 1,
flag, f, arg);
}
@@ -215,7 +215,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
OnigEncoding enc ARG_UNUSED)
{
return onigenc_get_case_fold_codes_by_str_with_map(
- sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
+ numberof(CaseFoldMap), CaseFoldMap, 1,
flag, p, end, items);
}
diff --git a/enc/iso_8859_16.c b/enc/iso_8859_16.c
index 5234cf0e7f..5e53f3b6d0 100644
--- a/enc/iso_8859_16.c
+++ b/enc/iso_8859_16.c
@@ -206,7 +206,7 @@ apply_all_case_fold(OnigCaseFoldType flag,
OnigEncoding enc ARG_UNUSED)
{
return onigenc_apply_all_case_fold_with_map(
- sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
+ numberof(CaseFoldMap), CaseFoldMap, 1,
flag, f, arg);
}
@@ -217,7 +217,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
OnigEncoding enc ARG_UNUSED)
{
return onigenc_get_case_fold_codes_by_str_with_map(
- sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
+ numberof(CaseFoldMap), CaseFoldMap, 1,
flag, p, end, items);
}
diff --git a/enc/iso_8859_2.c b/enc/iso_8859_2.c
index f4cb9100df..94613e661b 100644
--- a/enc/iso_8859_2.c
+++ b/enc/iso_8859_2.c
@@ -29,8 +29,6 @@
#include "regenc.h"
-#define numberof(array) (int)(sizeof(array) / sizeof((array)[0]))
-
#define ENC_ISO_8859_2_TO_LOWER_CASE(c) EncISO_8859_2_ToLowerCaseTable[c]
#define ENC_IS_ISO_8859_2_CTYPE(code,ctype) \
((EncISO_8859_2_CtypeTable[code] & CTYPE_TO_BIT(ctype)) != 0)
diff --git a/enc/iso_8859_3.c b/enc/iso_8859_3.c
index 85572574b8..863a575020 100644
--- a/enc/iso_8859_3.c
+++ b/enc/iso_8859_3.c
@@ -204,7 +204,7 @@ apply_all_case_fold(OnigCaseFoldType flag,
OnigEncoding enc ARG_UNUSED)
{
return onigenc_apply_all_case_fold_with_map(
- sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
+ numberof(CaseFoldMap), CaseFoldMap, 1,
flag, f, arg);
}
@@ -215,7 +215,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
OnigEncoding enc ARG_UNUSED)
{
return onigenc_get_case_fold_codes_by_str_with_map(
- sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
+ numberof(CaseFoldMap), CaseFoldMap, 1,
flag, p, end, items);
}
diff --git a/enc/iso_8859_4.c b/enc/iso_8859_4.c
index 771a2cf6e7..48b999e756 100644
--- a/enc/iso_8859_4.c
+++ b/enc/iso_8859_4.c
@@ -206,7 +206,7 @@ apply_all_case_fold(OnigCaseFoldType flag,
OnigEncoding enc ARG_UNUSED)
{
return onigenc_apply_all_case_fold_with_map(
- sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
+ numberof(CaseFoldMap), CaseFoldMap, 1,
flag, f, arg);
}
@@ -217,7 +217,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
OnigEncoding enc ARG_UNUSED)
{
return onigenc_get_case_fold_codes_by_str_with_map(
- sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
+ numberof(CaseFoldMap), CaseFoldMap, 1,
flag, p, end, items);
}
diff --git a/enc/iso_8859_5.c b/enc/iso_8859_5.c
index 4ee27b10d8..e71a488c4c 100644
--- a/enc/iso_8859_5.c
+++ b/enc/iso_8859_5.c
@@ -194,7 +194,7 @@ apply_all_case_fold(OnigCaseFoldType flag,
OnigEncoding enc ARG_UNUSED)
{
return onigenc_apply_all_case_fold_with_map(
- sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
+ numberof(CaseFoldMap), CaseFoldMap, 0,
flag, f, arg);
}
@@ -205,7 +205,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
OnigEncoding enc ARG_UNUSED)
{
return onigenc_get_case_fold_codes_by_str_with_map(
- sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
+ numberof(CaseFoldMap), CaseFoldMap, 0,
flag, p, end, items);
}
diff --git a/enc/iso_8859_7.c b/enc/iso_8859_7.c
index aa82f880f9..8d07cb6310 100644
--- a/enc/iso_8859_7.c
+++ b/enc/iso_8859_7.c
@@ -190,7 +190,7 @@ apply_all_case_fold(OnigCaseFoldType flag,
OnigEncoding enc ARG_UNUSED)
{
return onigenc_apply_all_case_fold_with_map(
- sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
+ numberof(CaseFoldMap), CaseFoldMap, 0,
flag, f, arg);
}
@@ -201,7 +201,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
OnigEncoding enc ARG_UNUSED)
{
return onigenc_get_case_fold_codes_by_str_with_map(
- sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
+ numberof(CaseFoldMap), CaseFoldMap, 0,
flag, p, end, items);
}
diff --git a/enc/iso_8859_9.c b/enc/iso_8859_9.c
index 0adafa3ed4..211ba3b2f3 100644
--- a/enc/iso_8859_9.c
+++ b/enc/iso_8859_9.c
@@ -197,7 +197,7 @@ apply_all_case_fold(OnigCaseFoldType flag,
OnigEncoding enc ARG_UNUSED)
{
return onigenc_apply_all_case_fold_with_map(
- sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
+ numberof(CaseFoldMap), CaseFoldMap, 1,
flag, f, arg);
}
@@ -208,7 +208,7 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
OnigEncoding enc ARG_UNUSED)
{
return onigenc_get_case_fold_codes_by_str_with_map(
- sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 1,
+ numberof(CaseFoldMap), CaseFoldMap, 1,
flag, p, end, items);
}
diff --git a/enc/koi8_r.c b/enc/koi8_r.c
index 8ec48747f8..85fa72287e 100644
--- a/enc/koi8_r.c
+++ b/enc/koi8_r.c
@@ -183,7 +183,7 @@ koi8_r_apply_all_case_fold(OnigCaseFoldType flag,
void* arg, OnigEncoding enc ARG_UNUSED)
{
return onigenc_apply_all_case_fold_with_map(
- sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
+ numberof(CaseFoldMap), CaseFoldMap, 0,
flag, f, arg);
}
@@ -193,7 +193,7 @@ koi8_r_get_case_fold_codes_by_str(OnigCaseFoldType flag,
OnigCaseFoldCodeItem items[], OnigEncoding enc ARG_UNUSED)
{
return onigenc_get_case_fold_codes_by_str_with_map(
- sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
+ numberof(CaseFoldMap), CaseFoldMap, 0,
flag, p, end, items);
}
diff --git a/enc/koi8_u.c b/enc/koi8_u.c
index 0e51b6eb80..0ae449ca21 100644
--- a/enc/koi8_u.c
+++ b/enc/koi8_u.c
@@ -187,7 +187,7 @@ koi8_u_apply_all_case_fold(OnigCaseFoldType flag,
void* arg, OnigEncoding enc ARG_UNUSED)
{
return onigenc_apply_all_case_fold_with_map(
- sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
+ numberof(CaseFoldMap), CaseFoldMap, 0,
flag, f, arg);
}
@@ -197,7 +197,7 @@ koi8_u_get_case_fold_codes_by_str(OnigCaseFoldType flag,
OnigCaseFoldCodeItem items[], OnigEncoding enc ARG_UNUSED)
{
return onigenc_get_case_fold_codes_by_str_with_map(
- sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
+ numberof(CaseFoldMap), CaseFoldMap, 0,
flag, p, end, items);
}
diff --git a/enc/shift_jis.c b/enc/shift_jis.c
index 530415b87c..cbd3f02051 100644
--- a/enc/shift_jis.c
+++ b/enc/shift_jis.c
@@ -278,7 +278,7 @@ apply_all_case_fold(OnigCaseFoldType flag,
OnigApplyAllCaseFoldFunc f, void* arg, OnigEncoding enc)
{
return onigenc_apply_all_case_fold_with_map(
- sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
+ numberof(CaseFoldMap), CaseFoldMap, 0,
flag, f, arg);
}
@@ -493,13 +493,14 @@ static int
property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end)
{
UChar *s = p, *e = end;
- const struct enc_property *prop = onig_jis_property((const char *)s, (unsigned int)(e-s));
+ const struct enc_property *prop =
+ onig_jis_property((const char* )s, (unsigned int )(e - s));
if (!prop) {
return onigenc_minimum_property_name_to_ctype(enc, s, e);
}
- return (int)prop->ctype;
+ return (int )prop->ctype;
}
static int
diff --git a/enc/unicode.c b/enc/unicode.c
index 2575762ada..f0ef89880f 100644
--- a/enc/unicode.c
+++ b/enc/unicode.c
@@ -141,7 +141,6 @@ code3_equal(const OnigCodePoint *x, const OnigCodePoint *y)
#include "enc/unicode/name2ctype.h"
-#define numberof(array) (int)(sizeof(array) / sizeof((array)[0]))
#define CODE_RANGES_NUM numberof(CodeRanges)
extern int
diff --git a/enc/us_ascii.c b/enc/us_ascii.c
index 1b47778391..18d0685040 100644
--- a/enc/us_ascii.c
+++ b/enc/us_ascii.c
@@ -3,9 +3,9 @@
static int
us_ascii_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc)
{
- if (*p & 0x80)
- return ONIGENC_CONSTRUCT_MBCLEN_INVALID();
- return ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(1);
+ if (*p & 0x80)
+ return ONIGENC_CONSTRUCT_MBCLEN_INVALID();
+ return ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(1);
}
OnigEncodingDefine(us_ascii, US_ASCII) = {
diff --git a/enc/utf_16_32.h b/enc/utf_16_32.h
index da58d1b23c..b232767ee3 100644
--- a/enc/utf_16_32.h
+++ b/enc/utf_16_32.h
@@ -1,5 +1,5 @@
#include "regenc.h"
/* dummy for unsupported, statefull encoding */
-#define ENC_DUMMY_UNICODE(name) ENC_REPLICATE(name, name "BE")
+#define ENC_DUMMY_UNICODE(name) ENC_DUMMY(name)
ENC_DUMMY_UNICODE("UTF-16");
ENC_DUMMY_UNICODE("UTF-32");
diff --git a/enc/utf_16be.c b/enc/utf_16be.c
index 3af8359caf..a61ae00863 100644
--- a/enc/utf_16be.c
+++ b/enc/utf_16be.c
@@ -29,10 +29,6 @@
#include "regenc.h"
-#define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8)
-#define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc)
-#define UTF16_IS_SURROGATE(c) (((c) & 0xf8) == 0xd8)
-
#if 0
static const int EncLen_UTF16[] = {
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
diff --git a/enc/utf_16le.c b/enc/utf_16le.c
index 453c771cc5..7d176e710e 100644
--- a/enc/utf_16le.c
+++ b/enc/utf_16le.c
@@ -29,10 +29,6 @@
#include "regenc.h"
-#define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8)
-#define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc)
-#define UTF16_IS_SURROGATE(c) (((c) & 0xf8) == 0xd8)
-
#if 0
static const int EncLen_UTF16[] = {
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
diff --git a/enc/utf_8.c b/enc/utf_8.c
index dae1f3a1bc..b8f38e9d58 100644
--- a/enc/utf_8.c
+++ b/enc/utf_8.c
@@ -367,7 +367,7 @@ code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc ARG_UNUSED)
static int
mbc_case_fold(OnigCaseFoldType flag, const UChar** pp,
- const UChar* end, UChar* fold, OnigEncoding enc)
+ const UChar* end, UChar* fold, OnigEncoding enc)
{
const UChar* p = *pp;
@@ -395,7 +395,7 @@ mbc_case_fold(OnigCaseFoldType flag, const UChar** pp,
static int
get_ctype_code_range(OnigCtype ctype, OnigCodePoint *sb_out,
- const OnigCodePoint* ranges[], OnigEncoding enc ARG_UNUSED)
+ const OnigCodePoint* ranges[], OnigEncoding enc ARG_UNUSED)
{
*sb_out = 0x80;
return onigenc_unicode_ctype_code_range(ctype, ranges);
diff --git a/enc/windows_1251.c b/enc/windows_1251.c
index 73060962c3..191d631b88 100644
--- a/enc/windows_1251.c
+++ b/enc/windows_1251.c
@@ -167,7 +167,7 @@ cp1251_apply_all_case_fold(OnigCaseFoldType flag,
OnigApplyAllCaseFoldFunc f, void* arg, OnigEncoding enc ARG_UNUSED)
{
return onigenc_apply_all_case_fold_with_map(
- sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
+ numberof(CaseFoldMap), CaseFoldMap, 0,
flag, f, arg);
}
@@ -176,7 +176,7 @@ cp1251_get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[], OnigEncoding enc ARG_UNUSED)
{
return onigenc_get_case_fold_codes_by_str_with_map(
- sizeof(CaseFoldMap)/sizeof(OnigPairCaseFoldCodes), CaseFoldMap, 0,
+ numberof(CaseFoldMap), CaseFoldMap, 0,
flag, p, end, items);
}
diff --git a/include/ruby/oniguruma.h b/include/ruby/oniguruma.h
index acb7365a88..2530f61427 100644
--- a/include/ruby/oniguruma.h
+++ b/include/ruby/oniguruma.h
@@ -39,8 +39,8 @@ extern "C" {
#define ONIGURUMA
#define ONIGURUMA_VERSION_MAJOR 5
-#define ONIGURUMA_VERSION_MINOR 14
-#define ONIGURUMA_VERSION_TEENY 1
+#define ONIGURUMA_VERSION_MINOR 15
+#define ONIGURUMA_VERSION_TEENY 0
#ifdef __cplusplus
# ifndef HAVE_PROTOTYPES
diff --git a/regcomp.c b/regcomp.c
index 676bee26cc..c1698ea1dc 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -330,9 +330,10 @@ static int compile_tree(Node* node, regex_t* reg);
(op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
static int
-select_str_opcode(int mb_len, OnigDistance str_len, int ignore_case)
+select_str_opcode(int mb_len, OnigDistance byte_len, int ignore_case)
{
int op;
+ OnigDistance str_len = (byte_len + mb_len - 1) / mb_len;
if (ignore_case) {
switch (str_len) {
@@ -434,11 +435,11 @@ compile_tree_n_times(Node* node, int n, regex_t* reg)
}
static int
-add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance str_len,
+add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance byte_len,
regex_t* reg ARG_UNUSED, int ignore_case)
{
int len;
- int op = select_str_opcode(mb_len, str_len, ignore_case);
+ int op = select_str_opcode(mb_len, byte_len, ignore_case);
len = SIZE_OPCODE;
@@ -446,15 +447,15 @@ add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance str_len,
if (IS_NEED_STR_LEN_OP_EXACT(op))
len += SIZE_LENGTH;
- len += mb_len * (int )str_len;
+ len += (int )byte_len;
return len;
}
static int
-add_compile_string(UChar* s, int mb_len, OnigDistance str_len,
+add_compile_string(UChar* s, int mb_len, OnigDistance byte_len,
regex_t* reg, int ignore_case)
{
- int op = select_str_opcode(mb_len, str_len, ignore_case);
+ int op = select_str_opcode(mb_len, byte_len, ignore_case);
add_opcode(reg, op);
if (op == OP_EXACTMBN)
@@ -462,12 +463,12 @@ add_compile_string(UChar* s, int mb_len, OnigDistance str_len,
if (IS_NEED_STR_LEN_OP_EXACT(op)) {
if (op == OP_EXACTN_IC)
- add_length(reg, mb_len * str_len);
+ add_length(reg, byte_len);
else
- add_length(reg, str_len);
+ add_length(reg, byte_len / mb_len);
}
- add_bytes(reg, s, mb_len * str_len);
+ add_bytes(reg, s, byte_len);
return 0;
}
@@ -475,7 +476,7 @@ add_compile_string(UChar* s, int mb_len, OnigDistance str_len,
static int
compile_length_string_node(Node* node, regex_t* reg)
{
- int rlen, r, len, prev_len, slen, ambig;
+ int rlen, r, len, prev_len, blen, ambig;
OnigEncoding enc = reg->enc;
UChar *p, *prev;
StrNode* sn;
@@ -489,24 +490,24 @@ compile_length_string_node(Node* node, regex_t* reg)
p = prev = sn->s;
prev_len = enclen(enc, p, sn->end);
p += prev_len;
- slen = 1;
+ blen = prev_len;
rlen = 0;
for (; p < sn->end; ) {
len = enclen(enc, p, sn->end);
- if (len == prev_len) {
- slen++;
+ if (len == prev_len || ambig) {
+ blen += len;
}
else {
- r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
+ r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
rlen += r;
prev = p;
- slen = 1;
+ blen = len;
prev_len = len;
}
p += len;
}
- r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
+ r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
rlen += r;
return rlen;
}
@@ -523,7 +524,7 @@ compile_length_string_raw_node(StrNode* sn, regex_t* reg)
static int
compile_string_node(Node* node, regex_t* reg)
{
- int r, len, prev_len, slen, ambig;
+ int r, len, prev_len, blen, ambig;
OnigEncoding enc = reg->enc;
UChar *p, *prev, *end;
StrNode* sn;
@@ -538,25 +539,25 @@ compile_string_node(Node* node, regex_t* reg)
p = prev = sn->s;
prev_len = enclen(enc, p, end);
p += prev_len;
- slen = 1;
+ blen = prev_len;
for (; p < end; ) {
len = enclen(enc, p, end);
- if (len == prev_len) {
- slen++;
+ if (len == prev_len || ambig) {
+ blen += len;
}
else {
- r = add_compile_string(prev, prev_len, slen, reg, ambig);
+ r = add_compile_string(prev, prev_len, blen, reg, ambig);
if (r) return r;
prev = p;
- slen = 1;
+ blen = len;
prev_len = len;
}
p += len;
}
- return add_compile_string(prev, prev_len, slen, reg, ambig);
+ return add_compile_string(prev, prev_len, blen, reg, ambig);
}
static int
@@ -2591,6 +2592,7 @@ is_not_included(Node* x, Node* y, regex_t* reg)
return 0;
}
else {
+ if (IS_NOT_NULL(xc->mbuf)) return 0;
for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
int is_word;
if (NCTYPE(y)->ascii_range)
@@ -3311,7 +3313,7 @@ next_setup(Node* node, Node* next_node, int in_root, regex_t* reg)
qn->next_head_exact = n;
}
#endif
- /* automatic possessivation a*b ==> (?>a*)b */
+ /* automatic possessification a*b ==> (?>a*)b */
if (qn->lower <= 1) {
int ttype = NTYPE(qn->target);
if (IS_NODE_TYPE_SIMPLE(ttype)) {
@@ -3433,26 +3435,39 @@ expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,
}
static int
+is_case_fold_variable_len(int item_num, OnigCaseFoldCodeItem items[],
+ int slen)
+{
+ int i;
+
+ for (i = 0; i < item_num; i++) {
+ if (items[i].byte_len != slen) {
+ return 1;
+ }
+ if (items[i].code_len != 1) {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static int
expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
UChar *p, int slen, UChar *end,
regex_t* reg, Node **rnode)
{
- int r, i, j, len, varlen, varclen;
+ int r, i, j, len, varlen;
Node *anode, *var_anode, *snode, *xnode, *an;
UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
*rnode = var_anode = NULL_NODE;
varlen = 0;
- varclen = 0;
for (i = 0; i < item_num; i++) {
if (items[i].byte_len != slen) {
varlen = 1;
break;
}
- if (items[i].code_len != 1) {
- varclen |= 1;
- }
}
if (varlen != 0) {
@@ -3537,8 +3552,6 @@ expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
}
}
- if (varclen && !varlen)
- return 2;
return varlen;
mem_err2:
@@ -3582,7 +3595,8 @@ expand_case_fold_string(Node* node, regex_t* reg)
len = enclen(reg->enc, p, end);
- if (n == 0) {
+ varlen = is_case_fold_variable_len(n, items, len);
+ if (n == 0 || varlen == 0) {
if (IS_NULL(snode)) {
if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
top_root = root = onig_node_list_add(NULL_NODE, prev_node);
@@ -3607,11 +3621,14 @@ expand_case_fold_string(Node* node, regex_t* reg)
}
else {
alt_num *= (n + 1);
- if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) {
- varlen = 1; /* Assume that expanded strings are variable length. */
- break;
- }
+ if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
+ if (IS_NOT_NULL(snode)) {
+ r = update_string_node_case_fold(reg, snode);
+ if (r == 0) {
+ NSTRING_SET_AMBIG(snode);
+ }
+ }
if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
top_root = root = onig_node_list_add(NULL_NODE, prev_node);
if (IS_NULL(root)) {
@@ -3622,7 +3639,6 @@ expand_case_fold_string(Node* node, regex_t* reg)
r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
if (r < 0) goto mem_err;
- if (r > 0) varlen = 1;
if (r == 1) {
if (IS_NULL(root)) {
top_root = prev_node;
@@ -3636,7 +3652,7 @@ expand_case_fold_string(Node* node, regex_t* reg)
root = NCAR(prev_node);
}
- else { /* r == 0 || r == 2 */
+ else { /* r == 0 */
if (IS_NOT_NULL(root)) {
if (IS_NULL(onig_node_list_add(root, prev_node))) {
onig_node_free(prev_node);
@@ -3650,6 +3666,12 @@ expand_case_fold_string(Node* node, regex_t* reg)
p += len;
}
+ if (IS_NOT_NULL(snode)) {
+ r = update_string_node_case_fold(reg, snode);
+ if (r == 0) {
+ NSTRING_SET_AMBIG(snode);
+ }
+ }
if (p < end) {
Node *srem;
@@ -3679,20 +3701,9 @@ expand_case_fold_string(Node* node, regex_t* reg)
/* ending */
top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
- if (!varlen) {
- /* When all expanded strings are same length, case-insensitive
- BM search will be used. */
- r = update_string_node_case_fold(reg, node);
- if (r == 0) {
- NSTRING_SET_AMBIG(node);
- }
- }
- else {
- swap_node(node, top_root);
- r = 0;
- }
+ swap_node(node, top_root);
onig_node_free(top_root);
- return r;
+ return 0;
mem_err:
r = ONIGERR_MEMORY;
@@ -4367,7 +4378,7 @@ map_position_value(OnigEncoding enc, int i)
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
};
- if (i < (int )(sizeof(ByteValTable)/sizeof(ByteValTable[0]))) {
+ if (i < numberof(ByteValTable)) {
if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
return 20;
else
@@ -4399,7 +4410,7 @@ distance_value(MinMaxLen* mm)
if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
d = mm->max - mm->min;
- if (d < sizeof(dist_vals)/sizeof(dist_vals[0]))
+ if (d < numberof(dist_vals))
/* return dist_vals[d] * 16 / (mm->min + 12); */
return (int )dist_vals[d];
else
@@ -4507,6 +4518,9 @@ concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
if (right_len == 0) {
to->right_anchor |= left->right_anchor;
}
+ else {
+ to->right_anchor |= (left->right_anchor & ANCHOR_PREC_READ_NOT);
+ }
}
static int
@@ -5080,7 +5094,8 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
case ANCHOR_END_BUF:
case ANCHOR_SEMI_END_BUF:
case ANCHOR_END_LINE:
- case ANCHOR_LOOK_BEHIND: /* just for (?<=x).* */
+ case ANCHOR_LOOK_BEHIND: /* just for (?<=x).* */
+ case ANCHOR_PREC_READ_NOT: /* just for (?!x).* */
add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
break;
@@ -5103,7 +5118,6 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
}
break;
- case ANCHOR_PREC_READ_NOT:
case ANCHOR_LOOK_BEHIND_NOT:
break;
}
@@ -5369,7 +5383,8 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML |
ANCHOR_LOOK_BEHIND);
- reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF);
+ reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF |
+ ANCHOR_PREC_READ_NOT);
if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
reg->anchor_dmin = opt.len.min;
diff --git a/regenc.c b/regenc.c
index e628d62357..2683b826f2 100644
--- a/regenc.c
+++ b/regenc.c
@@ -414,9 +414,7 @@ onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
OnigCodePoint code;
int i, r;
- for (i = 0;
- i < (int )(sizeof(OnigAsciiLowerMap)/sizeof(OnigPairCaseFoldCodes));
- i++) {
+ for (i = 0; i < numberof(OnigAsciiLowerMap); i++) {
code = OnigAsciiLowerMap[i].to;
r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg);
if (r != 0) return r;
@@ -431,8 +429,8 @@ onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
extern int
onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED,
- const OnigUChar* p, const OnigUChar* end ARG_UNUSED, OnigCaseFoldCodeItem items[],
- OnigEncoding enc ARG_UNUSED)
+ const OnigUChar* p, const OnigUChar* end ARG_UNUSED,
+ OnigCaseFoldCodeItem items[], OnigEncoding enc ARG_UNUSED)
{
if (0x41 <= *p && *p <= 0x5a) {
items[0].byte_len = 1;
@@ -570,9 +568,10 @@ onigenc_get_case_fold_codes_by_str_with_map(int map_size,
extern int
-onigenc_not_support_get_ctype_code_range(OnigCtype ctype,
- OnigCodePoint* sb_out, const OnigCodePoint* ranges[],
- OnigEncoding enc)
+onigenc_not_support_get_ctype_code_range(OnigCtype ctype ARG_UNUSED,
+ OnigCodePoint* sb_out ARG_UNUSED,
+ const OnigCodePoint* ranges[] ARG_UNUSED,
+ OnigEncoding enc)
{
return ONIG_NO_SUPPORT_CONFIG;
}
@@ -589,7 +588,7 @@ onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end, OnigEncoding enc A
/* for single byte encodings */
extern int
onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p,
- const UChar*end, UChar* lower, OnigEncoding enc ARG_UNUSED)
+ const UChar* end, UChar* lower, OnigEncoding enc ARG_UNUSED)
{
*lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p);
@@ -633,28 +632,31 @@ extern int
onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc ARG_UNUSED)
{
if (code > 0xff)
- rb_raise(rb_eRangeError, "%u out of char range", code);
+ rb_raise(rb_eRangeError, "%u out of char range", code);
*buf = (UChar )(code & 0xff);
return 1;
}
extern UChar*
-onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED, const UChar* s,
- const UChar* end,
+onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED,
+ const UChar* s,
+ const UChar* end ARG_UNUSED,
OnigEncoding enc ARG_UNUSED)
{
return (UChar* )s;
}
extern int
-onigenc_always_true_is_allowed_reverse_match(const UChar* s ARG_UNUSED, const UChar* end ARG_UNUSED,
+onigenc_always_true_is_allowed_reverse_match(const UChar* s ARG_UNUSED,
+ const UChar* end ARG_UNUSED,
OnigEncoding enc ARG_UNUSED)
{
return TRUE;
}
extern int
-onigenc_always_false_is_allowed_reverse_match(const UChar* s ARG_UNUSED, const UChar* end ARG_UNUSED,
+onigenc_always_false_is_allowed_reverse_match(const UChar* s ARG_UNUSED,
+ const UChar* end ARG_UNUSED,
OnigEncoding enc ARG_UNUSED)
{
return FALSE;
@@ -716,7 +718,7 @@ onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED,
#if 0
extern int
onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag,
- const UChar** pp ARG_UNUSED, const UChar* end ARG_UNUSED)
+ const UChar** pp, const UChar* end ARG_UNUSED)
{
const UChar* p = *pp;
@@ -791,27 +793,27 @@ extern int
onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p, UChar* end)
{
static const PosixBracketEntryType PBS[] = {
- PosixBracketEntryInit("Alnum", ONIGENC_CTYPE_ALNUM),
- PosixBracketEntryInit("Alpha", ONIGENC_CTYPE_ALPHA),
- PosixBracketEntryInit("Blank", ONIGENC_CTYPE_BLANK),
- PosixBracketEntryInit("Cntrl", ONIGENC_CTYPE_CNTRL),
- PosixBracketEntryInit("Digit", ONIGENC_CTYPE_DIGIT),
- PosixBracketEntryInit("Graph", ONIGENC_CTYPE_GRAPH),
- PosixBracketEntryInit("Lower", ONIGENC_CTYPE_LOWER),
- PosixBracketEntryInit("Print", ONIGENC_CTYPE_PRINT),
- PosixBracketEntryInit("Punct", ONIGENC_CTYPE_PUNCT),
- PosixBracketEntryInit("Space", ONIGENC_CTYPE_SPACE),
- PosixBracketEntryInit("Upper", ONIGENC_CTYPE_UPPER),
- PosixBracketEntryInit("XDigit", ONIGENC_CTYPE_XDIGIT),
- PosixBracketEntryInit("ASCII", ONIGENC_CTYPE_ASCII),
- PosixBracketEntryInit("Word", ONIGENC_CTYPE_WORD),
+ POSIX_BRACKET_ENTRY_INIT("Alnum", ONIGENC_CTYPE_ALNUM),
+ POSIX_BRACKET_ENTRY_INIT("Alpha", ONIGENC_CTYPE_ALPHA),
+ POSIX_BRACKET_ENTRY_INIT("Blank", ONIGENC_CTYPE_BLANK),
+ POSIX_BRACKET_ENTRY_INIT("Cntrl", ONIGENC_CTYPE_CNTRL),
+ POSIX_BRACKET_ENTRY_INIT("Digit", ONIGENC_CTYPE_DIGIT),
+ POSIX_BRACKET_ENTRY_INIT("Graph", ONIGENC_CTYPE_GRAPH),
+ POSIX_BRACKET_ENTRY_INIT("Lower", ONIGENC_CTYPE_LOWER),
+ POSIX_BRACKET_ENTRY_INIT("Print", ONIGENC_CTYPE_PRINT),
+ POSIX_BRACKET_ENTRY_INIT("Punct", ONIGENC_CTYPE_PUNCT),
+ POSIX_BRACKET_ENTRY_INIT("Space", ONIGENC_CTYPE_SPACE),
+ POSIX_BRACKET_ENTRY_INIT("Upper", ONIGENC_CTYPE_UPPER),
+ POSIX_BRACKET_ENTRY_INIT("XDigit", ONIGENC_CTYPE_XDIGIT),
+ POSIX_BRACKET_ENTRY_INIT("ASCII", ONIGENC_CTYPE_ASCII),
+ POSIX_BRACKET_ENTRY_INIT("Word", ONIGENC_CTYPE_WORD),
};
- const PosixBracketEntryType *pb, *pbe;
+ const PosixBracketEntryType *pb;
int len;
len = onigenc_strlen(enc, p, end);
- for (pbe = (pb = PBS) + sizeof(PBS)/sizeof(PBS[0]); pb < pbe; ++pb) {
+ for (pb = PBS; pb < PBS + numberof(PBS); pb++) {
if (len == pb->len &&
onigenc_with_ascii_strnicmp(enc, p, end, pb->name, pb->len) == 0)
return pb->ctype;
diff --git a/regenc.h b/regenc.h
index 6f2d4a69c1..e220aabc5b 100644
--- a/regenc.h
+++ b/regenc.h
@@ -29,15 +29,18 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
+
#ifndef REGINT_H
#ifndef RUBY_EXTERN
#include "ruby/config.h"
#include "ruby/defines.h"
#endif
+#endif
+
#ifdef ONIG_ESCAPE_UCHAR_COLLISION
#undef ONIG_ESCAPE_UCHAR_COLLISION
#endif
-#endif
+
#include "ruby/oniguruma.h"
RUBY_SYMBOL_EXPORT_BEGIN
@@ -104,7 +107,13 @@ typedef struct {
short int len;
} PosixBracketEntryType;
-#define PosixBracketEntryInit(name, ctype) {(const UChar *)name, ctype, (short int)(sizeof(name) - 1)}
+#define POSIX_BRACKET_ENTRY_INIT(name, ctype) \
+ {(const UChar* )(name), (ctype), (short int )(sizeof(name) - 1)}
+
+#ifndef numberof
+#define numberof(array) (int )(sizeof(array) / sizeof((array)[0]))
+#endif
+
#define USE_CRNL_AS_LINE_TERMINATOR
#define USE_UNICODE_PROPERTIES
@@ -159,6 +168,7 @@ ONIG_EXTERN int onigenc_unicode_apply_all_case_fold P_((OnigCaseFoldType flag, O
#define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8)
#define UTF16_IS_SURROGATE_SECOND(c) (((c) & 0xfc) == 0xdc)
+#define UTF16_IS_SURROGATE(c) (((c) & 0xf8) == 0xd8)
#define ONIGENC_ISO_8859_1_TO_LOWER_CASE(c) \
OnigEncISO_8859_1_ToLowerCaseTable[c]
diff --git a/regexec.c b/regexec.c
index 87b7759e26..1336468a74 100644
--- a/regexec.c
+++ b/regexec.c
@@ -1397,7 +1397,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
(int )(end - str), (int )(sstart - str));
#endif
- STACK_PUSH_ENSURED(STK_ALT, (UChar *)FinishCode); /* bottom stack */
+ STACK_PUSH_ENSURED(STK_ALT, (UChar* )FinishCode); /* bottom stack */
best_len = ONIG_MISMATCH;
s = (UChar* )sstart;
pkeep = (UChar* )sstart;
@@ -1406,7 +1406,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (s) {
UChar *q, *bp, buf[50];
int len;
- fprintf(stderr, "%4d> \"", (*p == OP_FINISH) ? -1 : (int )(s - str));
+ fprintf(stderr, "%4"PRIdPTR"> \"", (*p == OP_FINISH) ? (ptrdiff_t )-1 : s - str);
bp = buf;
if (*p != OP_FINISH) { /* s may not be a valid pointer if OP_FINISH. */
for (i = 0, q = s; i < 7 && q < end; i++) {
@@ -1419,6 +1419,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
*bp = 0;
fputs((char* )buf, stderr);
for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr);
+ fprintf(stderr, "%4"PRIdPTR":", (p == FinishCode) ? (ptrdiff_t )-1 : p - reg->p);
onig_print_compiled_byte_code(stderr, p, p + strlen((char *)p), NULL, encode);
fprintf(stderr, "\n");
}
@@ -4183,7 +4184,7 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
prev = s;
s += enclen(reg->enc, s, end);
- if ((reg->anchor & ANCHOR_LOOK_BEHIND) == 0) {
+ if ((reg->anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) == 0) {
while (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)
&& s < range) {
prev = s;
diff --git a/regint.h b/regint.h
index c389477211..3abc8809c9 100644
--- a/regint.h
+++ b/regint.h
@@ -193,6 +193,8 @@ extern pthread_mutex_t gOnigMutex;
#define USE_UPPER_CASE_TABLE
#else
+#define CHECK_INTERRUPT_IN_MATCH_AT
+
#define st_init_table onig_st_init_table
#define st_init_table_with_size onig_st_init_table_with_size
#define st_init_numtable onig_st_init_numtable
@@ -213,8 +215,6 @@ extern pthread_mutex_t gOnigMutex;
/* */
#define onig_st_is_member st_is_member
-#define CHECK_INTERRUPT_IN_MATCH_AT
-
#endif
#define STATE_CHECK_STRING_THRESHOLD_LEN 7
@@ -913,9 +913,7 @@ typedef struct {
extern OnigOpInfoType OnigOpInfo[];
-#ifdef ONIG_DEBUG
extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar* bpend, UChar** nextp, OnigEncoding enc));
-#endif
#ifdef ONIG_DEBUG_STATISTICS
extern void onig_statistics_init P_((void));
diff --git a/regparse.c b/regparse.c
index 6e2e5c6ea1..5258972399 100644
--- a/regparse.c
+++ b/regparse.c
@@ -4153,17 +4153,15 @@ add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not,
}
static int
-add_ctype_to_cc(CClassNode* cc, int ctype, int not, int char_prop, ScanEnv* env)
+add_ctype_to_cc(CClassNode* cc, int ctype, int not, int ascii_range, ScanEnv* env)
{
- int maxcode, ascii_range;
+ int maxcode;
int c, r;
const OnigCodePoint *ranges;
OnigCodePoint sb_out;
OnigEncoding enc = env->enc;
OnigOptionType option = env->option;
- ascii_range = IS_ASCII_RANGE(option) && (char_prop == 0);
-
r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges);
if (r == 0) {
if (ascii_range) {
@@ -4280,31 +4278,32 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, int char_prop, ScanEnv* env)
}
static int
-parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
+parse_posix_bracket(CClassNode* cc, CClassNode* asc_cc,
+ UChar** src, UChar* end, ScanEnv* env)
{
#define POSIX_BRACKET_CHECK_LIMIT_LENGTH 20
#define POSIX_BRACKET_NAME_MIN_LEN 4
static const PosixBracketEntryType PBS[] = {
- { (UChar* )"alnum", ONIGENC_CTYPE_ALNUM, 5 },
- { (UChar* )"alpha", ONIGENC_CTYPE_ALPHA, 5 },
- { (UChar* )"blank", ONIGENC_CTYPE_BLANK, 5 },
- { (UChar* )"cntrl", ONIGENC_CTYPE_CNTRL, 5 },
- { (UChar* )"digit", ONIGENC_CTYPE_DIGIT, 5 },
- { (UChar* )"graph", ONIGENC_CTYPE_GRAPH, 5 },
- { (UChar* )"lower", ONIGENC_CTYPE_LOWER, 5 },
- { (UChar* )"print", ONIGENC_CTYPE_PRINT, 5 },
- { (UChar* )"punct", ONIGENC_CTYPE_PUNCT, 5 },
- { (UChar* )"space", ONIGENC_CTYPE_SPACE, 5 },
- { (UChar* )"upper", ONIGENC_CTYPE_UPPER, 5 },
- { (UChar* )"xdigit", ONIGENC_CTYPE_XDIGIT, 6 },
- { (UChar* )"ascii", ONIGENC_CTYPE_ASCII, 5 },
- { (UChar* )"word", ONIGENC_CTYPE_WORD, 4 },
- { (UChar* )NULL, -1, 0 }
+ POSIX_BRACKET_ENTRY_INIT("alnum", ONIGENC_CTYPE_ALNUM),
+ POSIX_BRACKET_ENTRY_INIT("alpha", ONIGENC_CTYPE_ALPHA),
+ POSIX_BRACKET_ENTRY_INIT("blank", ONIGENC_CTYPE_BLANK),
+ POSIX_BRACKET_ENTRY_INIT("cntrl", ONIGENC_CTYPE_CNTRL),
+ POSIX_BRACKET_ENTRY_INIT("digit", ONIGENC_CTYPE_DIGIT),
+ POSIX_BRACKET_ENTRY_INIT("graph", ONIGENC_CTYPE_GRAPH),
+ POSIX_BRACKET_ENTRY_INIT("lower", ONIGENC_CTYPE_LOWER),
+ POSIX_BRACKET_ENTRY_INIT("print", ONIGENC_CTYPE_PRINT),
+ POSIX_BRACKET_ENTRY_INIT("punct", ONIGENC_CTYPE_PUNCT),
+ POSIX_BRACKET_ENTRY_INIT("space", ONIGENC_CTYPE_SPACE),
+ POSIX_BRACKET_ENTRY_INIT("upper", ONIGENC_CTYPE_UPPER),
+ POSIX_BRACKET_ENTRY_INIT("xdigit", ONIGENC_CTYPE_XDIGIT),
+ POSIX_BRACKET_ENTRY_INIT("ascii", ONIGENC_CTYPE_ASCII),
+ POSIX_BRACKET_ENTRY_INIT("word", ONIGENC_CTYPE_WORD),
};
const PosixBracketEntryType *pb;
int not, i, r;
+ int ascii_range;
OnigCodePoint c;
OnigEncoding enc = env->enc;
UChar *p = *src;
@@ -4319,17 +4318,25 @@ parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
if (onigenc_strlen(enc, p, end) < POSIX_BRACKET_NAME_MIN_LEN + 3)
goto not_posix_bracket;
- for (pb = PBS; IS_NOT_NULL(pb->name); pb++) {
+ ascii_range = IS_ASCII_RANGE(env->option) &&
+ ! IS_POSIX_BRACKET_ALL_RANGE(env->option);
+ for (pb = PBS; pb < PBS + numberof(PBS); pb++) {
if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) {
p = (UChar* )onigenc_step(enc, p, end, pb->len);
if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0)
return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
- r = add_ctype_to_cc(cc, pb->ctype, not,
- IS_POSIX_BRACKET_ALL_RANGE(env->option),
- env);
+ r = add_ctype_to_cc(cc, pb->ctype, not, ascii_range, env);
if (r != 0) return r;
+ if (IS_NOT_NULL(asc_cc)) {
+ if (pb->ctype != ONIGENC_CTYPE_WORD &&
+ pb->ctype != ONIGENC_CTYPE_ASCII &&
+ !ascii_range)
+ r = add_ctype_to_cc(asc_cc, pb->ctype, not, ascii_range, env);
+ if (r != 0) return r;
+ }
+
PINC_S; PINC_S;
*src = p;
return 0;
@@ -4386,6 +4393,8 @@ fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env)
return r;
}
+static int cclass_case_fold(Node** np, CClassNode* cc, CClassNode* asc_cc, ScanEnv* env);
+
static int
parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end,
ScanEnv* env)
@@ -4399,11 +4408,15 @@ parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end,
*np = node_new_cclass();
CHECK_NULL_RETURN_MEMERR(*np);
cc = NCCLASS(*np);
- r = add_ctype_to_cc(cc, ctype, 0, 1, env);
+ r = add_ctype_to_cc(cc, ctype, 0, 0, env);
if (r != 0) return r;
if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
- return 0;
+ if (IS_IGNORECASE(env->option)) {
+ if (ctype != ONIGENC_CTYPE_ASCII)
+ r = cclass_case_fold(np, cc, cc, env);
+ }
+ return r;
}
@@ -4421,7 +4434,8 @@ enum CCVALTYPE {
};
static int
-next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type,
+next_state_class(CClassNode* cc, CClassNode* asc_cc,
+ OnigCodePoint* vs, enum CCVALTYPE* type,
enum CCSTATE* state, ScanEnv* env)
{
int r;
@@ -4430,11 +4444,18 @@ next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type,
return ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE;
if (*state == CCS_VALUE && *type != CCV_CLASS) {
- if (*type == CCV_SB)
+ if (*type == CCV_SB) {
BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*vs));
+ if (IS_NOT_NULL(asc_cc))
+ BITSET_SET_BIT(asc_cc->bs, (int )(*vs));
+ }
else if (*type == CCV_CODE_POINT) {
r = add_code_range(&(cc->mbuf), env, *vs, *vs);
if (r < 0) return r;
+ if (IS_NOT_NULL(asc_cc)) {
+ r = add_code_range0(&(asc_cc->mbuf), env, *vs, *vs, 0);
+ if (r < 0) return r;
+ }
}
}
@@ -4444,7 +4465,8 @@ next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type,
}
static int
-next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
+next_state_val(CClassNode* cc, CClassNode* asc_cc,
+ OnigCodePoint *vs, OnigCodePoint v,
int* vs_israw, int v_israw,
enum CCVALTYPE intype, enum CCVALTYPE* type,
enum CCSTATE* state, ScanEnv* env)
@@ -4453,11 +4475,18 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
switch (*state) {
case CCS_VALUE:
- if (*type == CCV_SB)
+ if (*type == CCV_SB) {
BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*vs));
+ if (IS_NOT_NULL(asc_cc))
+ BITSET_SET_BIT(asc_cc->bs, (int )(*vs));
+ }
else if (*type == CCV_CODE_POINT) {
r = add_code_range(&(cc->mbuf), env, *vs, *vs);
if (r < 0) return r;
+ if (IS_NOT_NULL(asc_cc)) {
+ r = add_code_range0(&(asc_cc->mbuf), env, *vs, *vs, 0);
+ if (r < 0) return r;
+ }
}
break;
@@ -4474,10 +4503,16 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
}
bitset_set_range(env, cc->bs, (int )*vs, (int )v);
+ if (IS_NOT_NULL(asc_cc))
+ bitset_set_range(env, asc_cc->bs, (int )*vs, (int )v);
}
else {
r = add_code_range(&(cc->mbuf), env, *vs, v);
if (r < 0) return r;
+ if (IS_NOT_NULL(asc_cc)) {
+ r = add_code_range0(&(asc_cc->mbuf), env, *vs, v, 0);
+ if (r < 0) return r;
+ }
}
}
else {
@@ -4493,6 +4528,11 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
bitset_set_range(env, cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff));
r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*vs, v);
if (r < 0) return r;
+ if (IS_NOT_NULL(asc_cc)) {
+ bitset_set_range(env, asc_cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff));
+ r = add_code_range0(&(asc_cc->mbuf), env, (OnigCodePoint )*vs, v, 0);
+ if (r < 0) return r;
+ }
#if 0
}
else
@@ -4542,22 +4582,24 @@ code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,
}
static int
-parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
+parse_char_class(Node** np, Node** asc_np, OnigToken* tok, UChar** src, UChar* end,
ScanEnv* env)
{
int r, neg, len, fetched, and_start;
OnigCodePoint v, vs;
UChar *p;
Node* node;
+ Node* asc_node;
CClassNode *cc, *prev_cc;
- CClassNode work_cc;
+ CClassNode *asc_cc, *asc_prev_cc;
+ CClassNode work_cc, asc_work_cc;
enum CCSTATE state;
enum CCVALTYPE val_type, in_type;
int val_israw, in_israw;
- prev_cc = (CClassNode* )NULL;
- *np = NULL_NODE;
+ prev_cc = asc_prev_cc = (CClassNode* )NULL;
+ *np = *asc_np = NULL_NODE;
r = fetch_token_in_cc(tok, src, end, env);
if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) {
neg = 1;
@@ -4581,6 +4623,16 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
CHECK_NULL_RETURN_MEMERR(node);
cc = NCCLASS(node);
+ if (IS_IGNORECASE(env->option)) {
+ *asc_np = asc_node = node_new_cclass();
+ CHECK_NULL_RETURN_MEMERR(asc_node);
+ asc_cc = NCCLASS(asc_node);
+ }
+ else {
+ asc_node = NULL_NODE;
+ asc_cc = NULL;
+ }
+
and_start = 0;
state = CCS_START;
p = *src;
@@ -4671,13 +4723,13 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
}
in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT);
val_entry2:
- r = next_state_val(cc, &vs, v, &val_israw, in_israw, in_type, &val_type,
+ r = next_state_val(cc, asc_cc, &vs, v, &val_israw, in_israw, in_type, &val_type,
&state, env);
if (r != 0) goto err;
break;
case TK_POSIX_BRACKET_OPEN:
- r = parse_posix_bracket(cc, &p, end, env);
+ r = parse_posix_bracket(cc, asc_cc, &p, end, env);
if (r < 0) goto err;
if (r == 1) { /* is not POSIX bracket */
CC_ESC_WARN(env, (UChar* )"[");
@@ -4690,11 +4742,18 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
break;
case TK_CHAR_TYPE:
- r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not, 0, env);
+ r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not,
+ IS_ASCII_RANGE(env->option), env);
if (r != 0) return r;
+ if (IS_NOT_NULL(asc_cc)) {
+ if (tok->u.prop.ctype != ONIGENC_CTYPE_WORD)
+ r = add_ctype_to_cc(asc_cc, tok->u.prop.ctype, tok->u.prop.not,
+ IS_ASCII_RANGE(env->option), env);
+ if (r != 0) return r;
+ }
next_class:
- r = next_state_class(cc, &vs, &val_type, &state, env);
+ r = next_state_class(cc, asc_cc, &vs, &val_type, &state, env);
if (r != 0) goto err;
break;
@@ -4704,8 +4763,13 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
ctype = fetch_char_property_to_ctype(&p, end, env);
if (ctype < 0) return ctype;
- r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, 1, env);
+ r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, 0, env);
if (r != 0) return r;
+ if (IS_NOT_NULL(asc_cc)) {
+ if (ctype != ONIGENC_CTYPE_ASCII)
+ r = add_ctype_to_cc(asc_cc, ctype, tok->u.prop.not, 0, env);
+ if (r != 0) return r;
+ }
goto next_class;
}
break;
@@ -4766,15 +4830,20 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
case TK_CC_CC_OPEN: /* [ */
{
- Node *anode;
+ Node *anode, *aasc_node;
CClassNode* acc;
- r = parse_char_class(&anode, tok, &p, end, env);
+ r = parse_char_class(&anode, &aasc_node, tok, &p, end, env);
if (r == 0) {
acc = NCCLASS(anode);
r = or_cclass(cc, acc, env);
}
+ if (r == 0 && IS_NOT_NULL(aasc_node)) {
+ acc = NCCLASS(aasc_node);
+ r = or_cclass(asc_cc, acc, env);
+ }
onig_node_free(anode);
+ onig_node_free(aasc_node);
if (r != 0) goto err;
}
break;
@@ -4782,7 +4851,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
case TK_CC_AND: /* && */
{
if (state == CCS_VALUE) {
- r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,
+ r = next_state_val(cc, asc_cc, &vs, 0, &val_israw, 0, val_type,
&val_type, &state, env);
if (r != 0) goto err;
}
@@ -4794,12 +4863,23 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
r = and_cclass(prev_cc, cc, env);
if (r != 0) goto err;
bbuf_free(cc->mbuf);
+ if (IS_NOT_NULL(asc_cc)) {
+ r = and_cclass(asc_prev_cc, asc_cc, env);
+ if (r != 0) goto err;
+ bbuf_free(asc_cc->mbuf);
+ }
}
else {
prev_cc = cc;
cc = &work_cc;
+ if (IS_NOT_NULL(asc_cc)) {
+ asc_prev_cc = asc_cc;
+ asc_cc = &asc_work_cc;
+ }
}
initialize_cclass(cc);
+ if (IS_NOT_NULL(asc_cc))
+ initialize_cclass(asc_cc);
}
break;
@@ -4822,7 +4902,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
}
if (state == CCS_VALUE) {
- r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,
+ r = next_state_val(cc, asc_cc, &vs, 0, &val_israw, 0, val_type,
&val_type, &state, env);
if (r != 0) goto err;
}
@@ -4832,12 +4912,24 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
if (r != 0) goto err;
bbuf_free(cc->mbuf);
cc = prev_cc;
+ if (IS_NOT_NULL(asc_cc)) {
+ r = and_cclass(asc_prev_cc, asc_cc, env);
+ if (r != 0) goto err;
+ bbuf_free(asc_cc->mbuf);
+ asc_cc = asc_prev_cc;
+ }
}
- if (neg != 0)
+ if (neg != 0) {
NCCLASS_SET_NOT(cc);
- else
+ if (IS_NOT_NULL(asc_cc))
+ NCCLASS_SET_NOT(asc_cc);
+ }
+ else {
NCCLASS_CLEAR_NOT(cc);
+ if (IS_NOT_NULL(asc_cc))
+ NCCLASS_CLEAR_NOT(asc_cc);
+ }
if (IS_NCCLASS_NOT(cc) &&
IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) {
int is_empty;
@@ -4865,6 +4957,8 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
err:
if (cc != NCCLASS(*np))
bbuf_free(cc->mbuf);
+ if (IS_NOT_NULL(asc_cc) && (asc_cc != NCCLASS(*asc_np)))
+ bbuf_free(asc_cc->mbuf);
return r;
}
@@ -5489,6 +5583,7 @@ clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)
typedef struct {
ScanEnv* env;
CClassNode* cc;
+ CClassNode* asc_cc;
Node* alt_root;
Node** ptail;
} IApplyCaseFoldArg;
@@ -5500,37 +5595,57 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[],
IApplyCaseFoldArg* iarg;
ScanEnv* env;
CClassNode* cc;
+ CClassNode* asc_cc;
BitSetRef bs;
+ int add_flag;
iarg = (IApplyCaseFoldArg* )arg;
env = iarg->env;
cc = iarg->cc;
+ asc_cc = iarg->asc_cc;
bs = cc->bs;
+ if (IS_NULL(asc_cc)) {
+ add_flag = 0;
+ }
+ else if (ONIGENC_IS_ASCII_CODE(from) == ONIGENC_IS_ASCII_CODE(*to)) {
+ add_flag = 1;
+ }
+ else {
+ add_flag = onig_is_code_in_cc(env->enc, from, asc_cc);
+ if (IS_NCCLASS_NOT(asc_cc))
+ add_flag = !add_flag;
+ }
+
if (to_len == 1) {
int is_in = onig_is_code_in_cc(env->enc, from, cc);
#ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) ||
(is_in == 0 && IS_NCCLASS_NOT(cc))) {
- if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
- add_code_range0(&(cc->mbuf), env, *to, *to, 0);
- }
- else {
- BITSET_SET_BIT(bs, *to);
+ if (add_flag) {
+ if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
+ add_code_range0(&(cc->mbuf), env, *to, *to, 0);
+ }
+ else {
+ BITSET_SET_BIT(bs, *to);
+ }
}
}
#else
if (is_in != 0) {
- if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
- if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc);
- add_code_range0(&(cc->mbuf), env, *to, *to, 0);
- }
- else {
- if (IS_NCCLASS_NOT(cc)) {
- BITSET_CLEAR_BIT(bs, *to);
+ if (add_flag) {
+ if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
+ if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc);
+ add_code_range0(&(cc->mbuf), env, *to, *to, 0);
+ }
+ else {
+ if (IS_NCCLASS_NOT(cc)) {
+ BITSET_CLEAR_BIT(bs, *to);
+ }
+ else {
+ BITSET_SET_BIT(bs, *to);
+ }
}
- else
- BITSET_SET_BIT(bs, *to);
}
}
#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */
@@ -5574,6 +5689,35 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[],
}
static int
+cclass_case_fold(Node** np, CClassNode* cc, CClassNode* asc_cc, ScanEnv* env)
+{
+ int r;
+ IApplyCaseFoldArg iarg;
+
+ iarg.env = env;
+ iarg.cc = cc;
+ iarg.asc_cc = asc_cc;
+ iarg.alt_root = NULL_NODE;
+ iarg.ptail = &(iarg.alt_root);
+
+ r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->case_fold_flag,
+ i_apply_case_fold, &iarg);
+ if (r != 0) {
+ onig_node_free(iarg.alt_root);
+ return r;
+ }
+ if (IS_NOT_NULL(iarg.alt_root)) {
+ Node* work = onig_node_new_alt(*np, iarg.alt_root);
+ if (IS_NULL(work)) {
+ onig_node_free(iarg.alt_root);
+ return ONIGERR_MEMORY;
+ }
+ *np = work;
+ }
+ return r;
+}
+
+static int
node_linebreak(Node** np, ScanEnv* env)
{
/* same as (?>\x0D\x0A|[\x0A-\x0D\x{85}\x{2028}\x{2029}]) */
@@ -5658,7 +5802,7 @@ node_extended_grapheme_cluster(Node** np, ScanEnv* env)
np1 = node_new_cclass();
if (IS_NULL(np1)) goto err;
cc1 = NCCLASS(np1);
- r = add_ctype_to_cc(cc1, ctype, 0, 1, env);
+ r = add_ctype_to_cc(cc1, ctype, 0, 0, env);
if (r != 0) goto err;
NCCLASS_SET_NOT(cc1);
@@ -5666,7 +5810,7 @@ node_extended_grapheme_cluster(Node** np, ScanEnv* env)
np2 = node_new_cclass();
if (IS_NULL(np2)) goto err;
cc2 = NCCLASS(np2);
- r = add_ctype_to_cc(cc2, ctype, 0, 1, env);
+ r = add_ctype_to_cc(cc2, ctype, 0, 0, env);
if (r != 0) goto err;
qn = node_new_quantifier(0, REPEAT_INFINITE, 0);
@@ -6013,7 +6157,8 @@ parse_exp(Node** np, OnigToken* tok, int term,
*np = node_new_cclass();
CHECK_NULL_RETURN_MEMERR(*np);
cc = NCCLASS(*np);
- r = add_ctype_to_cc(cc, tok->u.prop.ctype, 0, 0, env);
+ r = add_ctype_to_cc(cc, tok->u.prop.ctype, 0,
+ IS_ASCII_RANGE(env->option), env);
if (r != 0) return r;
if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
#ifdef USE_SHARED_CCLASS_TABLE
@@ -6036,15 +6181,20 @@ parse_exp(Node** np, OnigToken* tok, int term,
case TK_CC_OPEN:
{
+ Node *asc_node;
CClassNode* cc;
OnigCodePoint code;
- r = parse_char_class(np, tok, src, end, env);
- if (r != 0) return r;
+ r = parse_char_class(np, &asc_node, tok, src, end, env);
+ if (r != 0) {
+ onig_node_free(asc_node);
+ return r;
+ }
cc = NCCLASS(*np);
if (is_onechar_cclass(cc, &code)) {
onig_node_free(*np);
+ onig_node_free(asc_node);
*np = node_new_empty();
CHECK_NULL_RETURN_MEMERR(*np);
r = node_str_cat_codepoint(*np, env->enc, code);
@@ -6052,28 +6202,13 @@ parse_exp(Node** np, OnigToken* tok, int term,
goto string_loop;
}
if (IS_IGNORECASE(env->option)) {
- IApplyCaseFoldArg iarg;
-
- iarg.env = env;
- iarg.cc = cc;
- iarg.alt_root = NULL_NODE;
- iarg.ptail = &(iarg.alt_root);
-
- r = ONIGENC_APPLY_ALL_CASE_FOLD(env->enc, env->case_fold_flag,
- i_apply_case_fold, &iarg);
+ r = cclass_case_fold(np, cc, NCCLASS(asc_node), env);
if (r != 0) {
- onig_node_free(iarg.alt_root);
+ onig_node_free(asc_node);
return r;
}
- if (IS_NOT_NULL(iarg.alt_root)) {
- Node* work = onig_node_new_alt(*np, iarg.alt_root);
- if (IS_NULL(work)) {
- onig_node_free(iarg.alt_root);
- return ONIGERR_MEMORY;
- }
- *np = work;
- }
}
+ onig_node_free(asc_node);
}
break;
diff --git a/regparse.h b/regparse.h
index c92babfebe..35de54671e 100644
--- a/regparse.h
+++ b/regparse.h
@@ -193,8 +193,8 @@ typedef struct {
int type;
int regnum;
OnigOptionType option;
- struct _Node* target;
AbsAddrType call_addr;
+ struct _Node* target;
/* for multiple call reference */
OnigDistance min_len; /* min length (byte) */
OnigDistance max_len; /* max length (byte) */
@@ -296,10 +296,10 @@ typedef struct {
UChar* error;
UChar* error_end;
regex_t* reg; /* for reg->names only */
- int num_call;
#ifdef USE_SUBEXP_CALL
UnsetAddrList* unset_addr_list;
#endif
+ int num_call;
int num_mem;
#ifdef USE_NAMED_GROUP
int num_named;