aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authornaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2016-12-10 17:47:04 +0000
committernaruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2016-12-10 17:47:04 +0000
commit1b7d03b9d0d60d4dede7d38739503e59a8b25d06 (patch)
treedeb58ca3dc6d6cd71c1740e62aa7e47bea5ed37e
parent5a5ea533fd8c5c95ca0dcd5e4cce2558ebba62c5 (diff)
downloadruby-1b7d03b9d0d60d4dede7d38739503e59a8b25d06.tar.gz
Merge Onigmo 6.0.0
* https://github.com/k-takata/Onigmo/blob/Onigmo-6.0.0/HISTORY * fix for ruby 2.4: https://github.com/k-takata/Onigmo/pull/78 * suppress warning: https://github.com/k-takata/Onigmo/pull/79 * include/ruby/oniguruma.h: include onigmo.h. * template/encdb.h.tmpl: ignore duplicated definition of EUC-CN in enc/euc_kr.c. It is defined in enc/gb2313.c with CRuby macro. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@57045 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--NEWS2
-rw-r--r--enc/ascii.c9
-rw-r--r--enc/big5.c6
-rw-r--r--enc/cp949.c2
-rw-r--r--enc/emacs_mule.c4
-rw-r--r--enc/euc_jp.c4
-rw-r--r--enc/euc_kr.c27
-rw-r--r--enc/euc_tw.c2
-rw-r--r--enc/gb18030.c3
-rw-r--r--enc/gbk.c2
-rw-r--r--enc/iso_8859_1.c33
-rw-r--r--enc/iso_8859_10.c41
-rw-r--r--enc/iso_8859_11.c2
-rw-r--r--enc/iso_8859_13.c41
-rw-r--r--enc/iso_8859_14.c50
-rw-r--r--enc/iso_8859_15.c47
-rw-r--r--enc/iso_8859_16.c46
-rw-r--r--enc/iso_8859_2.c34
-rw-r--r--enc/iso_8859_3.c43
-rw-r--r--enc/iso_8859_4.c29
-rw-r--r--enc/iso_8859_5.c22
-rw-r--r--enc/iso_8859_6.c2
-rw-r--r--enc/iso_8859_7.c48
-rw-r--r--enc/iso_8859_8.c2
-rw-r--r--enc/iso_8859_9.c47
-rw-r--r--enc/koi8_r.c3
-rw-r--r--enc/koi8_u.c2
-rw-r--r--enc/mktable.c42
-rw-r--r--enc/shift_jis.c4
-rw-r--r--enc/unicode.c248
-rw-r--r--enc/us_ascii.c9
-rw-r--r--enc/utf_16be.c2
-rw-r--r--enc/utf_16le.c2
-rw-r--r--enc/utf_32be.c3
-rw-r--r--enc/utf_32le.c2
-rw-r--r--enc/utf_8.c14
-rw-r--r--enc/windows_1250.c35
-rw-r--r--enc/windows_1251.c35
-rw-r--r--enc/windows_1252.c29
-rw-r--r--enc/windows_1253.c43
-rw-r--r--enc/windows_1254.c47
-rw-r--r--enc/windows_1257.c50
-rw-r--r--enc/windows_31j.c4
-rw-r--r--include/ruby/onigmo.h934
-rw-r--r--include/ruby/oniguruma.h880
-rw-r--r--re.c3
-rw-r--r--regcomp.c583
-rw-r--r--regenc.c67
-rw-r--r--regenc.h136
-rw-r--r--regerror.c65
-rw-r--r--regexec.c895
-rw-r--r--regint.h416
-rw-r--r--regparse.c1099
-rw-r--r--regparse.h56
-rw-r--r--regsyntax.c15
-rw-r--r--template/encdb.h.tmpl3
-rwxr-xr-xtool/enc-unicode.rb17
57 files changed, 3108 insertions, 3183 deletions
diff --git a/NEWS b/NEWS
index cf10cd9868..fc80975967 100644
--- a/NEWS
+++ b/NEWS
@@ -138,6 +138,8 @@ with all sufficient information, see the ChangeLog file or Redmine
* meta character \X matches Unicode 9.0 characters with some workarounds
for UTR #51 Unicode Emoji, Version 4.0 emoji zwj sequences.
+ * Update Onigmo 6.0.0.
+
* Regexp/String: Updated Unicode version from 8.0.0 to 9.0.0 [Feature #12513]
* RubyVM::Env
diff --git a/enc/ascii.c b/enc/ascii.c
index d34cc20582..8b32c414fe 100644
--- a/enc/ascii.c
+++ b/enc/ascii.c
@@ -29,9 +29,12 @@
*/
#include "regenc.h"
-#include "encindex.h"
+#ifdef RUBY
+# include "encindex.h"
+#endif
+
#ifndef ENCINDEX_ASCII
-#define ENCINDEX_ASCII 0
+# define ENCINDEX_ASCII 0
#endif
OnigEncodingDefine(ascii, ASCII) = {
@@ -51,9 +54,9 @@ OnigEncodingDefine(ascii, ASCII) = {
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
+ onigenc_single_byte_ascii_only_case_map,
ENCINDEX_ASCII,
ONIGENC_FLAG_NONE,
- onigenc_single_byte_ascii_only_case_map,
};
ENC_ALIAS("BINARY", "ASCII-8BIT")
ENC_REPLICATE("IBM437", "ASCII-8BIT")
diff --git a/enc/big5.c b/enc/big5.c
index fc2bcadcc1..ab4fb69819 100644
--- a/enc/big5.c
+++ b/enc/big5.c
@@ -300,9 +300,9 @@ OnigEncodingDefine(big5, BIG5) = {
onigenc_not_support_get_ctype_code_range,
big5_left_adjust_char_head,
big5_is_allowed_reverse_match,
+ onigenc_ascii_only_case_map,
0,
ONIGENC_FLAG_NONE,
- onigenc_ascii_only_case_map,
};
/*
@@ -335,9 +335,9 @@ OnigEncodingDefine(big5_hkscs, BIG5_HKSCS) = {
onigenc_not_support_get_ctype_code_range,
big5_left_adjust_char_head,
big5_is_allowed_reverse_match,
+ onigenc_ascii_only_case_map,
0,
ONIGENC_FLAG_NONE,
- onigenc_ascii_only_case_map,
};
ENC_ALIAS("Big5-HKSCS:2008", "Big5-HKSCS")
@@ -370,7 +370,7 @@ OnigEncodingDefine(big5_uao, BIG5_UAO) = {
onigenc_not_support_get_ctype_code_range,
big5_left_adjust_char_head,
big5_is_allowed_reverse_match,
+ onigenc_ascii_only_case_map,
0,
ONIGENC_FLAG_NONE,
- onigenc_ascii_only_case_map,
};
diff --git a/enc/cp949.c b/enc/cp949.c
index f832cd5758..bd2c8d21a4 100644
--- a/enc/cp949.c
+++ b/enc/cp949.c
@@ -211,9 +211,9 @@ OnigEncodingDefine(cp949, CP949) = {
onigenc_not_support_get_ctype_code_range,
cp949_left_adjust_char_head,
cp949_is_allowed_reverse_match,
+ onigenc_ascii_only_case_map,
0,
ONIGENC_FLAG_NONE,
- onigenc_ascii_only_case_map,
};
/*
* Name: CP949
diff --git a/enc/emacs_mule.c b/enc/emacs_mule.c
index a53f243dfe..f92eb183cf 100644
--- a/enc/emacs_mule.c
+++ b/enc/emacs_mule.c
@@ -27,7 +27,7 @@
* SUCH DAMAGE.
*/
-#include "regint.h"
+#include "regenc.h"
#define emacsmule_islead(c) ((UChar )(c) < 0x9e)
@@ -334,9 +334,9 @@ OnigEncodingDefine(emacs_mule, Emacs_Mule) = {
onigenc_not_support_get_ctype_code_range,
left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
+ onigenc_ascii_only_case_map,
0,
ONIGENC_FLAG_NONE,
- onigenc_ascii_only_case_map,
};
ENC_REPLICATE("stateless-ISO-2022-JP", "Emacs-Mule")
diff --git a/enc/euc_jp.c b/enc/euc_jp.c
index f9604b8d6e..ded051af69 100644
--- a/enc/euc_jp.c
+++ b/enc/euc_jp.c
@@ -28,7 +28,7 @@
* SUCH DAMAGE.
*/
-#include "regint.h"
+#include "regenc.h"
#define eucjp_islead(c) ((UChar )((c) - 0xa1) > 0xfe - 0xa1)
@@ -576,9 +576,9 @@ OnigEncodingDefine(euc_jp, EUC_JP) = {
get_ctype_code_range,
left_adjust_char_head,
is_allowed_reverse_match,
+ onigenc_ascii_only_case_map,
0,
ONIGENC_FLAG_NONE,
- onigenc_ascii_only_case_map,
};
/*
* Name: EUC-JP
diff --git a/enc/euc_kr.c b/enc/euc_kr.c
index eb17f476e9..21d6ab4e1c 100644
--- a/enc/euc_kr.c
+++ b/enc/euc_kr.c
@@ -188,8 +188,33 @@ OnigEncodingDefine(euc_kr, EUC_KR) = {
onigenc_not_support_get_ctype_code_range,
euckr_left_adjust_char_head,
euckr_is_allowed_reverse_match,
+ onigenc_ascii_only_case_map,
0,
ONIGENC_FLAG_NONE,
- onigenc_ascii_only_case_map,
};
ENC_ALIAS("eucKR", "EUC-KR")
+
+#ifndef RUBY
+/* Same with OnigEncodingEUC_KR except the name */
+OnigEncodingDefine(euc_cn, EUC_CN) = {
+ euckr_mbc_enc_len,
+ "EUC-CN", /* name */
+ 2, /* max enc length */
+ 1, /* min enc length */
+ onigenc_is_mbc_newline_0x0a,
+ euckr_mbc_to_code,
+ onigenc_mb2_code_to_mbclen,
+ euckr_code_to_mbc,
+ euckr_mbc_case_fold,
+ onigenc_ascii_apply_all_case_fold,
+ onigenc_ascii_get_case_fold_codes_by_str,
+ onigenc_minimum_property_name_to_ctype,
+ euckr_is_code_ctype,
+ onigenc_not_support_get_ctype_code_range,
+ euckr_left_adjust_char_head,
+ euckr_is_allowed_reverse_match,
+ onigenc_ascii_only_case_map,
+ 0,
+ ONIGENC_FLAG_NONE,
+};
+#endif /* RUBY */
diff --git a/enc/euc_tw.c b/enc/euc_tw.c
index e7d5187c4a..1c5659cb1d 100644
--- a/enc/euc_tw.c
+++ b/enc/euc_tw.c
@@ -221,8 +221,8 @@ OnigEncodingDefine(euc_tw, EUC_TW) = {
onigenc_not_support_get_ctype_code_range,
euctw_left_adjust_char_head,
euctw_is_allowed_reverse_match,
+ onigenc_ascii_only_case_map,
0,
ONIGENC_FLAG_NONE,
- onigenc_ascii_only_case_map,
};
ENC_ALIAS("eucTW", "EUC-TW")
diff --git a/enc/gb18030.c b/enc/gb18030.c
index 8a00332991..63d2e633ec 100644
--- a/enc/gb18030.c
+++ b/enc/gb18030.c
@@ -597,8 +597,7 @@ OnigEncodingDefine(gb18030, GB18030) = {
onigenc_not_support_get_ctype_code_range,
gb18030_left_adjust_char_head,
gb18030_is_allowed_reverse_match,
+ onigenc_ascii_only_case_map,
0,
ONIGENC_FLAG_NONE,
- onigenc_ascii_only_case_map,
};
-
diff --git a/enc/gbk.c b/enc/gbk.c
index d3bb1a5864..31032553bf 100644
--- a/enc/gbk.c
+++ b/enc/gbk.c
@@ -211,9 +211,9 @@ OnigEncodingDefine(gbk, GBK) = {
onigenc_not_support_get_ctype_code_range,
gbk_left_adjust_char_head,
gbk_is_allowed_reverse_match,
+ onigenc_ascii_only_case_map,
0,
ONIGENC_FLAG_NONE,
- onigenc_ascii_only_case_map,
};
/*
* Name: GBK
diff --git a/enc/iso_8859_1.c b/enc/iso_8859_1.c
index 2440c9f5a1..7af0888c3e 100644
--- a/enc/iso_8859_1.c
+++ b/enc/iso_8859_1.c
@@ -256,45 +256,46 @@ is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc ARG_UNUSE
}
static int
-case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
- const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
- const struct OnigEncodingTypeST* enc)
+case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
+ const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
+ const struct OnigEncodingTypeST* enc)
{
OnigCodePoint code;
OnigUChar *to_start = to;
OnigCaseFoldType flags = *flagP;
- while (*pp<end && to<to_end) {
+ while (*pp < end && to < to_end) {
code = *(*pp)++;
- if (code==SHARP_s) {
- if (flags&ONIGENC_CASE_UPCASE) {
+ if (code == SHARP_s) {
+ if (flags & ONIGENC_CASE_UPCASE) {
flags |= ONIGENC_CASE_MODIFIED;
*to++ = 'S';
- code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S';
+ code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S';
}
- else if (flags&ONIGENC_CASE_FOLD) {
+ else if (flags & ONIGENC_CASE_FOLD) {
flags |= ONIGENC_CASE_MODIFIED;
*to++ = 's';
code = 's';
}
}
else if ((EncISO_8859_1_CtypeTable[code] & BIT_CTYPE_UPPER)
- && (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) {
+ && (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
flags |= ONIGENC_CASE_MODIFIED;
code += 0x20;
}
- else if (code==0xAA || code==0xBA || code==0xB5 || code==0xFF) ;
- else if ((EncISO_8859_1_CtypeTable[code]&BIT_CTYPE_LOWER)
- && (flags&ONIGENC_CASE_UPCASE)) {
+ else if (code == 0xAA || code == 0xBA || code == 0xB5 || code == 0xFF)
+ ;
+ else if ((EncISO_8859_1_CtypeTable[code] & BIT_CTYPE_LOWER)
+ && (flags & ONIGENC_CASE_UPCASE)) {
flags |= ONIGENC_CASE_MODIFIED;
code -= 0x20;
}
*to++ = code;
- if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
- flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
+ if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
+ flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
}
*flagP = flags;
- return (int)(to-to_start);
+ return (int )(to - to_start);
}
OnigEncodingDefine(iso_8859_1, ISO_8859_1) = {
@@ -314,8 +315,8 @@ OnigEncodingDefine(iso_8859_1, ISO_8859_1) = {
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
+ case_map,
0,
ONIGENC_FLAG_NONE,
- case_map,
};
ENC_ALIAS("ISO8859-1", "ISO-8859-1")
diff --git a/enc/iso_8859_10.c b/enc/iso_8859_10.c
index e06f15f9d0..cae4be2db0 100644
--- a/enc/iso_8859_10.c
+++ b/enc/iso_8859_10.c
@@ -215,9 +215,9 @@ apply_all_case_fold(OnigCaseFoldType flag,
static int
get_case_fold_codes_by_str(OnigCaseFoldType flag,
- const OnigUChar* p, const OnigUChar* end,
- OnigCaseFoldCodeItem items[],
- OnigEncoding enc ARG_UNUSED)
+ const OnigUChar* p, const OnigUChar* end,
+ OnigCaseFoldCodeItem items[],
+ OnigEncoding enc ARG_UNUSED)
{
return onigenc_get_case_fold_codes_by_str_with_map(
numberof(CaseFoldMap), CaseFoldMap, 1,
@@ -225,48 +225,49 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
}
static int
-case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
- const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
- const struct OnigEncodingTypeST* enc)
+case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
+ const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
+ const struct OnigEncodingTypeST* enc)
{
OnigCodePoint code;
OnigUChar *to_start = to;
OnigCaseFoldType flags = *flagP;
- while (*pp<end && to<to_end) {
+ while (*pp < end && to < to_end) {
code = *(*pp)++;
- if (code==SHARP_s) {
- if (flags&ONIGENC_CASE_UPCASE) {
+ if (code == SHARP_s) {
+ if (flags & ONIGENC_CASE_UPCASE) {
flags |= ONIGENC_CASE_MODIFIED;
*to++ = 'S';
- code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S';
+ code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S';
}
- else if (flags&ONIGENC_CASE_FOLD) {
+ else if (flags & ONIGENC_CASE_FOLD) {
flags |= ONIGENC_CASE_MODIFIED;
*to++ = 's';
code = 's';
}
}
- else if (code==0xBD || code==0xFF) ;
+ else if (code == 0xBD || code == 0xFF)
+ ;
else if ((EncISO_8859_10_CtypeTable[code] & BIT_CTYPE_UPPER)
- && (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) {
+ && (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
flags |= ONIGENC_CASE_MODIFIED;
code = ENC_ISO_8859_10_TO_LOWER_CASE(code);
}
- else if ((EncISO_8859_10_CtypeTable[code]&BIT_CTYPE_LOWER)
- && (flags&ONIGENC_CASE_UPCASE)) {
+ else if ((EncISO_8859_10_CtypeTable[code] & BIT_CTYPE_LOWER)
+ && (flags & ONIGENC_CASE_UPCASE)) {
flags |= ONIGENC_CASE_MODIFIED;
- if (code>=0xA0 && code<=0xBF)
+ if (code >= 0xA0 && code <= 0xBF)
code -= 0x10;
else
code -= 0x20;
}
*to++ = code;
- if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
- flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
+ if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
+ flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
}
*flagP = flags;
- return (int)(to-to_start);
+ return (int )(to - to_start);
}
OnigEncodingDefine(iso_8859_10, ISO_8859_10) = {
@@ -286,8 +287,8 @@ OnigEncodingDefine(iso_8859_10, ISO_8859_10) = {
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
+ case_map,
0,
ONIGENC_FLAG_NONE,
- case_map,
};
ENC_ALIAS("ISO8859-10", "ISO-8859-10")
diff --git a/enc/iso_8859_11.c b/enc/iso_8859_11.c
index a5522da2e3..85e8f2cdb4 100644
--- a/enc/iso_8859_11.c
+++ b/enc/iso_8859_11.c
@@ -93,9 +93,9 @@ OnigEncodingDefine(iso_8859_11, ISO_8859_11) = {
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
+ onigenc_single_byte_ascii_only_case_map,
0,
ONIGENC_FLAG_NONE,
- onigenc_single_byte_ascii_only_case_map,
};
ENC_ALIAS("ISO8859-11", "ISO-8859-11")
diff --git a/enc/iso_8859_13.c b/enc/iso_8859_13.c
index 6e49e16dfb..fe1ddd7065 100644
--- a/enc/iso_8859_13.c
+++ b/enc/iso_8859_13.c
@@ -208,9 +208,9 @@ apply_all_case_fold(OnigCaseFoldType flag,
static int
get_case_fold_codes_by_str(OnigCaseFoldType flag,
- const OnigUChar* p, const OnigUChar* end,
- OnigCaseFoldCodeItem items[],
- OnigEncoding enc ARG_UNUSED)
+ const OnigUChar* p, const OnigUChar* end,
+ OnigCaseFoldCodeItem items[],
+ OnigEncoding enc ARG_UNUSED)
{
return onigenc_get_case_fold_codes_by_str_with_map(
numberof(CaseFoldMap), CaseFoldMap, 1,
@@ -218,38 +218,39 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
}
static int
-case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
- const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
- const struct OnigEncodingTypeST* enc)
+case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
+ const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
+ const struct OnigEncodingTypeST* enc)
{
OnigCodePoint code;
OnigUChar *to_start = to;
OnigCaseFoldType flags = *flagP;
- while (*pp<end && to<to_end) {
+ while (*pp < end && to < to_end) {
code = *(*pp)++;
- if (code==SHARP_s) {
- if (flags&ONIGENC_CASE_UPCASE) {
+ if (code == SHARP_s) {
+ if (flags & ONIGENC_CASE_UPCASE) {
flags |= ONIGENC_CASE_MODIFIED;
*to++ = 'S';
- code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S';
+ code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S';
}
- else if (flags&ONIGENC_CASE_FOLD) {
+ else if (flags & ONIGENC_CASE_FOLD) {
flags |= ONIGENC_CASE_MODIFIED;
*to++ = 's';
code = 's';
}
}
else if ((EncISO_8859_13_CtypeTable[code] & BIT_CTYPE_UPPER)
- && (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) {
+ && (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
flags |= ONIGENC_CASE_MODIFIED;
code = ENC_ISO_8859_13_TO_LOWER_CASE(code);
}
- else if (code==0xB5) ;
- else if ((EncISO_8859_13_CtypeTable[code]&BIT_CTYPE_LOWER)
- && (flags&ONIGENC_CASE_UPCASE)) {
+ else if (code == 0xB5)
+ ;
+ else if ((EncISO_8859_13_CtypeTable[code] & BIT_CTYPE_LOWER)
+ && (flags & ONIGENC_CASE_UPCASE)) {
flags |= ONIGENC_CASE_MODIFIED;
- if (code==0xB8 || code==0xBA || code==0xBF) {
+ if (code == 0xB8 || code == 0xBA || code == 0xBF) {
code -= 0x10;
}
else {
@@ -257,11 +258,11 @@ case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
}
}
*to++ = code;
- if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
- flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
+ if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
+ flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
}
*flagP = flags;
- return (int)(to-to_start);
+ return (int )(to - to_start);
}
OnigEncodingDefine(iso_8859_13, ISO_8859_13) = {
@@ -281,8 +282,8 @@ OnigEncodingDefine(iso_8859_13, ISO_8859_13) = {
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
+ case_map,
0,
ONIGENC_FLAG_NONE,
- case_map,
};
ENC_ALIAS("ISO8859-13", "ISO-8859-13")
diff --git a/enc/iso_8859_14.c b/enc/iso_8859_14.c
index 22df367dd9..647514a016 100644
--- a/enc/iso_8859_14.c
+++ b/enc/iso_8859_14.c
@@ -217,9 +217,9 @@ apply_all_case_fold(OnigCaseFoldType flag,
static int
get_case_fold_codes_by_str(OnigCaseFoldType flag,
- const OnigUChar* p, const OnigUChar* end,
- OnigCaseFoldCodeItem items[],
- OnigEncoding enc ARG_UNUSED)
+ const OnigUChar* p, const OnigUChar* end,
+ OnigCaseFoldCodeItem items[],
+ OnigEncoding enc ARG_UNUSED)
{
return onigenc_get_case_fold_codes_by_str_with_map(
numberof(CaseFoldMap), CaseFoldMap, 1,
@@ -227,58 +227,58 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
}
static int
-case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
- const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
- const struct OnigEncodingTypeST* enc)
+case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
+ const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
+ const struct OnigEncodingTypeST* enc)
{
OnigCodePoint code;
OnigUChar *to_start = to;
OnigCaseFoldType flags = *flagP;
- while (*pp<end && to<to_end) {
+ while (*pp < end && to < to_end) {
code = *(*pp)++;
- if (code==SHARP_s) {
- if (flags&ONIGENC_CASE_UPCASE) {
+ if (code == SHARP_s) {
+ if (flags & ONIGENC_CASE_UPCASE) {
flags |= ONIGENC_CASE_MODIFIED;
*to++ = 'S';
- code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S';
+ code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S';
}
- else if (flags&ONIGENC_CASE_FOLD) {
+ else if (flags & ONIGENC_CASE_FOLD) {
flags |= ONIGENC_CASE_MODIFIED;
*to++ = 's';
code = 's';
}
}
- /* else if (code==0xAA || code==0xBA) ; */
+ /* else if (code == 0xAA || code == 0xBA) ; */
else if ((EncISO_8859_14_CtypeTable[code] & BIT_CTYPE_UPPER)
- && (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) {
+ && (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
flags |= ONIGENC_CASE_MODIFIED;
code = ENC_ISO_8859_14_TO_LOWER_CASE(code);
}
- else if ((EncISO_8859_14_CtypeTable[code]&BIT_CTYPE_LOWER)
- && (flags&ONIGENC_CASE_UPCASE)) {
+ else if ((EncISO_8859_14_CtypeTable[code] & BIT_CTYPE_LOWER)
+ && (flags & ONIGENC_CASE_UPCASE)) {
flags |= ONIGENC_CASE_MODIFIED;
- if(code == 0xA2 || code == 0xA5 || code == 0xB1 || code == 0xB3 || code == 0xB5 || code == 0xBE)
+ if (code == 0xA2 || code == 0xA5 || code == 0xB1 || code == 0xB3 || code == 0xB5 || code == 0xBE)
code -= 0x1;
- else if(code == 0xAB)
+ else if (code == 0xAB)
code -= 0x5;
- else if(code == 0xFF)
+ else if (code == 0xFF)
code -= 0x50;
- else if(code == 0xB9)
+ else if (code == 0xB9)
code -= 0x2;
- else if(code == 0xBF)
+ else if (code == 0xBF)
code -= 0x4;
- else if(code == 0xB8 || code == 0xBA || code == 0xBC)
+ else if (code == 0xB8 || code == 0xBA || code == 0xBC)
code -= 0x10;
else
code -= 0x20;
}
*to++ = code;
- if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
- flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
+ if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
+ flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
}
*flagP = flags;
- return (int)(to-to_start);
+ return (int )(to - to_start);
}
OnigEncodingDefine(iso_8859_14, ISO_8859_14) = {
@@ -298,8 +298,8 @@ OnigEncodingDefine(iso_8859_14, ISO_8859_14) = {
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
+ case_map,
0,
ONIGENC_FLAG_NONE,
- case_map,
};
ENC_ALIAS("ISO8859-14", "ISO-8859-14")
diff --git a/enc/iso_8859_15.c b/enc/iso_8859_15.c
index 06b00b90b3..377a3afc7b 100644
--- a/enc/iso_8859_15.c
+++ b/enc/iso_8859_15.c
@@ -211,9 +211,9 @@ apply_all_case_fold(OnigCaseFoldType flag,
static int
get_case_fold_codes_by_str(OnigCaseFoldType flag,
- const OnigUChar* p, const OnigUChar* end,
- OnigCaseFoldCodeItem items[],
- OnigEncoding enc ARG_UNUSED)
+ const OnigUChar* p, const OnigUChar* end,
+ OnigCaseFoldCodeItem items[],
+ OnigEncoding enc ARG_UNUSED)
{
return onigenc_get_case_fold_codes_by_str_with_map(
numberof(CaseFoldMap), CaseFoldMap, 1,
@@ -221,54 +221,55 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
}
static int
-case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
- const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
- const struct OnigEncodingTypeST* enc)
+case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
+ const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
+ const struct OnigEncodingTypeST* enc)
{
OnigCodePoint code;
OnigUChar *to_start = to;
OnigCaseFoldType flags = *flagP;
- while (*pp<end && to<to_end) {
+ while (*pp < end && to < to_end) {
code = *(*pp)++;
- if (code==SHARP_s) {
- if (flags&ONIGENC_CASE_UPCASE) {
+ if (code == SHARP_s) {
+ if (flags & ONIGENC_CASE_UPCASE) {
flags |= ONIGENC_CASE_MODIFIED;
*to++ = 'S';
- code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S';
+ code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S';
}
- else if (flags&ONIGENC_CASE_FOLD) {
+ else if (flags & ONIGENC_CASE_FOLD) {
flags |= ONIGENC_CASE_MODIFIED;
*to++ = 's';
code = 's';
}
}
- else if (code==0xAA || code==0xBA || code==0xB5) ;
+ else if (code == 0xAA || code == 0xBA || code == 0xB5)
+ ;
else if ((EncISO_8859_15_CtypeTable[code] & BIT_CTYPE_UPPER)
- && (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) {
+ && (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
flags |= ONIGENC_CASE_MODIFIED;
code = ENC_ISO_8859_15_TO_LOWER_CASE(code);
}
- else if ((EncISO_8859_15_CtypeTable[code]&BIT_CTYPE_LOWER)
- && (flags&ONIGENC_CASE_UPCASE)) {
+ else if ((EncISO_8859_15_CtypeTable[code] & BIT_CTYPE_LOWER)
+ && (flags & ONIGENC_CASE_UPCASE)) {
flags |= ONIGENC_CASE_MODIFIED;
- if (code==0xA8)
+ if (code == 0xA8)
code -= 2;
- else if (code==0xB8)
+ else if (code == 0xB8)
code -= 4;
- else if (code==0xBD)
+ else if (code == 0xBD)
code -= 1;
- else if (code==0xFF)
+ else if (code == 0xFF)
code -= 0x41;
else
code -= 0x20;
}
*to++ = code;
- if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
- flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
+ if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
+ flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
}
*flagP = flags;
- return (int)(to-to_start);
+ return (int )(to - to_start);
}
OnigEncodingDefine(iso_8859_15, ISO_8859_15) = {
@@ -288,8 +289,8 @@ OnigEncodingDefine(iso_8859_15, ISO_8859_15) = {
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
+ case_map,
0,
ONIGENC_FLAG_NONE,
- case_map,
};
ENC_ALIAS("ISO8859-15", "ISO-8859-15")
diff --git a/enc/iso_8859_16.c b/enc/iso_8859_16.c
index c8695e65f7..135630eb73 100644
--- a/enc/iso_8859_16.c
+++ b/enc/iso_8859_16.c
@@ -213,9 +213,9 @@ apply_all_case_fold(OnigCaseFoldType flag,
static int
get_case_fold_codes_by_str(OnigCaseFoldType flag,
- const OnigUChar* p, const OnigUChar* end,
- OnigCaseFoldCodeItem items[],
- OnigEncoding enc ARG_UNUSED)
+ const OnigUChar* p, const OnigUChar* end,
+ OnigCaseFoldCodeItem items[],
+ OnigEncoding enc ARG_UNUSED)
{
return onigenc_get_case_fold_codes_by_str_with_map(
numberof(CaseFoldMap), CaseFoldMap, 1,
@@ -223,57 +223,57 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
}
static int
-case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
- const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
- const struct OnigEncodingTypeST* enc)
+case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
+ const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
+ const struct OnigEncodingTypeST* enc)
{
OnigCodePoint code;
OnigUChar *to_start = to;
OnigCaseFoldType flags = *flagP;
- while (*pp<end && to<to_end) {
+ while (*pp < end && to < to_end) {
code = *(*pp)++;
- if (code==SHARP_s) {
- if (flags&ONIGENC_CASE_UPCASE) {
+ if (code == SHARP_s) {
+ if (flags & ONIGENC_CASE_UPCASE) {
flags |= ONIGENC_CASE_MODIFIED;
*to++ = 'S';
- code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S';
+ code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S';
}
- else if (flags&ONIGENC_CASE_FOLD) {
+ else if (flags & ONIGENC_CASE_FOLD) {
flags |= ONIGENC_CASE_MODIFIED;
*to++ = 's';
code = 's';
}
}
else if ((EncISO_8859_16_CtypeTable[code] & BIT_CTYPE_UPPER)
- && (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) {
+ && (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
flags |= ONIGENC_CASE_MODIFIED;
code = ENC_ISO_8859_16_TO_LOWER_CASE(code);
}
- else if ((EncISO_8859_16_CtypeTable[code]&BIT_CTYPE_LOWER)
+ else if ((EncISO_8859_16_CtypeTable[code] & BIT_CTYPE_LOWER)
&& (flags&ONIGENC_CASE_UPCASE)) {
flags |= ONIGENC_CASE_MODIFIED;
- if (code==0xA2 || code==0xBD)
+ if (code == 0xA2 || code == 0xBD)
code--;
- else if (code==0xB3 || code==0xBA || code==0xBF)
+ else if (code == 0xB3 || code == 0xBA || code == 0xBF)
code -= 0x10;
- else if (code==0xA8 || code==0xAE)
+ else if (code == 0xA8 || code == 0xAE)
code -= 0x02;
- else if (code==0xB9)
+ else if (code == 0xB9)
code -= 0x07;
- else if (code==0xB8)
+ else if (code == 0xB8)
code -= 0x04;
- else if (code==0xFF)
+ else if (code == 0xFF)
code -= 0x41;
else
code -= 0x20;
}
*to++ = code;
- if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
- flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
+ if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
+ flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
}
*flagP = flags;
- return (int)(to-to_start);
+ return (int )(to - to_start);
}
OnigEncodingDefine(iso_8859_16, ISO_8859_16) = {
@@ -293,8 +293,8 @@ OnigEncodingDefine(iso_8859_16, ISO_8859_16) = {
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
+ case_map,
0,
ONIGENC_FLAG_NONE,
- case_map,
};
ENC_ALIAS("ISO8859-16", "ISO-8859-16")
diff --git a/enc/iso_8859_2.c b/enc/iso_8859_2.c
index 00de8ec757..3a05c6320d 100644
--- a/enc/iso_8859_2.c
+++ b/enc/iso_8859_2.c
@@ -221,50 +221,50 @@ is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc ARG_UNUSE
}
static int
-case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
- const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
- const struct OnigEncodingTypeST* enc)
+case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
+ const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
+ const struct OnigEncodingTypeST* enc)
{
OnigCodePoint code;
OnigUChar *to_start = to;
OnigCaseFoldType flags = *flagP;
- while (*pp<end && to<to_end) {
+ while (*pp < end && to < to_end) {
code = *(*pp)++;
- if (code==SHARP_s) {
- if (flags&ONIGENC_CASE_UPCASE) {
+ if (code == SHARP_s) {
+ if (flags & ONIGENC_CASE_UPCASE) {
flags |= ONIGENC_CASE_MODIFIED;
*to++ = 'S';
- code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S';
+ code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S';
}
- else if (flags&ONIGENC_CASE_FOLD) {
+ else if (flags & ONIGENC_CASE_FOLD) {
flags |= ONIGENC_CASE_MODIFIED;
*to++ = 's';
code = 's';
}
}
else if ((EncISO_8859_2_CtypeTable[code] & BIT_CTYPE_UPPER)
- && (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) {
+ && (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
flags |= ONIGENC_CASE_MODIFIED;
code = ENC_ISO_8859_2_TO_LOWER_CASE(code);
}
- else if ((EncISO_8859_2_CtypeTable[code]&BIT_CTYPE_LOWER)
- && (flags&ONIGENC_CASE_UPCASE)) {
- if (code>=0xB1 && code<=0xBF){
+ else if ((EncISO_8859_2_CtypeTable[code] & BIT_CTYPE_LOWER)
+ && (flags & ONIGENC_CASE_UPCASE)) {
+ if (code >= 0xB1 && code <= 0xBF) {
flags |= ONIGENC_CASE_MODIFIED;
code -= 0x10;
}
- else{
+ else {
flags |= ONIGENC_CASE_MODIFIED;
code -= 0x20;
}
}
*to++ = code;
- if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
- flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
+ if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
+ flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
}
*flagP = flags;
- return (int)(to-to_start);
+ return (int )(to - to_start);
}
OnigEncodingDefine(iso_8859_2, ISO_8859_2) = {
@@ -284,8 +284,8 @@ OnigEncodingDefine(iso_8859_2, ISO_8859_2) = {
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
+ case_map,
0,
ONIGENC_FLAG_NONE,
- case_map,
};
ENC_ALIAS("ISO8859-2", "ISO-8859-2")
diff --git a/enc/iso_8859_3.c b/enc/iso_8859_3.c
index 365d9a77de..2a343eac63 100644
--- a/enc/iso_8859_3.c
+++ b/enc/iso_8859_3.c
@@ -223,45 +223,46 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
#define DOTLESS_i (0xB9)
#define I_WITH_DOT_ABOVE (0xA9)
static int
-case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
- const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
- const struct OnigEncodingTypeST* enc)
+case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
+ const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
+ const struct OnigEncodingTypeST* enc)
{
OnigCodePoint code;
OnigUChar *to_start = to;
OnigCaseFoldType flags = *flagP;
- while (*pp<end && to<to_end) {
+ while (*pp < end && to < to_end) {
code = *(*pp)++;
- if (code==SHARP_s) {
- if (flags&ONIGENC_CASE_UPCASE) {
+ if (code == SHARP_s) {
+ if (flags & ONIGENC_CASE_UPCASE) {
flags |= ONIGENC_CASE_MODIFIED;
*to++ = 'S';
- code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S';
+ code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S';
}
- else if (flags&ONIGENC_CASE_FOLD) {
+ else if (flags & ONIGENC_CASE_FOLD) {
flags |= ONIGENC_CASE_MODIFIED;
*to++ = 's';
code = 's';
}
}
- else if (code==0xB5) ;
+ else if (code == 0xB5)
+ ;
else if ((EncISO_8859_3_CtypeTable[code] & BIT_CTYPE_UPPER)
- && (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) {
+ && (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
flags |= ONIGENC_CASE_MODIFIED;
- if (code=='I')
- code = flags&ONIGENC_CASE_FOLD_TURKISH_AZERI ? DOTLESS_i : 'i';
+ if (code == 'I')
+ code = flags & ONIGENC_CASE_FOLD_TURKISH_AZERI ? DOTLESS_i : 'i';
else
code = ENC_ISO_8859_3_TO_LOWER_CASE(code);
}
else if ((EncISO_8859_3_CtypeTable[code]&BIT_CTYPE_LOWER)
- && (flags&ONIGENC_CASE_UPCASE)) {
+ && (flags & ONIGENC_CASE_UPCASE)) {
flags |= ONIGENC_CASE_MODIFIED;
- if (code=='i')
- code = flags&ONIGENC_CASE_FOLD_TURKISH_AZERI ? I_WITH_DOT_ABOVE : 'I';
- else if (code==DOTLESS_i)
+ if (code == 'i')
+ code = flags & ONIGENC_CASE_FOLD_TURKISH_AZERI ? I_WITH_DOT_ABOVE : 'I';
+ else if (code == DOTLESS_i)
code = 'I';
- else if (code>=0xB0 && code<=0xBF ) {
+ else if (code >= 0xB0 && code <= 0xBF) {
code -= 0x10;
}
else {
@@ -269,11 +270,11 @@ case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
}
}
*to++ = code;
- if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
- flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
+ if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
+ flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
}
*flagP = flags;
- return (int)(to-to_start);
+ return (int )(to - to_start);
}
OnigEncodingDefine(iso_8859_3, ISO_8859_3) = {
@@ -293,8 +294,8 @@ OnigEncodingDefine(iso_8859_3, ISO_8859_3) = {
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
+ case_map,
0,
ONIGENC_FLAG_NONE,
- case_map,
};
ENC_ALIAS("ISO8859-3", "ISO-8859-3")
diff --git a/enc/iso_8859_4.c b/enc/iso_8859_4.c
index 6d27300e22..e2134e8c0b 100644
--- a/enc/iso_8859_4.c
+++ b/enc/iso_8859_4.c
@@ -232,31 +232,32 @@ case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
OnigUChar *to_start = to;
OnigCaseFoldType flags = *flagP;
- while (*pp<end && to<to_end) {
+ while (*pp < end && to < to_end) {
code = *(*pp)++;
- if (code==SHARP_s) {
- if (flags&ONIGENC_CASE_UPCASE) {
+ if (code == SHARP_s) {
+ if (flags & ONIGENC_CASE_UPCASE) {
flags |= ONIGENC_CASE_MODIFIED;
*to++ = 'S';
- code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S';
+ code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S';
}
- else if (flags&ONIGENC_CASE_FOLD) {
+ else if (flags & ONIGENC_CASE_FOLD) {
flags |= ONIGENC_CASE_MODIFIED;
*to++ = 's';
code = 's';
}
}
else if ((EncISO_8859_4_CtypeTable[code] & BIT_CTYPE_UPPER)
- && (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) {
+ && (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
flags |= ONIGENC_CASE_MODIFIED;
code = ENC_ISO_8859_4_TO_LOWER_CASE(code);
}
- else if (code==0xA2) ;
+ else if (code == 0xA2)
+ ;
else if ((EncISO_8859_4_CtypeTable[code]&BIT_CTYPE_LOWER)
- && (flags&ONIGENC_CASE_UPCASE)) {
+ && (flags & ONIGENC_CASE_UPCASE)) {
flags |= ONIGENC_CASE_MODIFIED;
- if (code>=0xA0&&code<=0xBF) {
- if (code==0xBF)
+ if (code >= 0xA0 && code <= 0xBF) {
+ if (code == 0xBF)
code -= 0x02;
else
code -= 0x10;
@@ -265,11 +266,11 @@ case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
code -= 0x20;
}
*to++ = code;
- if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
- flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
+ if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
+ flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
}
*flagP = flags;
- return (int)(to-to_start);
+ return (int )(to - to_start);
}
OnigEncodingDefine(iso_8859_4, ISO_8859_4) = {
@@ -289,8 +290,8 @@ OnigEncodingDefine(iso_8859_4, ISO_8859_4) = {
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
+ case_map,
0,
ONIGENC_FLAG_NONE,
- case_map,
};
ENC_ALIAS("ISO8859-4", "ISO-8859-4")
diff --git a/enc/iso_8859_5.c b/enc/iso_8859_5.c
index 5d67639f5e..6fafc35823 100644
--- a/enc/iso_8859_5.c
+++ b/enc/iso_8859_5.c
@@ -210,35 +210,35 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
}
static int
-case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
- const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
- const struct OnigEncodingTypeST* enc)
+case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
+ const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
+ const struct OnigEncodingTypeST* enc)
{
OnigCodePoint code;
OnigUChar *to_start = to;
OnigCaseFoldType flags = *flagP;
- while (*pp<end && to<to_end) {
+ while (*pp < end && to < to_end) {
code = *(*pp)++;
if ((EncISO_8859_5_CtypeTable[code] & BIT_CTYPE_UPPER)
- && (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) {
+ && (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
flags |= ONIGENC_CASE_MODIFIED;
code = ENC_ISO_8859_5_TO_LOWER_CASE(code);
}
else if ((EncISO_8859_5_CtypeTable[code]&BIT_CTYPE_LOWER)
- && (flags&ONIGENC_CASE_UPCASE)) {
+ && (flags & ONIGENC_CASE_UPCASE)) {
flags |= ONIGENC_CASE_MODIFIED;
- if (0xF1<=code && code<=0xFF)
+ if (0xF1 <= code && code <= 0xFF)
code -= 0x50;
else
code -= 0x20;
}
*to++ = code;
- if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
- flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
+ if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
+ flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
}
*flagP = flags;
- return (int)(to-to_start);
+ return (int )(to - to_start);
}
OnigEncodingDefine(iso_8859_5, ISO_8859_5) = {
@@ -258,8 +258,8 @@ OnigEncodingDefine(iso_8859_5, ISO_8859_5) = {
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
+ case_map,
0,
ONIGENC_FLAG_NONE,
- case_map,
};
ENC_ALIAS("ISO8859-5", "ISO-8859-5")
diff --git a/enc/iso_8859_6.c b/enc/iso_8859_6.c
index 64dc5aceac..6d852ac8c0 100644
--- a/enc/iso_8859_6.c
+++ b/enc/iso_8859_6.c
@@ -93,9 +93,9 @@ OnigEncodingDefine(iso_8859_6, ISO_8859_6) = {
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
+ onigenc_single_byte_ascii_only_case_map,
0,
ONIGENC_FLAG_NONE,
- onigenc_single_byte_ascii_only_case_map,
};
ENC_ALIAS("ISO8859-6", "ISO-8859-6")
diff --git a/enc/iso_8859_7.c b/enc/iso_8859_7.c
index 475fecc19c..ac973f74ba 100644
--- a/enc/iso_8859_7.c
+++ b/enc/iso_8859_7.c
@@ -206,58 +206,58 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
}
static int
-case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
- const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
- const struct OnigEncodingTypeST* enc)
+case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
+ const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
+ const struct OnigEncodingTypeST* enc)
{
OnigCodePoint code;
OnigUChar *to_start = to;
OnigCaseFoldType flags = *flagP;
- while (*pp<end && to<to_end) {
+ while (*pp < end && to < to_end) {
code = *(*pp)++;
- if (code==0xF2) {
- if (flags&ONIGENC_CASE_UPCASE) {
+ if (code == 0xF2) {
+ if (flags & ONIGENC_CASE_UPCASE) {
flags |= ONIGENC_CASE_MODIFIED;
code = 0xD3;
}
- else if (flags&ONIGENC_CASE_FOLD) {
+ else if (flags & ONIGENC_CASE_FOLD) {
flags |= ONIGENC_CASE_MODIFIED;
code = 0xF3;
}
}
else if ((EncISO_8859_7_CtypeTable[code] & BIT_CTYPE_UPPER)
- && (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) {
+ && (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
flags |= ONIGENC_CASE_MODIFIED;
code = ENC_ISO_8859_7_TO_LOWER_CASE(code);
}
- else if (code==0xC0 || code==0xE0)
- ;
+ else if (code == 0xC0 || code == 0xE0)
+ ;
else if ((EncISO_8859_7_CtypeTable[code]&BIT_CTYPE_LOWER)
- && (flags&ONIGENC_CASE_UPCASE)) {
+ && (flags & ONIGENC_CASE_UPCASE)) {
flags |= ONIGENC_CASE_MODIFIED;
- if (code==0xDC) {
- code-=0x26;
+ if (code == 0xDC) {
+ code -= 0x26;
}
- else if (code>=0xDD && code<=0xDF) {
- code-=0x25;
+ else if (code >= 0xDD && code <= 0xDF) {
+ code -= 0x25;
}
- else if (code==0xFC) {
- code-=0x40;
+ else if (code == 0xFC) {
+ code -= 0x40;
}
- else if (code==0xFD || code==0xFE) {
- code-=0x3F;
+ else if (code == 0xFD || code == 0xFE) {
+ code -= 0x3F;
}
else {
- code-=0x20;
+ code -= 0x20;
}
}
*to++ = code;
- if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
- flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
+ if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
+ flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
}
*flagP = flags;
- return (int)(to-to_start);
+ return (int )(to - to_start);
}
OnigEncodingDefine(iso_8859_7, ISO_8859_7) = {
@@ -277,8 +277,8 @@ OnigEncodingDefine(iso_8859_7, ISO_8859_7) = {
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
+ case_map,
0,
ONIGENC_FLAG_NONE,
- case_map,
};
ENC_ALIAS("ISO8859-7", "ISO-8859-7")
diff --git a/enc/iso_8859_8.c b/enc/iso_8859_8.c
index 4777762849..0a7a29e82e 100644
--- a/enc/iso_8859_8.c
+++ b/enc/iso_8859_8.c
@@ -93,9 +93,9 @@ OnigEncodingDefine(iso_8859_8, ISO_8859_8) = {
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
+ onigenc_single_byte_ascii_only_case_map,
0,
ONIGENC_FLAG_NONE,
- onigenc_single_byte_ascii_only_case_map,
};
ENC_ALIAS("ISO8859-8", "ISO-8859-8")
diff --git a/enc/iso_8859_9.c b/enc/iso_8859_9.c
index 064a04d480..004eec310f 100644
--- a/enc/iso_8859_9.c
+++ b/enc/iso_8859_9.c
@@ -204,9 +204,9 @@ apply_all_case_fold(OnigCaseFoldType flag,
static int
get_case_fold_codes_by_str(OnigCaseFoldType flag,
- const OnigUChar* p, const OnigUChar* end,
- OnigCaseFoldCodeItem items[],
- OnigEncoding enc ARG_UNUSED)
+ const OnigUChar* p, const OnigUChar* end,
+ OnigCaseFoldCodeItem items[],
+ OnigEncoding enc ARG_UNUSED)
{
return onigenc_get_case_fold_codes_by_str_with_map(
numberof(CaseFoldMap), CaseFoldMap, 1,
@@ -216,53 +216,54 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
#define DOTLESS_i (0xFD)
#define I_WITH_DOT_ABOVE (0xDD)
static int
-case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
- const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
- const struct OnigEncodingTypeST* enc)
+case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
+ const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
+ const struct OnigEncodingTypeST* enc)
{
OnigCodePoint code;
OnigUChar *to_start = to;
OnigCaseFoldType flags = *flagP;
- while (*pp<end && to<to_end) {
+ while (*pp < end && to < to_end) {
code = *(*pp)++;
- if (code==SHARP_s) {
- if (flags&ONIGENC_CASE_UPCASE) {
+ if (code == SHARP_s) {
+ if (flags & ONIGENC_CASE_UPCASE) {
flags |= ONIGENC_CASE_MODIFIED;
*to++ = 'S';
- code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S';
+ code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S';
}
- else if (flags&ONIGENC_CASE_FOLD) {
+ else if (flags & ONIGENC_CASE_FOLD) {
flags |= ONIGENC_CASE_MODIFIED;
*to++ = 's';
code = 's';
}
}
- else if (code==0xAA || code==0xB5 || code==0xBA || code==0xFF) ;
+ else if (code == 0xAA || code == 0xB5 || code == 0xBA || code == 0xFF)
+ ;
else if ((EncISO_8859_9_CtypeTable[code] & BIT_CTYPE_UPPER)
- && (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) {
+ && (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
flags |= ONIGENC_CASE_MODIFIED;
- if (code=='I')
- code = flags&ONIGENC_CASE_FOLD_TURKISH_AZERI ? DOTLESS_i : 'i';
+ if (code == 'I')
+ code = flags & ONIGENC_CASE_FOLD_TURKISH_AZERI ? DOTLESS_i : 'i';
else
code = ENC_ISO_8859_9_TO_LOWER_CASE(code);
}
else if ((EncISO_8859_9_CtypeTable[code]&BIT_CTYPE_LOWER)
- && (flags&ONIGENC_CASE_UPCASE)) {
+ && (flags & ONIGENC_CASE_UPCASE)) {
flags |= ONIGENC_CASE_MODIFIED;
- if (code=='i')
- code = flags&ONIGENC_CASE_FOLD_TURKISH_AZERI ? I_WITH_DOT_ABOVE : 'I';
- else if (code==DOTLESS_i)
+ if (code == 'i')
+ code = flags & ONIGENC_CASE_FOLD_TURKISH_AZERI ? I_WITH_DOT_ABOVE : 'I';
+ else if (code == DOTLESS_i)
code = 'I';
else
code -= 0x20;
}
*to++ = code;
- if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
- flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
+ if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
+ flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
}
*flagP = flags;
- return (int)(to-to_start);
+ return (int )(to - to_start);
}
OnigEncodingDefine(iso_8859_9, ISO_8859_9) = {
@@ -282,8 +283,8 @@ OnigEncodingDefine(iso_8859_9, ISO_8859_9) = {
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
+ case_map,
0,
ONIGENC_FLAG_NONE,
- case_map,
};
ENC_ALIAS("ISO8859-9", "ISO-8859-9")
diff --git a/enc/koi8_r.c b/enc/koi8_r.c
index a3c05cd27b..a520975774 100644
--- a/enc/koi8_r.c
+++ b/enc/koi8_r.c
@@ -214,9 +214,8 @@ OnigEncodingDefine(koi8_r, KOI8_R) = {
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
+ onigenc_single_byte_ascii_only_case_map,
0,
ONIGENC_FLAG_NONE,
- onigenc_single_byte_ascii_only_case_map,
};
ENC_ALIAS("CP878", "KOI8-R")
-
diff --git a/enc/koi8_u.c b/enc/koi8_u.c
index f97d74d3f0..50bb78bd04 100644
--- a/enc/koi8_u.c
+++ b/enc/koi8_u.c
@@ -218,7 +218,7 @@ OnigEncodingDefine(koi8_u, KOI8_U) = {
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
+ onigenc_single_byte_ascii_only_case_map,
0,
ONIGENC_FLAG_NONE,
- onigenc_single_byte_ascii_only_case_map,
};
diff --git a/enc/mktable.c b/enc/mktable.c
index 49acf628d0..4edd5a0ff7 100644
--- a/enc/mktable.c
+++ b/enc/mktable.c
@@ -2,7 +2,7 @@
mktable.c
**********************************************************************/
/*-
- * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -31,7 +31,10 @@
#include <stdio.h>
#include <locale.h>
+#ifndef __USE_ISOC99
#define __USE_ISOC99
+#endif
+
#include <ctype.h>
#include "regenc.h"
@@ -1108,11 +1111,13 @@ static int exec(FILE* fp, ENC_INFO* einfo)
#define NCOL 8
int c, val, enc;
+ int r;
enc = einfo->num;
- fprintf(fp, "static const unsigned short Enc%s_CtypeTable[256] = {\n",
- einfo->name);
+ r = fprintf(fp, "static const unsigned short Enc%s_CtypeTable[256] = {\n",
+ einfo->name);
+ if (r < 0) return -1;
for (c = 0; c < 256; c++) {
val = 0;
@@ -1131,20 +1136,33 @@ static int exec(FILE* fp, ENC_INFO* einfo)
if (IsWord (enc, c)) val |= BIT_CTYPE_WORD;
if (IsAscii (enc, c)) val |= BIT_CTYPE_ASCII;
- if (c % NCOL == 0) fputs(" ", fp);
- fprintf(fp, "0x%04x", val);
- if (c != 255) fputs(",", fp);
+ if (c % NCOL == 0) {
+ r = fputs(" ", fp);
+ if (r < 0) return -1;
+ }
+ r = fprintf(fp, "0x%04x", val);
+ if (r < 0) return -1;
+
+ if (c != 255) {
+ r = fputs(",", fp);
+ if (r < 0) return -1;
+ }
if (c != 0 && c % NCOL == (NCOL-1))
- fputs("\n", fp);
+ r = fputs("\n", fp);
else
- fputs(" ", fp);
+ r = fputs(" ", fp);
+
+ if (r < 0) return -1;
}
- fprintf(fp, "};\n");
+ r = fprintf(fp, "};\n");
+ if (r < 0) return -1;
+
return 0;
}
extern int main(int argc ARG_UNUSED, char* argv[] ARG_UNUSED)
{
+ int r;
int i;
FILE* fp = stdout;
@@ -1155,7 +1173,11 @@ extern int main(int argc ARG_UNUSED, char* argv[] ARG_UNUSED)
/* setlocale(LC_ALL, "fr_FR.iso88591"); */
for (i = 0; i < (int )(sizeof(Info)/sizeof(ENC_INFO)); i++) {
- exec(fp, &Info[i]);
+ r = exec(fp, &Info[i]);
+ if (r < 0) {
+ fprintf(stderr, "FAIL exec(): %d\n", r);
+ return -1;
+ }
}
return 0;
diff --git a/enc/shift_jis.c b/enc/shift_jis.c
index c1552bfd13..eacca9a5db 100644
--- a/enc/shift_jis.c
+++ b/enc/shift_jis.c
@@ -28,7 +28,7 @@
* SUCH DAMAGE.
*/
-#include "regint.h"
+#include "regenc.h"
static const int EncLen_SJIS[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -563,9 +563,9 @@ OnigEncodingDefine(shift_jis, Shift_JIS) = {
get_ctype_code_range,
left_adjust_char_head,
is_allowed_reverse_match,
+ onigenc_ascii_only_case_map,
0,
ONIGENC_FLAG_NONE,
- onigenc_ascii_only_case_map,
};
/*
* Name: Shift_JIS
diff --git a/enc/unicode.c b/enc/unicode.c
index 39fb24408f..72ff5a96e7 100644
--- a/enc/unicode.c
+++ b/enc/unicode.c
@@ -139,17 +139,17 @@ code3_equal(const OnigCodePoint *x, const OnigCodePoint *y)
/* macros related to ONIGENC_CASE flags */
/* defined here because not used in other files */
-#define ONIGENC_CASE_SPECIALS (ONIGENC_CASE_TITLECASE|ONIGENC_CASE_IS_TITLECASE|ONIGENC_CASE_UP_SPECIAL|ONIGENC_CASE_DOWN_SPECIAL)
+#define ONIGENC_CASE_SPECIALS (ONIGENC_CASE_TITLECASE | ONIGENC_CASE_IS_TITLECASE | ONIGENC_CASE_UP_SPECIAL | ONIGENC_CASE_DOWN_SPECIAL)
/* macros for length in CaseMappingSpecials array in enc/unicode/casefold.h */
#define SpecialsLengthOffset 25 /* needs to be higher than the 22 bits used for Unicode codepoints */
-#define SpecialsLengthExtract(n) ((n)>>SpecialsLengthOffset)
-#define SpecialsCodepointExtract(n) ((n)&((1<<SpecialsLengthOffset)-1))
-#define SpecialsLengthEncode(n) ((n)<<SpecialsLengthOffset)
+#define SpecialsLengthExtract(n) ((n) >> SpecialsLengthOffset)
+#define SpecialsCodepointExtract(n) ((n) & ((1 << SpecialsLengthOffset) - 1))
+#define SpecialsLengthEncode(n) ((n) << SpecialsLengthOffset)
-#define OnigSpecialIndexMask (((1<<OnigSpecialIndexWidth)-1)<<OnigSpecialIndexShift)
-#define OnigSpecialIndexEncode(n) ((n)<<OnigSpecialIndexShift)
-#define OnigSpecialIndexDecode(n) (((n)&OnigSpecialIndexMask)>>OnigSpecialIndexShift)
+#define OnigSpecialIndexMask (((1 << OnigSpecialIndexWidth) - 1) << OnigSpecialIndexShift)
+#define OnigSpecialIndexEncode(n) ((n) << OnigSpecialIndexShift)
+#define OnigSpecialIndexDecode(n) (((n) & OnigSpecialIndexMask) >> OnigSpecialIndexShift)
/* macros to shorten "enc/unicode/casefold.h", undefined immediately after including the file */
#define U ONIGENC_CASE_UPCASE
@@ -660,128 +660,130 @@ onigenc_unicode_case_map(OnigCaseFoldType* flagP,
OnigUChar* to, OnigUChar* to_end,
const struct OnigEncodingTypeST* enc)
{
- OnigCodePoint code;
- OnigUChar *to_start = to;
- OnigCaseFoldType flags = *flagP;
- int codepoint_length;
-
- to_end -= CASE_MAPPING_SLACK;
- /* copy flags ONIGENC_CASE_UPCASE and ONIGENC_CASE_DOWNCASE over to
- * ONIGENC_CASE_UP_SPECIAL and ONIGENC_CASE_DOWN_SPECIAL */
- flags |= (flags&(ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE))<<ONIGENC_CASE_SPECIAL_OFFSET;
-
- while (*pp<end && to<=to_end) {
- codepoint_length = ONIGENC_PRECISE_MBC_ENC_LEN(enc, *pp, end);
- if (codepoint_length < 0)
- return codepoint_length; /* encoding invalid */
- code = ONIGENC_MBC_TO_CODE(enc, *pp, end);
- *pp += codepoint_length;
-
- if (code<='z') { /* ASCII comes first */
- if (code>='a' && code<='z') {
- if (flags&ONIGENC_CASE_UPCASE) {
- MODIFIED;
- if (flags&ONIGENC_CASE_FOLD_TURKISH_AZERI && code=='i')
- code = I_WITH_DOT_ABOVE;
- else
- code += 'A'-'a';
- }
- }
- else if (code>='A' && code<='Z') {
- if (flags&(ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD)) {
- MODIFIED;
- if (flags&ONIGENC_CASE_FOLD_TURKISH_AZERI && code=='I')
- code = DOTLESS_i;
- else
- code += 'a'-'A';
- }
- }
+ OnigCodePoint code;
+ OnigUChar *to_start = to;
+ OnigCaseFoldType flags = *flagP;
+ int codepoint_length;
+
+ to_end -= CASE_MAPPING_SLACK;
+ /* copy flags ONIGENC_CASE_UPCASE and ONIGENC_CASE_DOWNCASE over to
+ * ONIGENC_CASE_UP_SPECIAL and ONIGENC_CASE_DOWN_SPECIAL */
+ flags |= (flags & (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE)) << ONIGENC_CASE_SPECIAL_OFFSET;
+
+ while (*pp < end && to <= to_end) {
+ codepoint_length = ONIGENC_PRECISE_MBC_ENC_LEN(enc, *pp, end);
+ if (codepoint_length < 0)
+ return codepoint_length; /* encoding invalid */
+ code = ONIGENC_MBC_TO_CODE(enc, *pp, end);
+ *pp += codepoint_length;
+
+ if (code <= 'z') { /* ASCII comes first */
+ if (code >= 'a' && code <= 'z') {
+ if (flags & ONIGENC_CASE_UPCASE) {
+ MODIFIED;
+ if (flags & ONIGENC_CASE_FOLD_TURKISH_AZERI && code == 'i')
+ code = I_WITH_DOT_ABOVE;
+ else
+ code += 'A' - 'a';
}
- else if (!(flags&ONIGENC_CASE_ASCII_ONLY) && code>=0x00B5) { /* deal with non-ASCII; micron sign (U+00B5) is lowest affected */
- const CodePointList3 *folded;
-
- if (code==I_WITH_DOT_ABOVE) {
- if (flags&(ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD)) {
- MODIFIED;
- code = 'i';
- if (!(flags&ONIGENC_CASE_FOLD_TURKISH_AZERI)) { /* make dot above explicit */
- to += ONIGENC_CODE_TO_MBC(enc, code, to);
- code = DOT_ABOVE;
- }
- }
- }
- else if (code==DOTLESS_i) { /* handle this manually, because it isn't involved in folding */
- if (flags&ONIGENC_CASE_UPCASE)
- MODIFIED, code = 'I';
+ }
+ else if (code >= 'A' && code <= 'Z') {
+ if (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD)) {
+ MODIFIED;
+ if (flags & ONIGENC_CASE_FOLD_TURKISH_AZERI && code == 'I')
+ code = DOTLESS_i;
+ else
+ code += 'a' - 'A';
+ }
+ }
+ }
+ else if (!(flags & ONIGENC_CASE_ASCII_ONLY) && code >= 0x00B5) { /* deal with non-ASCII; micron sign (U+00B5) is lowest affected */
+ const CodePointList3 *folded;
+
+ if (code == I_WITH_DOT_ABOVE) {
+ if (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD)) {
+ MODIFIED;
+ code = 'i';
+ if (!(flags & ONIGENC_CASE_FOLD_TURKISH_AZERI)) { /* make dot above explicit */
+ to += ONIGENC_CODE_TO_MBC(enc, code, to);
+ code = DOT_ABOVE;
+ }
+ }
+ }
+ else if (code == DOTLESS_i) { /* handle this manually, because it isn't involved in folding */
+ if (flags & ONIGENC_CASE_UPCASE) {
+ MODIFIED;
+ code = 'I';
+ }
+ }
+ else if ((folded = onigenc_unicode_fold_lookup(code)) != 0) { /* data about character found in CaseFold_11_Table */
+ if ((flags & ONIGENC_CASE_TITLECASE) /* Titlecase needed, */
+ && (OnigCaseFoldFlags(folded->n) & ONIGENC_CASE_IS_TITLECASE)) { /* but already Titlecase */
+ /* already Titlecase, no changes needed */
+ }
+ else if (flags & OnigCaseFoldFlags(folded->n)) { /* needs and data availability match */
+ const OnigCodePoint *next;
+ int count;
+
+ MODIFIED;
+ if (flags & OnigCaseFoldFlags(folded->n) & ONIGENC_CASE_SPECIALS) { /* special */
+ const OnigCodePoint *SpecialsStart = CaseMappingSpecials + OnigSpecialIndexDecode(folded->n);
+
+ if (OnigCaseFoldFlags(folded->n) & ONIGENC_CASE_IS_TITLECASE) { /* swapCASE available */
+ if ((flags & (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE))
+ == (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE)) /* swapCASE needed */
+ goto SpecialsCopy;
+ else /* swapCASE not needed */
+ SpecialsStart += SpecialsLengthExtract(*SpecialsStart);
}
- else if ((folded = onigenc_unicode_fold_lookup(code)) != 0) { /* data about character found in CaseFold_11_Table */
- if ((flags&ONIGENC_CASE_TITLECASE) /* Titlecase needed, */
- && (OnigCaseFoldFlags(folded->n)&ONIGENC_CASE_IS_TITLECASE)) { /* but already Titlecase */
- /* already Titlecase, no changes needed */
- }
- else if (flags&OnigCaseFoldFlags(folded->n)) { /* needs and data availability match */
- const OnigCodePoint *next;
- int count;
-
- MODIFIED;
- if (flags&OnigCaseFoldFlags(folded->n)&ONIGENC_CASE_SPECIALS) { /* special */
- const OnigCodePoint *SpecialsStart = CaseMappingSpecials + OnigSpecialIndexDecode(folded->n);
-
- if (OnigCaseFoldFlags(folded->n)&ONIGENC_CASE_IS_TITLECASE) { /* swapCASE available */
- if ((flags&(ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE))
- == (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE)) /* swapCASE needed */
- goto SpecialsCopy;
- else /* swapCASE not needed */
- SpecialsStart += SpecialsLengthExtract(*SpecialsStart);
- }
- if (OnigCaseFoldFlags(folded->n)&ONIGENC_CASE_TITLECASE) { /* Titlecase available */
- if (flags&ONIGENC_CASE_TITLECASE) /* Titlecase needed, but not yet Titlecase */
- goto SpecialsCopy;
- else /* Titlecase not needed */
- SpecialsStart += SpecialsLengthExtract(*SpecialsStart);
- }
- if (OnigCaseFoldFlags(folded->n)&ONIGENC_CASE_DOWN_SPECIAL) {
- if (!(flags&ONIGENC_CASE_DOWN_SPECIAL))
- SpecialsStart += SpecialsLengthExtract(*SpecialsStart);
- }
- /* here, we know we use ONIGENC_CASE_UP_SPECIAL, and the position is right */
- SpecialsCopy:
- count = SpecialsLengthExtract(*SpecialsStart);
- next = SpecialsStart;
- code = SpecialsCodepointExtract(*next++);
- }
- else { /* no specials */
- count = OnigCodePointCount(folded->n);
- next = folded->code;
- code = *next++;
- }
- if (count==1)
- ;
- else if (count==2) {
- to += ONIGENC_CODE_TO_MBC(enc, code, to);
- code = *next;
- }
- else { /* count == 3 */
- to += ONIGENC_CODE_TO_MBC(enc, code, to);
- to += ONIGENC_CODE_TO_MBC(enc, *next++, to);
- code = *next;
- }
- }
+ if (OnigCaseFoldFlags(folded->n) & ONIGENC_CASE_TITLECASE) { /* Titlecase available */
+ if (flags & ONIGENC_CASE_TITLECASE) /* Titlecase needed, but not yet Titlecase */
+ goto SpecialsCopy;
+ else /* Titlecase not needed */
+ SpecialsStart += SpecialsLengthExtract(*SpecialsStart);
}
- else if ((folded = onigenc_unicode_unfold1_lookup(code)) != 0 /* data about character found in CaseUnfold_11_Table */
- && flags&OnigCaseFoldFlags(folded->n)) { /* needs and data availability match */
- MODIFIED;
- code = folded->code[(flags&OnigCaseFoldFlags(folded->n)&ONIGENC_CASE_TITLECASE) ? 1 : 0];
+ if (OnigCaseFoldFlags(folded->n) & ONIGENC_CASE_DOWN_SPECIAL) {
+ if (!(flags & ONIGENC_CASE_DOWN_SPECIAL))
+ SpecialsStart += SpecialsLengthExtract(*SpecialsStart);
}
+ /* here, we know we use ONIGENC_CASE_UP_SPECIAL, and the position is right */
+SpecialsCopy:
+ count = SpecialsLengthExtract(*SpecialsStart);
+ next = SpecialsStart;
+ code = SpecialsCodepointExtract(*next++);
+ }
+ else { /* no specials */
+ count = OnigCodePointCount(folded->n);
+ next = folded->code;
+ code = *next++;
+ }
+ if (count == 1)
+ ;
+ else if (count == 2) {
+ to += ONIGENC_CODE_TO_MBC(enc, code, to);
+ code = *next;
+ }
+ else { /* count == 3 */
+ to += ONIGENC_CODE_TO_MBC(enc, code, to);
+ to += ONIGENC_CODE_TO_MBC(enc, *next++, to);
+ code = *next;
+ }
}
- to += ONIGENC_CODE_TO_MBC(enc, code, to);
- /* switch from titlecase to lowercase for capitalize */
- if (flags & ONIGENC_CASE_TITLECASE)
- flags ^= (ONIGENC_CASE_UPCASE |ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE|
- ONIGENC_CASE_UP_SPECIAL|ONIGENC_CASE_DOWN_SPECIAL);
+ }
+ else if ((folded = onigenc_unicode_unfold1_lookup(code)) != 0 /* data about character found in CaseUnfold_11_Table */
+ && flags & OnigCaseFoldFlags(folded->n)) { /* needs and data availability match */
+ MODIFIED;
+ code = folded->code[(flags & OnigCaseFoldFlags(folded->n) & ONIGENC_CASE_TITLECASE) ? 1 : 0];
+ }
}
- *flagP = flags;
- return (int)(to-to_start);
+ to += ONIGENC_CODE_TO_MBC(enc, code, to);
+ /* switch from titlecase to lowercase for capitalize */
+ if (flags & ONIGENC_CASE_TITLECASE)
+ flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE |
+ ONIGENC_CASE_UP_SPECIAL | ONIGENC_CASE_DOWN_SPECIAL);
+ }
+ *flagP = flags;
+ return (int )(to - to_start);
}
#if 0
diff --git a/enc/us_ascii.c b/enc/us_ascii.c
index cf835e6538..08f9072c43 100644
--- a/enc/us_ascii.c
+++ b/enc/us_ascii.c
@@ -1,7 +1,10 @@
#include "regenc.h"
-#include "encindex.h"
+#ifdef RUBY
+# include "encindex.h"
+#endif
+
#ifndef ENCINDEX_US_ASCII
-#define ENCINDEX_US_ASCII 0
+# define ENCINDEX_US_ASCII 0
#endif
static int
@@ -29,9 +32,9 @@ OnigEncodingDefine(us_ascii, US_ASCII) = {
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
+ onigenc_single_byte_ascii_only_case_map,
ENCINDEX_US_ASCII,
ONIGENC_FLAG_NONE,
- onigenc_single_byte_ascii_only_case_map,
};
ENC_ALIAS("ASCII", "US-ASCII")
ENC_ALIAS("ANSI_X3.4-1968", "US-ASCII")
diff --git a/enc/utf_16be.c b/enc/utf_16be.c
index e8b97983bf..f9dd7119d6 100644
--- a/enc/utf_16be.c
+++ b/enc/utf_16be.c
@@ -249,8 +249,8 @@ OnigEncodingDefine(utf_16be, UTF_16BE) = {
onigenc_utf16_32_get_ctype_code_range,
utf16be_left_adjust_char_head,
onigenc_always_false_is_allowed_reverse_match,
+ onigenc_unicode_case_map,
0,
ONIGENC_FLAG_UNICODE,
- onigenc_unicode_case_map,
};
ENC_ALIAS("UCS-2BE", "UTF-16BE")
diff --git a/enc/utf_16le.c b/enc/utf_16le.c
index 67ec2ad178..2c8438d0be 100644
--- a/enc/utf_16le.c
+++ b/enc/utf_16le.c
@@ -242,7 +242,7 @@ OnigEncodingDefine(utf_16le, UTF_16LE) = {
onigenc_utf16_32_get_ctype_code_range,
utf16le_left_adjust_char_head,
onigenc_always_false_is_allowed_reverse_match,
+ onigenc_unicode_case_map,
0,
ONIGENC_FLAG_UNICODE,
- onigenc_unicode_case_map,
};
diff --git a/enc/utf_32be.c b/enc/utf_32be.c
index a57b854674..995c9d8ed5 100644
--- a/enc/utf_32be.c
+++ b/enc/utf_32be.c
@@ -187,9 +187,8 @@ OnigEncodingDefine(utf_32be, UTF_32BE) = {
onigenc_utf16_32_get_ctype_code_range,
utf32be_left_adjust_char_head,
onigenc_always_false_is_allowed_reverse_match,
+ onigenc_unicode_case_map,
0,
ONIGENC_FLAG_UNICODE,
- onigenc_unicode_case_map,
};
ENC_ALIAS("UCS-4BE", "UTF-32BE")
-
diff --git a/enc/utf_32le.c b/enc/utf_32le.c
index c48089d6ed..e255f0e246 100644
--- a/enc/utf_32le.c
+++ b/enc/utf_32le.c
@@ -187,8 +187,8 @@ OnigEncodingDefine(utf_32le, UTF_32LE) = {
onigenc_utf16_32_get_ctype_code_range,
utf32le_left_adjust_char_head,
onigenc_always_false_is_allowed_reverse_match,
+ onigenc_unicode_case_map,
0,
ONIGENC_FLAG_UNICODE,
- onigenc_unicode_case_map,
};
ENC_ALIAS("UCS-4LE", "UTF-32LE")
diff --git a/enc/utf_8.c b/enc/utf_8.c
index 862b13fd9b..3dad2f729b 100644
--- a/enc/utf_8.c
+++ b/enc/utf_8.c
@@ -28,17 +28,20 @@
*/
#include "regenc.h"
-#include "encindex.h"
+#ifdef RUBY
+# include "encindex.h"
+#endif
+
#ifndef ENCINDEX_UTF_8
-#define ENCINDEX_UTF_8 0
+# define ENCINDEX_UTF_8 0
#endif
#define USE_INVALID_CODE_SCHEME
#ifdef USE_INVALID_CODE_SCHEME
/* virtual codepoint values for invalid encoding byte 0xfe and 0xff */
-#define INVALID_CODE_FE 0xfffffffe
-#define INVALID_CODE_FF 0xffffffff
+# define INVALID_CODE_FE 0xfffffffe
+# define INVALID_CODE_FF 0xffffffff
#endif
#define VALID_CODE_LIMIT 0x0010ffff
@@ -428,9 +431,9 @@ OnigEncodingDefine(utf_8, UTF_8) = {
get_ctype_code_range,
left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
+ onigenc_unicode_case_map,
ENCINDEX_UTF_8,
ONIGENC_FLAG_UNICODE,
- onigenc_unicode_case_map,
};
ENC_ALIAS("CP65001", "UTF-8")
@@ -444,4 +447,3 @@ ENC_ALIAS("CP65001", "UTF-8")
ENC_REPLICATE("UTF8-MAC", "UTF-8")
ENC_ALIAS("UTF-8-MAC", "UTF8-MAC")
ENC_ALIAS("UTF-8-HFS", "UTF8-MAC") /* Emacs 23.2 */
-
diff --git a/enc/windows_1250.c b/enc/windows_1250.c
index 47317ddaf6..d2cf7b16bc 100644
--- a/enc/windows_1250.c
+++ b/enc/windows_1250.c
@@ -191,40 +191,41 @@ cp1250_get_case_fold_codes_by_str(OnigCaseFoldType flag,
}
static int
-case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
- const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
- const struct OnigEncodingTypeST* enc)
+case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
+ const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
+ const struct OnigEncodingTypeST* enc)
{
OnigCodePoint code;
OnigUChar *to_start = to;
OnigCaseFoldType flags = *flagP;
- while (*pp<end && to<to_end) {
+ while (*pp < end && to < to_end) {
code = *(*pp)++;
- if (code==SHARP_s) {
- if (flags&ONIGENC_CASE_UPCASE) {
+ if (code == SHARP_s) {
+ if (flags & ONIGENC_CASE_UPCASE) {
flags |= ONIGENC_CASE_MODIFIED;
*to++ = 'S';
- code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S';
+ code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S';
}
- else if (flags&ONIGENC_CASE_FOLD) {
+ else if (flags & ONIGENC_CASE_FOLD) {
flags |= ONIGENC_CASE_MODIFIED;
*to++ = 's';
code = 's';
}
}
else if ((EncCP1250_CtypeTable[code] & BIT_CTYPE_UPPER)
- && (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) {
+ && (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
flags |= ONIGENC_CASE_MODIFIED;
code = ENC_CP1250_TO_LOWER_CASE(code);
}
- else if (code==0xB5) ;
+ else if (code == 0xB5)
+ ;
else if ((EncCP1250_CtypeTable[code]&BIT_CTYPE_LOWER)
- && (flags&ONIGENC_CASE_UPCASE)) {
+ && (flags & ONIGENC_CASE_UPCASE)) {
flags |= ONIGENC_CASE_MODIFIED;
- if (code==0xB9)
+ if (code == 0xB9)
code = 0xA5;
- else if (code==0xBE)
+ else if (code == 0xBE)
code = 0xBC;
else if (code >= 0x8A && code <= 0xBF && code!=0xB9)
code -= 0x10;
@@ -232,11 +233,11 @@ case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
code -= 0x20;
}
*to++ = code;
- if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
- flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
+ if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
+ flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
}
*flagP = flags;
- return (int)(to-to_start);
+ return (int )(to - to_start);
}
OnigEncodingDefine(windows_1250, Windows_1250) = {
@@ -256,9 +257,9 @@ OnigEncodingDefine(windows_1250, Windows_1250) = {
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
+ case_map,
0,
ONIGENC_FLAG_NONE,
- case_map,
};
/*
* Name: windows-1250
diff --git a/enc/windows_1251.c b/enc/windows_1251.c
index 0f9b7fa69a..fcd0f1015d 100644
--- a/enc/windows_1251.c
+++ b/enc/windows_1251.c
@@ -181,49 +181,50 @@ cp1251_get_case_fold_codes_by_str(OnigCaseFoldType flag,
}
static int
-case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
- const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
- const struct OnigEncodingTypeST* enc)
+case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
+ const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
+ const struct OnigEncodingTypeST* enc)
{
OnigCodePoint code;
OnigUChar *to_start = to;
OnigCaseFoldType flags = *flagP;
- while (*pp<end && to<to_end) {
+ while (*pp < end && to < to_end) {
code = *(*pp)++;
if ((EncCP1251_CtypeTable[code] & BIT_CTYPE_UPPER)
- && (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) {
+ && (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
flags |= ONIGENC_CASE_MODIFIED;
code = ENC_CP1251_TO_LOWER_CASE(code);
}
- else if (code==0xB5) ;
+ else if (code == 0xB5)
+ ;
else if ((EncCP1251_CtypeTable[code]&BIT_CTYPE_LOWER)
- && (flags&ONIGENC_CASE_UPCASE)) {
+ && (flags & ONIGENC_CASE_UPCASE)) {
flags |= ONIGENC_CASE_MODIFIED;
- if ((0x61<=code && code<=0x7A) || (0xE0<=code && code<=0xFF))
+ if ((0x61 <= code && code <= 0x7A) || (0xE0 <= code && code <= 0xFF))
code -= 0x20;
- else if (code==0xA2 || code==0xB3 || code==0xBE)
+ else if (code == 0xA2 || code == 0xB3 || code == 0xBE)
code -= 0x01;
- else if (code==0x83)
+ else if (code == 0x83)
code = 0x81;
- else if (code==0xBC)
+ else if (code == 0xBC)
code = 0xA3;
- else if (code==0xB4)
+ else if (code == 0xB4)
code = 0xA5;
else
code -= 0x10;
}
*to++ = code;
- if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
- flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
+ if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
+ flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
}
*flagP = flags;
- return (int)(to-to_start);
+ return (int )(to - to_start);
}
OnigEncodingDefine(windows_1251, Windows_1251) = {
onigenc_single_byte_mbc_enc_len,
- "Windows-1251", /* name */
+ "Windows-1251",/* name */
1, /* max enc length */
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
@@ -238,9 +239,9 @@ OnigEncodingDefine(windows_1251, Windows_1251) = {
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
+ case_map,
0,
ONIGENC_FLAG_NONE,
- case_map,
};
/*
* Name: windows-1251
diff --git a/enc/windows_1252.c b/enc/windows_1252.c
index 4427f8e31e..5f90c15601 100644
--- a/enc/windows_1252.c
+++ b/enc/windows_1252.c
@@ -190,42 +190,43 @@ case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
OnigUChar *to_start = to;
OnigCaseFoldType flags = *flagP;
- while (*pp<end && to<to_end) {
+ while (*pp < end && to < to_end) {
code = *(*pp)++;
- if (code==SHARP_s) {
- if (flags&ONIGENC_CASE_UPCASE) {
+ if (code == SHARP_s) {
+ if (flags & ONIGENC_CASE_UPCASE) {
flags |= ONIGENC_CASE_MODIFIED;
*to++ = 'S';
- code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S';
+ code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S';
}
- else if (flags&ONIGENC_CASE_FOLD) {
+ else if (flags & ONIGENC_CASE_FOLD) {
flags |= ONIGENC_CASE_MODIFIED;
*to++ = 's';
code = 's';
}
}
else if ((EncCP1252_CtypeTable[code] & BIT_CTYPE_UPPER)
- && (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) {
+ && (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
flags |= ONIGENC_CASE_MODIFIED;
code = ENC_CP1252_TO_LOWER_CASE(code);
}
- else if (code==0x83 || code==0xAA || code==0xBA || code==0xB5) ;
+ else if (code == 0x83 || code == 0xAA || code == 0xBA || code == 0xB5)
+ ;
else if ((EncCP1252_CtypeTable[code]&BIT_CTYPE_LOWER)
- && (flags&ONIGENC_CASE_UPCASE)) {
+ && (flags & ONIGENC_CASE_UPCASE)) {
flags |= ONIGENC_CASE_MODIFIED;
- if (code==0x9A || code==0x9C || code==0x9E)
+ if (code == 0x9A || code == 0x9C || code == 0x9E)
code -= 0x10;
- else if (code==0xFF)
+ else if (code == 0xFF)
code -= 0x60;
else
code -= 0x20;
}
*to++ = code;
- if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
- flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
+ if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
+ flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
}
*flagP = flags;
- return (int)(to-to_start);
+ return (int )(to - to_start);
}
OnigEncodingDefine(windows_1252, Windows_1252) = {
@@ -245,9 +246,9 @@ OnigEncodingDefine(windows_1252, Windows_1252) = {
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
+ case_map,
0,
ONIGENC_FLAG_NONE,
- case_map,
};
/*
* Name: windows-1252
diff --git a/enc/windows_1253.c b/enc/windows_1253.c
index 2157b55c99..9e9c63a581 100644
--- a/enc/windows_1253.c
+++ b/enc/windows_1253.c
@@ -214,62 +214,63 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
}
static int
-case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
- const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
- const struct OnigEncodingTypeST* enc)
+case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
+ const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
+ const struct OnigEncodingTypeST* enc)
{
OnigCodePoint code;
OnigUChar *to_start = to;
OnigCaseFoldType flags = *flagP;
- while (*pp<end && to<to_end) {
+ while (*pp < end && to < to_end) {
code = *(*pp)++;
- if (code==0xF2) {
- if (flags&ONIGENC_CASE_UPCASE) {
+ if (code == 0xF2) {
+ if (flags & ONIGENC_CASE_UPCASE) {
flags |= ONIGENC_CASE_MODIFIED;
code = 0xD3;
}
- else if (flags&ONIGENC_CASE_FOLD) {
+ else if (flags & ONIGENC_CASE_FOLD) {
flags |= ONIGENC_CASE_MODIFIED;
code = 0xF3;
}
}
- else if (code==0xB5) {
- if (flags&ONIGENC_CASE_UPCASE) {
+ else if (code == 0xB5) {
+ if (flags & ONIGENC_CASE_UPCASE) {
flags |= ONIGENC_CASE_MODIFIED;
code = 0xCC;
}
- else if (flags&ONIGENC_CASE_FOLD) {
+ else if (flags & ONIGENC_CASE_FOLD) {
flags |= ONIGENC_CASE_MODIFIED;
code = 0xEC;
}
}
- else if (code==0xC0 || code==0xE0 || code==0xB6) ;
+ else if (code == 0xC0 || code == 0xE0 || code == 0xB6)
+ ;
else if ((EncCP1253_CtypeTable[code] & BIT_CTYPE_UPPER)
- && (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) {
+ && (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
flags |= ONIGENC_CASE_MODIFIED;
code = ENC_CP1253_TO_LOWER_CASE(code);
}
else if ((EncCP1253_CtypeTable[code] & BIT_CTYPE_LOWER)
- && (flags&ONIGENC_CASE_UPCASE)) {
+ && (flags & ONIGENC_CASE_UPCASE)) {
flags |= ONIGENC_CASE_MODIFIED;
- if (code==0xDC)
+ if (code == 0xDC)
code = 0xA2;
- else if (code>=0xDD && code<=0xDF)
+ else if (code >= 0xDD && code <= 0xDF)
code -= 0x25;
- else if (code==0xFC)
+ else if (code == 0xFC)
code = 0xBC;
- else if (code==0xFD || code==0xFE)
+ else if (code == 0xFD || code == 0xFE)
code -= 0x3F;
else
code -= 0x20;
}
*to++ = code;
- if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
- flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
+ if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
+ flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
}
*flagP = flags;
- return (int)(to-to_start);
+ return (int )(to - to_start);
}
OnigEncodingDefine(windows_1253, Windows_1253) = {
@@ -289,8 +290,8 @@ OnigEncodingDefine(windows_1253, Windows_1253) = {
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
+ case_map,
0,
ONIGENC_FLAG_NONE,
- case_map,
};
ENC_ALIAS("CP1253", "Windows-1253")
diff --git a/enc/windows_1254.c b/enc/windows_1254.c
index 2ccf966b8e..9ae66978a2 100644
--- a/enc/windows_1254.c
+++ b/enc/windows_1254.c
@@ -212,9 +212,9 @@ apply_all_case_fold(OnigCaseFoldType flag,
static int
get_case_fold_codes_by_str(OnigCaseFoldType flag,
- const OnigUChar* p, const OnigUChar* end,
- OnigCaseFoldCodeItem items[],
- OnigEncoding enc ARG_UNUSED)
+ const OnigUChar* p, const OnigUChar* end,
+ OnigCaseFoldCodeItem items[],
+ OnigEncoding enc ARG_UNUSED)
{
return onigenc_get_case_fold_codes_by_str_with_map(
numberof(CaseFoldMap), CaseFoldMap, 1,
@@ -232,49 +232,50 @@ case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
OnigUChar *to_start = to;
OnigCaseFoldType flags = *flagP;
- while (*pp<end && to<to_end) {
+ while (*pp < end && to < to_end) {
code = *(*pp)++;
- if (code==SHARP_s) {
- if (flags&ONIGENC_CASE_UPCASE) {
+ if (code == SHARP_s) {
+ if (flags & ONIGENC_CASE_UPCASE) {
flags |= ONIGENC_CASE_MODIFIED;
*to++ = 'S';
- code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S';
+ code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S';
}
- else if (flags&ONIGENC_CASE_FOLD) {
+ else if (flags & ONIGENC_CASE_FOLD) {
flags |= ONIGENC_CASE_MODIFIED;
*to++ = 's';
code = 's';
}
}
else if ((EncCP1254_CtypeTable[code] & BIT_CTYPE_UPPER)
- && (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) {
+ && (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
flags |= ONIGENC_CASE_MODIFIED;
- if (code=='I')
- code = flags&ONIGENC_CASE_FOLD_TURKISH_AZERI ? DOTLESS_i : 'i';
+ if (code == 'I')
+ code = flags & ONIGENC_CASE_FOLD_TURKISH_AZERI ? DOTLESS_i : 'i';
else
code = ENC_CP1254_TO_LOWER_CASE(code);
}
- else if (code==0x83 || code==0xAA || code==0xBA || code==0xB5) ;
- else if ((EncCP1254_CtypeTable[code]&BIT_CTYPE_LOWER)
- && (flags&ONIGENC_CASE_UPCASE)) {
+ else if (code == 0x83 || code == 0xAA || code == 0xBA || code == 0xB5)
+ ;
+ else if ((EncCP1254_CtypeTable[code] & BIT_CTYPE_LOWER)
+ && (flags & ONIGENC_CASE_UPCASE)) {
flags |= ONIGENC_CASE_MODIFIED;
- if (code=='i')
- code = flags&ONIGENC_CASE_FOLD_TURKISH_AZERI ? I_WITH_DOT_ABOVE : 'I';
- else if (code==DOTLESS_i)
+ if (code == 'i')
+ code = flags & ONIGENC_CASE_FOLD_TURKISH_AZERI ? I_WITH_DOT_ABOVE : 'I';
+ else if (code == DOTLESS_i)
code = 'I';
- else if (code==0x9A || code==0x9C || code==0x9E)
+ else if (code == 0x9A || code == 0x9C || code == 0x9E)
code -= 0x10;
- else if (code==0xFF)
+ else if (code == 0xFF)
code -= 0x60;
else
code -= 0x20;
}
*to++ = code;
- if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
- flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
+ if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
+ flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
}
*flagP = flags;
- return (int)(to-to_start);
+ return (int )(to - to_start);
}
OnigEncodingDefine(windows_1254, Windows_1254) = {
@@ -294,8 +295,8 @@ OnigEncodingDefine(windows_1254, Windows_1254) = {
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
+ case_map,
0,
ONIGENC_FLAG_NONE,
- case_map,
};
ENC_ALIAS("CP1254", "Windows-1254")
diff --git a/enc/windows_1257.c b/enc/windows_1257.c
index 40cdb969aa..936a94ac76 100644
--- a/enc/windows_1257.c
+++ b/enc/windows_1257.c
@@ -216,9 +216,9 @@ apply_all_case_fold(OnigCaseFoldType flag,
static int
get_case_fold_codes_by_str(OnigCaseFoldType flag,
- const OnigUChar* p, const OnigUChar* end,
- OnigCaseFoldCodeItem items[],
- OnigEncoding enc ARG_UNUSED)
+ const OnigUChar* p, const OnigUChar* end,
+ OnigCaseFoldCodeItem items[],
+ OnigEncoding enc ARG_UNUSED)
{
return onigenc_get_case_fold_codes_by_str_with_map(
numberof(CaseFoldMap), CaseFoldMap, 1,
@@ -228,55 +228,56 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag,
#define DOTLESS_i (0xB9)
#define I_WITH_DOT_ABOVE (0xA9)
static int
-case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
- const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
- const struct OnigEncodingTypeST* enc)
+case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
+ const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
+ const struct OnigEncodingTypeST* enc)
{
OnigCodePoint code;
OnigUChar *to_start = to;
OnigCaseFoldType flags = *flagP;
- while (*pp<end && to<to_end) {
+ while (*pp < end && to < to_end) {
code = *(*pp)++;
- if (code==SHARP_s) {
- if (flags&ONIGENC_CASE_UPCASE) {
+ if (code == SHARP_s) {
+ if (flags & ONIGENC_CASE_UPCASE) {
flags |= ONIGENC_CASE_MODIFIED;
*to++ = 'S';
- code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S';
+ code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S';
}
- else if (flags&ONIGENC_CASE_FOLD) {
+ else if (flags & ONIGENC_CASE_FOLD) {
flags |= ONIGENC_CASE_MODIFIED;
*to++ = 's';
code = 's';
}
}
- else if (code==0xB5) ;
+ else if (code == 0xB5)
+ ;
else if ((EncCP1252_CtypeTable[code] & BIT_CTYPE_UPPER)
- && (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) {
+ && (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
flags |= ONIGENC_CASE_MODIFIED;
- if (code=='I')
- code = flags&ONIGENC_CASE_FOLD_TURKISH_AZERI ? DOTLESS_i : 'i';
+ if (code == 'I')
+ code = flags & ONIGENC_CASE_FOLD_TURKISH_AZERI ? DOTLESS_i : 'i';
else
code = ENC_CP1252_TO_LOWER_CASE(code);
}
else if ((EncCP1252_CtypeTable[code]&BIT_CTYPE_LOWER)
- && (flags&ONIGENC_CASE_UPCASE)) {
+ && (flags & ONIGENC_CASE_UPCASE)) {
flags |= ONIGENC_CASE_MODIFIED;
- if (code=='i')
- code = flags&ONIGENC_CASE_FOLD_TURKISH_AZERI ? I_WITH_DOT_ABOVE : 'I';
- else if (code==DOTLESS_i)
+ if (code == 'i')
+ code = flags & ONIGENC_CASE_FOLD_TURKISH_AZERI ? I_WITH_DOT_ABOVE : 'I';
+ else if (code == DOTLESS_i)
code = 'I';
- else if (code>=0xB0 && code<=0xBF )
+ else if (code >= 0xB0 && code <= 0xBF)
code -= 0x10;
else
code -= 0x20;
}
*to++ = code;
- if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
- flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
+ if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
+ flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
}
*flagP = flags;
- return (int)(to-to_start);
+ return (int )(to - to_start);
}
OnigEncodingDefine(windows_1257, Windows_1257) = {
@@ -296,9 +297,8 @@ OnigEncodingDefine(windows_1257, Windows_1257) = {
onigenc_not_support_get_ctype_code_range,
onigenc_single_byte_left_adjust_char_head,
onigenc_always_true_is_allowed_reverse_match,
+ case_map,
0,
ONIGENC_FLAG_NONE,
- case_map,
};
-
ENC_ALIAS("CP1257", "Windows-1257")
diff --git a/enc/windows_31j.c b/enc/windows_31j.c
index 71836c1f13..174f8983c4 100644
--- a/enc/windows_31j.c
+++ b/enc/windows_31j.c
@@ -33,7 +33,7 @@
OnigEncodingDefine(windows_31j, Windows_31J) = {
mbc_enc_len,
- "Windows-31J", /* name */
+ "Windows-31J", /* name */
2, /* max byte length */
1, /* min byte length */
onigenc_is_mbc_newline_0x0a,
@@ -48,9 +48,9 @@ OnigEncodingDefine(windows_31j, Windows_31J) = {
get_ctype_code_range,
left_adjust_char_head,
is_allowed_reverse_match,
+ onigenc_ascii_only_case_map,
0,
ONIGENC_FLAG_NONE,
- onigenc_ascii_only_case_map,
};
/*
* Name: Windows-31J
diff --git a/include/ruby/onigmo.h b/include/ruby/onigmo.h
new file mode 100644
index 0000000000..228aa77ea5
--- /dev/null
+++ b/include/ruby/onigmo.h
@@ -0,0 +1,934 @@
+#ifndef ONIGMO_H
+#define ONIGMO_H
+/**********************************************************************
+ onigmo.h - Onigmo (Oniguruma-mod) (regular expression library)
+**********************************************************************/
+/*-
+ * Copyright (c) 2002-2009 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+# if 0
+} /* satisfy cc-mode */
+# endif
+#endif
+
+#define ONIGMO_VERSION_MAJOR 6
+#define ONIGMO_VERSION_MINOR 0
+#define ONIGMO_VERSION_TEENY 0
+
+#ifndef ONIG_EXTERN
+# ifdef RUBY_EXTERN
+# define ONIG_EXTERN RUBY_EXTERN
+# else
+# if defined(_WIN32) && !defined(__GNUC__)
+# if defined(EXPORT) || defined(RUBY_EXPORT)
+# define ONIG_EXTERN extern __declspec(dllexport)
+# else
+# define ONIG_EXTERN extern __declspec(dllimport)
+# endif
+# endif
+# endif
+#endif
+
+#ifndef ONIG_EXTERN
+# define ONIG_EXTERN extern
+#endif
+
+#ifndef RUBY
+# ifndef RUBY_SYMBOL_EXPORT_BEGIN
+# define RUBY_SYMBOL_EXPORT_BEGIN
+# define RUBY_SYMBOL_EXPORT_END
+# endif
+#endif
+
+RUBY_SYMBOL_EXPORT_BEGIN
+
+#include <stddef.h> /* for size_t */
+
+/* PART: character encoding */
+
+#ifndef ONIG_ESCAPE_UCHAR_COLLISION
+# define UChar OnigUChar
+#endif
+
+typedef unsigned char OnigUChar;
+typedef unsigned int OnigCodePoint;
+typedef unsigned int OnigCtype;
+typedef size_t OnigDistance;
+typedef ptrdiff_t OnigPosition;
+
+#define ONIG_INFINITE_DISTANCE ~((OnigDistance )0)
+
+/*
+ * Onig casefold/case mapping flags and related definitions
+ *
+ * Subfields (starting with 0 at LSB):
+ * 0-2: Code point count in casefold.h
+ * 3-12: Index into SpecialCaseMapping array in casefold.h
+ * 13-22: Case folding/mapping flags
+ */
+typedef unsigned int OnigCaseFoldType; /* case fold flag */
+
+ONIG_EXTERN OnigCaseFoldType OnigDefaultCaseFoldFlag;
+
+/* bits for actual code point count; 3 bits is more than enough, currently only 2 used */
+#define OnigCodePointMaskWidth 3
+#define OnigCodePointMask ((1<<OnigCodePointMaskWidth)-1)
+#define OnigCodePointCount(n) ((n)&OnigCodePointMask)
+#define OnigCaseFoldFlags(n) ((n)&~OnigCodePointMask)
+
+/* #define ONIGENC_CASE_FOLD_HIRAGANA_KATAKANA (1<<1) */ /* no longer usable with these values! */
+/* #define ONIGENC_CASE_FOLD_KATAKANA_WIDTH (1<<2) */ /* no longer usable with these values! */
+
+/* bits for index into table with separate titlecase mappings */
+/* 10 bits provide 1024 values */
+#define OnigSpecialIndexShift 3
+#define OnigSpecialIndexWidth 10
+
+#define ONIGENC_CASE_UPCASE (1<<13) /* has/needs uppercase mapping */
+#define ONIGENC_CASE_DOWNCASE (1<<14) /* has/needs lowercase mapping */
+#define ONIGENC_CASE_TITLECASE (1<<15) /* has/needs (special) titlecase mapping */
+#define ONIGENC_CASE_SPECIAL_OFFSET 3 /* offset in bits from ONIGENC_CASE to ONIGENC_CASE_SPECIAL */
+#define ONIGENC_CASE_UP_SPECIAL (1<<16) /* has special upcase mapping */
+#define ONIGENC_CASE_DOWN_SPECIAL (1<<17) /* has special downcase mapping */
+#define ONIGENC_CASE_MODIFIED (1<<18) /* data has been modified */
+#define ONIGENC_CASE_FOLD (1<<19) /* has/needs case folding */
+
+#define ONIGENC_CASE_FOLD_TURKISH_AZERI (1<<20) /* needs mapping specific to Turkic languages; better not change original value! */
+
+#define ONIGENC_CASE_FOLD_LITHUANIAN (1<<21) /* needs Lithuanian-specific mapping */
+#define ONIGENC_CASE_ASCII_ONLY (1<<22) /* only modify ASCII range */
+#define ONIGENC_CASE_IS_TITLECASE (1<<23) /* character itself is already titlecase */
+
+#define INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR (1<<30) /* better not change original value! */
+
+#define ONIGENC_CASE_FOLD_MIN INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR
+#define ONIGENC_CASE_FOLD_DEFAULT OnigDefaultCaseFoldFlag
+
+
+#define ONIGENC_MAX_COMP_CASE_FOLD_CODE_LEN 3
+#define ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM 13
+/* 13 => Unicode:0x1ffc */
+
+/* code range */
+#define ONIGENC_CODE_RANGE_NUM(range) ((int )range[0])
+#define ONIGENC_CODE_RANGE_FROM(range,i) range[((i)*2) + 1]
+#define ONIGENC_CODE_RANGE_TO(range,i) range[((i)*2) + 2]
+
+typedef struct {
+ int byte_len; /* argument(original) character(s) byte length */
+ int code_len; /* number of code */
+ OnigCodePoint code[ONIGENC_MAX_COMP_CASE_FOLD_CODE_LEN];
+} OnigCaseFoldCodeItem;
+
+typedef struct {
+ OnigCodePoint esc;
+ OnigCodePoint anychar;
+ OnigCodePoint anytime;
+ OnigCodePoint zero_or_one_time;
+ OnigCodePoint one_or_more_time;
+ OnigCodePoint anychar_anytime;
+} OnigMetaCharTableType;
+
+typedef int (*OnigApplyAllCaseFoldFunc)(OnigCodePoint from, OnigCodePoint* to, int to_len, void* arg);
+
+typedef struct OnigEncodingTypeST {
+ int (*precise_mbc_enc_len)(const OnigUChar* p,const OnigUChar* e, const struct OnigEncodingTypeST* enc);
+ const char* name;
+ int max_enc_len;
+ int min_enc_len;
+ int (*is_mbc_newline)(const OnigUChar* p, const OnigUChar* end, const struct OnigEncodingTypeST* enc);
+ OnigCodePoint (*mbc_to_code)(const OnigUChar* p, const OnigUChar* end, const struct OnigEncodingTypeST* enc);
+ int (*code_to_mbclen)(OnigCodePoint code, const struct OnigEncodingTypeST* enc);
+ int (*code_to_mbc)(OnigCodePoint code, OnigUChar *buf, const struct OnigEncodingTypeST* enc);
+ int (*mbc_case_fold)(OnigCaseFoldType flag, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, const struct OnigEncodingTypeST* enc);
+ int (*apply_all_case_fold)(OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg, const struct OnigEncodingTypeST* enc);
+ int (*get_case_fold_codes_by_str)(OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem acs[], const struct OnigEncodingTypeST* enc);
+ int (*property_name_to_ctype)(const struct OnigEncodingTypeST* enc, const OnigUChar* p, const OnigUChar* end);
+ int (*is_code_ctype)(OnigCodePoint code, OnigCtype ctype, const struct OnigEncodingTypeST* enc);
+ int (*get_ctype_code_range)(OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[], const struct OnigEncodingTypeST* enc);
+ OnigUChar* (*left_adjust_char_head)(const OnigUChar* start, const OnigUChar* p, const OnigUChar* end, const struct OnigEncodingTypeST* enc);
+ int (*is_allowed_reverse_match)(const OnigUChar* p, const OnigUChar* end, const struct OnigEncodingTypeST* enc);
+ int (*case_map)(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, const struct OnigEncodingTypeST* enc);
+ int ruby_encoding_index;
+ unsigned int flags;
+} OnigEncodingType;
+
+typedef const OnigEncodingType* OnigEncoding;
+
+ONIG_EXTERN const OnigEncodingType OnigEncodingASCII;
+#ifndef RUBY
+ONIG_EXTERN const OnigEncodingType OnigEncodingISO_8859_1;
+ONIG_EXTERN const OnigEncodingType OnigEncodingISO_8859_2;
+ONIG_EXTERN const OnigEncodingType OnigEncodingISO_8859_3;
+ONIG_EXTERN const OnigEncodingType OnigEncodingISO_8859_4;
+ONIG_EXTERN const OnigEncodingType OnigEncodingISO_8859_5;
+ONIG_EXTERN const OnigEncodingType OnigEncodingISO_8859_6;
+ONIG_EXTERN const OnigEncodingType OnigEncodingISO_8859_7;
+ONIG_EXTERN const OnigEncodingType OnigEncodingISO_8859_8;
+ONIG_EXTERN const OnigEncodingType OnigEncodingISO_8859_9;
+ONIG_EXTERN const OnigEncodingType OnigEncodingISO_8859_10;
+ONIG_EXTERN const OnigEncodingType OnigEncodingISO_8859_11;
+ONIG_EXTERN const OnigEncodingType OnigEncodingISO_8859_13;
+ONIG_EXTERN const OnigEncodingType OnigEncodingISO_8859_14;
+ONIG_EXTERN const OnigEncodingType OnigEncodingISO_8859_15;
+ONIG_EXTERN const OnigEncodingType OnigEncodingISO_8859_16;
+ONIG_EXTERN const OnigEncodingType OnigEncodingUTF_8;
+ONIG_EXTERN const OnigEncodingType OnigEncodingUTF_16BE;
+ONIG_EXTERN const OnigEncodingType OnigEncodingUTF_16LE;
+ONIG_EXTERN const OnigEncodingType OnigEncodingUTF_32BE;
+ONIG_EXTERN const OnigEncodingType OnigEncodingUTF_32LE;
+ONIG_EXTERN const OnigEncodingType OnigEncodingEUC_JP;
+ONIG_EXTERN const OnigEncodingType OnigEncodingEUC_TW;
+ONIG_EXTERN const OnigEncodingType OnigEncodingEUC_KR;
+ONIG_EXTERN const OnigEncodingType OnigEncodingEUC_CN;
+ONIG_EXTERN const OnigEncodingType OnigEncodingShift_JIS;
+ONIG_EXTERN const OnigEncodingType OnigEncodingWindows_31J;
+/* ONIG_EXTERN const OnigEncodingType OnigEncodingKOI8; */
+ONIG_EXTERN const OnigEncodingType OnigEncodingKOI8_R;
+ONIG_EXTERN const OnigEncodingType OnigEncodingKOI8_U;
+ONIG_EXTERN const OnigEncodingType OnigEncodingWindows_1250;
+ONIG_EXTERN const OnigEncodingType OnigEncodingWindows_1251;
+ONIG_EXTERN const OnigEncodingType OnigEncodingWindows_1252;
+ONIG_EXTERN const OnigEncodingType OnigEncodingWindows_1253;
+ONIG_EXTERN const OnigEncodingType OnigEncodingWindows_1254;
+ONIG_EXTERN const OnigEncodingType OnigEncodingWindows_1257;
+ONIG_EXTERN const OnigEncodingType OnigEncodingBIG5;
+ONIG_EXTERN const OnigEncodingType OnigEncodingGB18030;
+#endif /* RUBY */
+
+#define ONIG_ENCODING_ASCII (&OnigEncodingASCII)
+#ifndef RUBY
+# define ONIG_ENCODING_ISO_8859_1 (&OnigEncodingISO_8859_1)
+# define ONIG_ENCODING_ISO_8859_2 (&OnigEncodingISO_8859_2)
+# define ONIG_ENCODING_ISO_8859_3 (&OnigEncodingISO_8859_3)
+# define ONIG_ENCODING_ISO_8859_4 (&OnigEncodingISO_8859_4)
+# define ONIG_ENCODING_ISO_8859_5 (&OnigEncodingISO_8859_5)
+# define ONIG_ENCODING_ISO_8859_6 (&OnigEncodingISO_8859_6)
+# define ONIG_ENCODING_ISO_8859_7 (&OnigEncodingISO_8859_7)
+# define ONIG_ENCODING_ISO_8859_8 (&OnigEncodingISO_8859_8)
+# define ONIG_ENCODING_ISO_8859_9 (&OnigEncodingISO_8859_9)
+# define ONIG_ENCODING_ISO_8859_10 (&OnigEncodingISO_8859_10)
+# define ONIG_ENCODING_ISO_8859_11 (&OnigEncodingISO_8859_11)
+# define ONIG_ENCODING_ISO_8859_13 (&OnigEncodingISO_8859_13)
+# define ONIG_ENCODING_ISO_8859_14 (&OnigEncodingISO_8859_14)
+# define ONIG_ENCODING_ISO_8859_15 (&OnigEncodingISO_8859_15)
+# define ONIG_ENCODING_ISO_8859_16 (&OnigEncodingISO_8859_16)
+# define ONIG_ENCODING_UTF_8 (&OnigEncodingUTF_8)
+# define ONIG_ENCODING_UTF_16BE (&OnigEncodingUTF_16BE)
+# define ONIG_ENCODING_UTF_16LE (&OnigEncodingUTF_16LE)
+# define ONIG_ENCODING_UTF_32BE (&OnigEncodingUTF_32BE)
+# define ONIG_ENCODING_UTF_32LE (&OnigEncodingUTF_32LE)
+# define ONIG_ENCODING_EUC_JP (&OnigEncodingEUC_JP)
+# define ONIG_ENCODING_EUC_TW (&OnigEncodingEUC_TW)
+# define ONIG_ENCODING_EUC_KR (&OnigEncodingEUC_KR)
+# define ONIG_ENCODING_EUC_CN (&OnigEncodingEUC_CN)
+# define ONIG_ENCODING_SHIFT_JIS (&OnigEncodingShift_JIS)
+# define ONIG_ENCODING_WINDOWS_31J (&OnigEncodingWindows_31J)
+/* # define ONIG_ENCODING_KOI8 (&OnigEncodingKOI8) */
+# define ONIG_ENCODING_KOI8_R (&OnigEncodingKOI8_R)
+# define ONIG_ENCODING_KOI8_U (&OnigEncodingKOI8_U)
+# define ONIG_ENCODING_WINDOWS_1250 (&OnigEncodingWindows_1250)
+# define ONIG_ENCODING_WINDOWS_1251 (&OnigEncodingWindows_1251)
+# define ONIG_ENCODING_WINDOWS_1252 (&OnigEncodingWindows_1252)
+# define ONIG_ENCODING_WINDOWS_1253 (&OnigEncodingWindows_1253)
+# define ONIG_ENCODING_WINDOWS_1254 (&OnigEncodingWindows_1254)
+# define ONIG_ENCODING_WINDOWS_1257 (&OnigEncodingWindows_1257)
+# define ONIG_ENCODING_BIG5 (&OnigEncodingBIG5)
+# define ONIG_ENCODING_GB18030 (&OnigEncodingGB18030)
+
+/* old names */
+# define ONIG_ENCODING_SJIS ONIG_ENCODING_SHIFT_JIS
+# define ONIG_ENCODING_CP932 ONIG_ENCODING_WINDOWS_31J
+# define ONIG_ENCODING_CP1250 ONIG_ENCODING_WINDOWS_1250
+# define ONIG_ENCODING_CP1251 ONIG_ENCODING_WINDOWS_1251
+# define ONIG_ENCODING_CP1252 ONIG_ENCODING_WINDOWS_1252
+# define ONIG_ENCODING_CP1253 ONIG_ENCODING_WINDOWS_1253
+# define ONIG_ENCODING_CP1254 ONIG_ENCODING_WINDOWS_1254
+# define ONIG_ENCODING_CP1257 ONIG_ENCODING_WINDOWS_1257
+# define ONIG_ENCODING_UTF8 ONIG_ENCODING_UTF_8
+# define ONIG_ENCODING_UTF16_BE ONIG_ENCODING_UTF_16BE
+# define ONIG_ENCODING_UTF16_LE ONIG_ENCODING_UTF_16LE
+# define ONIG_ENCODING_UTF32_BE ONIG_ENCODING_UTF_32BE
+# define ONIG_ENCODING_UTF32_LE ONIG_ENCODING_UTF_32LE
+#endif /* RUBY */
+
+#define ONIG_ENCODING_UNDEF ((OnigEncoding )0)
+
+/* this declaration needs to be here because it is used in string.c in Ruby */
+ONIG_EXTERN
+int onigenc_ascii_only_case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, const struct OnigEncodingTypeST* enc);
+
+
+/* work size */
+#define ONIGENC_CODE_TO_MBC_MAXLEN 7
+#define ONIGENC_MBC_CASE_FOLD_MAXLEN 18
+/* 18: 6(max-byte) * 3(case-fold chars) */
+
+/* character types */
+#define ONIGENC_CTYPE_NEWLINE 0
+#define ONIGENC_CTYPE_ALPHA 1
+#define ONIGENC_CTYPE_BLANK 2
+#define ONIGENC_CTYPE_CNTRL 3
+#define ONIGENC_CTYPE_DIGIT 4
+#define ONIGENC_CTYPE_GRAPH 5
+#define ONIGENC_CTYPE_LOWER 6
+#define ONIGENC_CTYPE_PRINT 7
+#define ONIGENC_CTYPE_PUNCT 8
+#define ONIGENC_CTYPE_SPACE 9
+#define ONIGENC_CTYPE_UPPER 10
+#define ONIGENC_CTYPE_XDIGIT 11
+#define ONIGENC_CTYPE_WORD 12
+#define ONIGENC_CTYPE_ALNUM 13 /* alpha || digit */
+#define ONIGENC_CTYPE_ASCII 14
+#define ONIGENC_MAX_STD_CTYPE ONIGENC_CTYPE_ASCII
+
+/* flags */
+#define ONIGENC_FLAG_NONE 0U
+#define ONIGENC_FLAG_UNICODE 1U
+
+#define onig_enc_len(enc,p,e) ONIGENC_MBC_ENC_LEN(enc, p, e)
+
+#define ONIGENC_IS_UNDEF(enc) ((enc) == ONIG_ENCODING_UNDEF)
+#define ONIGENC_IS_SINGLEBYTE(enc) (ONIGENC_MBC_MAXLEN(enc) == 1)
+#define ONIGENC_IS_MBC_HEAD(enc,p,e) (ONIGENC_MBC_ENC_LEN(enc,p,e) != 1)
+#define ONIGENC_IS_MBC_ASCII(p) (*(p) < 128)
+#define ONIGENC_IS_CODE_ASCII(code) ((code) < 128)
+#define ONIGENC_IS_MBC_WORD(enc,s,end) \
+ ONIGENC_IS_CODE_WORD(enc,ONIGENC_MBC_TO_CODE(enc,s,end))
+#define ONIGENC_IS_MBC_ASCII_WORD(enc,s,end) \
+ onigenc_ascii_is_code_ctype( \
+ ONIGENC_MBC_TO_CODE(enc,s,end),ONIGENC_CTYPE_WORD,enc)
+#define ONIGENC_IS_UNICODE(enc) ((enc)->flags & ONIGENC_FLAG_UNICODE)
+
+
+#define ONIGENC_NAME(enc) ((enc)->name)
+
+#define ONIGENC_MBC_CASE_FOLD(enc,flag,pp,end,buf) \
+ (enc)->mbc_case_fold(flag,(const OnigUChar** )pp,end,buf,enc)
+#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \
+ (enc)->is_allowed_reverse_match(s,end,enc)
+#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s,end) \
+ (enc)->left_adjust_char_head(start, s, end, enc)
+#define ONIGENC_APPLY_ALL_CASE_FOLD(enc,case_fold_flag,f,arg) \
+ (enc)->apply_all_case_fold(case_fold_flag,f,arg,enc)
+#define ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc,case_fold_flag,p,end,acs) \
+ (enc)->get_case_fold_codes_by_str(case_fold_flag,p,end,acs,enc)
+#define ONIGENC_STEP_BACK(enc,start,s,end,n) \
+ onigenc_step_back((enc),(start),(s),(end),(n))
+
+#define ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(n) (n)
+#define ONIGENC_MBCLEN_CHARFOUND_P(r) (0 < (r))
+#define ONIGENC_MBCLEN_CHARFOUND_LEN(r) (r)
+
+#define ONIGENC_CONSTRUCT_MBCLEN_INVALID() (-1)
+#define ONIGENC_MBCLEN_INVALID_P(r) ((r) == -1)
+
+#define ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n) (-1-(n))
+#define ONIGENC_MBCLEN_NEEDMORE_P(r) ((r) < -1)
+#define ONIGENC_MBCLEN_NEEDMORE_LEN(r) (-1-(r))
+
+#define ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e) (enc)->precise_mbc_enc_len(p,e,enc)
+
+ONIG_EXTERN
+int onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, const struct OnigEncodingTypeST* enc);
+
+#define ONIGENC_MBC_ENC_LEN(enc,p,e) onigenc_mbclen_approximate(p,e,enc)
+#define ONIGENC_MBC_MAXLEN(enc) ((enc)->max_enc_len)
+#define ONIGENC_MBC_MAXLEN_DIST(enc) ONIGENC_MBC_MAXLEN(enc)
+#define ONIGENC_MBC_MINLEN(enc) ((enc)->min_enc_len)
+#define ONIGENC_IS_MBC_NEWLINE(enc,p,end) (enc)->is_mbc_newline((p),(end),enc)
+#define ONIGENC_MBC_TO_CODE(enc,p,end) (enc)->mbc_to_code((p),(end),enc)
+#define ONIGENC_CODE_TO_MBCLEN(enc,code) (enc)->code_to_mbclen(code,enc)
+#define ONIGENC_CODE_TO_MBC(enc,code,buf) (enc)->code_to_mbc(code,buf,enc)
+#define ONIGENC_PROPERTY_NAME_TO_CTYPE(enc,p,end) \
+ (enc)->property_name_to_ctype(enc,p,end)
+
+#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) (enc)->is_code_ctype(code,ctype,enc)
+
+#define ONIGENC_IS_CODE_NEWLINE(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_NEWLINE)
+#define ONIGENC_IS_CODE_GRAPH(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_GRAPH)
+#define ONIGENC_IS_CODE_PRINT(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PRINT)
+#define ONIGENC_IS_CODE_ALNUM(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALNUM)
+#define ONIGENC_IS_CODE_ALPHA(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALPHA)
+#define ONIGENC_IS_CODE_LOWER(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_LOWER)
+#define ONIGENC_IS_CODE_UPPER(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_UPPER)
+#define ONIGENC_IS_CODE_CNTRL(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_CNTRL)
+#define ONIGENC_IS_CODE_PUNCT(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PUNCT)
+#define ONIGENC_IS_CODE_SPACE(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_SPACE)
+#define ONIGENC_IS_CODE_BLANK(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_BLANK)
+#define ONIGENC_IS_CODE_DIGIT(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_DIGIT)
+#define ONIGENC_IS_CODE_XDIGIT(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_XDIGIT)
+#define ONIGENC_IS_CODE_WORD(enc,code) \
+ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_WORD)
+
+#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,sbout,ranges) \
+ (enc)->get_ctype_code_range(ctype,sbout,ranges,enc)
+
+ONIG_EXTERN
+OnigUChar* onigenc_step_back(OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end, int n);
+
+
+/* encoding API */
+ONIG_EXTERN
+int onigenc_init(void);
+ONIG_EXTERN
+int onigenc_set_default_encoding(OnigEncoding enc);
+ONIG_EXTERN
+OnigEncoding onigenc_get_default_encoding(void);
+ONIG_EXTERN
+OnigUChar* onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end, const OnigUChar** prev);
+ONIG_EXTERN
+OnigUChar* onigenc_get_prev_char_head(OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end);
+ONIG_EXTERN
+OnigUChar* onigenc_get_left_adjust_char_head(OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end);
+ONIG_EXTERN
+OnigUChar* onigenc_get_right_adjust_char_head(OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end);
+ONIG_EXTERN
+int onigenc_strlen(OnigEncoding enc, const OnigUChar* p, const OnigUChar* end);
+ONIG_EXTERN
+int onigenc_strlen_null(OnigEncoding enc, const OnigUChar* p);
+ONIG_EXTERN
+int onigenc_str_bytelen_null(OnigEncoding enc, const OnigUChar* p);
+
+
+
+/* PART: regular expression */
+
+/* config parameters */
+#define ONIG_NREGION 10
+#define ONIG_MAX_CAPTURE_GROUP_NUM 32767
+#define ONIG_MAX_BACKREF_NUM 1000
+#define ONIG_MAX_REPEAT_NUM 100000
+#define ONIG_MAX_MULTI_BYTE_RANGES_NUM 10000
+/* constants */
+#define ONIG_MAX_ERROR_MESSAGE_LEN 90
+
+typedef unsigned int OnigOptionType;
+
+#define ONIG_OPTION_DEFAULT ONIG_OPTION_NONE
+
+/* options */
+#define ONIG_OPTION_NONE 0U
+#define ONIG_OPTION_IGNORECASE 1U
+#define ONIG_OPTION_EXTEND (ONIG_OPTION_IGNORECASE << 1)
+#define ONIG_OPTION_MULTILINE (ONIG_OPTION_EXTEND << 1)
+#define ONIG_OPTION_DOTALL ONIG_OPTION_MULTILINE
+#define ONIG_OPTION_SINGLELINE (ONIG_OPTION_MULTILINE << 1)
+#define ONIG_OPTION_FIND_LONGEST (ONIG_OPTION_SINGLELINE << 1)
+#define ONIG_OPTION_FIND_NOT_EMPTY (ONIG_OPTION_FIND_LONGEST << 1)
+#define ONIG_OPTION_NEGATE_SINGLELINE (ONIG_OPTION_FIND_NOT_EMPTY << 1)
+#define ONIG_OPTION_DONT_CAPTURE_GROUP (ONIG_OPTION_NEGATE_SINGLELINE << 1)
+#define ONIG_OPTION_CAPTURE_GROUP (ONIG_OPTION_DONT_CAPTURE_GROUP << 1)
+/* options (search time) */
+#define ONIG_OPTION_NOTBOL (ONIG_OPTION_CAPTURE_GROUP << 1)
+#define ONIG_OPTION_NOTEOL (ONIG_OPTION_NOTBOL << 1)
+#define ONIG_OPTION_NOTBOS (ONIG_OPTION_NOTEOL << 1)
+#define ONIG_OPTION_NOTEOS (ONIG_OPTION_NOTBOS << 1)
+/* options (ctype range) */
+#define ONIG_OPTION_ASCII_RANGE (ONIG_OPTION_NOTEOS << 1)
+#define ONIG_OPTION_POSIX_BRACKET_ALL_RANGE (ONIG_OPTION_ASCII_RANGE << 1)
+#define ONIG_OPTION_WORD_BOUND_ALL_RANGE (ONIG_OPTION_POSIX_BRACKET_ALL_RANGE << 1)
+/* options (newline) */
+#define ONIG_OPTION_NEWLINE_CRLF (ONIG_OPTION_WORD_BOUND_ALL_RANGE << 1)
+#define ONIG_OPTION_MAXBIT ONIG_OPTION_NEWLINE_CRLF /* limit */
+
+#define ONIG_OPTION_ON(options,regopt) ((options) |= (regopt))
+#define ONIG_OPTION_OFF(options,regopt) ((options) &= ~(regopt))
+#define ONIG_IS_OPTION_ON(options,option) ((options) & (option))
+
+/* syntax */
+typedef struct {
+ unsigned int op;
+ unsigned int op2;
+ unsigned int behavior;
+ OnigOptionType options; /* default option */
+ OnigMetaCharTableType meta_char_table;
+} OnigSyntaxType;
+
+ONIG_EXTERN const OnigSyntaxType OnigSyntaxASIS;
+ONIG_EXTERN const OnigSyntaxType OnigSyntaxPosixBasic;
+ONIG_EXTERN const OnigSyntaxType OnigSyntaxPosixExtended;
+ONIG_EXTERN const OnigSyntaxType OnigSyntaxEmacs;
+ONIG_EXTERN const OnigSyntaxType OnigSyntaxGrep;
+ONIG_EXTERN const OnigSyntaxType OnigSyntaxGnuRegex;
+ONIG_EXTERN const OnigSyntaxType OnigSyntaxJava;
+ONIG_EXTERN const OnigSyntaxType OnigSyntaxPerl58;
+ONIG_EXTERN const OnigSyntaxType OnigSyntaxPerl58_NG;
+ONIG_EXTERN const OnigSyntaxType OnigSyntaxPerl;
+ONIG_EXTERN const OnigSyntaxType OnigSyntaxRuby;
+ONIG_EXTERN const OnigSyntaxType OnigSyntaxPython;
+
+/* predefined syntaxes (see regsyntax.c) */
+#define ONIG_SYNTAX_ASIS (&OnigSyntaxASIS)
+#define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic)
+#define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended)
+#define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs)
+#define ONIG_SYNTAX_GREP (&OnigSyntaxGrep)
+#define ONIG_SYNTAX_GNU_REGEX (&OnigSyntaxGnuRegex)
+#define ONIG_SYNTAX_JAVA (&OnigSyntaxJava)
+#define ONIG_SYNTAX_PERL58 (&OnigSyntaxPerl58)
+#define ONIG_SYNTAX_PERL58_NG (&OnigSyntaxPerl58_NG)
+#define ONIG_SYNTAX_PERL (&OnigSyntaxPerl)
+#define ONIG_SYNTAX_RUBY (&OnigSyntaxRuby)
+#define ONIG_SYNTAX_PYTHON (&OnigSyntaxPython)
+
+/* default syntax */
+ONIG_EXTERN const OnigSyntaxType* OnigDefaultSyntax;
+#define ONIG_SYNTAX_DEFAULT OnigDefaultSyntax
+
+/* syntax (operators) */
+#define ONIG_SYN_OP_VARIABLE_META_CHARACTERS (1U<<0)
+#define ONIG_SYN_OP_DOT_ANYCHAR (1U<<1) /* . */
+#define ONIG_SYN_OP_ASTERISK_ZERO_INF (1U<<2) /* * */
+#define ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF (1U<<3)
+#define ONIG_SYN_OP_PLUS_ONE_INF (1U<<4) /* + */
+#define ONIG_SYN_OP_ESC_PLUS_ONE_INF (1U<<5)
+#define ONIG_SYN_OP_QMARK_ZERO_ONE (1U<<6) /* ? */
+#define ONIG_SYN_OP_ESC_QMARK_ZERO_ONE (1U<<7)
+#define ONIG_SYN_OP_BRACE_INTERVAL (1U<<8) /* {lower,upper} */
+#define ONIG_SYN_OP_ESC_BRACE_INTERVAL (1U<<9) /* \{lower,upper\} */
+#define ONIG_SYN_OP_VBAR_ALT (1U<<10) /* | */
+#define ONIG_SYN_OP_ESC_VBAR_ALT (1U<<11) /* \| */
+#define ONIG_SYN_OP_LPAREN_SUBEXP (1U<<12) /* (...) */
+#define ONIG_SYN_OP_ESC_LPAREN_SUBEXP (1U<<13) /* \(...\) */
+#define ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR (1U<<14) /* \A, \Z, \z */
+#define ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR (1U<<15) /* \G */
+#define ONIG_SYN_OP_DECIMAL_BACKREF (1U<<16) /* \num */
+#define ONIG_SYN_OP_BRACKET_CC (1U<<17) /* [...] */
+#define ONIG_SYN_OP_ESC_W_WORD (1U<<18) /* \w, \W */
+#define ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END (1U<<19) /* \<. \> */
+#define ONIG_SYN_OP_ESC_B_WORD_BOUND (1U<<20) /* \b, \B */
+#define ONIG_SYN_OP_ESC_S_WHITE_SPACE (1U<<21) /* \s, \S */
+#define ONIG_SYN_OP_ESC_D_DIGIT (1U<<22) /* \d, \D */
+#define ONIG_SYN_OP_LINE_ANCHOR (1U<<23) /* ^, $ */
+#define ONIG_SYN_OP_POSIX_BRACKET (1U<<24) /* [:xxxx:] */
+#define ONIG_SYN_OP_QMARK_NON_GREEDY (1U<<25) /* ??,*?,+?,{n,m}? */
+#define ONIG_SYN_OP_ESC_CONTROL_CHARS (1U<<26) /* \n,\r,\t,\a ... */
+#define ONIG_SYN_OP_ESC_C_CONTROL (1U<<27) /* \cx */
+#define ONIG_SYN_OP_ESC_OCTAL3 (1U<<28) /* \OOO */
+#define ONIG_SYN_OP_ESC_X_HEX2 (1U<<29) /* \xHH */
+#define ONIG_SYN_OP_ESC_X_BRACE_HEX8 (1U<<30) /* \x{7HHHHHHH} */
+#define ONIG_SYN_OP_ESC_O_BRACE_OCTAL (1U<<31) /* \o{OOO} */
+
+#define ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE (1U<<0) /* \Q...\E */
+#define ONIG_SYN_OP2_QMARK_GROUP_EFFECT (1U<<1) /* (?...) */
+#define ONIG_SYN_OP2_OPTION_PERL (1U<<2) /* (?imsxadlu), (?-imsx), (?^imsxalu) */
+#define ONIG_SYN_OP2_OPTION_RUBY (1U<<3) /* (?imxadu), (?-imx) */
+#define ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT (1U<<4) /* ?+,*+,++ */
+#define ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL (1U<<5) /* {n,m}+ */
+#define ONIG_SYN_OP2_CCLASS_SET_OP (1U<<6) /* [...&&..[..]..] */
+#define ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP (1U<<7) /* (?<name>...) */
+#define ONIG_SYN_OP2_ESC_K_NAMED_BACKREF (1U<<8) /* \k<name> */
+#define ONIG_SYN_OP2_ESC_G_SUBEXP_CALL (1U<<9) /* \g<name>, \g<n> */
+#define ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY (1U<<10) /* (?@..),(?@<x>..) */
+#define ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL (1U<<11) /* \C-x */
+#define ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META (1U<<12) /* \M-x */
+#define ONIG_SYN_OP2_ESC_V_VTAB (1U<<13) /* \v as VTAB */
+#define ONIG_SYN_OP2_ESC_U_HEX4 (1U<<14) /* \uHHHH */
+#define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR (1U<<15) /* \`, \' */
+#define ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY (1U<<16) /* \p{...}, \P{...} */
+#define ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT (1U<<17) /* \p{^..}, \P{^..} */
+/* #define ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS (1U<<18) */
+#define ONIG_SYN_OP2_ESC_H_XDIGIT (1U<<19) /* \h, \H */
+#define ONIG_SYN_OP2_INEFFECTIVE_ESCAPE (1U<<20) /* \ */
+#define ONIG_SYN_OP2_ESC_CAPITAL_R_LINEBREAK (1U<<21) /* \R as (?>\x0D\x0A|[\x0A-\x0D\x{85}\x{2028}\x{2029}]) */
+#define ONIG_SYN_OP2_ESC_CAPITAL_X_EXTENDED_GRAPHEME_CLUSTER (1U<<22) /* \X */
+#define ONIG_SYN_OP2_ESC_V_VERTICAL_WHITESPACE (1U<<23) /* \v, \V -- Perl */ /* NOTIMPL */
+#define ONIG_SYN_OP2_ESC_H_HORIZONTAL_WHITESPACE (1U<<24) /* \h, \H -- Perl */ /* NOTIMPL */
+#define ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP (1U<<25) /* \K */
+#define ONIG_SYN_OP2_ESC_G_BRACE_BACKREF (1U<<26) /* \g{name}, \g{n} */
+#define ONIG_SYN_OP2_QMARK_SUBEXP_CALL (1U<<27) /* (?&name), (?n), (?R), (?0) */
+#define ONIG_SYN_OP2_QMARK_VBAR_BRANCH_RESET (1U<<28) /* (?|...) */ /* NOTIMPL */
+#define ONIG_SYN_OP2_QMARK_LPAREN_CONDITION (1U<<29) /* (?(cond)yes...|no...) */
+#define ONIG_SYN_OP2_QMARK_CAPITAL_P_NAMED_GROUP (1U<<30) /* (?P<name>...), (?P=name), (?P>name) -- Python/PCRE */
+#define ONIG_SYN_OP2_OPTION_JAVA (1U<<31) /* (?idmsux), (?-idmsux) */ /* NOTIMPL */
+
+/* syntax (behavior) */
+#define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1U<<31) /* not implemented */
+#define ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS (1U<<0) /* ?, *, +, {n,m} */
+#define ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS (1U<<1) /* error or ignore */
+#define ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP (1U<<2) /* ...)... */
+#define ONIG_SYN_ALLOW_INVALID_INTERVAL (1U<<3) /* {??? */
+#define ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV (1U<<4) /* {,n} => {0,n} */
+#define ONIG_SYN_STRICT_CHECK_BACKREF (1U<<5) /* /(\1)/,/\1()/ ..*/
+#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND (1U<<6) /* (?<=a|bc) */
+#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP (1U<<7) /* see doc/RE */
+#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME (1U<<8) /* (?<x>)(?<x>) */
+#define ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY (1U<<9) /* a{n}?=(?:a{n})? */
+#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL (1U<<10) /* (?<x>)(?<x>)(?&x) */
+#define ONIG_SYN_USE_LEFT_MOST_NAMED_GROUP (1U<<11) /* (?<x>)(?<x>)\k<x> */
+
+/* syntax (behavior) in char class [...] */
+#define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (1U<<20) /* [^...] */
+#define ONIG_SYN_BACKSLASH_ESCAPE_IN_CC (1U<<21) /* [..\w..] etc.. */
+#define ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC (1U<<22)
+#define ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC (1U<<23) /* [0-9-a]=[0-9\-a] */
+/* syntax (behavior) warning */
+#define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED (1U<<24) /* [,-,] */
+#define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT (1U<<25) /* (?:a*)+ */
+#define ONIG_SYN_WARN_CC_DUP (1U<<26) /* [aa] */
+
+/* meta character specifiers (onig_set_meta_char()) */
+#define ONIG_META_CHAR_ESCAPE 0
+#define ONIG_META_CHAR_ANYCHAR 1
+#define ONIG_META_CHAR_ANYTIME 2
+#define ONIG_META_CHAR_ZERO_OR_ONE_TIME 3
+#define ONIG_META_CHAR_ONE_OR_MORE_TIME 4
+#define ONIG_META_CHAR_ANYCHAR_ANYTIME 5
+
+#define ONIG_INEFFECTIVE_META_CHAR 0
+
+/* error codes */
+#define ONIG_IS_PATTERN_ERROR(ecode) ((ecode) <= -100 && (ecode) > -1000)
+/* normal return */
+#define ONIG_NORMAL 0
+#define ONIG_MISMATCH -1
+#define ONIG_NO_SUPPORT_CONFIG -2
+
+/* internal error */
+#define ONIGERR_MEMORY -5
+#define ONIGERR_TYPE_BUG -6
+#define ONIGERR_PARSER_BUG -11
+#define ONIGERR_STACK_BUG -12
+#define ONIGERR_UNDEFINED_BYTECODE -13
+#define ONIGERR_UNEXPECTED_BYTECODE -14
+#define ONIGERR_MATCH_STACK_LIMIT_OVER -15
+#define ONIGERR_PARSE_DEPTH_LIMIT_OVER -16
+#define ONIGERR_DEFAULT_ENCODING_IS_NOT_SET -21
+#define ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR -22
+/* general error */
+#define ONIGERR_INVALID_ARGUMENT -30
+/* syntax error */
+#define ONIGERR_END_PATTERN_AT_LEFT_BRACE -100
+#define ONIGERR_END_PATTERN_AT_LEFT_BRACKET -101
+#define ONIGERR_EMPTY_CHAR_CLASS -102
+#define ONIGERR_PREMATURE_END_OF_CHAR_CLASS -103
+#define ONIGERR_END_PATTERN_AT_ESCAPE -104
+#define ONIGERR_END_PATTERN_AT_META -105
+#define ONIGERR_END_PATTERN_AT_CONTROL -106
+#define ONIGERR_META_CODE_SYNTAX -108
+#define ONIGERR_CONTROL_CODE_SYNTAX -109
+#define ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE -110
+#define ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE -111
+#define ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS -112
+#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED -113
+#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID -114
+#define ONIGERR_NESTED_REPEAT_OPERATOR -115
+#define ONIGERR_UNMATCHED_CLOSE_PARENTHESIS -116
+#define ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS -117
+#define ONIGERR_END_PATTERN_IN_GROUP -118
+#define ONIGERR_UNDEFINED_GROUP_OPTION -119
+#define ONIGERR_INVALID_POSIX_BRACKET_TYPE -121
+#define ONIGERR_INVALID_LOOK_BEHIND_PATTERN -122
+#define ONIGERR_INVALID_REPEAT_RANGE_PATTERN -123
+#define ONIGERR_INVALID_CONDITION_PATTERN -124
+/* values error (syntax error) */
+#define ONIGERR_TOO_BIG_NUMBER -200
+#define ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE -201
+#define ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE -202
+#define ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS -203
+#define ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE -204
+#define ONIGERR_TOO_MANY_MULTI_BYTE_RANGES -205
+#define ONIGERR_TOO_SHORT_MULTI_BYTE_STRING -206
+#define ONIGERR_TOO_BIG_BACKREF_NUMBER -207
+#define ONIGERR_INVALID_BACKREF -208
+#define ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED -209
+#define ONIGERR_TOO_MANY_CAPTURE_GROUPS -210
+#define ONIGERR_TOO_SHORT_DIGITS -211
+#define ONIGERR_TOO_LONG_WIDE_CHAR_VALUE -212
+#define ONIGERR_EMPTY_GROUP_NAME -214
+#define ONIGERR_INVALID_GROUP_NAME -215
+#define ONIGERR_INVALID_CHAR_IN_GROUP_NAME -216
+#define ONIGERR_UNDEFINED_NAME_REFERENCE -217
+#define ONIGERR_UNDEFINED_GROUP_REFERENCE -218
+#define ONIGERR_MULTIPLEX_DEFINED_NAME -219
+#define ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL -220
+#define ONIGERR_NEVER_ENDING_RECURSION -221
+#define ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY -222
+#define ONIGERR_INVALID_CHAR_PROPERTY_NAME -223
+#define ONIGERR_INVALID_CODE_POINT_VALUE -400
+#define ONIGERR_INVALID_WIDE_CHAR_VALUE -400
+#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE -401
+#define ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION -402
+#define ONIGERR_INVALID_COMBINATION_OF_OPTIONS -403
+
+/* errors related to thread */
+/* #define ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT -1001 */
+
+
+/* must be smaller than BIT_STATUS_BITS_NUM (unsigned int * 8) */
+#define ONIG_MAX_CAPTURE_HISTORY_GROUP 31
+#define ONIG_IS_CAPTURE_HISTORY_GROUP(r, i) \
+ ((i) <= ONIG_MAX_CAPTURE_HISTORY_GROUP && (r)->list && (r)->list[i])
+
+typedef struct OnigCaptureTreeNodeStruct {
+ int group; /* group number */
+ OnigPosition beg;
+ OnigPosition end;
+ int allocated;
+ int num_childs;
+ struct OnigCaptureTreeNodeStruct** childs;
+} OnigCaptureTreeNode;
+
+/* match result region type */
+struct re_registers {
+ int allocated;
+ int num_regs;
+ OnigPosition* beg;
+ OnigPosition* end;
+ /* extended */
+ OnigCaptureTreeNode* history_root; /* capture history tree root */
+};
+
+/* capture tree traverse */
+#define ONIG_TRAVERSE_CALLBACK_AT_FIRST 1
+#define ONIG_TRAVERSE_CALLBACK_AT_LAST 2
+#define ONIG_TRAVERSE_CALLBACK_AT_BOTH \
+ ( ONIG_TRAVERSE_CALLBACK_AT_FIRST | ONIG_TRAVERSE_CALLBACK_AT_LAST )
+
+
+#define ONIG_REGION_NOTPOS -1
+
+typedef struct re_registers OnigRegion;
+
+typedef struct {
+ OnigEncoding enc;
+ OnigUChar* par;
+ OnigUChar* par_end;
+} OnigErrorInfo;
+
+typedef struct {
+ int lower;
+ int upper;
+} OnigRepeatRange;
+
+typedef void (*OnigWarnFunc)(const char* s);
+extern void onig_null_warn(const char* s);
+#define ONIG_NULL_WARN onig_null_warn
+
+#define ONIG_CHAR_TABLE_SIZE 256
+
+typedef struct re_pattern_buffer {
+ /* common members of BBuf(bytes-buffer) */
+ unsigned char* p; /* compiled pattern */
+ unsigned int used; /* used space for p */
+ unsigned int alloc; /* allocated space for p */
+
+ int num_mem; /* used memory(...) num counted from 1 */
+ int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */
+ int num_null_check; /* OP_NULL_CHECK_START/END id counter */
+ int num_comb_exp_check; /* combination explosion check */
+ int num_call; /* number of subexp call */
+ unsigned int capture_history; /* (?@...) flag (1-31) */
+ unsigned int bt_mem_start; /* need backtrack flag */
+ unsigned int bt_mem_end; /* need backtrack flag */
+ int stack_pop_level;
+ int repeat_range_alloc;
+
+ OnigOptionType options;
+
+ OnigRepeatRange* repeat_range;
+
+ OnigEncoding enc;
+ const OnigSyntaxType* syntax;
+ void* name_table;
+ OnigCaseFoldType case_fold_flag;
+
+ /* optimization info (string search, char-map and anchors) */
+ int optimize; /* optimize flag */
+ int threshold_len; /* search str-length for apply optimize */
+ int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */
+ OnigDistance anchor_dmin; /* (SEMI_)END_BUF anchor distance */
+ OnigDistance anchor_dmax; /* (SEMI_)END_BUF anchor distance */
+ int sub_anchor; /* start-anchor for exact or map */
+ unsigned char *exact;
+ unsigned char *exact_end;
+ unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */
+ int *int_map; /* BM skip for exact_len > 255 */
+ int *int_map_backward; /* BM skip for backward search */
+ OnigDistance dmin; /* min-distance of exact or map */
+ OnigDistance dmax; /* max-distance of exact or map */
+
+ /* regex_t link chain */
+ struct re_pattern_buffer* chain; /* escape compile-conflict */
+} OnigRegexType;
+
+typedef OnigRegexType* OnigRegex;
+
+#ifndef ONIG_ESCAPE_REGEX_T_COLLISION
+typedef OnigRegexType regex_t;
+#endif
+
+
+typedef struct {
+ int num_of_elements;
+ OnigEncoding pattern_enc;
+ OnigEncoding target_enc;
+ const OnigSyntaxType* syntax;
+ OnigOptionType option;
+ OnigCaseFoldType case_fold_flag;
+} OnigCompileInfo;
+
+/* Oniguruma Native API */
+ONIG_EXTERN
+int onig_initialize(OnigEncoding encodings[], int n);
+ONIG_EXTERN
+int onig_init(void);
+ONIG_EXTERN
+int onig_error_code_to_str(OnigUChar* s, OnigPosition err_code, ...);
+ONIG_EXTERN
+void onig_set_warn_func(OnigWarnFunc f);
+ONIG_EXTERN
+void onig_set_verb_warn_func(OnigWarnFunc f);
+ONIG_EXTERN
+int onig_new(OnigRegex*, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax, OnigErrorInfo* einfo);
+ONIG_EXTERN
+int onig_reg_init(OnigRegex reg, OnigOptionType option, OnigCaseFoldType case_fold_flag, OnigEncoding enc, const OnigSyntaxType* syntax);
+ONIG_EXTERN
+int onig_new_without_alloc(OnigRegex, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo);
+ONIG_EXTERN
+int onig_new_deluxe(OnigRegex* reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo);
+ONIG_EXTERN
+void onig_free(OnigRegex);
+ONIG_EXTERN
+void onig_free_body(OnigRegex);
+ONIG_EXTERN
+OnigPosition onig_scan(OnigRegex reg, const OnigUChar* str, const OnigUChar* end, OnigRegion* region, OnigOptionType option, int (*scan_callback)(OnigPosition, OnigPosition, OnigRegion*, void*), void* callback_arg);
+ONIG_EXTERN
+OnigPosition onig_search(OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option);
+ONIG_EXTERN
+OnigPosition onig_search_gpos(OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* global_pos, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option);
+ONIG_EXTERN
+OnigPosition onig_match(OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* at, OnigRegion* region, OnigOptionType option);
+ONIG_EXTERN
+OnigRegion* onig_region_new(void);
+ONIG_EXTERN
+void onig_region_init(OnigRegion* region);
+ONIG_EXTERN
+void onig_region_free(OnigRegion* region, int free_self);
+ONIG_EXTERN
+void onig_region_copy(OnigRegion* to, const OnigRegion* from);
+ONIG_EXTERN
+void onig_region_clear(OnigRegion* region);
+ONIG_EXTERN
+int onig_region_resize(OnigRegion* region, int n);
+ONIG_EXTERN
+int onig_region_set(OnigRegion* region, int at, int beg, int end);
+ONIG_EXTERN
+int onig_name_to_group_numbers(OnigRegex reg, const OnigUChar* name, const OnigUChar* name_end, int** nums);
+ONIG_EXTERN
+int onig_name_to_backref_number(OnigRegex reg, const OnigUChar* name, const OnigUChar* name_end, const OnigRegion *region);
+ONIG_EXTERN
+int onig_foreach_name(OnigRegex reg, int (*func)(const OnigUChar*, const OnigUChar*,int,int*,OnigRegex,void*), void* arg);
+ONIG_EXTERN
+int onig_number_of_names(const OnigRegexType *reg);
+ONIG_EXTERN
+int onig_number_of_captures(const OnigRegexType *reg);
+ONIG_EXTERN
+int onig_number_of_capture_histories(const OnigRegexType *reg);
+ONIG_EXTERN
+OnigCaptureTreeNode* onig_get_capture_tree(OnigRegion* region);
+ONIG_EXTERN
+int onig_capture_tree_traverse(OnigRegion* region, int at, int(*callback_func)(int,OnigPosition,OnigPosition,int,int,void*), void* arg);
+ONIG_EXTERN
+int onig_noname_group_capture_is_active(const OnigRegexType *reg);
+ONIG_EXTERN
+OnigEncoding onig_get_encoding(const OnigRegexType *reg);
+ONIG_EXTERN
+OnigOptionType onig_get_options(const OnigRegexType *reg);
+ONIG_EXTERN
+OnigCaseFoldType onig_get_case_fold_flag(const OnigRegexType *reg);
+ONIG_EXTERN
+const OnigSyntaxType* onig_get_syntax(const OnigRegexType *reg);
+ONIG_EXTERN
+int onig_set_default_syntax(const OnigSyntaxType* syntax);
+ONIG_EXTERN
+void onig_copy_syntax(OnigSyntaxType* to, const OnigSyntaxType* from);
+ONIG_EXTERN
+unsigned int onig_get_syntax_op(const OnigSyntaxType* syntax);
+ONIG_EXTERN
+unsigned int onig_get_syntax_op2(const OnigSyntaxType* syntax);
+ONIG_EXTERN
+unsigned int onig_get_syntax_behavior(const OnigSyntaxType* syntax);
+ONIG_EXTERN
+OnigOptionType onig_get_syntax_options(const OnigSyntaxType* syntax);
+ONIG_EXTERN
+void onig_set_syntax_op(OnigSyntaxType* syntax, unsigned int op);
+ONIG_EXTERN
+void onig_set_syntax_op2(OnigSyntaxType* syntax, unsigned int op2);
+ONIG_EXTERN
+void onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior);
+ONIG_EXTERN
+void onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options);
+ONIG_EXTERN
+int onig_set_meta_char(OnigSyntaxType* syntax, unsigned int what, OnigCodePoint code);
+ONIG_EXTERN
+void onig_copy_encoding(OnigEncodingType *to, OnigEncoding from);
+ONIG_EXTERN
+OnigCaseFoldType onig_get_default_case_fold_flag(void);
+ONIG_EXTERN
+int onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag);
+ONIG_EXTERN
+unsigned int onig_get_match_stack_limit_size(void);
+ONIG_EXTERN
+int onig_set_match_stack_limit_size(unsigned int size);
+ONIG_EXTERN
+unsigned int onig_get_parse_depth_limit(void);
+ONIG_EXTERN
+int onig_set_parse_depth_limit(unsigned int depth);
+ONIG_EXTERN
+int onig_end(void);
+ONIG_EXTERN
+const char* onig_version(void);
+ONIG_EXTERN
+const char* onig_copyright(void);
+
+RUBY_SYMBOL_EXPORT_END
+
+#ifdef __cplusplus
+# if 0
+{ /* satisfy cc-mode */
+# endif
+}
+#endif
+
+#endif /* ONIGMO_H */
diff --git a/include/ruby/oniguruma.h b/include/ruby/oniguruma.h
index 1d8a0198d8..dc83754aca 100644
--- a/include/ruby/oniguruma.h
+++ b/include/ruby/oniguruma.h
@@ -1,880 +1,8 @@
#ifndef ONIGURUMA_H
#define ONIGURUMA_H
-/**********************************************************************
- oniguruma.h - Onigmo (Oniguruma-mod) (regular expression library)
-**********************************************************************/
-/*-
- * Copyright (c) 2002-2009 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
- * Copyright (c) 2011-2014 K.Takata <kentkt AT csc DOT jp>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#ifdef __cplusplus
-extern "C" {
-#if 0
-} /* satisfy cc-mode */
-#endif
-#endif
-
+#include "onigmo.h"
#define ONIGURUMA
-#define ONIGURUMA_VERSION_MAJOR 5
-#define ONIGURUMA_VERSION_MINOR 15
-#define ONIGURUMA_VERSION_TEENY 0
-
-#ifdef __cplusplus
-# ifndef HAVE_PROTOTYPES
-# define HAVE_PROTOTYPES 1
-# endif
-# ifndef HAVE_STDARG_PROTOTYPES
-# define HAVE_STDARG_PROTOTYPES 1
-# endif
-#endif
-
-/* escape Mac OS X/Xcode 2.4/gcc 4.0.1 problem */
-#if defined(__APPLE__) && defined(__GNUC__) && __GNUC__ >= 4
-# ifndef HAVE_STDARG_PROTOTYPES
-# define HAVE_STDARG_PROTOTYPES 1
-# endif
-#endif
-
-#ifdef HAVE_STDARG_H
-# ifndef HAVE_STDARG_PROTOTYPES
-# define HAVE_STDARG_PROTOTYPES 1
-# endif
-#endif
-
-#ifndef P_
-#if defined(__STDC__) || defined(_WIN32)
-# define P_(args) args
-#else
-# define P_(args) ()
-#endif
-#endif
-
-#ifndef PV_
-#ifdef HAVE_STDARG_PROTOTYPES
-# define PV_(args) args
-#else
-# define PV_(args) ()
-#endif
-#endif
-
-#ifndef ONIG_EXTERN
-#ifdef RUBY_EXTERN
-#define ONIG_EXTERN RUBY_EXTERN
-#else
-#if defined(_WIN32) && !defined(__GNUC__)
-#if defined(EXPORT) || defined(RUBY_EXPORT)
-#define ONIG_EXTERN extern __declspec(dllexport)
-#else
-#define ONIG_EXTERN extern __declspec(dllimport)
-#endif
-#endif
-#endif
-#endif
-
-#ifndef ONIG_EXTERN
-#define ONIG_EXTERN extern
-#endif
-
-RUBY_SYMBOL_EXPORT_BEGIN
-
-#include <stddef.h> /* for size_t */
-
-/* PART: character encoding */
-
-#ifndef ONIG_ESCAPE_UCHAR_COLLISION
-#define UChar OnigUChar
-#endif
-
-typedef unsigned char OnigUChar;
-typedef unsigned int OnigCodePoint;
-typedef unsigned int OnigCtype;
-typedef size_t OnigDistance;
-typedef ptrdiff_t OnigPosition;
-
-#define ONIG_INFINITE_DISTANCE ~((OnigDistance )0)
-
-/*
- * Onig casefold/case mapping flags and related definitions
- *
- * Subfields (starting with 0 at LSB):
- * 0-2: Code point count in casefold.h
- * 3-12: Index into SpecialCaseMapping array in casefold.h
- * 13-22: Case folding/mapping flags
- */
-typedef unsigned int OnigCaseFoldType; /* case fold flag */
-
-ONIG_EXTERN OnigCaseFoldType OnigDefaultCaseFoldFlag;
-
-/* bits for actual code point count; 3 bits is more than enough, currently only 2 used */
-#define OnigCodePointMaskWidth 3
-#define OnigCodePointMask ((1<<OnigCodePointMaskWidth)-1)
-#define OnigCodePointCount(n) ((n)&OnigCodePointMask)
-#define OnigCaseFoldFlags(n) ((n)&~OnigCodePointMask)
-
-/* #define ONIGENC_CASE_FOLD_HIRAGANA_KATAKANA (1<<1) */ /* no longer usable with these values! */
-/* #define ONIGENC_CASE_FOLD_KATAKANA_WIDTH (1<<2) */ /* no longer usable with these values! */
-
-/* bits for index into table with separate titlecase mappings */
-/* 10 bits provide 1024 values */
-#define OnigSpecialIndexShift 3
-#define OnigSpecialIndexWidth 10
-
-#define ONIGENC_CASE_UPCASE (1<<13) /* has/needs uppercase mapping */
-#define ONIGENC_CASE_DOWNCASE (1<<14) /* has/needs lowercase mapping */
-#define ONIGENC_CASE_TITLECASE (1<<15) /* has/needs (special) titlecase mapping */
-#define ONIGENC_CASE_SPECIAL_OFFSET 3 /* offset in bytes from ONIGENC_CASE to ONIGENC_CASE_SPECIAL */
-#define ONIGENC_CASE_UP_SPECIAL (1<<16) /* has special upcase mapping */
-#define ONIGENC_CASE_DOWN_SPECIAL (1<<17) /* has special downcase mapping */
-#define ONIGENC_CASE_MODIFIED (1<<18) /* data has been modified */
-#define ONIGENC_CASE_FOLD (1<<19) /* has/needs case folding */
-
-#define ONIGENC_CASE_FOLD_TURKISH_AZERI (1<<20) /* needs mapping specific to Turkic languages; better not change original value! */
-
-#define ONIGENC_CASE_FOLD_LITHUANIAN (1<<21) /* needs Lithuanian-specific mapping */
-#define ONIGENC_CASE_ASCII_ONLY (1<<22) /* only modify ASCII range */
-#define ONIGENC_CASE_IS_TITLECASE (1<<23) /* character itself is already titlecase */
-
-#define INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR (1<<30) /* better not change original value! */
-
-#define ONIGENC_CASE_FOLD_MIN INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR
-#define ONIGENC_CASE_FOLD_DEFAULT OnigDefaultCaseFoldFlag
-
-
-#define ONIGENC_MAX_COMP_CASE_FOLD_CODE_LEN 3
-#define ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM 13
-/* 13 => Unicode:0x1ffc */
-
-/* code range */
-#define ONIGENC_CODE_RANGE_NUM(range) ((int )range[0])
-#define ONIGENC_CODE_RANGE_FROM(range,i) range[((i)*2) + 1]
-#define ONIGENC_CODE_RANGE_TO(range,i) range[((i)*2) + 2]
-
-typedef struct {
- int byte_len; /* argument(original) character(s) byte length */
- int code_len; /* number of code */
- OnigCodePoint code[ONIGENC_MAX_COMP_CASE_FOLD_CODE_LEN];
-} OnigCaseFoldCodeItem;
-
-typedef struct {
- OnigCodePoint esc;
- OnigCodePoint anychar;
- OnigCodePoint anytime;
- OnigCodePoint zero_or_one_time;
- OnigCodePoint one_or_more_time;
- OnigCodePoint anychar_anytime;
-} OnigMetaCharTableType;
-
-typedef int (*OnigApplyAllCaseFoldFunc)(OnigCodePoint from, OnigCodePoint* to, int to_len, void* arg);
-
-typedef struct OnigEncodingTypeST {
- int (*precise_mbc_enc_len)(const OnigUChar* p,const OnigUChar* e, const struct OnigEncodingTypeST* enc);
- const char* name;
- int max_enc_len;
- int min_enc_len;
- int (*is_mbc_newline)(const OnigUChar* p, const OnigUChar* end, const struct OnigEncodingTypeST* enc);
- OnigCodePoint (*mbc_to_code)(const OnigUChar* p, const OnigUChar* end, const struct OnigEncodingTypeST* enc);
- int (*code_to_mbclen)(OnigCodePoint code, const struct OnigEncodingTypeST* enc);
- int (*code_to_mbc)(OnigCodePoint code, OnigUChar *buf, const struct OnigEncodingTypeST* enc);
- int (*mbc_case_fold)(OnigCaseFoldType flag, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, const struct OnigEncodingTypeST* enc);
- int (*apply_all_case_fold)(OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg, const struct OnigEncodingTypeST* enc);
- int (*get_case_fold_codes_by_str)(OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem acs[], const struct OnigEncodingTypeST* enc);
- int (*property_name_to_ctype)(const struct OnigEncodingTypeST* enc, const OnigUChar* p, const OnigUChar* end);
- int (*is_code_ctype)(OnigCodePoint code, OnigCtype ctype, const struct OnigEncodingTypeST* enc);
- int (*get_ctype_code_range)(OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[], const struct OnigEncodingTypeST* enc);
- OnigUChar* (*left_adjust_char_head)(const OnigUChar* start, const OnigUChar* p, const OnigUChar* end, const struct OnigEncodingTypeST* enc);
- int (*is_allowed_reverse_match)(const OnigUChar* p, const OnigUChar* end, const struct OnigEncodingTypeST* enc);
- int ruby_encoding_index;
- unsigned int flags;
- int (*case_map)(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, const struct OnigEncodingTypeST* enc);
-} OnigEncodingType;
-
-typedef const OnigEncodingType* OnigEncoding;
-
-ONIG_EXTERN const OnigEncodingType OnigEncodingASCII;
-
-#define ONIG_ENCODING_ASCII (&OnigEncodingASCII)
-
-#define ONIG_ENCODING_UNDEF ((OnigEncoding )0)
-
-/* this declaration needs to be here because it is used in string.c */
-ONIG_EXTERN int onigenc_ascii_only_case_map P_((OnigCaseFoldType* flagP,
- const OnigUChar** pp, const OnigUChar* end,
- OnigUChar* to, OnigUChar* to_end,
- const struct OnigEncodingTypeST* enc));
-
-
-/* work size */
-#define ONIGENC_CODE_TO_MBC_MAXLEN 7
-#define ONIGENC_MBC_CASE_FOLD_MAXLEN 18
-/* 18: 6(max-byte) * 3(case-fold chars) */
-
-/* character types */
-#define ONIGENC_CTYPE_NEWLINE 0
-#define ONIGENC_CTYPE_ALPHA 1
-#define ONIGENC_CTYPE_BLANK 2
-#define ONIGENC_CTYPE_CNTRL 3
-#define ONIGENC_CTYPE_DIGIT 4
-#define ONIGENC_CTYPE_GRAPH 5
-#define ONIGENC_CTYPE_LOWER 6
-#define ONIGENC_CTYPE_PRINT 7
-#define ONIGENC_CTYPE_PUNCT 8
-#define ONIGENC_CTYPE_SPACE 9
-#define ONIGENC_CTYPE_UPPER 10
-#define ONIGENC_CTYPE_XDIGIT 11
-#define ONIGENC_CTYPE_WORD 12
-#define ONIGENC_CTYPE_ALNUM 13 /* alpha || digit */
-#define ONIGENC_CTYPE_ASCII 14
-#define ONIGENC_MAX_STD_CTYPE ONIGENC_CTYPE_ASCII
-
-/* flags */
-#define ONIGENC_FLAG_NONE 0U
-#define ONIGENC_FLAG_UNICODE 1U
-
-#define onig_enc_len(enc,p,e) ONIGENC_MBC_ENC_LEN(enc, p, e)
-
-#define ONIGENC_IS_UNDEF(enc) ((enc) == ONIG_ENCODING_UNDEF)
-#define ONIGENC_IS_SINGLEBYTE(enc) (ONIGENC_MBC_MAXLEN(enc) == 1)
-#define ONIGENC_IS_MBC_HEAD(enc,p,e) (ONIGENC_MBC_ENC_LEN(enc,p,e) != 1)
-#define ONIGENC_IS_MBC_ASCII(p) (*(p) < 128)
-#define ONIGENC_IS_CODE_ASCII(code) ((code) < 128)
-#define ONIGENC_IS_MBC_WORD(enc,s,end) \
- ONIGENC_IS_CODE_WORD(enc,ONIGENC_MBC_TO_CODE(enc,s,end))
-#define ONIGENC_IS_MBC_ASCII_WORD(enc,s,end) \
- onigenc_ascii_is_code_ctype( \
- ONIGENC_MBC_TO_CODE(enc,s,end),ONIGENC_CTYPE_WORD,enc)
-#define ONIGENC_IS_UNICODE(enc) ((enc)->flags & ONIGENC_FLAG_UNICODE)
-
-
-#define ONIGENC_NAME(enc) ((enc)->name)
-
-#define ONIGENC_MBC_CASE_FOLD(enc,flag,pp,end,buf) \
- (enc)->mbc_case_fold(flag,(const OnigUChar** )pp,end,buf,enc)
-#define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \
- (enc)->is_allowed_reverse_match(s,end,enc)
-#define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s,end) \
- (enc)->left_adjust_char_head(start, s, end, enc)
-#define ONIGENC_APPLY_ALL_CASE_FOLD(enc,case_fold_flag,f,arg) \
- (enc)->apply_all_case_fold(case_fold_flag,f,arg,enc)
-#define ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc,case_fold_flag,p,end,acs) \
- (enc)->get_case_fold_codes_by_str(case_fold_flag,p,end,acs,enc)
-#define ONIGENC_STEP_BACK(enc,start,s,end,n) \
- onigenc_step_back((enc),(start),(s),(end),(n))
-
-#define ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(n) (n)
-#define ONIGENC_MBCLEN_CHARFOUND_P(r) (0 < (r))
-#define ONIGENC_MBCLEN_CHARFOUND_LEN(r) (r)
-
-#define ONIGENC_CONSTRUCT_MBCLEN_INVALID() (-1)
-#define ONIGENC_MBCLEN_INVALID_P(r) ((r) == -1)
-
-#define ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(n) (-1-(n))
-#define ONIGENC_MBCLEN_NEEDMORE_P(r) ((r) < -1)
-#define ONIGENC_MBCLEN_NEEDMORE_LEN(r) (-1-(r))
-
-#define ONIGENC_PRECISE_MBC_ENC_LEN(enc,p,e) (enc)->precise_mbc_enc_len(p,e,enc)
-
-ONIG_EXTERN
-int onigenc_mbclen_approximate P_((const OnigUChar* p,const OnigUChar* e, const struct OnigEncodingTypeST* enc));
-
-#define ONIGENC_MBC_ENC_LEN(enc,p,e) onigenc_mbclen_approximate(p,e,enc)
-#define ONIGENC_MBC_MAXLEN(enc) ((enc)->max_enc_len)
-#define ONIGENC_MBC_MAXLEN_DIST(enc) ONIGENC_MBC_MAXLEN(enc)
-#define ONIGENC_MBC_MINLEN(enc) ((enc)->min_enc_len)
-#define ONIGENC_IS_MBC_NEWLINE(enc,p,end) (enc)->is_mbc_newline((p),(end),enc)
-#define ONIGENC_MBC_TO_CODE(enc,p,end) (enc)->mbc_to_code((p),(end),enc)
-#define ONIGENC_CODE_TO_MBCLEN(enc,code) (enc)->code_to_mbclen(code,enc)
-#define ONIGENC_CODE_TO_MBC(enc,code,buf) (enc)->code_to_mbc(code,buf,enc)
-#define ONIGENC_PROPERTY_NAME_TO_CTYPE(enc,p,end) \
- (enc)->property_name_to_ctype(enc,p,end)
-
-#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) (enc)->is_code_ctype(code,ctype,enc)
-
-#define ONIGENC_IS_CODE_NEWLINE(enc,code) \
- ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_NEWLINE)
-#define ONIGENC_IS_CODE_GRAPH(enc,code) \
- ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_GRAPH)
-#define ONIGENC_IS_CODE_PRINT(enc,code) \
- ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PRINT)
-#define ONIGENC_IS_CODE_ALNUM(enc,code) \
- ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALNUM)
-#define ONIGENC_IS_CODE_ALPHA(enc,code) \
- ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_ALPHA)
-#define ONIGENC_IS_CODE_LOWER(enc,code) \
- ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_LOWER)
-#define ONIGENC_IS_CODE_UPPER(enc,code) \
- ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_UPPER)
-#define ONIGENC_IS_CODE_CNTRL(enc,code) \
- ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_CNTRL)
-#define ONIGENC_IS_CODE_PUNCT(enc,code) \
- ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_PUNCT)
-#define ONIGENC_IS_CODE_SPACE(enc,code) \
- ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_SPACE)
-#define ONIGENC_IS_CODE_BLANK(enc,code) \
- ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_BLANK)
-#define ONIGENC_IS_CODE_DIGIT(enc,code) \
- ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_DIGIT)
-#define ONIGENC_IS_CODE_XDIGIT(enc,code) \
- ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_XDIGIT)
-#define ONIGENC_IS_CODE_WORD(enc,code) \
- ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_WORD)
-
-#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,sbout,ranges) \
- (enc)->get_ctype_code_range(ctype,sbout,ranges,enc)
-
-ONIG_EXTERN
-OnigUChar* onigenc_step_back P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end, int n));
-
-
-/* encoding API */
-ONIG_EXTERN
-int onigenc_init P_((void));
-ONIG_EXTERN
-int onigenc_set_default_encoding P_((OnigEncoding enc));
-PUREFUNC(ONIG_EXTERN OnigEncoding onigenc_get_default_encoding P_((void)));
-PUREFUNC(ONIG_EXTERN void onigenc_set_default_caseconv_table P_((const OnigUChar* table)));
-ONIG_EXTERN
-OnigUChar* onigenc_get_right_adjust_char_head_with_prev P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end, const OnigUChar** prev));
-ONIG_EXTERN
-OnigUChar* onigenc_get_prev_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end));
-ONIG_EXTERN
-OnigUChar* onigenc_get_left_adjust_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end));
-ONIG_EXTERN
-OnigUChar* onigenc_get_right_adjust_char_head P_((OnigEncoding enc, const OnigUChar* start, const OnigUChar* s, const OnigUChar* end));
-ONIG_EXTERN
-int onigenc_strlen P_((OnigEncoding enc, const OnigUChar* p, const OnigUChar* end));
-ONIG_EXTERN
-int onigenc_strlen_null P_((OnigEncoding enc, const OnigUChar* p));
-ONIG_EXTERN
-int onigenc_str_bytelen_null P_((OnigEncoding enc, const OnigUChar* p));
-
-
-
-/* PART: regular expression */
-
-/* config parameters */
-#define ONIG_NREGION 10
-#define ONIG_MAX_BACKREF_NUM 1000
-#define ONIG_MAX_CAPTURE_GROUP_NUM 32767
-#define ONIG_MAX_REPEAT_NUM 100000
-#define ONIG_MAX_MULTI_BYTE_RANGES_NUM 10000
-/* constants */
-#define ONIG_MAX_ERROR_MESSAGE_LEN 90
-
-typedef unsigned int OnigOptionType;
-
-#define ONIG_OPTION_DEFAULT ONIG_OPTION_NONE
-
-/* options */
-#define ONIG_OPTION_NONE 0U
-#define ONIG_OPTION_IGNORECASE 1U
-#define ONIG_OPTION_EXTEND (ONIG_OPTION_IGNORECASE << 1)
-#define ONIG_OPTION_MULTILINE (ONIG_OPTION_EXTEND << 1)
-#define ONIG_OPTION_DOTALL ONIG_OPTION_MULTILINE
-#define ONIG_OPTION_SINGLELINE (ONIG_OPTION_MULTILINE << 1)
-#define ONIG_OPTION_FIND_LONGEST (ONIG_OPTION_SINGLELINE << 1)
-#define ONIG_OPTION_FIND_NOT_EMPTY (ONIG_OPTION_FIND_LONGEST << 1)
-#define ONIG_OPTION_NEGATE_SINGLELINE (ONIG_OPTION_FIND_NOT_EMPTY << 1)
-#define ONIG_OPTION_DONT_CAPTURE_GROUP (ONIG_OPTION_NEGATE_SINGLELINE << 1)
-#define ONIG_OPTION_CAPTURE_GROUP (ONIG_OPTION_DONT_CAPTURE_GROUP << 1)
-/* options (search time) */
-#define ONIG_OPTION_NOTBOL (ONIG_OPTION_CAPTURE_GROUP << 1)
-#define ONIG_OPTION_NOTEOL (ONIG_OPTION_NOTBOL << 1)
-#define ONIG_OPTION_POSIX_REGION (ONIG_OPTION_NOTEOL << 1)
-/* options (ctype range) */
-#define ONIG_OPTION_ASCII_RANGE (ONIG_OPTION_POSIX_REGION << 1)
-#define ONIG_OPTION_POSIX_BRACKET_ALL_RANGE (ONIG_OPTION_ASCII_RANGE << 1)
-#define ONIG_OPTION_WORD_BOUND_ALL_RANGE (ONIG_OPTION_POSIX_BRACKET_ALL_RANGE << 1)
-/* options (newline) */
-#define ONIG_OPTION_NEWLINE_CRLF (ONIG_OPTION_WORD_BOUND_ALL_RANGE << 1)
-#define ONIG_OPTION_NOTBOS (ONIG_OPTION_NEWLINE_CRLF << 1)
-#define ONIG_OPTION_NOTEOS (ONIG_OPTION_NOTBOS << 1)
-#define ONIG_OPTION_MAXBIT ONIG_OPTION_NOTEOS /* limit */
-
-#define ONIG_OPTION_ON(options,regopt) ((options) |= (regopt))
-#define ONIG_OPTION_OFF(options,regopt) ((options) &= ~(regopt))
-#define ONIG_IS_OPTION_ON(options,option) ((options) & (option))
-
-/* syntax */
-typedef struct {
- unsigned int op;
- unsigned int op2;
- unsigned int behavior;
- OnigOptionType options; /* default option */
- OnigMetaCharTableType meta_char_table;
-} OnigSyntaxType;
-
-ONIG_EXTERN const OnigSyntaxType OnigSyntaxASIS;
-ONIG_EXTERN const OnigSyntaxType OnigSyntaxPosixBasic;
-ONIG_EXTERN const OnigSyntaxType OnigSyntaxPosixExtended;
-ONIG_EXTERN const OnigSyntaxType OnigSyntaxEmacs;
-ONIG_EXTERN const OnigSyntaxType OnigSyntaxGrep;
-ONIG_EXTERN const OnigSyntaxType OnigSyntaxGnuRegex;
-ONIG_EXTERN const OnigSyntaxType OnigSyntaxJava;
-ONIG_EXTERN const OnigSyntaxType OnigSyntaxPerl58;
-ONIG_EXTERN const OnigSyntaxType OnigSyntaxPerl58_NG;
-ONIG_EXTERN const OnigSyntaxType OnigSyntaxPerl;
-ONIG_EXTERN const OnigSyntaxType OnigSyntaxRuby;
-ONIG_EXTERN const OnigSyntaxType OnigSyntaxPython;
-
-/* predefined syntaxes (see regsyntax.c) */
-#define ONIG_SYNTAX_ASIS (&OnigSyntaxASIS)
-#define ONIG_SYNTAX_POSIX_BASIC (&OnigSyntaxPosixBasic)
-#define ONIG_SYNTAX_POSIX_EXTENDED (&OnigSyntaxPosixExtended)
-#define ONIG_SYNTAX_EMACS (&OnigSyntaxEmacs)
-#define ONIG_SYNTAX_GREP (&OnigSyntaxGrep)
-#define ONIG_SYNTAX_GNU_REGEX (&OnigSyntaxGnuRegex)
-#define ONIG_SYNTAX_JAVA (&OnigSyntaxJava)
-#define ONIG_SYNTAX_PERL58 (&OnigSyntaxPerl58)
-#define ONIG_SYNTAX_PERL58_NG (&OnigSyntaxPerl58_NG)
-#define ONIG_SYNTAX_PERL (&OnigSyntaxPerl)
-#define ONIG_SYNTAX_RUBY (&OnigSyntaxRuby)
-#define ONIG_SYNTAX_PYTHON (&OnigSyntaxPython)
-
-/* default syntax */
-ONIG_EXTERN const OnigSyntaxType* OnigDefaultSyntax;
-#define ONIG_SYNTAX_DEFAULT OnigDefaultSyntax
-
-/* syntax (operators) */
-#define ONIG_SYN_OP_VARIABLE_META_CHARACTERS (1U<<0)
-#define ONIG_SYN_OP_DOT_ANYCHAR (1U<<1) /* . */
-#define ONIG_SYN_OP_ASTERISK_ZERO_INF (1U<<2) /* * */
-#define ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF (1U<<3)
-#define ONIG_SYN_OP_PLUS_ONE_INF (1U<<4) /* + */
-#define ONIG_SYN_OP_ESC_PLUS_ONE_INF (1U<<5)
-#define ONIG_SYN_OP_QMARK_ZERO_ONE (1U<<6) /* ? */
-#define ONIG_SYN_OP_ESC_QMARK_ZERO_ONE (1U<<7)
-#define ONIG_SYN_OP_BRACE_INTERVAL (1U<<8) /* {lower,upper} */
-#define ONIG_SYN_OP_ESC_BRACE_INTERVAL (1U<<9) /* \{lower,upper\} */
-#define ONIG_SYN_OP_VBAR_ALT (1U<<10) /* | */
-#define ONIG_SYN_OP_ESC_VBAR_ALT (1U<<11) /* \| */
-#define ONIG_SYN_OP_LPAREN_SUBEXP (1U<<12) /* (...) */
-#define ONIG_SYN_OP_ESC_LPAREN_SUBEXP (1U<<13) /* \(...\) */
-#define ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR (1U<<14) /* \A, \Z, \z */
-#define ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR (1U<<15) /* \G */
-#define ONIG_SYN_OP_DECIMAL_BACKREF (1U<<16) /* \num */
-#define ONIG_SYN_OP_BRACKET_CC (1U<<17) /* [...] */
-#define ONIG_SYN_OP_ESC_W_WORD (1U<<18) /* \w, \W */
-#define ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END (1U<<19) /* \<. \> */
-#define ONIG_SYN_OP_ESC_B_WORD_BOUND (1U<<20) /* \b, \B */
-#define ONIG_SYN_OP_ESC_S_WHITE_SPACE (1U<<21) /* \s, \S */
-#define ONIG_SYN_OP_ESC_D_DIGIT (1U<<22) /* \d, \D */
-#define ONIG_SYN_OP_LINE_ANCHOR (1U<<23) /* ^, $ */
-#define ONIG_SYN_OP_POSIX_BRACKET (1U<<24) /* [:xxxx:] */
-#define ONIG_SYN_OP_QMARK_NON_GREEDY (1U<<25) /* ??,*?,+?,{n,m}? */
-#define ONIG_SYN_OP_ESC_CONTROL_CHARS (1U<<26) /* \n,\r,\t,\a ... */
-#define ONIG_SYN_OP_ESC_C_CONTROL (1U<<27) /* \cx */
-#define ONIG_SYN_OP_ESC_OCTAL3 (1U<<28) /* \OOO */
-#define ONIG_SYN_OP_ESC_X_HEX2 (1U<<29) /* \xHH */
-#define ONIG_SYN_OP_ESC_X_BRACE_HEX8 (1U<<30) /* \x{7HHHHHHH} */
-#define ONIG_SYN_OP_ESC_O_BRACE_OCTAL (1U<<31) /* \o{OOO} */ /* NOTIMPL */
-
-#define ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE (1U<<0) /* \Q...\E */
-#define ONIG_SYN_OP2_QMARK_GROUP_EFFECT (1U<<1) /* (?...) */
-#define ONIG_SYN_OP2_OPTION_PERL (1U<<2) /* (?imsxadlu), (?-imsx), (?^imsxalu) */
-#define ONIG_SYN_OP2_OPTION_RUBY (1U<<3) /* (?imxadu), (?-imx) */
-#define ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT (1U<<4) /* ?+,*+,++ */
-#define ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL (1U<<5) /* {n,m}+ */
-#define ONIG_SYN_OP2_CCLASS_SET_OP (1U<<6) /* [...&&..[..]..] */
-#define ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP (1U<<7) /* (?<name>...) */
-#define ONIG_SYN_OP2_ESC_K_NAMED_BACKREF (1U<<8) /* \k<name> */
-#define ONIG_SYN_OP2_ESC_G_SUBEXP_CALL (1U<<9) /* \g<name>, \g<n> */
-#define ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY (1U<<10) /* (?@..),(?@<x>..) */
-#define ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL (1U<<11) /* \C-x */
-#define ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META (1U<<12) /* \M-x */
-#define ONIG_SYN_OP2_ESC_V_VTAB (1U<<13) /* \v as VTAB */
-#define ONIG_SYN_OP2_ESC_U_HEX4 (1U<<14) /* \uHHHH */
-#define ONIG_SYN_OP2_ESC_GNU_BUF_ANCHOR (1U<<15) /* \`, \' */
-#define ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY (1U<<16) /* \p{...}, \P{...} */
-#define ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT (1U<<17) /* \p{^..}, \P{^..} */
-/* #define ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS (1U<<18) */
-#define ONIG_SYN_OP2_ESC_H_XDIGIT (1U<<19) /* \h, \H */
-#define ONIG_SYN_OP2_INEFFECTIVE_ESCAPE (1U<<20) /* \ */
-#define ONIG_SYN_OP2_ESC_CAPITAL_R_LINEBREAK (1U<<21) /* \R as (?>\x0D\x0A|[\x0A-\x0D\x{85}\x{2028}\x{2029}]) */
-#define ONIG_SYN_OP2_ESC_CAPITAL_X_EXTENDED_GRAPHEME_CLUSTER (1U<<22) /* \X as (?>\P{M}\p{M}*) */
-#define ONIG_SYN_OP2_ESC_V_VERTICAL_WHITESPACE (1U<<23) /* \v, \V -- Perl */ /* NOTIMPL */
-#define ONIG_SYN_OP2_ESC_H_HORIZONTAL_WHITESPACE (1U<<24) /* \h, \H -- Perl */ /* NOTIMPL */
-#define ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP (1U<<25) /* \K */
-#define ONIG_SYN_OP2_ESC_G_BRACE_BACKREF (1U<<26) /* \g{name}, \g{n} */
-#define ONIG_SYN_OP2_QMARK_SUBEXP_CALL (1U<<27) /* (?&name), (?n), (?R), (?0) */
-#define ONIG_SYN_OP2_QMARK_VBAR_BRANCH_RESET (1U<<28) /* (?|...) */ /* NOTIMPL */
-#define ONIG_SYN_OP2_QMARK_LPAREN_CONDITION (1U<<29) /* (?(cond)yes...|no...) */
-#define ONIG_SYN_OP2_QMARK_CAPITAL_P_NAMED_GROUP (1U<<30) /* (?P<name>...), (?P=name), (?P>name) -- Python/PCRE */
-#define ONIG_SYN_OP2_OPTION_JAVA (1U<<31) /* (?idmsux), (?-idmsux) */ /* NOTIMPL */
-
-/* syntax (behavior) */
-#define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1U<<31) /* not implemented */
-#define ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS (1U<<0) /* ?, *, +, {n,m} */
-#define ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS (1U<<1) /* error or ignore */
-#define ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP (1U<<2) /* ...)... */
-#define ONIG_SYN_ALLOW_INVALID_INTERVAL (1U<<3) /* {??? */
-#define ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV (1U<<4) /* {,n} => {0,n} */
-#define ONIG_SYN_STRICT_CHECK_BACKREF (1U<<5) /* /(\1)/,/\1()/ ..*/
-#define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND (1U<<6) /* (?<=a|bc) */
-#define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP (1U<<7) /* see doc/RE */
-#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME (1U<<8) /* (?<x>)(?<x>) */
-#define ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY (1U<<9) /* a{n}?=(?:a{n})? */
-#define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL (1U<<10) /* (?<x>)(?<x>)(?&x) */
-
-/* syntax (behavior) in char class [...] */
-#define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (1U<<20) /* [^...] */
-#define ONIG_SYN_BACKSLASH_ESCAPE_IN_CC (1U<<21) /* [..\w..] etc.. */
-#define ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC (1U<<22)
-#define ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC (1U<<23) /* [0-9-a]=[0-9\-a] */
-/* syntax (behavior) warning */
-#define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED (1U<<24) /* [,-,] */
-#define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT (1U<<25) /* (?:a*)+ */
-#define ONIG_SYN_WARN_CC_DUP (1U<<26) /* [aa] */
-
-/* meta character specifiers (onig_set_meta_char()) */
-#define ONIG_META_CHAR_ESCAPE 0
-#define ONIG_META_CHAR_ANYCHAR 1
-#define ONIG_META_CHAR_ANYTIME 2
-#define ONIG_META_CHAR_ZERO_OR_ONE_TIME 3
-#define ONIG_META_CHAR_ONE_OR_MORE_TIME 4
-#define ONIG_META_CHAR_ANYCHAR_ANYTIME 5
-
-#define ONIG_INEFFECTIVE_META_CHAR 0
-
-/* error codes */
-#define ONIG_IS_PATTERN_ERROR(ecode) ((ecode) <= -100 && (ecode) > -1000)
-/* normal return */
-#define ONIG_NORMAL 0
-#define ONIG_MISMATCH -1
-#define ONIG_NO_SUPPORT_CONFIG -2
-
-/* internal error */
-#define ONIGERR_MEMORY -5
-#define ONIGERR_TYPE_BUG -6
-#define ONIGERR_PARSER_BUG -11
-#define ONIGERR_STACK_BUG -12
-#define ONIGERR_UNDEFINED_BYTECODE -13
-#define ONIGERR_UNEXPECTED_BYTECODE -14
-#define ONIGERR_MATCH_STACK_LIMIT_OVER -15
-#define ONIGERR_DEFAULT_ENCODING_IS_NOT_SET -21
-#define ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR -22
-/* general error */
-#define ONIGERR_INVALID_ARGUMENT -30
-/* syntax error */
-#define ONIGERR_END_PATTERN_AT_LEFT_BRACE -100
-#define ONIGERR_END_PATTERN_AT_LEFT_BRACKET -101
-#define ONIGERR_EMPTY_CHAR_CLASS -102
-#define ONIGERR_PREMATURE_END_OF_CHAR_CLASS -103
-#define ONIGERR_END_PATTERN_AT_ESCAPE -104
-#define ONIGERR_END_PATTERN_AT_META -105
-#define ONIGERR_END_PATTERN_AT_CONTROL -106
-#define ONIGERR_META_CODE_SYNTAX -108
-#define ONIGERR_CONTROL_CODE_SYNTAX -109
-#define ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE -110
-#define ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE -111
-#define ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS -112
-#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED -113
-#define ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID -114
-#define ONIGERR_NESTED_REPEAT_OPERATOR -115
-#define ONIGERR_UNMATCHED_CLOSE_PARENTHESIS -116
-#define ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS -117
-#define ONIGERR_END_PATTERN_IN_GROUP -118
-#define ONIGERR_UNDEFINED_GROUP_OPTION -119
-#define ONIGERR_INVALID_POSIX_BRACKET_TYPE -121
-#define ONIGERR_INVALID_LOOK_BEHIND_PATTERN -122
-#define ONIGERR_INVALID_REPEAT_RANGE_PATTERN -123
-#define ONIGERR_INVALID_CONDITION_PATTERN -124
-/* values error (syntax error) */
-#define ONIGERR_TOO_BIG_NUMBER -200
-#define ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE -201
-#define ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE -202
-#define ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS -203
-#define ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE -204
-#define ONIGERR_TOO_MANY_MULTI_BYTE_RANGES -205
-#define ONIGERR_TOO_SHORT_MULTI_BYTE_STRING -206
-#define ONIGERR_TOO_BIG_BACKREF_NUMBER -207
-#define ONIGERR_INVALID_BACKREF -208
-#define ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED -209
-#define ONIGERR_TOO_SHORT_DIGITS -210
-#define ONIGERR_TOO_LONG_WIDE_CHAR_VALUE -212
-#define ONIGERR_EMPTY_GROUP_NAME -214
-#define ONIGERR_INVALID_GROUP_NAME -215
-#define ONIGERR_INVALID_CHAR_IN_GROUP_NAME -216
-#define ONIGERR_UNDEFINED_NAME_REFERENCE -217
-#define ONIGERR_UNDEFINED_GROUP_REFERENCE -218
-#define ONIGERR_MULTIPLEX_DEFINED_NAME -219
-#define ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL -220
-#define ONIGERR_NEVER_ENDING_RECURSION -221
-#define ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY -222
-#define ONIGERR_INVALID_CHAR_PROPERTY_NAME -223
-#define ONIGERR_TOO_MANY_CAPTURE_GROUPS -224
-#define ONIGERR_INVALID_CODE_POINT_VALUE -400
-#define ONIGERR_INVALID_WIDE_CHAR_VALUE -400
-#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE -401
-#define ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION -402
-#define ONIGERR_INVALID_COMBINATION_OF_OPTIONS -403
-
-/* errors related to thread */
-#define ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT -1001
-
-
-/* must be smaller than BIT_STATUS_BITS_NUM (unsigned int * 8) */
-#define ONIG_MAX_CAPTURE_HISTORY_GROUP 31
-#define ONIG_IS_CAPTURE_HISTORY_GROUP(r, i) \
- ((i) <= ONIG_MAX_CAPTURE_HISTORY_GROUP && (r)->list && (r)->list[i])
-
-typedef struct OnigCaptureTreeNodeStruct {
- int group; /* group number */
- OnigPosition beg;
- OnigPosition end;
- int allocated;
- int num_childs;
- struct OnigCaptureTreeNodeStruct** childs;
-} OnigCaptureTreeNode;
-
-/* match result region type */
-struct re_registers {
- int allocated;
- int num_regs;
- OnigPosition* beg;
- OnigPosition* end;
- /* extended */
- OnigCaptureTreeNode* history_root; /* capture history tree root */
-};
-
-/* capture tree traverse */
-#define ONIG_TRAVERSE_CALLBACK_AT_FIRST 1
-#define ONIG_TRAVERSE_CALLBACK_AT_LAST 2
-#define ONIG_TRAVERSE_CALLBACK_AT_BOTH \
- ( ONIG_TRAVERSE_CALLBACK_AT_FIRST | ONIG_TRAVERSE_CALLBACK_AT_LAST )
-
-
-#define ONIG_REGION_NOTPOS -1
-
-typedef struct re_registers OnigRegion;
-
-typedef struct {
- OnigEncoding enc;
- OnigUChar* par;
- OnigUChar* par_end;
-} OnigErrorInfo;
-
-typedef struct {
- int lower;
- int upper;
-} OnigRepeatRange;
-
-typedef void (*OnigWarnFunc) P_((const char* s));
-extern void onig_null_warn P_((const char* s));
-#define ONIG_NULL_WARN onig_null_warn
-
-#define ONIG_CHAR_TABLE_SIZE 256
-
-/* regex_t state */
-#define ONIG_STATE_NORMAL 0
-#define ONIG_STATE_SEARCHING 1
-#define ONIG_STATE_COMPILING -1
-#define ONIG_STATE_MODIFY -2
-
-#define ONIG_STATE(reg) \
- ((reg)->state > 0 ? ONIG_STATE_SEARCHING : (reg)->state)
-
-typedef struct re_pattern_buffer {
- /* common members of BBuf(bytes-buffer) */
- unsigned char* p; /* compiled pattern */
- unsigned int used; /* used space for p */
- unsigned int alloc; /* allocated space for p */
-
- int state; /* normal, searching, compiling */
- int num_mem; /* used memory(...) num counted from 1 */
- int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */
- int num_null_check; /* OP_NULL_CHECK_START/END id counter */
- int num_comb_exp_check; /* combination explosion check */
- int num_call; /* number of subexp call */
- unsigned int capture_history; /* (?@...) flag (1-31) */
- unsigned int bt_mem_start; /* need backtrack flag */
- unsigned int bt_mem_end; /* need backtrack flag */
- int stack_pop_level;
- int repeat_range_alloc;
-
- OnigOptionType options;
-
- OnigRepeatRange* repeat_range;
-
- OnigEncoding enc;
- const OnigSyntaxType* syntax;
- void* name_table;
- OnigCaseFoldType case_fold_flag;
-
- /* optimization info (string search, char-map and anchors) */
- int optimize; /* optimize flag */
- int threshold_len; /* search str-length for apply optimize */
- int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */
- OnigDistance anchor_dmin; /* (SEMI_)END_BUF anchor distance */
- OnigDistance anchor_dmax; /* (SEMI_)END_BUF anchor distance */
- int sub_anchor; /* start-anchor for exact or map */
- unsigned char *exact;
- unsigned char *exact_end;
- unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */
- int *int_map; /* BM skip for exact_len > 255 */
- int *int_map_backward; /* BM skip for backward search */
- OnigDistance dmin; /* min-distance of exact or map */
- OnigDistance dmax; /* max-distance of exact or map */
-
- /* regex_t link chain */
- struct re_pattern_buffer* chain; /* escape compile-conflict */
-} OnigRegexType;
-
-typedef OnigRegexType* OnigRegex;
-
-#ifndef ONIG_ESCAPE_REGEX_T_COLLISION
- typedef OnigRegexType regex_t;
-#endif
-
-
-typedef struct {
- int num_of_elements;
- OnigEncoding pattern_enc;
- OnigEncoding target_enc;
- const OnigSyntaxType* syntax;
- OnigOptionType option;
- OnigCaseFoldType case_fold_flag;
-} OnigCompileInfo;
-
-/* Oniguruma Native API */
-ONIG_EXTERN
-int onig_init P_((void));
-ONIG_EXTERN
-int onig_error_code_to_str PV_((OnigUChar* s, OnigPosition err_code, ...));
-ONIG_EXTERN
-void onig_set_warn_func P_((OnigWarnFunc f));
-ONIG_EXTERN
-void onig_set_verb_warn_func P_((OnigWarnFunc f));
-ONIG_EXTERN
-int onig_new P_((OnigRegex*, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax, OnigErrorInfo* einfo));
-ONIG_EXTERN
-int onig_reg_init P_((OnigRegex reg, OnigOptionType option, OnigCaseFoldType case_fold_flag, OnigEncoding enc, const OnigSyntaxType* syntax));
-ONIG_EXTERN
-int onig_new_without_alloc P_((OnigRegex, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
-ONIG_EXTERN
-int onig_new_deluxe P_((OnigRegex* reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo));
-ONIG_EXTERN
-void onig_free P_((OnigRegex));
-ONIG_EXTERN
-void onig_free_body P_((OnigRegex));
-ONIG_EXTERN
-int onig_recompile P_((OnigRegex, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo));
-ONIG_EXTERN
-int onig_recompile_deluxe P_((OnigRegex reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo));
-ONIG_EXTERN
-OnigPosition onig_search P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option));
-ONIG_EXTERN
-OnigPosition onig_search_gpos P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* global_pos, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option));
-ONIG_EXTERN
-OnigPosition onig_match P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* at, OnigRegion* region, OnigOptionType option));
-ONIG_EXTERN
-OnigRegion* onig_region_new P_((void));
-ONIG_EXTERN
-void onig_region_init P_((OnigRegion* region));
-ONIG_EXTERN
-void onig_region_free P_((OnigRegion* region, int free_self));
-ONIG_EXTERN
-void onig_region_copy P_((OnigRegion* to, OnigRegion* from));
-ONIG_EXTERN
-void onig_region_clear P_((OnigRegion* region));
-ONIG_EXTERN
-int onig_region_resize P_((OnigRegion* region, int n));
-ONIG_EXTERN
-int onig_region_set P_((OnigRegion* region, int at, int beg, int end));
-ONIG_EXTERN
-int onig_name_to_group_numbers P_((OnigRegex reg, const OnigUChar* name, const OnigUChar* name_end, int** nums));
-ONIG_EXTERN
-int onig_name_to_backref_number P_((OnigRegex reg, const OnigUChar* name, const OnigUChar* name_end, OnigRegion *region));
-ONIG_EXTERN
-int onig_foreach_name P_((OnigRegex reg, int (*func)(const OnigUChar*, const OnigUChar*,int,int*,OnigRegex,void*), void* arg));
-ONIG_EXTERN
-int onig_number_of_names P_((OnigRegex reg));
-ONIG_EXTERN
-int onig_number_of_captures P_((OnigRegex reg));
-ONIG_EXTERN
-int onig_number_of_capture_histories P_((OnigRegex reg));
-ONIG_EXTERN
-OnigCaptureTreeNode* onig_get_capture_tree P_((OnigRegion* region));
-ONIG_EXTERN
-int onig_capture_tree_traverse P_((OnigRegion* region, int at, int(*callback_func)(int,OnigPosition,OnigPosition,int,int,void*), void* arg));
-ONIG_EXTERN
-int onig_noname_group_capture_is_active P_((OnigRegex reg));
-ONIG_EXTERN
-OnigEncoding onig_get_encoding P_((OnigRegex reg));
-ONIG_EXTERN
-OnigOptionType onig_get_options P_((OnigRegex reg));
-ONIG_EXTERN
-OnigCaseFoldType onig_get_case_fold_flag P_((OnigRegex reg));
-ONIG_EXTERN
-const OnigSyntaxType* onig_get_syntax P_((OnigRegex reg));
-ONIG_EXTERN
-int onig_set_default_syntax P_((const OnigSyntaxType* syntax));
-ONIG_EXTERN
-void onig_copy_syntax P_((OnigSyntaxType* to, const OnigSyntaxType* from));
-ONIG_EXTERN
-unsigned int onig_get_syntax_op P_((OnigSyntaxType* syntax));
-ONIG_EXTERN
-unsigned int onig_get_syntax_op2 P_((OnigSyntaxType* syntax));
-ONIG_EXTERN
-unsigned int onig_get_syntax_behavior P_((OnigSyntaxType* syntax));
-ONIG_EXTERN
-OnigOptionType onig_get_syntax_options P_((OnigSyntaxType* syntax));
-ONIG_EXTERN
-void onig_set_syntax_op P_((OnigSyntaxType* syntax, unsigned int op));
-ONIG_EXTERN
-void onig_set_syntax_op2 P_((OnigSyntaxType* syntax, unsigned int op2));
-ONIG_EXTERN
-void onig_set_syntax_behavior P_((OnigSyntaxType* syntax, unsigned int behavior));
-ONIG_EXTERN
-void onig_set_syntax_options P_((OnigSyntaxType* syntax, OnigOptionType options));
-ONIG_EXTERN
-int onig_set_meta_char P_((OnigSyntaxType* syntax, unsigned int what, OnigCodePoint code));
-ONIG_EXTERN
-void onig_copy_encoding P_((OnigEncodingType *to, OnigEncoding from));
-ONIG_EXTERN
-OnigCaseFoldType onig_get_default_case_fold_flag P_((void));
-ONIG_EXTERN
-int onig_set_default_case_fold_flag P_((OnigCaseFoldType case_fold_flag));
-ONIG_EXTERN
-unsigned int onig_get_match_stack_limit_size P_((void));
-ONIG_EXTERN
-int onig_set_match_stack_limit_size P_((unsigned int size));
-ONIG_EXTERN
-int onig_end P_((void));
-ONIG_EXTERN
-const char* onig_version P_((void));
-ONIG_EXTERN
-const char* onig_copyright P_((void));
-
-RUBY_SYMBOL_EXPORT_END
-
-#ifdef __cplusplus
-#if 0
-{ /* satisfy cc-mode */
-#endif
-}
-#endif
-
+#define ONIGURUMA_VERSION_MAJOR ONIGMO_VERSION_MAJOR
+#define ONIGURUMA_VERSION_MINOR ONIGMO_VERSION_MINOR
+#define ONIGURUMA_VERSION_TEENY ONIGMO_VERSION_TEENY
#endif /* ONIGURUMA_H */
diff --git a/re.c b/re.c
index 069a9bc15d..2d786f5cef 100644
--- a/re.c
+++ b/re.c
@@ -847,7 +847,7 @@ onig_new_with_source(regex_t** reg, const UChar* pattern, const UChar* pattern_e
r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
if (r) goto err;
- r = onig_compile(*reg, pattern, pattern_end, einfo, sourcefile, sourceline);
+ r = onig_compile_ruby(*reg, pattern, pattern_end, einfo, sourcefile, sourceline);
if (r) {
err:
onig_free(*reg);
@@ -3908,7 +3908,6 @@ Init_Regexp(void)
{
rb_eRegexpError = rb_define_class("RegexpError", rb_eStandardError);
- onigenc_set_default_caseconv_table((UChar*)casetable);
onigenc_set_default_encoding(ONIG_ENCODING_ASCII);
onig_set_warn_func(re_warn);
onig_set_verb_warn_func(re_warn);
diff --git a/regcomp.c b/regcomp.c
index 222d1d6c9e..49103afea1 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -3,7 +3,7 @@
**********************************************************************/
/*-
* Copyright (c) 2002-2013 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
- * Copyright (c) 2011-2014 K.Takata <kentkt AT csc DOT jp>
+ * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -30,15 +30,6 @@
#include "regparse.h"
-#if defined(USE_MULTI_THREAD_SYSTEM) \
- && defined(USE_DEFAULT_MULTI_THREAD_SYSTEM)
-#ifdef _WIN32
-CRITICAL_SECTION gOnigMutex;
-#else
-pthread_mutex_t gOnigMutex;
-#endif
-#endif
-
OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
extern OnigCaseFoldType
@@ -263,6 +254,7 @@ add_mem_num(regex_t* reg, int num)
return 0;
}
+#if 0
static int
add_pointer(regex_t* reg, void* addr)
{
@@ -271,6 +263,7 @@ add_pointer(regex_t* reg, void* addr)
BBUF_ADD(reg, &ptr, SIZE_POINTER);
return 0;
}
+#endif
static int
add_option(regex_t* reg, OnigOptionType option)
@@ -591,11 +584,6 @@ compile_length_cclass_node(CClassNode* cc, regex_t* reg)
{
int len;
- if (IS_NCCLASS_SHARE(cc)) {
- len = SIZE_OPCODE + SIZE_POINTER;
- return len;
- }
-
if (IS_NULL(cc->mbuf)) {
len = SIZE_OPCODE + SIZE_BITSET;
}
@@ -621,12 +609,6 @@ compile_cclass_node(CClassNode* cc, regex_t* reg)
{
int r;
- if (IS_NCCLASS_SHARE(cc)) {
- add_opcode(reg, OP_CCLASS_NODE);
- r = add_pointer(reg, cc);
- return r;
- }
-
if (IS_NULL(cc->mbuf)) {
if (IS_NCCLASS_NOT(cc))
add_opcode(reg, OP_CCLASS_NOT);
@@ -638,17 +620,17 @@ compile_cclass_node(CClassNode* cc, regex_t* reg)
else {
if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
if (IS_NCCLASS_NOT(cc))
- add_opcode(reg, OP_CCLASS_MB_NOT);
+ add_opcode(reg, OP_CCLASS_MB_NOT);
else
- add_opcode(reg, OP_CCLASS_MB);
+ add_opcode(reg, OP_CCLASS_MB);
r = add_multi_byte_cclass(cc->mbuf, reg);
}
else {
if (IS_NCCLASS_NOT(cc))
- add_opcode(reg, OP_CCLASS_MIX_NOT);
+ add_opcode(reg, OP_CCLASS_MIX_NOT);
else
- add_opcode(reg, OP_CCLASS_MIX);
+ add_opcode(reg, OP_CCLASS_MIX);
r = add_bitset(reg, cc->bs);
if (r) return r;
@@ -760,9 +742,9 @@ compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
if (NTYPE(qn->target) == NT_CANY) {
if (qn->greedy && infinite) {
if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
- return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
+ return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
else
- return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
+ return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
}
}
@@ -989,9 +971,9 @@ compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
if (NTYPE(qn->target) == NT_CANY) {
if (qn->greedy && infinite) {
if (IS_NOT_NULL(qn->next_head_exact))
- return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
+ return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
else
- return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
+ return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
}
}
@@ -1010,9 +992,12 @@ compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
}
if (qn->greedy) {
+#ifdef USE_OP_PUSH_OR_JUMP_EXACT
if (IS_NOT_NULL(qn->head_exact))
len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
- else if (IS_NOT_NULL(qn->next_head_exact))
+ else
+#endif
+ if (IS_NOT_NULL(qn->next_head_exact))
len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
else
len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
@@ -1078,9 +1063,12 @@ compile_quantifier_node(QtfrNode* qn, regex_t* reg)
(qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
if (qn->greedy) {
+#ifdef USE_OP_PUSH_OR_JUMP_EXACT
if (IS_NOT_NULL(qn->head_exact))
r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
- else if (IS_NOT_NULL(qn->next_head_exact))
+ else
+#endif
+ if (IS_NOT_NULL(qn->next_head_exact))
r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
else
r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
@@ -1096,6 +1084,7 @@ compile_quantifier_node(QtfrNode* qn, regex_t* reg)
}
if (qn->greedy) {
+#ifdef USE_OP_PUSH_OR_JUMP_EXACT
if (IS_NOT_NULL(qn->head_exact)) {
r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
mod_tlen + SIZE_OP_JUMP);
@@ -1106,7 +1095,9 @@ compile_quantifier_node(QtfrNode* qn, regex_t* reg)
r = add_opcode_rel_addr(reg, OP_JUMP,
-(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
}
- else if (IS_NOT_NULL(qn->next_head_exact)) {
+ else
+#endif
+ if (IS_NOT_NULL(qn->next_head_exact)) {
r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
mod_tlen + SIZE_OP_JUMP);
if (r) return r;
@@ -1243,6 +1234,11 @@ compile_length_enclose_node(EncloseNode* node, regex_t* reg)
len += (IS_ENCLOSE_RECURSION(node)
? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
}
+ else if (IS_ENCLOSE_RECURSION(node)) {
+ len = SIZE_OP_MEMORY_START_PUSH;
+ len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
+ ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_REC);
+ }
else
#endif
{
@@ -1354,6 +1350,14 @@ compile_enclose_node(EncloseNode* node, regex_t* reg)
if (r) return r;
r = add_opcode(reg, OP_RETURN);
}
+ else if (IS_ENCLOSE_RECURSION(node)) {
+ if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
+ r = add_opcode(reg, OP_MEMORY_END_PUSH_REC);
+ else
+ r = add_opcode(reg, OP_MEMORY_END_REC);
+ if (r) return r;
+ r = add_mem_num(reg, node->regnum);
+ }
else
#endif
{
@@ -1589,10 +1593,10 @@ compile_length_tree(Node* node, regex_t* reg)
int n = 0;
len = 0;
do {
- r = compile_length_tree(NCAR(node), reg);
- if (r < 0) return r;
- len += r;
- n++;
+ r = compile_length_tree(NCAR(node), reg);
+ if (r < 0) return r;
+ len += r;
+ n++;
} while (IS_NOT_NULL(node = NCDR(node)));
r = len;
r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
@@ -1621,7 +1625,7 @@ compile_length_tree(Node* node, regex_t* reg)
#ifdef USE_BACKREF_WITH_LEVEL
if (IS_BACKREF_NEST_LEVEL(br)) {
- r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
+ r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
}
else
@@ -1785,12 +1789,12 @@ compile_tree(Node* node, regex_t* reg)
int i;
int* p;
- if (IS_IGNORECASE(reg->options)) {
- r = add_opcode(reg, OP_BACKREF_MULTI_IC);
- }
- else {
- r = add_opcode(reg, OP_BACKREF_MULTI);
- }
+ if (IS_IGNORECASE(reg->options)) {
+ r = add_opcode(reg, OP_BACKREF_MULTI_IC);
+ }
+ else {
+ r = add_opcode(reg, OP_BACKREF_MULTI);
+ }
if (r) return r;
#ifdef USE_BACKREF_WITH_LEVEL
@@ -1884,17 +1888,8 @@ noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
break;
case NT_ANCHOR:
- {
- AnchorNode* an = NANCHOR(node);
- switch (an->type) {
- case ANCHOR_PREC_READ:
- case ANCHOR_PREC_READ_NOT:
- case ANCHOR_LOOK_BEHIND:
- case ANCHOR_LOOK_BEHIND_NOT:
- r = noname_disable_map(&(an->target), map, counter);
- break;
- }
- }
+ if (NANCHOR(node)->target)
+ r = noname_disable_map(&(NANCHOR(node)->target), map, counter);
break;
default:
@@ -1951,7 +1946,7 @@ renumber_by_map(Node* node, GroupNumRemap* map)
{
EncloseNode* en = NENCLOSE(node);
if (en->type == ENCLOSE_CONDITION)
- en->regnum = map[en->regnum].new_val;
+ en->regnum = map[en->regnum].new_val;
r = renumber_by_map(en->target, map);
}
break;
@@ -1961,17 +1956,8 @@ renumber_by_map(Node* node, GroupNumRemap* map)
break;
case NT_ANCHOR:
- {
- AnchorNode* an = NANCHOR(node);
- switch (an->type) {
- case ANCHOR_PREC_READ:
- case ANCHOR_PREC_READ_NOT:
- case ANCHOR_LOOK_BEHIND:
- case ANCHOR_LOOK_BEHIND_NOT:
- r = renumber_by_map(an->target, map);
- break;
- }
- }
+ if (NANCHOR(node)->target)
+ r = renumber_by_map(NANCHOR(node)->target, map);
break;
default:
@@ -2005,6 +1991,11 @@ numbered_ref_check(Node* node)
return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
break;
+ case NT_ANCHOR:
+ if (NANCHOR(node)->target)
+ r = numbered_ref_check(NANCHOR(node)->target);
+ break;
+
default:
break;
}
@@ -2091,7 +2082,7 @@ quantifiers_memory_node_info(Node* node)
}
break;
-#ifdef USE_SUBEXP_CALL
+# ifdef USE_SUBEXP_CALL
case NT_CALL:
if (IS_CALL_RECURSION(NCALL(node))) {
return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
@@ -2099,7 +2090,7 @@ quantifiers_memory_node_info(Node* node)
else
r = quantifiers_memory_node_info(NCALL(node)->target);
break;
-#endif
+# endif
case NT_QTFR:
{
@@ -2238,18 +2229,23 @@ get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
EncloseNode* en = NENCLOSE(node);
switch (en->type) {
case ENCLOSE_MEMORY:
-#ifdef USE_SUBEXP_CALL
- if (IS_ENCLOSE_MIN_FIXED(en))
- *min = en->min_len;
- else {
- r = get_min_match_length(en->target, min, env);
- if (r == 0) {
- en->min_len = *min;
- SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
+ if (IS_ENCLOSE_MIN_FIXED(en))
+ *min = en->min_len;
+ else {
+ if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
+ *min = 0; // recursive
+ else {
+ SET_ENCLOSE_STATUS(node, NST_MARK1);
+ r = get_min_match_length(en->target, min, env);
+ CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
+ if (r == 0) {
+ en->min_len = *min;
+ SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
+ }
}
- }
- break;
-#endif
+ }
+ break;
+
case ENCLOSE_OPTION:
case ENCLOSE_STOP_BACKTRACK:
case ENCLOSE_CONDITION:
@@ -2356,18 +2352,23 @@ get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
EncloseNode* en = NENCLOSE(node);
switch (en->type) {
case ENCLOSE_MEMORY:
-#ifdef USE_SUBEXP_CALL
if (IS_ENCLOSE_MAX_FIXED(en))
*max = en->max_len;
else {
- r = get_max_match_length(en->target, max, env);
- if (r == 0) {
- en->max_len = *max;
- SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);
+ if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
+ *max = ONIG_INFINITE_DISTANCE;
+ else {
+ SET_ENCLOSE_STATUS(node, NST_MARK1);
+ r = get_max_match_length(en->target, max, env);
+ CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
+ if (r == 0) {
+ en->max_len = *max;
+ SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);
+ }
}
}
break;
-#endif
+
case ENCLOSE_OPTION:
case ENCLOSE_STOP_BACKTRACK:
case ENCLOSE_CONDITION:
@@ -2622,10 +2623,10 @@ is_not_included(Node* x, Node* y, regex_t* reg)
for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
v = BITSET_AT(xc->bs, i);
if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
- (v == 0 && IS_NCCLASS_NOT(xc))) {
+ (v == 0 && IS_NCCLASS_NOT(xc))) {
v = BITSET_AT(yc->bs, i);
if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
- (v == 0 && IS_NCCLASS_NOT(yc)))
+ (v == 0 && IS_NCCLASS_NOT(yc)))
return 0;
}
}
@@ -2675,24 +2676,24 @@ is_not_included(Node* x, Node* y, regex_t* reg)
break;
case NT_CCLASS:
- {
- CClassNode* cc = NCCLASS(y);
+ {
+ CClassNode* cc = NCCLASS(y);
- code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
- xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
- return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
- }
- break;
+ code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
+ xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
+ return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
+ }
+ break;
case NT_STR:
- {
- UChar *q;
- StrNode* ys = NSTR(y);
- len = NSTRING_LEN(x);
- if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
- if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
- /* tiny version */
- return 0;
+ {
+ UChar *q;
+ StrNode* ys = NSTR(y);
+ len = NSTRING_LEN(x);
+ if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
+ if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
+ /* tiny version */
+ return 0;
}
else {
for (i = 0, p = ys->s, q = xs->s; (OnigDistance )i < len; i++, p++, q++) {
@@ -2703,7 +2704,7 @@ is_not_included(Node* x, Node* y, regex_t* reg)
break;
default:
- break;
+ break;
}
}
break;
@@ -2760,9 +2761,11 @@ get_head_value_node(Node* node, int exact, regex_t* reg)
{
QtfrNode* qn = NQTFR(node);
if (qn->lower > 0) {
+#ifdef USE_OP_PUSH_OR_JUMP_EXACT
if (IS_NOT_NULL(qn->head_exact))
n = qn->head_exact;
else
+#endif
n = get_head_value_node(qn->target, exact, reg);
}
}
@@ -2854,8 +2857,8 @@ check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
#ifdef USE_SUBEXP_CALL
-#define RECURSION_EXIST 1
-#define RECURSION_INFINITE 2
+# define RECURSION_EXIST 1
+# define RECURSION_INFINITE 2
static int
subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
@@ -3055,7 +3058,7 @@ subexp_recursive_check(Node* node)
static int
subexp_recursive_check_trav(Node* node, ScanEnv* env)
{
-#define FOUND_CALLED_NODE 1
+# define FOUND_CALLED_NODE 1
int type;
int r = 0;
@@ -3156,22 +3159,22 @@ setup_subexp_call(Node* node, ScanEnv* env)
if (cn->group_num != 0) {
int gnum = cn->group_num;
-#ifdef USE_NAMED_GROUP
+# ifdef USE_NAMED_GROUP
if (env->num_named > 0 &&
IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
!ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
}
-#endif
+# endif
if (gnum > env->num_mem) {
onig_scan_env_set_error_string(env,
ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
return ONIGERR_UNDEFINED_GROUP_REFERENCE;
}
-#ifdef USE_NAMED_GROUP
+# ifdef USE_NAMED_GROUP
set_call_attr:
-#endif
+# endif
cn->target = nodes[cn->group_num];
if (IS_NULL(cn->target)) {
onig_scan_env_set_error_string(env,
@@ -3182,12 +3185,12 @@ setup_subexp_call(Node* node, ScanEnv* env)
BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
cn->unset_addr_list = env->unset_addr_list;
}
-#ifdef USE_NAMED_GROUP
-#ifdef USE_PERL_SUBEXP_CALL
+# ifdef USE_NAMED_GROUP
+# ifdef USE_PERL_SUBEXP_CALL
else if (cn->name == cn->name_end) {
goto set_call_attr;
}
-#endif
+# endif
else {
int *refs;
@@ -3209,7 +3212,7 @@ setup_subexp_call(Node* node, ScanEnv* env)
goto set_call_attr;
}
}
-#endif
+# endif
}
break;
@@ -3398,13 +3401,9 @@ update_string_node_case_fold(regex_t* reg, Node *node)
}
r = onig_node_str_set(node, sbuf, sp);
- if (r != 0) {
- xfree(sbuf);
- return r;
- }
xfree(sbuf);
- return 0;
+ return r;
}
static int
@@ -3512,29 +3511,29 @@ expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
UChar *q = p + items[i].byte_len;
if (q < end) {
- r = expand_case_fold_make_rem_string(&rem, q, end, reg);
- if (r != 0) {
- onig_node_free(an);
- goto mem_err2;
- }
+ r = expand_case_fold_make_rem_string(&rem, q, end, reg);
+ if (r != 0) {
+ onig_node_free(an);
+ goto mem_err2;
+ }
- xnode = onig_node_list_add(NULL_NODE, snode);
- if (IS_NULL(xnode)) {
- onig_node_free(an);
- onig_node_free(rem);
- goto mem_err2;
- }
- if (IS_NULL(onig_node_list_add(xnode, rem))) {
- onig_node_free(an);
- onig_node_free(xnode);
- onig_node_free(rem);
- goto mem_err;
- }
+ xnode = onig_node_list_add(NULL_NODE, snode);
+ if (IS_NULL(xnode)) {
+ onig_node_free(an);
+ onig_node_free(rem);
+ goto mem_err2;
+ }
+ if (IS_NULL(onig_node_list_add(xnode, rem))) {
+ onig_node_free(an);
+ onig_node_free(xnode);
+ onig_node_free(rem);
+ goto mem_err;
+ }
- NCAR(an) = xnode;
+ NCAR(an) = xnode;
}
else {
- NCAR(an) = snode;
+ NCAR(an) = snode;
}
NCDR(var_anode) = an;
@@ -3711,12 +3710,12 @@ expand_case_fold_string(Node* node, regex_t* reg)
#ifdef USE_COMBINATION_EXPLOSION_CHECK
-#define CEC_THRES_NUM_BIG_REPEAT 512
-#define CEC_INFINITE_NUM 0x7fffffff
+# define CEC_THRES_NUM_BIG_REPEAT 512
+# define CEC_INFINITE_NUM 0x7fffffff
-#define CEC_IN_INFINITE_REPEAT (1<<0)
-#define CEC_IN_FINITE_REPEAT (1<<1)
-#define CEC_CONT_BIG_REPEAT (1<<2)
+# define CEC_IN_INFINITE_REPEAT (1<<0)
+# define CEC_IN_FINITE_REPEAT (1<<1)
+# define CEC_CONT_BIG_REPEAT (1<<2)
static int
setup_comb_exp_check(Node* node, int state, ScanEnv* env)
@@ -3832,14 +3831,14 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env)
}
break;
-#ifdef USE_SUBEXP_CALL
+# ifdef USE_SUBEXP_CALL
case NT_CALL:
if (IS_CALL_RECURSION(NCALL(node)))
env->has_recursion = 1;
else
r = setup_comb_exp_check(NCALL(node)->target, state, env);
break;
-#endif
+# endif
default:
break;
@@ -3854,6 +3853,8 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env)
#define IN_REPEAT (1<<2)
#define IN_VAR_REPEAT (1<<3)
#define IN_ROOT (1<<4)
+#define IN_CALL (1<<5)
+#define IN_RECCALL (1<<6)
/* setup_tree does the following work.
1. check empty loop. (set qn->target_empty_info)
@@ -3943,7 +3944,7 @@ restart:
Node* target = qn->target;
if ((state & IN_REPEAT) != 0) {
- qn->state |= NST_IN_REPEAT;
+ qn->state |= NST_IN_REPEAT;
}
if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
@@ -4058,12 +4059,18 @@ restart:
break;
case ENCLOSE_MEMORY:
- if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) {
+ if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_CALL)) != 0) {
BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
/* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
}
- r = setup_tree(en->target, reg, state, env);
- break;
+ if (IS_ENCLOSE_CALLED(en))
+ state |= IN_CALL;
+ if (IS_ENCLOSE_RECURSION(en))
+ state |= IN_RECCALL;
+ else if ((state & IN_RECCALL) != 0)
+ SET_CALL_RECURSION(node);
+ r = setup_tree(en->target, reg, state, env);
+ break;
case ENCLOSE_STOP_BACKTRACK:
{
@@ -4090,6 +4097,8 @@ restart:
return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
}
#endif
+ if (NENCLOSE(node)->regnum > env->num_mem)
+ return ONIGERR_INVALID_BACKREF;
r = setup_tree(NENCLOSE(node)->target, reg, state, env);
break;
}
@@ -4133,10 +4142,10 @@ restart:
ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB);
if (r < 0) return r;
if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
- r = setup_look_behind(node, reg, env);
- if (r != 0) return r;
if (NTYPE(node) != NT_ANCHOR) goto restart;
r = setup_tree(an->target, reg, state, env);
+ if (r != 0) return r;
+ r = setup_look_behind(node, reg, env);
}
break;
@@ -4146,10 +4155,10 @@ restart:
ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
if (r < 0) return r;
if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
- r = setup_look_behind(node, reg, env);
- if (r != 0) return r;
if (NTYPE(node) != NT_ANCHOR) goto restart;
r = setup_tree(an->target, reg, (state | IN_NOT), env);
+ if (r != 0) return r;
+ r = setup_look_behind(node, reg, env);
}
break;
}
@@ -4203,6 +4212,10 @@ set_bm_skip(UChar* s, UChar* end, regex_t* reg,
}
}
else {
+# if OPT_EXACT_MAXLEN < ONIG_CHAR_TABLE_SIZE
+ /* This should not happen. */
+ return ONIGERR_TYPE_BUG;
+# else
if (IS_NULL(*int_skip)) {
*int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
@@ -4231,6 +4244,7 @@ set_bm_skip(UChar* s, UChar* end, regex_t* reg,
}
}
}
+# endif
}
return 0;
}
@@ -4276,6 +4290,10 @@ set_bm_skip(UChar* s, UChar* end, regex_t* reg,
}
}
else {
+# if OPT_EXACT_MAXLEN < ONIG_CHAR_TABLE_SIZE
+ /* This should not happen. */
+ return ONIGERR_TYPE_BUG;
+# else
if (IS_NULL(*int_skip)) {
*int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
@@ -4304,13 +4322,12 @@ set_bm_skip(UChar* s, UChar* end, regex_t* reg,
}
}
}
+# endif
}
return 0;
}
#endif /* USE_SUNDAY_QUICK_SEARCH */
-#define OPT_EXACT_MAXLEN 24
-
typedef struct {
OnigDistance min; /* min byte length */
OnigDistance max; /* max byte length */
@@ -4980,14 +4997,14 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
if (slen > 0) {
add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
}
- set_mml(&opt->len, slen, slen);
+ set_mml(&opt->len, slen, slen);
}
else {
- OnigDistance max;
+ OnigDistance max;
if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
- int n = onigenc_strlen(env->enc, sn->s, sn->end);
- max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
+ int n = onigenc_strlen(env->enc, sn->s, sn->end);
+ max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
}
else {
concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
@@ -5003,7 +5020,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
max = slen;
}
- set_mml(&opt->len, slen, max);
+ set_mml(&opt->len, slen, max);
}
if ((OnigDistance )opt->exb.len == slen)
@@ -5019,18 +5036,18 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
/* no need to check ignore case. (set in setup_tree()) */
if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
- OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
+ OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
set_mml(&opt->len, min, max);
}
else {
- for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
- z = BITSET_AT(cc->bs, i);
- if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
- add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
- }
- }
+ for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
+ z = BITSET_AT(cc->bs, i);
+ if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
+ add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
+ }
+ }
set_mml(&opt->len, 1, 1);
}
}
@@ -5044,7 +5061,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
if (max == 1) {
- min = 1;
+ min = 1;
maxcode = NCTYPE(node)->ascii_range ? 0x80 : SINGLE_BYTE_SIZE;
switch (NCTYPE(node)->ctype) {
@@ -5067,7 +5084,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
}
}
else {
- min = ONIGENC_MBC_MINLEN(env->enc);
+ min = ONIGENC_MBC_MINLEN(env->enc);
}
set_mml(&opt->len, min, max);
}
@@ -5186,7 +5203,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
if (nopt.exb.len > 0) {
if (nopt.exb.reach_end) {
for (i = 2; i <= qn->lower &&
- ! is_full_opt_exact_info(&opt->exb); i++) {
+ ! is_full_opt_exact_info(&opt->exb); i++) {
concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
}
if (i < qn->lower) {
@@ -5308,11 +5325,14 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
else {
if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
r = set_bm_skip(reg->exact, reg->exact_end, reg,
- reg->map, &(reg->int_map), 0);
- if (r) return r;
-
- reg->optimize = (allow_reverse != 0
+ reg->map, &(reg->int_map), 0);
+ if (r == 0) {
+ reg->optimize = (allow_reverse != 0
? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
+ }
+ else {
+ reg->optimize = ONIG_OPTIMIZE_EXACT;
+ }
}
else {
reg->optimize = ONIG_OPTIMIZE_EXACT;
@@ -5378,6 +5398,9 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML |
ANCHOR_LOOK_BEHIND);
+ if ((opt.anc.left_anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0)
+ reg->anchor &= ~ANCHOR_ANYCHAR_STAR_ML;
+
reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF |
ANCHOR_PREC_READ_NOT);
@@ -5570,14 +5593,14 @@ print_optimize_info(FILE* f, regex_t* reg)
fputc('[', f);
for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
if (reg->map[i] != 0) {
- if (c > 0) fputs(", ", f);
- c++;
- if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
- ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
- fputc(i, f);
- else
- fprintf(f, "%d", i);
- }
+ if (c > 0) fputs(", ", f);
+ c++;
+ if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
+ ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
+ fputc(i, f);
+ else
+ fprintf(f, "%d", i);
+ }
}
fprintf(f, "]\n");
}
@@ -5612,6 +5635,7 @@ onig_free(regex_t* reg)
}
}
+#ifdef RUBY
size_t
onig_memsize(const regex_t *reg)
{
@@ -5635,65 +5659,47 @@ onig_region_memsize(const OnigRegion *regs)
size += regs->allocated * (sizeof(*regs->beg) + sizeof(*regs->end));
return size;
}
+#endif
#define REGEX_TRANSFER(to,from) do {\
- (to)->state = ONIG_STATE_MODIFY;\
onig_free_body(to);\
xmemcpy(to, from, sizeof(regex_t));\
xfree(from);\
} while (0)
+#if 0
extern void
onig_transfer(regex_t* to, regex_t* from)
{
- THREAD_ATOMIC_START;
REGEX_TRANSFER(to, from);
- THREAD_ATOMIC_END;
-}
-
-#define REGEX_CHAIN_HEAD(reg) do {\
- while (IS_NOT_NULL((reg)->chain)) {\
- (reg) = (reg)->chain;\
- }\
-} while (0)
-
-extern void
-onig_chain_link_add(regex_t* to, regex_t* add)
-{
- THREAD_ATOMIC_START;
- REGEX_CHAIN_HEAD(to);
- to->chain = add;
- THREAD_ATOMIC_END;
-}
-
-extern void
-onig_chain_reduce(regex_t* reg)
-{
- regex_t *head, *prev;
-
- prev = reg;
- head = prev->chain;
- if (IS_NOT_NULL(head)) {
- reg->state = ONIG_STATE_MODIFY;
- while (IS_NOT_NULL(head->chain)) {
- prev = head;
- head = head->chain;
- }
- prev->chain = (regex_t* )NULL;
- REGEX_TRANSFER(reg, head);
- }
}
+#endif
#ifdef ONIG_DEBUG_COMPILE
-static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg));
+static void print_compiled_byte_code_list(FILE* f, regex_t* reg);
#endif
#ifdef ONIG_DEBUG_PARSE_TREE
-static void print_tree P_((FILE* f, Node* node));
+static void print_tree(FILE* f, Node* node);
#endif
+#ifdef RUBY
extern int
onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
+ OnigErrorInfo* einfo)
+{
+ return onig_compile_ruby(reg, pattern, pattern_end, einfo, NULL, 0);
+}
+#endif
+
+#ifdef RUBY
+extern int
+onig_compile_ruby(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
OnigErrorInfo* einfo, const char *sourcefile, int sourceline)
+#else
+extern int
+onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
+ OnigErrorInfo* einfo)
+#endif
{
#define COMPILE_INIT_SIZE 20
@@ -5707,9 +5713,10 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
+#ifdef RUBY
scan_env.sourcefile = sourcefile;
scan_env.sourceline = sourceline;
- reg->state = ONIG_STATE_COMPILING;
+#endif
#ifdef ONIG_DEBUG
print_enc_string(stderr, reg->enc, pattern, pattern_end);
@@ -5794,17 +5801,17 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
#ifdef USE_COMBINATION_EXPLOSION_CHECK
if (scan_env.backrefed_mem == 0
-#ifdef USE_SUBEXP_CALL
+# ifdef USE_SUBEXP_CALL
|| scan_env.num_call == 0
-#endif
+# endif
) {
setup_comb_exp_check(root, 0, &scan_env);
-#ifdef USE_SUBEXP_CALL
+# ifdef USE_SUBEXP_CALL
if (scan_env.has_recursion != 0) {
scan_env.num_comb_exp_check = 0;
}
else
-#endif
+# endif
if (scan_env.comb_exp_max_regnum > 0) {
int i;
for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
@@ -5858,14 +5865,13 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
onig_node_free(root);
#ifdef ONIG_DEBUG_COMPILE
-#ifdef USE_NAMED_GROUP
+# ifdef USE_NAMED_GROUP
onig_print_names(stderr, reg);
-#endif
+# endif
print_compiled_byte_code_list(stderr, reg);
#endif
end:
- reg->state = ONIG_STATE_NORMAL;
return r;
err_unset:
@@ -5889,27 +5895,6 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
return r;
}
-#ifdef USE_RECOMPILE_API
-extern int
-onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
- OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
- OnigErrorInfo* einfo)
-{
- int r;
- regex_t *new_reg;
-
- r = onig_new(&new_reg, pattern, pattern_end, option, enc, syntax, einfo);
- if (r) return r;
- if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
- onig_transfer(reg, new_reg);
- }
- else {
- onig_chain_link_add(reg, new_reg);
- }
- return 0;
-}
-#endif
-
static int onig_inited = 0;
extern int
@@ -5931,8 +5916,6 @@ onig_reg_init(regex_t* reg, OnigOptionType option,
return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
}
- (reg)->state = ONIG_STATE_MODIFY;
-
if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
option |= syntax->options;
option &= ~ONIG_OPTION_SINGLELINE;
@@ -5968,7 +5951,7 @@ onig_new_without_alloc(regex_t* reg, const UChar* pattern,
r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
if (r) return r;
- r = onig_compile(reg, pattern, pattern_end, einfo, NULL, 0);
+ r = onig_compile(reg, pattern, pattern_end, einfo);
return r;
}
@@ -5985,7 +5968,7 @@ onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
if (r) goto err;
- r = onig_compile(*reg, pattern, pattern_end, einfo, NULL, 0);
+ r = onig_compile(*reg, pattern, pattern_end, einfo);
if (r) {
err:
onig_free(*reg);
@@ -5994,6 +5977,11 @@ onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
return r;
}
+extern int
+onig_initialize(OnigEncoding encodings[] ARG_UNUSED, int n ARG_UNUSED)
+{
+ return onig_init();
+}
extern int
onig_init(void)
@@ -6001,11 +5989,12 @@ onig_init(void)
if (onig_inited != 0)
return 0;
- THREAD_SYSTEM_INIT;
- THREAD_ATOMIC_START;
-
onig_inited = 1;
+#if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
+ _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
+#endif
+
onigenc_init();
/* onigenc_set_default_caseconv_table((UChar* )0); */
@@ -6013,7 +6002,6 @@ onig_init(void)
onig_statistics_init();
#endif
- THREAD_ATOMIC_END;
return 0;
}
@@ -6052,26 +6040,18 @@ exec_end_call_list(void)
extern int
onig_end(void)
{
- THREAD_ATOMIC_START;
-
exec_end_call_list();
#ifdef ONIG_DEBUG_STATISTICS
onig_print_statistics(stderr);
#endif
-#ifdef USE_SHARED_CCLASS_TABLE
- onig_free_shared_cclass_table();
-#endif
-
-#ifdef USE_PARSE_TREE_NODE_RECYCLE
- onig_free_node_list();
+#if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
+ _CrtDumpMemoryLeaks();
#endif
onig_inited = 0;
- THREAD_ATOMIC_END;
- THREAD_SYSTEM_END;
return 0;
}
@@ -6137,14 +6117,14 @@ onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
#ifdef ONIG_DEBUG
/* arguments type */
-#define ARG_SPECIAL -1
-#define ARG_NON 0
-#define ARG_RELADDR 1
-#define ARG_ABSADDR 2
-#define ARG_LENGTH 3
-#define ARG_MEMNUM 4
-#define ARG_OPTION 5
-#define ARG_STATE_CHECK 6
+# define ARG_SPECIAL -1
+# define ARG_NON 0
+# define ARG_RELADDR 1
+# define ARG_ABSADDR 2
+# define ARG_LENGTH 3
+# define ARG_MEMNUM 4
+# define ARG_OPTION 5
+# define ARG_STATE_CHECK 6
OnigOpInfoType OnigOpInfo[] = {
{ OP_FINISH, "finish", ARG_NON },
@@ -6169,7 +6149,6 @@ OnigOpInfoType OnigOpInfo[] = {
{ OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL },
{ OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL },
{ OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL },
- { OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL },
{ OP_ANYCHAR, "anychar", ARG_NON },
{ OP_ANYCHAR_ML, "anychar-ml", ARG_NON },
{ OP_ANYCHAR_STAR, "anychar*", ARG_NON },
@@ -6272,14 +6251,14 @@ op2arg_type(int opcode)
return ARG_SPECIAL;
}
-#ifdef ONIG_DEBUG_PARSE_TREE
+# ifdef ONIG_DEBUG_PARSE_TREE
static void
Indent(FILE* f, int indent)
{
int i;
for (i = 0; i < indent; i++) putc(' ', f);
}
-#endif /* ONIG_DEBUG_PARSE_TREE */
+# endif /* ONIG_DEBUG_PARSE_TREE */
static void
p_string(FILE* f, ptrdiff_t len, UChar* s)
@@ -6318,7 +6297,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
break;
case ARG_RELADDR:
GET_RELADDR_INC(addr, bp);
- fprintf(f, ":(+%d)", addr);
+ fprintf(f, ":(%s%d)", (addr >= 0) ? "+" : "", addr);
break;
case ARG_ABSADDR:
GET_ABSADDR_INC(addr, bp);
@@ -6423,9 +6402,9 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
case OP_CCLASS_MB_NOT:
GET_LENGTH_INC(len, bp);
q = bp;
-#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
+# ifndef PLATFORM_UNALIGNED_WORD_ACCESS
ALIGNMENT_RIGHT(q);
-#endif
+# endif
GET_CODE_POINT(code, q);
bp += len;
fprintf(f, ":%d:%d", (int )code, len);
@@ -6437,24 +6416,14 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
bp += SIZE_BITSET;
GET_LENGTH_INC(len, bp);
q = bp;
-#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
+# ifndef PLATFORM_UNALIGNED_WORD_ACCESS
ALIGNMENT_RIGHT(q);
-#endif
+# endif
GET_CODE_POINT(code, q);
bp += len;
fprintf(f, ":%d:%d:%d", n, (int )code, len);
break;
- case OP_CCLASS_NODE:
- {
- CClassNode *cc;
-
- GET_POINTER_INC(cc, bp);
- n = bitset_on_num(cc->bs);
- fprintf(f, ":%"PRIuPTR":%d", (uintptr_t )cc, n);
- }
- break;
-
case OP_BACKREFN_IC:
mem = *((MemNumType* )bp);
bp += SIZE_MEMNUM;
@@ -6507,7 +6476,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
case OP_PUSH_IF_PEEK_NEXT:
addr = *((RelAddrType* )bp);
bp += SIZE_RELADDR;
- fprintf(f, ":(%d)", addr);
+ fprintf(f, ":(%s%d)", (addr >= 0) ? "+" : "", addr);
p_string(f, 1, bp);
bp += 1;
break;
@@ -6520,7 +6489,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
case OP_PUSH_LOOK_BEHIND_NOT:
GET_RELADDR_INC(addr, bp);
GET_LENGTH_INC(len, bp);
- fprintf(f, ":%d:(%d)", len, addr);
+ fprintf(f, ":%d:(%s%d)", len, (addr >= 0) ? "+" : "", addr);
break;
case OP_STATE_CHECK_PUSH:
@@ -6529,13 +6498,13 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
bp += SIZE_STATE_CHECK_NUM;
addr = *((RelAddrType* )bp);
bp += SIZE_RELADDR;
- fprintf(f, ":%d:(%d)", scn, addr);
+ fprintf(f, ":%d:(%s%d)", scn, (addr >= 0) ? "+" : "", addr);
break;
case OP_CONDITION:
GET_MEMNUM_INC(mem, bp);
GET_RELADDR_INC(addr, bp);
- fprintf(f, ":%d:(%d)", mem, addr);
+ fprintf(f, ":%d:(%s%d)", mem, (addr >= 0) ? "+" : "", addr);
break;
default:
@@ -6547,7 +6516,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
if (nextp) *nextp = bp;
}
-#ifdef ONIG_DEBUG_COMPILE
+# ifdef ONIG_DEBUG_COMPILE
static void
print_compiled_byte_code_list(FILE* f, regex_t* reg)
{
@@ -6569,9 +6538,9 @@ print_compiled_byte_code_list(FILE* f, regex_t* reg)
fprintf(f, "\n");
}
-#endif /* ONIG_DEBUG_COMPILE */
+# endif /* ONIG_DEBUG_COMPILE */
-#ifdef ONIG_DEBUG_PARSE_TREE
+# ifdef ONIG_DEBUG_PARSE_TREE
void
print_indent_tree(FILE* f, Node* node, int indent)
{
@@ -6621,8 +6590,8 @@ print_indent_tree(FILE* f, Node* node, int indent)
if (IS_NCCLASS_NOT(NCCLASS(node))) fputs("not ", f);
if (NCCLASS(node)->mbuf) {
BBuf* bbuf = NCCLASS(node)->mbuf;
- OnigCodePoint* data = (OnigCodePoint*)bbuf->p;
- OnigCodePoint* end = (OnigCodePoint*)(bbuf->p + bbuf->used);
+ OnigCodePoint* data = (OnigCodePoint* )bbuf->p;
+ OnigCodePoint* end = (OnigCodePoint* )(bbuf->p + bbuf->used);
fprintf(f, "%d", *data++);
for (; data < end; data+=2) {
fprintf(f, ",");
@@ -6664,10 +6633,10 @@ print_indent_tree(FILE* f, Node* node, int indent)
case ANCHOR_WORD_BOUND: fputs("word bound", f); break;
case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break;
-#ifdef USE_WORD_BEGIN_END
+# ifdef USE_WORD_BEGIN_END
case ANCHOR_WORD_BEGIN: fputs("word begin", f); break;
case ANCHOR_WORD_END: fputs("word end", f); break;
-#endif
+# endif
case ANCHOR_PREC_READ: fputs("prec read", f); container_p = TRUE; break;
case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); container_p = TRUE; break;
case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); container_p = TRUE; break;
@@ -6693,7 +6662,7 @@ print_indent_tree(FILE* f, Node* node, int indent)
}
break;
-#ifdef USE_SUBEXP_CALL
+# ifdef USE_SUBEXP_CALL
case NT_CALL:
{
CallNode* cn = NCALL(node);
@@ -6701,7 +6670,7 @@ print_indent_tree(FILE* f, Node* node, int indent)
p_string(f, cn->name_end - cn->name, cn->name);
}
break;
-#endif
+# endif
case NT_QTFR:
fprintf(f, "<quantifier:%"PRIxPTR">{%d,%d}%s\n", (intptr_t )node,
@@ -6752,5 +6721,5 @@ print_tree(FILE* f, Node* node)
{
print_indent_tree(f, node, 0);
}
-#endif /* ONIG_DEBUG_PARSE_TREE */
+# endif /* ONIG_DEBUG_PARSE_TREE */
#endif /* ONIG_DEBUG */
diff --git a/regenc.c b/regenc.c
index 5cacbdfaa4..ca09a7fcb3 100644
--- a/regenc.c
+++ b/regenc.c
@@ -3,7 +3,7 @@
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
- * Copyright (c) 2011 K.Takata <kentkt AT csc DOT jp>
+ * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -364,12 +364,14 @@ const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = {
};
#endif
+#if 0
extern void
onigenc_set_default_caseconv_table(const UChar* table ARG_UNUSED)
{
/* nothing */
/* obsoleted. */
}
+#endif
extern UChar*
onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end)
@@ -631,8 +633,10 @@ onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED, OnigEncoding e
extern int
onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc ARG_UNUSED)
{
+#ifdef RUBY
if (code > 0xff)
rb_raise(rb_eRangeError, "%u out of char range", code);
+#endif
*buf = (UChar )(code & 0xff);
return 1;
}
@@ -892,6 +896,7 @@ onigenc_with_ascii_strnicmp(OnigEncoding enc, const UChar* p, const UChar* end,
return 0;
}
+#if 0
/* Property management */
static int
resize_property_list(int new_size, const OnigCodePoint*** plist, int* psize)
@@ -944,68 +949,64 @@ onigenc_property_list_add_property(UChar* name, const OnigCodePoint* prop,
(hash_data_type )(*pnum + ONIGENC_MAX_STD_CTYPE));
return 0;
}
+#endif
extern int
-onigenc_property_list_init(int (*f)(void))
-{
- int r;
-
- THREAD_ATOMIC_START;
-
- r = f();
-
- THREAD_ATOMIC_END;
- return r;
-}
-
-extern int
-onigenc_ascii_only_case_map (OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end,
- OnigUChar* to, OnigUChar* to_end, const struct OnigEncodingTypeST* enc)
+onigenc_ascii_only_case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end,
+ OnigUChar* to, OnigUChar* to_end, const struct OnigEncodingTypeST* enc)
{
OnigCodePoint code;
OnigUChar *to_start = to;
OnigCaseFoldType flags = *flagP;
int codepoint_length;
- while (*pp<end && to<to_end) {
+ while (*pp < end && to < to_end) {
codepoint_length = ONIGENC_PRECISE_MBC_ENC_LEN(enc, *pp, end);
if (codepoint_length < 0)
return codepoint_length; /* encoding invalid */
code = ONIGENC_MBC_TO_CODE(enc, *pp, end);
*pp += codepoint_length;
- if (code>='a' && code<='z' && (flags&ONIGENC_CASE_UPCASE))
- flags |= ONIGENC_CASE_MODIFIED, code += 'A'-'a';
- else if (code>='A' && code<='Z' && (flags&(ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD)))
- flags |= ONIGENC_CASE_MODIFIED, code += 'a'-'A';
+ if (code >= 'a' && code <= 'z' && (flags & ONIGENC_CASE_UPCASE)) {
+ flags |= ONIGENC_CASE_MODIFIED;
+ code += 'A' - 'a';
+ } else if (code >= 'A' && code <= 'Z' &&
+ (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
+ flags |= ONIGENC_CASE_MODIFIED;
+ code += 'a' - 'A';
+ }
to += ONIGENC_CODE_TO_MBC(enc, code, to);
if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
- flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
+ flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
}
*flagP = flags;
- return (int)(to-to_start);
+ return (int )(to - to_start);
}
extern int
-onigenc_single_byte_ascii_only_case_map (OnigCaseFoldType* flagP, const OnigUChar** pp,
- const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
- const struct OnigEncodingTypeST* enc)
+onigenc_single_byte_ascii_only_case_map(OnigCaseFoldType* flagP, const OnigUChar** pp,
+ const OnigUChar* end, OnigUChar* to, OnigUChar* to_end,
+ const struct OnigEncodingTypeST* enc)
{
OnigCodePoint code;
OnigUChar *to_start = to;
OnigCaseFoldType flags = *flagP;
- while (*pp<end && to<to_end) {
+ while (*pp < end && to < to_end) {
code = *(*pp)++;
- if (code>='a' && code<='z' && (flags&ONIGENC_CASE_UPCASE))
- flags |= ONIGENC_CASE_MODIFIED, code += 'A'-'a';
- else if (code>='A' && code<='Z' && (flags&(ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD)))
- flags |= ONIGENC_CASE_MODIFIED, code += 'a'-'A';
+ if (code >= 'a' && code <= 'z' && (flags & ONIGENC_CASE_UPCASE)) {
+ flags |= ONIGENC_CASE_MODIFIED;
+ code += 'A' - 'a';
+ } else if (code >= 'A' && code <= 'Z' &&
+ (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) {
+ flags |= ONIGENC_CASE_MODIFIED;
+ code += 'a' - 'A';
+ }
*to++ = code;
if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */
- flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE);
+ flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE);
}
*flagP = flags;
- return (int)(to-to_start);
+ return (int )(to - to_start);
}
diff --git a/regenc.h b/regenc.h
index 2c4c9343c5..10ca18c2a4 100644
--- a/regenc.h
+++ b/regenc.h
@@ -1,11 +1,11 @@
-#ifndef ONIGURUMA_REGENC_H
-#define ONIGURUMA_REGENC_H
+#ifndef ONIGMO_REGENC_H
+#define ONIGMO_REGENC_H
/**********************************************************************
regenc.h - Onigmo (Oniguruma-mod) (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
- * Copyright (c) 2011 K.Takata <kentkt AT csc DOT jp>
+ * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -30,18 +30,32 @@
* SUCH DAMAGE.
*/
-#ifndef REGINT_H
-#ifndef RUBY_EXTERN
-#include "ruby/config.h"
-#include "ruby/defines.h"
-#endif
+#if !defined(RUBY) && (defined(RUBY_EXPORT) || defined(ONIG_ENC_REGISTER))
+# define RUBY
#endif
+#ifdef RUBY
+# ifndef ONIGMO_REGINT_H
+# ifndef RUBY_EXTERN
+# include "ruby/config.h"
+# include "ruby/defines.h"
+# endif
+# endif
+#else /* RUBY */
+# ifndef PACKAGE
+/* PACKAGE is defined in config.h */
+# include "config.h"
+# endif
+#endif /* RUBY */
#ifdef ONIG_ESCAPE_UCHAR_COLLISION
-#undef ONIG_ESCAPE_UCHAR_COLLISION
+# undef ONIG_ESCAPE_UCHAR_COLLISION
#endif
-#include "ruby/oniguruma.h"
+#ifdef RUBY
+# include "ruby/onigmo.h"
+#else
+# include "onigmo.h"
+#endif
RUBY_SYMBOL_EXPORT_BEGIN
@@ -52,23 +66,23 @@ typedef struct {
#ifndef NULL
-#define NULL ((void* )0)
+# define NULL ((void* )0)
#endif
#ifndef TRUE
-#define TRUE 1
+# define TRUE 1
#endif
#ifndef FALSE
-#define FALSE 0
+# define FALSE 0
#endif
#ifndef ARG_UNUSED
-#if defined(__GNUC__)
+# if defined(__GNUC__)
# define ARG_UNUSED __attribute__ ((unused))
-#else
+# else
# define ARG_UNUSED
-#endif
+# endif
#endif
#define ONIG_IS_NULL(p) (((void*)(p)) == (void*)0)
@@ -111,7 +125,7 @@ typedef struct {
{(short int )(sizeof(name) - 1), (name), (ctype)}
#ifndef numberof
-#define numberof(array) (int )(sizeof(array) / sizeof((array)[0]))
+# define numberof(array) (int )(sizeof(array) / sizeof((array)[0]))
#endif
@@ -125,48 +139,48 @@ typedef struct {
#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII
/* for encoding system implementation (internal) */
-ONIG_EXTERN int onigenc_ascii_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg, OnigEncoding enc));
-ONIG_EXTERN int onigenc_ascii_get_case_fold_codes_by_str P_((OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[], OnigEncoding enc));
-ONIG_EXTERN int onigenc_apply_all_case_fold_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg));
-ONIG_EXTERN int onigenc_get_case_fold_codes_by_str_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));
-CONSTFUNC(ONIG_EXTERN int onigenc_not_support_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[], OnigEncoding enc)));
-PUREFUNC(ONIG_EXTERN int onigenc_is_mbc_newline_0x0a P_((const UChar* p, const UChar* end, OnigEncoding enc)));
-ONIG_EXTERN int onigenc_single_byte_ascii_only_case_map P_((OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, const struct OnigEncodingTypeST* enc));
+ONIG_EXTERN int onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg, OnigEncoding enc);
+ONIG_EXTERN int onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[], OnigEncoding enc);
+ONIG_EXTERN int onigenc_apply_all_case_fold_with_map(int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg);
+ONIG_EXTERN int onigenc_get_case_fold_codes_by_str_with_map(int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]);
+ONIG_EXTERN int onigenc_not_support_get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[], OnigEncoding enc);
+ONIG_EXTERN int onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end, OnigEncoding enc);
+ONIG_EXTERN int onigenc_single_byte_ascii_only_case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, const struct OnigEncodingTypeST* enc);
/* methods for single byte encoding */
-ONIG_EXTERN int onigenc_ascii_mbc_case_fold P_((OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower, OnigEncoding enc));
-CONSTFUNC(ONIG_EXTERN int onigenc_single_byte_mbc_enc_len P_((const UChar* p, const UChar* e, OnigEncoding enc)));
-PUREFUNC(ONIG_EXTERN OnigCodePoint onigenc_single_byte_mbc_to_code P_((const UChar* p, const UChar* end, OnigEncoding enc)));
-CONSTFUNC(ONIG_EXTERN int onigenc_single_byte_code_to_mbclen P_((OnigCodePoint code, OnigEncoding enc)));
-ONIG_EXTERN int onigenc_single_byte_code_to_mbc P_((OnigCodePoint code, UChar *buf, OnigEncoding enc));
-CONSTFUNC(ONIG_EXTERN UChar* onigenc_single_byte_left_adjust_char_head P_((const UChar* start, const UChar* s, const OnigUChar* end, OnigEncoding enc)));
-CONSTFUNC(ONIG_EXTERN int onigenc_always_true_is_allowed_reverse_match P_((const UChar* s, const UChar* end, OnigEncoding enc)));
-CONSTFUNC(ONIG_EXTERN int onigenc_always_false_is_allowed_reverse_match P_((const UChar* s, const UChar* end, OnigEncoding enc)));
-CONSTFUNC(ONIG_EXTERN int onigenc_ascii_is_code_ctype P_((OnigCodePoint code, unsigned int ctype, OnigEncoding enc)));
+ONIG_EXTERN int onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower, OnigEncoding enc);
+ONIG_EXTERN int onigenc_single_byte_mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc);
+ONIG_EXTERN OnigCodePoint onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc);
+ONIG_EXTERN int onigenc_single_byte_code_to_mbclen(OnigCodePoint code, OnigEncoding enc);
+ONIG_EXTERN int onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc);
+ONIG_EXTERN UChar* onigenc_single_byte_left_adjust_char_head(const UChar* start, const UChar* s, const OnigUChar* end, OnigEncoding enc);
+ONIG_EXTERN int onigenc_always_true_is_allowed_reverse_match(const UChar* s, const UChar* end, OnigEncoding enc);
+ONIG_EXTERN int onigenc_always_false_is_allowed_reverse_match(const UChar* s, const UChar* end, OnigEncoding enc);
+ONIG_EXTERN int onigenc_ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc);
/* methods for multi byte encoding */
-ONIG_EXTERN OnigCodePoint onigenc_mbn_mbc_to_code P_((OnigEncoding enc, const UChar* p, const UChar* end));
-ONIG_EXTERN int onigenc_mbn_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower));
-CONSTFUNC(ONIG_EXTERN int onigenc_mb2_code_to_mbclen P_((OnigCodePoint code, OnigEncoding enc)));
-ONIG_EXTERN int onigenc_mb2_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
-ONIG_EXTERN int onigenc_minimum_property_name_to_ctype P_((OnigEncoding enc, const UChar* p, const UChar* end));
-ONIG_EXTERN int onigenc_unicode_property_name_to_ctype P_((OnigEncoding enc, const UChar* p, const UChar* end));
-ONIG_EXTERN int onigenc_mb2_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
-CONSTFUNC(ONIG_EXTERN int onigenc_mb4_code_to_mbclen P_((OnigCodePoint code, OnigEncoding enc)));
-ONIG_EXTERN int onigenc_mb4_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
-ONIG_EXTERN int onigenc_mb4_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
+ONIG_EXTERN OnigCodePoint onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end);
+ONIG_EXTERN int onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower);
+ONIG_EXTERN int onigenc_mb2_code_to_mbclen(OnigCodePoint code, OnigEncoding enc);
+ONIG_EXTERN int onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf);
+ONIG_EXTERN int onigenc_minimum_property_name_to_ctype(OnigEncoding enc, const UChar* p, const UChar* end);
+ONIG_EXTERN int onigenc_unicode_property_name_to_ctype(OnigEncoding enc, const UChar* p, const UChar* end);
+ONIG_EXTERN int onigenc_mb2_is_code_ctype(OnigEncoding enc, OnigCodePoint code, unsigned int ctype);
+ONIG_EXTERN int onigenc_mb4_code_to_mbclen(OnigCodePoint code, OnigEncoding enc);
+ONIG_EXTERN int onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf);
+ONIG_EXTERN int onigenc_mb4_is_code_ctype(OnigEncoding enc, OnigCodePoint code, unsigned int ctype);
-ONIG_EXTERN int onigenc_unicode_case_map P_((OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, const struct OnigEncodingTypeST* enc));
+ONIG_EXTERN int onigenc_unicode_case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, const struct OnigEncodingTypeST* enc);
/* in enc/unicode.c */
-ONIG_EXTERN int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype, OnigEncoding enc));
-ONIG_EXTERN int onigenc_utf16_32_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint *sb_out, const OnigCodePoint* ranges[], OnigEncoding enc));
-ONIG_EXTERN int onigenc_unicode_ctype_code_range P_((int ctype, const OnigCodePoint* ranges[]));
-ONIG_EXTERN int onigenc_unicode_get_case_fold_codes_by_str P_((OnigEncoding enc, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));
-ONIG_EXTERN int onigenc_unicode_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** pp, const UChar* end, UChar* fold));
-ONIG_EXTERN int onigenc_unicode_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg, OnigEncoding enc));
+ONIG_EXTERN int onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc);
+ONIG_EXTERN int onigenc_utf16_32_get_ctype_code_range(OnigCtype ctype, OnigCodePoint *sb_out, const OnigCodePoint* ranges[], OnigEncoding enc);
+ONIG_EXTERN int onigenc_unicode_ctype_code_range(int ctype, const OnigCodePoint* ranges[]);
+ONIG_EXTERN int onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]);
+ONIG_EXTERN int onigenc_unicode_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag, const UChar** pp, const UChar* end, UChar* fold);
+ONIG_EXTERN int onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg, OnigEncoding enc);
#define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8)
@@ -182,14 +196,14 @@ ONIG_EXTERN const UChar OnigEncISO_8859_1_ToLowerCaseTable[];
ONIG_EXTERN const UChar OnigEncISO_8859_1_ToUpperCaseTable[];
ONIG_EXTERN int
-onigenc_with_ascii_strncmp P_((OnigEncoding enc, const UChar* p, const UChar* end, const UChar* sascii /* ascii */, int n));
+onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end, const UChar* sascii /* ascii */, int n);
ONIG_EXTERN int
-onigenc_with_ascii_strnicmp P_((OnigEncoding enc, const UChar* p, const UChar* end, const UChar* sascii /* ascii */, int n));
+onigenc_with_ascii_strnicmp(OnigEncoding enc, const UChar* p, const UChar* end, const UChar* sascii /* ascii */, int n);
ONIG_EXTERN UChar*
-onigenc_step P_((OnigEncoding enc, const UChar* p, const UChar* end, int n));
+onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n);
/* defined in regexec.c, but used in enc/xxx.c */
-extern int onig_is_in_code_range P_((const UChar* p, OnigCodePoint code));
+extern int onig_is_in_code_range(const UChar* p, OnigCodePoint code);
ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding;
ONIG_EXTERN const UChar OnigEncAsciiToLowerCaseTable[];
@@ -212,9 +226,9 @@ ONIG_EXTERN const unsigned short OnigEncAsciiCtypeTable[];
#ifdef ONIG_ENC_REGISTER
extern int ONIG_ENC_REGISTER(const char *, OnigEncoding);
-#define OnigEncodingName(n) encoding_##n
-#define OnigEncodingDeclare(n) static const OnigEncodingType OnigEncodingName(n)
-#define OnigEncodingDefine(f,n) \
+# define OnigEncodingName(n) encoding_##n
+# define OnigEncodingDeclare(n) static const OnigEncodingType OnigEncodingName(n)
+# define OnigEncodingDefine(f,n) \
OnigEncodingDeclare(n); \
void Init_##f(void) { \
ONIG_ENC_REGISTER(OnigEncodingName(n).name, \
@@ -222,9 +236,9 @@ extern int ONIG_ENC_REGISTER(const char *, OnigEncoding);
} \
OnigEncodingDeclare(n)
#else
-#define OnigEncodingName(n) OnigEncoding##n
-#define OnigEncodingDeclare(n) const OnigEncodingType OnigEncodingName(n)
-#define OnigEncodingDefine(f,n) OnigEncodingDeclare(n)
+# define OnigEncodingName(n) OnigEncoding##n
+# define OnigEncodingDeclare(n) const OnigEncodingType OnigEncodingName(n)
+# define OnigEncodingDefine(f,n) OnigEncodingDeclare(n)
#endif
/* macros for define replica encoding and encoding alias */
@@ -234,4 +248,4 @@ extern int ONIG_ENC_REGISTER(const char *, OnigEncoding);
RUBY_SYMBOL_EXPORT_END
-#endif /* ONIGURUMA_REGENC_H */
+#endif /* ONIGMO_REGENC_H */
diff --git a/regerror.c b/regerror.c
index 9ec3f65f4c..dbe3ee4094 100644
--- a/regerror.c
+++ b/regerror.c
@@ -3,7 +3,7 @@
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
- * Copyright (c) 2011-2014 K.Takata <kentkt AT csc DOT jp>
+ * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -31,13 +31,7 @@
#include "regint.h"
#include <stdio.h> /* for vsnprintf() */
-#ifdef HAVE_STDARG_PROTOTYPES
#include <stdarg.h>
-#define va_init_list(a,b) va_start(a,b)
-#else
-#include <varargs.h>
-#define va_init_list(a,b) va_start(a)
-#endif
extern UChar*
onig_error_code_to_format(OnigPosition code)
@@ -65,6 +59,8 @@ onig_error_code_to_format(OnigPosition code)
p = "unexpected bytecode (bug)"; break;
case ONIGERR_MATCH_STACK_LIMIT_OVER:
p = "match-stack limit over"; break;
+ case ONIGERR_PARSE_DEPTH_LIMIT_OVER:
+ p = "parse depth limit over"; break;
case ONIGERR_DEFAULT_ENCODING_IS_NOT_SET:
p = "default multibyte-encoding is not set"; break;
case ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR:
@@ -179,8 +175,6 @@ onig_error_code_to_format(OnigPosition code)
p = "not supported encoding combination"; break;
case ONIGERR_INVALID_COMBINATION_OF_OPTIONS:
p = "invalid combination of options"; break;
- case ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT:
- p = "over thread pass limit count"; break;
default:
p = "undefined error code"; break;
@@ -191,12 +185,12 @@ onig_error_code_to_format(OnigPosition code)
static void sprint_byte(char* s, unsigned int v)
{
- sprintf(s, "%02x", (v & 0377));
+ xsnprintf(s, 3, "%02x", (v & 0377));
}
static void sprint_byte_with_x(char* s, unsigned int v)
{
- sprintf(s, "\\x%02x", (v & 0377));
+ xsnprintf(s, 5, "\\x%02x", (v & 0377));
}
static int to_ascii(OnigEncoding enc, UChar *s, UChar *end,
@@ -252,14 +246,7 @@ static int to_ascii(OnigEncoding enc, UChar *s, UChar *end,
#define MAX_ERROR_PAR_LEN 30
extern int
-#ifdef HAVE_STDARG_PROTOTYPES
onig_error_code_to_str(UChar* s, OnigPosition code, ...)
-#else
-onig_error_code_to_str(s, code, va_alist)
- UChar* s;
- OnigPosition code;
- va_dcl
-#endif
{
UChar *p, *q;
OnigErrorInfo* einfo;
@@ -268,7 +255,7 @@ onig_error_code_to_str(s, code, va_alist)
UChar parbuf[MAX_ERROR_PAR_LEN];
va_list vargs;
- va_init_list(vargs, code);
+ va_start(vargs, code);
switch (code) {
case ONIGERR_UNDEFINED_NAME_REFERENCE:
@@ -337,26 +324,17 @@ onig_vsnprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc,
need = (pat_end - pat) * 4 + 4;
if (n + need < (size_t )bufsize) {
- strcat((char* )buf, ": /");
+ xstrcat((char* )buf, ": /", bufsize);
s = buf + onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, buf);
p = pat;
while (p < pat_end) {
- if (*p == '\\') {
- *s++ = *p++;
- len = enclen(enc, p, pat_end);
- while (len-- > 0) *s++ = *p++;
- }
- else if (*p == '/') {
- *s++ = (unsigned char )'\\';
- *s++ = *p++;
- }
- else if (ONIGENC_IS_MBC_HEAD(enc, p, pat_end)) {
+ if (ONIGENC_IS_MBC_HEAD(enc, p, pat_end)) {
len = enclen(enc, p, pat_end);
if (ONIGENC_MBC_MINLEN(enc) == 1) {
while (len-- > 0) *s++ = *p++;
}
- else { /* for UTF16 */
+ else { /* for UTF16/32 */
int blen;
while (len-- > 0) {
@@ -367,6 +345,15 @@ onig_vsnprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc,
}
}
}
+ else if (*p == '\\') {
+ *s++ = *p++;
+ len = enclen(enc, p, pat_end);
+ while (len-- > 0) *s++ = *p++;
+ }
+ else if (*p == '/') {
+ *s++ = (unsigned char )'\\';
+ *s++ = *p++;
+ }
else if (!ONIGENC_IS_CODE_PRINT(enc, *p) &&
!ONIGENC_IS_CODE_SPACE(enc, *p)) {
sprint_byte_with_x((char* )bs, (unsigned int )(*p++));
@@ -384,25 +371,15 @@ onig_vsnprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc,
}
}
+#if 0 /* unused */
void
-#ifdef HAVE_STDARG_PROTOTYPES
onig_snprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc,
UChar* pat, UChar* pat_end, const UChar *fmt, ...)
-#else
-onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
- UChar buf[];
- int bufsize;
- OnigEncoding enc;
- UChar* pat;
- UChar* pat_end;
- const UChar *fmt;
- va_dcl
-#endif
{
va_list args;
- va_init_list(args, fmt);
+ va_start(args, fmt);
onig_vsnprintf_with_pattern(buf, bufsize, enc,
pat, pat_end, fmt, args);
va_end(args);
}
-
+#endif
diff --git a/regexec.c b/regexec.c
index f8813875dc..b27884b32c 100644
--- a/regexec.c
+++ b/regexec.c
@@ -3,7 +3,7 @@
**********************************************************************/
/*-
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
- * Copyright (c) 2011-2014 K.Takata <kentkt AT csc DOT jp>
+ * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -30,33 +30,39 @@
#include "regint.h"
-/* #define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
+#ifdef RUBY
+# undef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
+#else
+# define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
+#endif
-#ifndef USE_DIRECT_THREADED_VM
+#ifndef USE_TOKEN_THREADED_VM
# ifdef __GNUC__
-# define USE_DIRECT_THREADED_VM 1
+# define USE_TOKEN_THREADED_VM 1
# else
-# define USE_DIRECT_THREADED_VM 0
+# define USE_TOKEN_THREADED_VM 0
# endif
#endif
-#define ENC_DUMMY_FLAG (1<<24)
+#ifdef RUBY
+# define ENC_DUMMY_FLAG (1<<24)
static inline int
rb_enc_asciicompat(OnigEncoding enc)
{
- return ONIGENC_MBC_MINLEN(enc)==1 && !((enc)->ruby_encoding_index & ENC_DUMMY_FLAG);
+ return ONIGENC_MBC_MINLEN(enc)==1 && !((enc)->ruby_encoding_index & ENC_DUMMY_FLAG);
}
-#undef ONIGENC_IS_MBC_ASCII_WORD
-#define ONIGENC_IS_MBC_ASCII_WORD(enc,s,end) \
+# undef ONIGENC_IS_MBC_ASCII_WORD
+# define ONIGENC_IS_MBC_ASCII_WORD(enc,s,end) \
(rb_enc_asciicompat(enc) ? (ISALNUM(*s) || *s=='_') : \
onigenc_ascii_is_code_ctype( \
ONIGENC_MBC_TO_CODE(enc,s,end),ONIGENC_CTYPE_WORD,enc))
+#endif /* RUBY */
#ifdef USE_CRNL_AS_LINE_TERMINATOR
-#define ONIGENC_IS_MBC_CRNL(enc,p,end) \
+# define ONIGENC_IS_MBC_CRNL(enc,p,end) \
(ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
ONIGENC_MBC_TO_CODE(enc,(p+enclen(enc,p,end)),end) == 10)
-#define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \
+# define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \
is_mbc_newline_ex((enc),(p),(start),(end),(option),(check_prev))
static int
is_mbc_newline_ex(OnigEncoding enc, const UChar *p, const UChar *start,
@@ -90,7 +96,7 @@ is_mbc_newline_ex(OnigEncoding enc, const UChar *p, const UChar *start,
}
}
#else /* USE_CRNL_AS_LINE_TERMINATOR */
-#define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \
+# define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \
ONIGENC_IS_MBC_NEWLINE((enc), (p), (end))
#endif /* USE_CRNL_AS_LINE_TERMINATOR */
@@ -105,7 +111,7 @@ history_tree_clear(OnigCaptureTreeNode* node)
if (IS_NOT_NULL(node)) {
for (i = 0; i < node->num_childs; i++) {
if (IS_NOT_NULL(node->childs[i])) {
- history_tree_free(node->childs[i]);
+ history_tree_free(node->childs[i]);
}
}
for (i = 0; i < node->allocated; i++) {
@@ -156,7 +162,7 @@ history_node_new(void)
static int
history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child)
{
-#define HISTORY_TREE_INIT_ALLOC_SIZE 8
+# define HISTORY_TREE_INIT_ALLOC_SIZE 8
if (parent->num_childs >= parent->allocated) {
int n, i;
@@ -164,15 +170,15 @@ history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child)
if (IS_NULL(parent->childs)) {
n = HISTORY_TREE_INIT_ALLOC_SIZE;
parent->childs =
- (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n);
+ (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n);
CHECK_NULL_RETURN_MEMERR(parent->childs);
}
else {
OnigCaptureTreeNode** tmp;
n = parent->allocated * 2;
tmp =
- (OnigCaptureTreeNode** )xrealloc(parent->childs,
- sizeof(OnigCaptureTreeNode*) * n);
+ (OnigCaptureTreeNode** )xrealloc(parent->childs,
+ sizeof(OnigCaptureTreeNode*) * n);
if (tmp == 0) {
history_tree_clear(parent);
return ONIGERR_MEMORY;
@@ -348,7 +354,7 @@ onig_region_free(OnigRegion* r, int free_self)
}
extern void
-onig_region_copy(OnigRegion* to, OnigRegion* from)
+onig_region_copy(OnigRegion* to, const OnigRegion* from)
{
#define RREGC_SIZE (sizeof(int) * from->num_regs)
int i, r;
@@ -404,7 +410,7 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)
#define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
-#define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
+# define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
(msa).stack_p = (void* )0;\
(msa).options = (arg_option);\
(msa).region = (arg_region);\
@@ -413,7 +419,7 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)
(msa).best_len = ONIG_MISMATCH;\
} while(0)
#else
-#define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
+# define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
(msa).stack_p = (void* )0;\
(msa).options = (arg_option);\
(msa).region = (arg_region);\
@@ -424,9 +430,9 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)
#ifdef USE_COMBINATION_EXPLOSION_CHECK
-#define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16
+# define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16
-#define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \
+# define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \
if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\
unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\
offset = ((offset) * (state_num)) >> 3;\
@@ -452,14 +458,14 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)
}\
} while(0)
-#define MATCH_ARG_FREE(msa) do {\
+# define MATCH_ARG_FREE(msa) do {\
if ((msa).stack_p) xfree((msa).stack_p);\
if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \
if ((msa).state_check_buff) xfree((msa).state_check_buff);\
}\
} while(0)
#else /* USE_COMBINATION_EXPLOSION_CHECK */
-#define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)
+# define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)
#endif /* USE_COMBINATION_EXPLOSION_CHECK */
@@ -548,9 +554,9 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
n *= 2;
if (limit_size != 0 && n > limit_size) {
if ((unsigned int )(stk_end - stk_base) == limit_size)
- return ONIGERR_MATCH_STACK_LIMIT_OVER;
+ return ONIGERR_MATCH_STACK_LIMIT_OVER;
else
- n = limit_size;
+ n = limit_size;
}
x = (OnigStackType* )xrealloc(stk_base, sizeof(OnigStackType) * n);
if (IS_NULL(x)) {
@@ -587,9 +593,9 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
#define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
#ifdef USE_COMBINATION_EXPLOSION_CHECK
-#define STATE_CHECK_POS(s,snum) \
+# define STATE_CHECK_POS(s,snum) \
(((s) - str) * num_comb_exp_check + ((snum) - 1))
-#define STATE_CHECK_VAL(v,snum) do {\
+# define STATE_CHECK_VAL(v,snum) do {\
if (state_check_buff != NULL) {\
int x = STATE_CHECK_POS(s,snum);\
(v) = state_check_buff[x/8] & (1<<(x%8));\
@@ -598,13 +604,13 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
} while(0)
-#define ELSE_IF_STATE_CHECK_MARK(stk) \
+# define ELSE_IF_STATE_CHECK_MARK(stk) \
else if ((stk)->type == STK_STATE_CHECK_MARK) { \
int x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\
state_check_buff[x/8] |= (1<<(x%8)); \
}
-#define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\
+# define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\
STACK_ENSURE(1);\
stk->type = (stack_type);\
stk->u.state.pcode = (pat);\
@@ -615,14 +621,14 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
STACK_INC;\
} while(0)
-#define STACK_PUSH_ENSURED(stack_type,pat) do {\
+# define STACK_PUSH_ENSURED(stack_type,pat) do {\
stk->type = (stack_type);\
stk->u.state.pcode = (pat);\
stk->u.state.state_check = 0;\
STACK_INC;\
} while(0)
-#define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum,keep) do {\
+# define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum,keep) do {\
STACK_ENSURE(1);\
stk->type = STK_ALT;\
stk->u.state.pcode = (pat);\
@@ -633,7 +639,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
STACK_INC;\
} while(0)
-#define STACK_PUSH_STATE_CHECK(s,snum) do {\
+# define STACK_PUSH_STATE_CHECK(s,snum) do {\
if (state_check_buff != NULL) {\
STACK_ENSURE(1);\
stk->type = STK_STATE_CHECK_MARK;\
@@ -645,9 +651,9 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
#else /* USE_COMBINATION_EXPLOSION_CHECK */
-#define ELSE_IF_STATE_CHECK_MARK(stk)
+# define ELSE_IF_STATE_CHECK_MARK(stk)
-#define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\
+# define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\
STACK_ENSURE(1);\
stk->type = (stack_type);\
stk->u.state.pcode = (pat);\
@@ -657,7 +663,7 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
STACK_INC;\
} while(0)
-#define STACK_PUSH_ENSURED(stack_type,pat) do {\
+# define STACK_PUSH_ENSURED(stack_type,pat) do {\
stk->type = (stack_type);\
stk->u.state.pcode = (pat);\
STACK_INC;\
@@ -781,13 +787,13 @@ stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
#ifdef ONIG_DEBUG
-#define STACK_BASE_CHECK(p, at) \
+# define STACK_BASE_CHECK(p, at) \
if ((p) < stk_base) {\
fprintf(stderr, "at %s\n", at);\
goto stack_error;\
}
#else
-#define STACK_BASE_CHECK(p, at)
+# define STACK_BASE_CHECK(p, at)
#endif
#define STACK_POP_ONE do {\
@@ -1124,16 +1130,16 @@ static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
#define IS_EMPTY_STR (str == end)
-#define ON_STR_BEGIN(s) ((s) == str)
-#define ON_STR_END(s) ((s) == end)
+#define ON_STR_BEGIN(s) ((s) == str)
+#define ON_STR_END(s) ((s) == end)
#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
-#define DATA_ENSURE_CHECK1 (s < right_range)
-#define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
-#define DATA_ENSURE(n) if (s + (n) > right_range) goto fail
+# define DATA_ENSURE_CHECK1 (s < right_range)
+# define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
+# define DATA_ENSURE(n) if (s + (n) > right_range) goto fail
#else
-#define DATA_ENSURE_CHECK1 (s < end)
-#define DATA_ENSURE_CHECK(n) (s + (n) <= end)
-#define DATA_ENSURE(n) if (s + (n) > end) goto fail
+# define DATA_ENSURE_CHECK1 (s < end)
+# define DATA_ENSURE_CHECK(n) (s + (n) <= end)
+# define DATA_ENSURE(n) if (s + (n) > end) goto fail
#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
@@ -1150,29 +1156,29 @@ make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp,
if (k->type == STK_MEM_START) {
n = k->u.mem.num;
if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&
- BIT_STATUS_AT(reg->capture_history, n) != 0) {
- child = history_node_new();
- CHECK_NULL_RETURN_MEMERR(child);
- child->group = n;
- child->beg = k->u.mem.pstr - str;
- r = history_tree_add_child(node, child);
- if (r != 0) {
- history_tree_free(child);
- return r;
- }
- *kp = (k + 1);
- r = make_capture_history_tree(child, kp, stk_top, str, reg);
- if (r != 0) return r;
-
- k = *kp;
- child->end = k->u.mem.pstr - str;
+ BIT_STATUS_AT(reg->capture_history, n) != 0) {
+ child = history_node_new();
+ CHECK_NULL_RETURN_MEMERR(child);
+ child->group = n;
+ child->beg = k->u.mem.pstr - str;
+ r = history_tree_add_child(node, child);
+ if (r != 0) {
+ history_tree_free(child);
+ return r;
+ }
+ *kp = (k + 1);
+ r = make_capture_history_tree(child, kp, stk_top, str, reg);
+ if (r != 0) return r;
+
+ k = *kp;
+ child->end = k->u.mem.pstr - str;
}
}
else if (k->type == STK_MEM_END) {
if (k->u.mem.num == node->group) {
- node->end = k->u.mem.pstr - str;
- *kp = k;
- return 0;
+ node->end = k->u.mem.pstr - str;
+ *kp = k;
+ return 0;
}
}
k++;
@@ -1195,10 +1201,10 @@ static int mem_is_in_memp(int mem, int num, UChar* memp)
return 0;
}
-static int backref_match_at_nested_level(regex_t* reg
- , OnigStackType* top, OnigStackType* stk_base
- , int ignore_case, int case_fold_flag
- , int nest, int mem_num, UChar* memp, UChar** s, const UChar* send)
+static int backref_match_at_nested_level(regex_t* reg,
+ OnigStackType* top, OnigStackType* stk_base,
+ int ignore_case, int case_fold_flag,
+ int nest, int mem_num, UChar* memp, UChar** s, const UChar* send)
{
UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
int level;
@@ -1255,27 +1261,37 @@ static int backref_match_at_nested_level(regex_t* reg
#ifdef ONIG_DEBUG_STATISTICS
-#define USE_TIMEOFDAY
-
-#ifdef USE_TIMEOFDAY
-#ifdef HAVE_SYS_TIME_H
-#include <sys/time.h>
-#endif
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
+# ifdef _WIN32
+# include <windows.h>
+static LARGE_INTEGER ts, te, freq;
+# define GETTIME(t) QueryPerformanceCounter(&(t))
+# define TIMEDIFF(te,ts) (unsigned long )(((te).QuadPart - (ts).QuadPart) \
+ * 1000000 / freq.QuadPart)
+# else /* _WIN32 */
+
+# define USE_TIMEOFDAY
+
+# ifdef USE_TIMEOFDAY
+# ifdef HAVE_SYS_TIME_H
+# include <sys/time.h>
+# endif
+# ifdef HAVE_UNISTD_H
+# include <unistd.h>
+# endif
static struct timeval ts, te;
-#define GETTIME(t) gettimeofday(&(t), (struct timezone* )0)
-#define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \
- (((te).tv_sec - (ts).tv_sec)*1000000))
-#else /* USE_TIMEOFDAY */
-#ifdef HAVE_SYS_TIMES_H
-#include <sys/times.h>
-#endif
+# define GETTIME(t) gettimeofday(&(t), (struct timezone* )0)
+# define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \
+ (((te).tv_sec - (ts).tv_sec)*1000000))
+# else /* USE_TIMEOFDAY */
+# ifdef HAVE_SYS_TIMES_H
+# include <sys/times.h>
+# endif
static struct tms ts, te;
-#define GETTIME(t) times(&(t))
-#define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime)
-#endif /* USE_TIMEOFDAY */
+# define GETTIME(t) times(&(t))
+# define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime)
+# endif /* USE_TIMEOFDAY */
+
+# endif /* _WIN32 */
static int OpCounter[256];
static int OpPrevCounter[256];
@@ -1284,14 +1300,14 @@ static int OpCurr = OP_FINISH;
static int OpPrevTarget = OP_FAIL;
static int MaxStackDepth = 0;
-#define MOP_IN(opcode) do {\
+# define MOP_IN(opcode) do {\
if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
OpCurr = opcode;\
OpCounter[opcode]++;\
GETTIME(ts);\
} while(0)
-#define MOP_OUT do {\
+# define MOP_OUT do {\
GETTIME(te);\
OpTime[OpCurr] += TIMEDIFF(te, ts);\
} while(0)
@@ -1304,6 +1320,9 @@ onig_statistics_init(void)
OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0;
}
MaxStackDepth = 0;
+# ifdef _WIN32
+ QueryPerformanceFrequency(&freq);
+# endif
}
extern void
@@ -1318,28 +1337,45 @@ onig_print_statistics(FILE* f)
fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth);
}
-#define STACK_INC do {\
+# define STACK_INC do {\
stk++;\
if (stk - stk_base > MaxStackDepth) \
MaxStackDepth = stk - stk_base;\
} while(0)
#else /* ONIG_DEBUG_STATISTICS */
-#define STACK_INC stk++
+# define STACK_INC stk++
-#define MOP_IN(opcode)
-#define MOP_OUT
+# define MOP_IN(opcode)
+# define MOP_OUT
#endif /* ONIG_DEBUG_STATISTICS */
-
-/* matching region of POSIX API */
-typedef int regoff_t;
-
-typedef struct {
- regoff_t rm_so;
- regoff_t rm_eo;
-} posix_regmatch_t;
+#ifdef ONIG_DEBUG_MATCH
+static char *
+stack_type_str(int stack_type)
+{
+ switch (stack_type) {
+ case STK_ALT: return "Alt ";
+ case STK_LOOK_BEHIND_NOT: return "LBNot ";
+ case STK_POS_NOT: return "PosNot";
+ case STK_MEM_START: return "MemS ";
+ case STK_MEM_END: return "MemE ";
+ case STK_REPEAT_INC: return "RepInc";
+ case STK_STATE_CHECK_MARK: return "StChMk";
+ case STK_NULL_CHECK_START: return "NulChS";
+ case STK_NULL_CHECK_END: return "NulChE";
+ case STK_MEM_END_MARK: return "MemEMk";
+ case STK_POS: return "Pos ";
+ case STK_STOP_BT: return "StopBt";
+ case STK_REPEAT: return "Rep ";
+ case STK_CALL_FRAME: return "Call ";
+ case STK_RETURN: return "Ret ";
+ case STK_VOID: return "Void ";
+ default: return " ";
+ }
+}
+#endif
/* match data(str - end) from position (sstart). */
/* if sstart == str then set sprev to NULL. */
@@ -1376,13 +1412,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
int num_comb_exp_check = reg->num_comb_exp_check;
#endif
-#if USE_DIRECT_THREADED_VM
-#define VM_LOOP JUMP;
-#define VM_LOOP_END
-#define CASE(x) L_##x: sbegin = s; OPCODE_EXEC_HOOK;
-#define DEFAULT L_DEFAULT:
-#define NEXT sprev = sbegin; JUMP
-#define JUMP goto *oplabels[*p++]
+#if USE_TOKEN_THREADED_VM
+# define OP_OFFSET 1
+# define VM_LOOP JUMP;
+# define VM_LOOP_END
+# define CASE(x) L_##x: sbegin = s; OPCODE_EXEC_HOOK;
+# define DEFAULT L_DEFAULT:
+# define NEXT sprev = sbegin; JUMP
+# define JUMP goto *oplabels[*p++]
static const void *oplabels[] = {
&&L_OP_FINISH, /* matching process terminator (no more alternative) */
@@ -1410,7 +1447,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
&&L_OP_CCLASS_NOT,
&&L_OP_CCLASS_MB_NOT,
&&L_OP_CCLASS_MIX_NOT,
- &&L_OP_CCLASS_NODE, /* pointer to CClassNode node */
&&L_OP_ANYCHAR, /* "." */
&&L_OP_ANYCHAR_ML, /* "." multi-line */
@@ -1423,24 +1459,24 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
&&L_OP_NOT_WORD,
&&L_OP_WORD_BOUND,
&&L_OP_NOT_WORD_BOUND,
-#ifdef USE_WORD_BEGIN_END
+# ifdef USE_WORD_BEGIN_END
&&L_OP_WORD_BEGIN,
&&L_OP_WORD_END,
-#else
+# else
&&L_DEFAULT,
&&L_DEFAULT,
-#endif
+# endif
&&L_OP_ASCII_WORD,
&&L_OP_NOT_ASCII_WORD,
&&L_OP_ASCII_WORD_BOUND,
&&L_OP_NOT_ASCII_WORD_BOUND,
-#ifdef USE_WORD_BEGIN_END
+# ifdef USE_WORD_BEGIN_END
&&L_OP_ASCII_WORD_BEGIN,
&&L_OP_ASCII_WORD_END,
-#else
+# else
&&L_DEFAULT,
&&L_DEFAULT,
-#endif
+# endif
&&L_OP_BEGIN_BUF,
&&L_OP_END_BUF,
@@ -1456,25 +1492,25 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
&&L_OP_BACKREFN_IC,
&&L_OP_BACKREF_MULTI,
&&L_OP_BACKREF_MULTI_IC,
-#ifdef USE_BACKREF_WITH_LEVEL
+# ifdef USE_BACKREF_WITH_LEVEL
&&L_OP_BACKREF_WITH_LEVEL, /* \k<xxx+n>, \k<xxx-n> */
-#else
+# else
&&L_DEFAULT,
-#endif
+# endif
&&L_OP_MEMORY_START,
&&L_OP_MEMORY_START_PUSH, /* push back-tracker to stack */
&&L_OP_MEMORY_END_PUSH, /* push back-tracker to stack */
-#ifdef USE_SUBEXP_CALL
+# ifdef USE_SUBEXP_CALL
&&L_OP_MEMORY_END_PUSH_REC, /* push back-tracker to stack */
-#else
+# else
&&L_DEFAULT,
-#endif
+# endif
&&L_OP_MEMORY_END,
-#ifdef USE_SUBEXP_CALL
+# ifdef USE_SUBEXP_CALL
&&L_OP_MEMORY_END_REC, /* push marker to stack */
-#else
+# else
&&L_DEFAULT,
-#endif
+# endif
&&L_OP_KEEP,
@@ -1482,7 +1518,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
&&L_OP_JUMP,
&&L_OP_PUSH,
&&L_OP_POP,
+# ifdef USE_OP_PUSH_OR_JUMP_EXACT
&&L_OP_PUSH_OR_JUMP_EXACT1, /* if match exact then push, else jump. */
+# else
+ &&L_DEFAULT,
+# endif
&&L_OP_PUSH_IF_PEEK_NEXT, /* if match exact then push, else none. */
&&L_OP_REPEAT, /* {n,m} */
&&L_OP_REPEAT_NG, /* {n,m}? (non greedy) */
@@ -1492,16 +1532,16 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
&&L_OP_REPEAT_INC_NG_SG, /* search and get in stack (non greedy) */
&&L_OP_NULL_CHECK_START, /* null loop checker start */
&&L_OP_NULL_CHECK_END, /* null loop checker end */
-#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
+# ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
&&L_OP_NULL_CHECK_END_MEMST, /* null loop checker end (with capture status) */
-#else
+# else
&&L_DEFAULT,
-#endif
-#ifdef USE_SUBEXP_CALL
+# endif
+# ifdef USE_SUBEXP_CALL
&&L_OP_NULL_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */
-#else
+# else
&&L_DEFAULT,
-#endif
+# endif
&&L_OP_PUSH_POS, /* (?=...) start */
&&L_OP_POP_POS, /* (?=...) end */
@@ -1513,69 +1553,66 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
&&L_OP_PUSH_LOOK_BEHIND_NOT, /* (?<!...) start */
&&L_OP_FAIL_LOOK_BEHIND_NOT, /* (?<!...) end */
-#ifdef USE_SUBEXP_CALL
+# ifdef USE_SUBEXP_CALL
&&L_OP_CALL, /* \g<name> */
&&L_OP_RETURN,
-#else
+# else
&&L_DEFAULT,
&&L_DEFAULT,
-#endif
+# endif
&&L_OP_CONDITION,
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
+# ifdef USE_COMBINATION_EXPLOSION_CHECK
&&L_OP_STATE_CHECK_PUSH, /* combination explosion check and push */
&&L_OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump */
&&L_OP_STATE_CHECK, /* check only */
-#else
+# else
&&L_DEFAULT,
&&L_DEFAULT,
&&L_DEFAULT,
-#endif
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
+# endif
+# ifdef USE_COMBINATION_EXPLOSION_CHECK
&&L_OP_STATE_CHECK_ANYCHAR_STAR,
&&L_OP_STATE_CHECK_ANYCHAR_ML_STAR,
-#else
+# else
&&L_DEFAULT,
&&L_DEFAULT,
-#endif
+# endif
/* no need: IS_DYNAMIC_OPTION() == 0 */
-#if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */
+# if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */
&&L_OP_SET_OPTION_PUSH, /* set option and push recover option */
&&L_OP_SET_OPTION /* set option */
-#else
+# else
&&L_DEFAULT,
&&L_DEFAULT
-#endif
+# endif
};
-#else
+#else /* USE_TOKEN_THREADED_VM */
-#define VM_LOOP \
+# define OP_OFFSET 0
+# define VM_LOOP \
while (1) { \
OPCODE_EXEC_HOOK; \
sbegin = s; \
switch (*p++) {
-#define VM_LOOP_END } sprev = sbegin; }
-#define CASE(x) case x:
-#define DEFAULT default:
-#define NEXT break
-#define JUMP continue; break
-#endif
+# define VM_LOOP_END } sprev = sbegin; }
+# define CASE(x) case x:
+# define DEFAULT default:
+# define NEXT break
+# define JUMP continue; break
+#endif /* USE_TOKEN_THREADED_VM */
#ifdef USE_SUBEXP_CALL
- /* Stack #0 is used to store the pattern itself and used for (?R), \g<0>, etc. */
- n = reg->num_repeat + (reg->num_mem + 1) * 2;
-
- STACK_INIT(alloca_base, xmalloc_base, n, INIT_MATCH_STACK_SIZE);
- pop_level = reg->stack_pop_level;
- num_mem = reg->num_mem;
- repeat_stk = (OnigStackIndex* )alloca_base;
+/* Stack #0 is used to store the pattern itself and used for (?R), \g<0>,
+ etc. Additional space is required. */
+# define ADD_NUMMEM 1
+#else
+/* Stack #0 not is used. */
+# define ADD_NUMMEM 0
+#endif
- mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat);
- mem_end_stk = mem_start_stk + (num_mem + 1);
-#else /* USE_SUBEXP_CALL */
- /* Stack #0 not is used. */
- n = reg->num_repeat + reg->num_mem * 2;
+ n = reg->num_repeat + (reg->num_mem + ADD_NUMMEM) * 2;
STACK_INIT(alloca_base, xmalloc_base, n, INIT_MATCH_STACK_SIZE);
pop_level = reg->stack_pop_level;
@@ -1583,25 +1620,27 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
repeat_stk = (OnigStackIndex* )alloca_base;
mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat);
- mem_end_stk = mem_start_stk + num_mem;
+ mem_end_stk = mem_start_stk + (num_mem + ADD_NUMMEM);
+ {
+ OnigStackIndex *pp = mem_start_stk;
+ for (; pp < repeat_stk + n; pp += 2) {
+ pp[0] = INVALID_STACK_INDEX;
+ pp[1] = INVALID_STACK_INDEX;
+ }
+ }
+#ifndef USE_SUBEXP_CALL
mem_start_stk--; /* for index start from 1,
mem_start_stk[1]..mem_start_stk[num_mem] */
mem_end_stk--; /* for index start from 1,
mem_end_stk[1]..mem_end_stk[num_mem] */
-#endif /* USE_SUBEXP_CALL */
- {
- OnigStackIndex *pp = mem_start_stk;
- for (; pp < (repeat_stk + n); pp+=2) {
- pp[0] = INVALID_STACK_INDEX;
- pp[1] = INVALID_STACK_INDEX;
- }
- }
+#endif
#ifdef ONIG_DEBUG_MATCH
fprintf(stderr, "match_at: str: %"PRIdPTR" (%p), end: %"PRIdPTR" (%p), start: %"PRIdPTR" (%p), sprev: %"PRIdPTR" (%p)\n",
(intptr_t )str, str, (intptr_t )end, end, (intptr_t )sstart, sstart, (intptr_t )sprev, sprev);
fprintf(stderr, "size: %d, start offset: %d\n",
(int )(end - str), (int )(sstart - str));
+ fprintf(stderr, "\n ofs> str stk:type addr:opcode\n");
#endif
STACK_PUSH_ENSURED(STK_ALT, (UChar* )FinishCode); /* bottom stack */
@@ -1611,31 +1650,34 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
#ifdef ONIG_DEBUG_MATCH
-#define OPCODE_EXEC_HOOK \
+# define OPCODE_EXEC_HOOK \
if (s) { \
UChar *op, *q, *bp, buf[50]; \
int len; \
- op = p - 1; \
+ op = p - OP_OFFSET; \
fprintf(stderr, "%4"PRIdPTR"> \"", (*op == OP_FINISH) ? (ptrdiff_t )-1 : s - str); \
bp = buf; \
q = s; \
if (*op != OP_FINISH) { /* s may not be a valid pointer if OP_FINISH. */ \
for (i = 0; i < 7 && q < end; i++) { \
- len = enclen(encode, q, end); \
+ len = enclen(encode, q, end); \
while (len-- > 0) *bp++ = *q++; \
} \
+ if (q < end) { xmemcpy(bp, "...", 3); bp += 3; } \
} \
- if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; } \
- else { xmemcpy(bp, "\"", 1); bp += 1; } \
+ xmemcpy(bp, "\"", 1); bp += 1; \
*bp = 0; \
fputs((char* )buf, stderr); \
for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); \
- fprintf(stderr, "%4"PRIdPTR":", (op == FinishCode) ? (ptrdiff_t )-1 : op - reg->p); \
+ fprintf(stderr, "%4"PRIdPTR":%s %4"PRIdPTR":", \
+ stk - stk_base - 1, \
+ (stk > stk_base) ? stack_type_str(stk[-1].type) : " ", \
+ (op == FinishCode) ? (ptrdiff_t )-1 : op - reg->p); \
onig_print_compiled_byte_code(stderr, op, reg->p+reg->used, NULL, encode); \
fprintf(stderr, "\n"); \
}
#else
-#define OPCODE_EXEC_HOOK ((void) 0)
+# define OPCODE_EXEC_HOOK ((void) 0)
#endif
@@ -1652,83 +1694,56 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
else
goto end_best_len;
- }
+ }
#endif
best_len = n;
region = msa->region;
if (region) {
-#ifdef USE_POSIX_API_REGION_OPTION
- if (IS_POSIX_REGION(msa->options)) {
- posix_regmatch_t* rmt = (posix_regmatch_t* )region;
-
- rmt[0].rm_so = (regoff_t )(((pkeep > s) ? s : pkeep) - str);
- rmt[0].rm_eo = (regoff_t )(s - str);
- for (i = 1; i <= num_mem; i++) {
- if (mem_end_stk[i] != INVALID_STACK_INDEX) {
- if (BIT_STATUS_AT(reg->bt_mem_start, i))
- rmt[i].rm_so = (regoff_t )(STACK_AT(mem_start_stk[i])->u.mem.pstr - str);
- else
- rmt[i].rm_so = (regoff_t )((UChar* )((void* )(mem_start_stk[i])) - str);
-
- rmt[i].rm_eo = (regoff_t )((BIT_STATUS_AT(reg->bt_mem_end, i)
+ region->beg[0] = ((pkeep > s) ? s : pkeep) - str;
+ region->end[0] = s - str;
+ for (i = 1; i <= num_mem; i++) {
+ if (mem_end_stk[i] != INVALID_STACK_INDEX) {
+ if (BIT_STATUS_AT(reg->bt_mem_start, i))
+ region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
+ else
+ region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str;
+
+ region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i)
? STACK_AT(mem_end_stk[i])->u.mem.pstr
- : (UChar* )((void* )mem_end_stk[i])) - str);
- }
- else {
- rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS;
- }
+ : (UChar* )((void* )mem_end_stk[i])) - str;
}
- }
- else {
-#endif /* USE_POSIX_API_REGION_OPTION */
- region->beg[0] = ((pkeep > s) ? s : pkeep) - str;
- region->end[0] = s - str;
- for (i = 1; i <= num_mem; i++) {
- if (mem_end_stk[i] != INVALID_STACK_INDEX) {
- if (BIT_STATUS_AT(reg->bt_mem_start, i))
- region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
- else
- region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str;
-
- region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i)
- ? STACK_AT(mem_end_stk[i])->u.mem.pstr
- : (UChar* )((void* )mem_end_stk[i])) - str;
- }
- else {
- region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
- }
+ else {
+ region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
}
+ }
#ifdef USE_CAPTURE_HISTORY
- if (reg->capture_history != 0) {
- int r;
- OnigCaptureTreeNode* node;
-
- if (IS_NULL(region->history_root)) {
- region->history_root = node = history_node_new();
- CHECK_NULL_RETURN_MEMERR(node);
- }
- else {
- node = region->history_root;
- history_tree_clear(node);
- }
-
- node->group = 0;
- node->beg = ((pkeep > s) ? s : pkeep) - str;
- node->end = s - str;
-
- stkp = stk_base;
- r = make_capture_history_tree(region->history_root, &stkp,
- stk, (UChar* )str, reg);
- if (r < 0) {
- best_len = r; /* error code */
- goto finish;
- }
+ if (reg->capture_history != 0) {
+ int r;
+ OnigCaptureTreeNode* node;
+
+ if (IS_NULL(region->history_root)) {
+ region->history_root = node = history_node_new();
+ CHECK_NULL_RETURN_MEMERR(node);
}
+ else {
+ node = region->history_root;
+ history_tree_clear(node);
+ }
+
+ node->group = 0;
+ node->beg = ((pkeep > s) ? s : pkeep) - str;
+ node->end = s - str;
+
+ stkp = stk_base;
+ r = make_capture_history_tree(region->history_root, &stkp,
+ stk, (UChar* )str, reg);
+ if (r < 0) {
+ best_len = r; /* error code */
+ goto finish;
+ }
+ }
#endif /* USE_CAPTURE_HISTORY */
-#ifdef USE_POSIX_API_REGION_OPTION
- } /* else IS_POSIX_REGION() */
-#endif
} /* if (region) */
} /* n > best_len */
@@ -1777,8 +1792,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
q = lowbuf;
while (len-- > 0) {
if (*p != *q) {
- goto fail;
- }
+ goto fail;
+ }
p++; q++;
}
}
@@ -2036,7 +2051,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
int mb_len = enclen(encode, s, end);
if (! DATA_ENSURE_CHECK(mb_len)) {
- DATA_ENSURE(1);
+ DATA_ENSURE(1);
s = (UChar* )end;
p += tlen;
goto cc_mb_not_success;
@@ -2078,25 +2093,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
MOP_OUT;
NEXT;
- CASE(OP_CCLASS_NODE) MOP_IN(OP_CCLASS_NODE);
- {
- OnigCodePoint code;
- void *node;
- int mb_len;
- UChar *ss;
-
- DATA_ENSURE(1);
- GET_POINTER_INC(node, p);
- mb_len = enclen(encode, s, end);
- ss = s;
- s += mb_len;
- DATA_ENSURE(0);
- code = ONIGENC_MBC_TO_CODE(encode, ss, s);
- if (onig_is_code_in_cc_len(mb_len, code, node) == 0) goto fail;
- }
- MOP_OUT;
- NEXT;
-
CASE(OP_ANYCHAR) MOP_IN(OP_ANYCHAR);
DATA_ENSURE(1);
n = enclen(encode, s, end);
@@ -2118,10 +2114,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
while (DATA_ENSURE_CHECK1) {
STACK_PUSH_ALT(p, s, sprev, pkeep);
n = enclen(encode, s, end);
- DATA_ENSURE(n);
- if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
- sprev = s;
- s += n;
+ DATA_ENSURE(n);
+ if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
+ sprev = s;
+ s += n;
}
MOP_OUT;
NEXT;
@@ -2149,10 +2145,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
STACK_PUSH_ALT(p + 1, s, sprev, pkeep);
}
n = enclen(encode, s, end);
- DATA_ENSURE(n);
- if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
- sprev = s;
- s += n;
+ DATA_ENSURE(n);
+ if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
+ sprev = s;
+ s += n;
}
p++;
MOP_OUT;
@@ -2187,10 +2183,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
n = enclen(encode, s, end);
- DATA_ENSURE(n);
- if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
- sprev = s;
- s += n;
+ DATA_ENSURE(n);
+ if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
+ sprev = s;
+ s += n;
}
MOP_OUT;
NEXT;
@@ -2681,8 +2677,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
GET_LENGTH_INC(tlen, p);
sprev = s;
- if (backref_match_at_nested_level(reg, stk, stk_base, ic
- , case_fold_flag, (int )level, (int )tlen, p, &s, end)) {
+ if (backref_match_at_nested_level(reg, stk, stk_base, ic,
+ case_fold_flag, (int )level, (int )tlen, p, &s, end)) {
while (sprev + (len = enclen(encode, sprev, end)) < s)
sprev += len;
@@ -2758,10 +2754,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
GET_MEMNUM_INC(mem, p); /* mem: null check id */
STACK_NULL_CHECK_MEMST(isnull, mem, s, reg);
if (isnull) {
-#ifdef ONIG_DEBUG_MATCH
+# ifdef ONIG_DEBUG_MATCH
fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%"PRIdPTR" (%p)\n",
(int )mem, (intptr_t )s, s);
-#endif
+# endif
if (isnull == -1) goto fail;
goto null_check_found;
}
@@ -2777,16 +2773,16 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
int isnull;
GET_MEMNUM_INC(mem, p); /* mem: null check id */
-#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
+# ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg);
-#else
+# else
STACK_NULL_CHECK_REC(isnull, mem, s);
-#endif
+# endif
if (isnull) {
-#ifdef ONIG_DEBUG_MATCH
+# ifdef ONIG_DEBUG_MATCH
fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%"PRIdPTR" (%p)\n",
(int )mem, (intptr_t )s, s);
-#endif
+# endif
if (isnull == -1) goto fail;
goto null_check_found;
}
@@ -2850,6 +2846,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
MOP_OUT;
JUMP;
+#ifdef USE_OP_PUSH_OR_JUMP_EXACT
CASE(OP_PUSH_OR_JUMP_EXACT1) MOP_IN(OP_PUSH_OR_JUMP_EXACT1);
GET_RELADDR_INC(addr, p);
if (*p == *s && DATA_ENSURE_CHECK1) {
@@ -2861,6 +2858,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
p += (addr + 1);
MOP_OUT;
JUMP;
+#endif
CASE(OP_PUSH_IF_PEEK_NEXT) MOP_IN(OP_PUSH_IF_PEEK_NEXT);
GET_RELADDR_INC(addr, p);
@@ -2915,14 +2913,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
repeat_inc:
stkp->u.repeat.count++;
if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {
- /* end of repeat. Nothing to do. */
+ /* end of repeat. Nothing to do. */
}
else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
- STACK_PUSH_ALT(p, s, sprev, pkeep);
- p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */
+ STACK_PUSH_ALT(p, s, sprev, pkeep);
+ p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */
}
else {
- p = stkp->u.repeat.pcode;
+ p = stkp->u.repeat.pcode;
}
STACK_PUSH_REPEAT_INC(si);
MOP_OUT;
@@ -2944,19 +2942,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
repeat_inc_ng:
stkp->u.repeat.count++;
if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
- if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
- UChar* pcode = stkp->u.repeat.pcode;
-
- STACK_PUSH_REPEAT_INC(si);
- STACK_PUSH_ALT(pcode, s, sprev, pkeep);
- }
- else {
- p = stkp->u.repeat.pcode;
- STACK_PUSH_REPEAT_INC(si);
- }
+ if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
+ UChar* pcode = stkp->u.repeat.pcode;
+
+ STACK_PUSH_REPEAT_INC(si);
+ STACK_PUSH_ALT(pcode, s, sprev, pkeep);
+ }
+ else {
+ p = stkp->u.repeat.pcode;
+ STACK_PUSH_REPEAT_INC(si);
+ }
}
else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
- STACK_PUSH_REPEAT_INC(si);
+ STACK_PUSH_REPEAT_INC(si);
}
MOP_OUT;
CHECK_INTERRUPT_IN_MATCH_AT;
@@ -3067,9 +3065,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE(OP_FAIL)
if (0) {
- /* fall */
+ /* fall */
fail:
- MOP_OUT;
+ MOP_OUT;
}
MOP_IN(OP_FAIL);
STACK_POP;
@@ -3080,8 +3078,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
#ifdef USE_COMBINATION_EXPLOSION_CHECK
if (stk->u.state.state_check != 0) {
- stk->type = STK_STATE_CHECK_MARK;
- stk++;
+ stk->type = STK_STATE_CHECK_MARK;
+ stk++;
}
#endif
@@ -3158,7 +3156,7 @@ slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
static int
str_lower_case_match(OnigEncoding enc, int case_fold_flag,
- const UChar* t, const UChar* tend,
+ const UChar* t, const UChar* tend,
const UChar* p, const UChar* end)
{
int lowlen;
@@ -3250,7 +3248,7 @@ slow_search_backward_ic(OnigEncoding enc, int case_fold_flag,
while (s >= text) {
if (str_lower_case_match(enc, case_fold_flag,
- target, target_end, s, text_end))
+ target, target_end, s, text_end))
return s;
s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
@@ -3270,10 +3268,10 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
const UChar *tail;
ptrdiff_t skip, tlen1;
-#ifdef ONIG_DEBUG_SEARCH
+# ifdef ONIG_DEBUG_SEARCH
fprintf(stderr, "bm_search_notrev: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
text, text, text_end, text_end, text_range, text_range);
-#endif
+# endif
tail = target_end - 1;
tlen1 = tail - target;
@@ -3294,11 +3292,12 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
skip = reg->map[*se];
t = s;
do {
- s += enclen(reg->enc, s, end);
+ s += enclen(reg->enc, s, end);
} while ((s - t) < skip && s < end);
}
}
else {
+# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
while (s < end) {
p = se = s + tlen1;
t = tail;
@@ -3309,9 +3308,10 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
skip = reg->int_map[*se];
t = s;
do {
- s += enclen(reg->enc, s, end);
+ s += enclen(reg->enc, s, end);
} while ((s - t) < skip && s < end);
}
+# endif
}
return (UChar* )NULL;
@@ -3325,10 +3325,10 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
const UChar *s, *t, *p, *end;
const UChar *tail;
-#ifdef ONIG_DEBUG_SEARCH
+# ifdef ONIG_DEBUG_SEARCH
fprintf(stderr, "bm_search: text: %"PRIuPTR", text_end: %"PRIuPTR", text_range: %"PRIuPTR"\n",
text, text_end, text_range);
-#endif
+# endif
end = text_range + (target_end - target) - 1;
if (end > text_end)
@@ -3340,10 +3340,10 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
while (s < end) {
p = s;
t = tail;
-#ifdef ONIG_DEBUG_SEARCH
+# ifdef ONIG_DEBUG_SEARCH
fprintf(stderr, "bm_search_loop: pos: %"PRIdPTR" %s\n",
(intptr_t )(s - text), s);
-#endif
+# endif
while (*p == *t) {
if (t == target) return (UChar* )p;
p--; t--;
@@ -3352,6 +3352,7 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
}
}
else { /* see int_map[] */
+# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
while (s < end) {
p = s;
t = tail;
@@ -3361,6 +3362,7 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
}
s += reg->int_map[*s];
}
+# endif
}
return (UChar* )NULL;
}
@@ -3377,10 +3379,10 @@ bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end,
OnigEncoding enc = reg->enc;
int case_fold_flag = reg->case_fold_flag;
-#ifdef ONIG_DEBUG_SEARCH
+# ifdef ONIG_DEBUG_SEARCH
fprintf(stderr, "bm_search_notrev_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
(int )text, text, (int )text_end, text_end, (int )text_range, text_range);
-#endif
+# endif
tail = target_end - 1;
tlen1 = tail - target;
@@ -3399,11 +3401,12 @@ bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end,
skip = reg->map[*se];
t = s;
do {
- s += enclen(reg->enc, s, end);
+ s += enclen(reg->enc, s, end);
} while ((s - t) < skip && s < end);
}
}
else {
+# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
while (s < end) {
se = s + tlen1;
if (str_lower_case_match(enc, case_fold_flag, target, target_end,
@@ -3412,9 +3415,10 @@ bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end,
skip = reg->int_map[*se];
t = s;
do {
- s += enclen(reg->enc, s, end);
+ s += enclen(reg->enc, s, end);
} while ((s - t) < skip && s < end);
}
+# endif
}
return (UChar* )NULL;
@@ -3430,10 +3434,10 @@ bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end,
OnigEncoding enc = reg->enc;
int case_fold_flag = reg->case_fold_flag;
-#ifdef ONIG_DEBUG_SEARCH
+# ifdef ONIG_DEBUG_SEARCH
fprintf(stderr, "bm_search_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
(int )text, text, (int )text_end, text_end, (int )text_range, text_range);
-#endif
+# endif
end = text_range + (target_end - target) - 1;
if (end > text_end)
@@ -3451,6 +3455,7 @@ bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end,
}
}
else { /* see int_map[] */
+# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
while (s < end) {
p = s - (target_end - target) + 1;
if (str_lower_case_match(enc, case_fold_flag, target, target_end,
@@ -3458,6 +3463,7 @@ bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end,
return (UChar* )p;
s += reg->int_map[*s];
}
+# endif
}
return (UChar* )NULL;
}
@@ -3475,10 +3481,10 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
ptrdiff_t skip, tlen1;
OnigEncoding enc = reg->enc;
-#ifdef ONIG_DEBUG_SEARCH
+# ifdef ONIG_DEBUG_SEARCH
fprintf(stderr, "bm_search_notrev: text: %"PRIdPTR" (%p), text_end: %"PRIdPTR" (%p), text_range: %"PRIdPTR" (%p)\n",
(intptr_t )text, text, (intptr_t )text_end, text_end, (intptr_t )text_range, text_range);
-#endif
+# endif
tail = target_end - 1;
tlen1 = tail - target;
@@ -3500,11 +3506,12 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
skip = reg->map[se[1]];
t = s;
do {
- s += enclen(enc, s, end);
+ s += enclen(enc, s, end);
} while ((s - t) < skip && s < end);
}
}
else {
+# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
while (s < end) {
p = se = s + tlen1;
t = tail;
@@ -3516,9 +3523,10 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
skip = reg->int_map[se[1]];
t = s;
do {
- s += enclen(enc, s, end);
+ s += enclen(enc, s, end);
} while ((s - t) < skip && s < end);
}
+# endif
}
return (UChar* )NULL;
@@ -3533,6 +3541,11 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
const UChar *tail;
ptrdiff_t tlen1;
+# ifdef ONIG_DEBUG_SEARCH
+ fprintf(stderr, "bm_search: text: %"PRIuPTR", text_end: %"PRIuPTR", text_range: %"PRIuPTR"\n",
+ text, text_end, text_range);
+# endif
+
tail = target_end - 1;
tlen1 = tail - target;
end = text_range + tlen1;
@@ -3553,6 +3566,7 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
}
}
else { /* see int_map[] */
+# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
while (s < end) {
p = s;
t = tail;
@@ -3563,6 +3577,7 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
if (s + 1 >= end) break;
s += reg->int_map[s[1]];
}
+# endif
}
return (UChar* )NULL;
}
@@ -3579,10 +3594,10 @@ bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end,
OnigEncoding enc = reg->enc;
int case_fold_flag = reg->case_fold_flag;
-#ifdef ONIG_DEBUG_SEARCH
+# ifdef ONIG_DEBUG_SEARCH
fprintf(stderr, "bm_search_notrev_ic: text: %"PRIdPTR" (%p), text_end: %"PRIdPTR" (%p), text_range: %"PRIdPTR" (%p)\n",
(intptr_t )text, text, (intptr_t )text_end, text_end, (intptr_t )text_range, text_range);
-#endif
+# endif
tail = target_end - 1;
tlen1 = tail - target;
@@ -3602,11 +3617,12 @@ bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end,
skip = reg->map[se[1]];
t = s;
do {
- s += enclen(enc, s, end);
+ s += enclen(enc, s, end);
} while ((s - t) < skip && s < end);
}
}
else {
+# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
while (s < end) {
se = s + tlen1;
if (str_lower_case_match(enc, case_fold_flag, target, target_end,
@@ -3616,9 +3632,10 @@ bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end,
skip = reg->int_map[se[1]];
t = s;
do {
- s += enclen(enc, s, end);
+ s += enclen(enc, s, end);
} while ((s - t) < skip && s < end);
}
+# endif
}
return (UChar* )NULL;
@@ -3635,10 +3652,10 @@ bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end,
OnigEncoding enc = reg->enc;
int case_fold_flag = reg->case_fold_flag;
-#ifdef ONIG_DEBUG_SEARCH
+# ifdef ONIG_DEBUG_SEARCH
fprintf(stderr, "bm_search_ic: text: %"PRIdPTR" (%p), text_end: %"PRIdPTR" (%p), text_range: %"PRIdPTR" (%p)\n",
(intptr_t )text, text, (intptr_t )text_end, text_end, (intptr_t )text_range, text_range);
-#endif
+# endif
tail = target_end - 1;
tlen1 = tail - target;
@@ -3658,6 +3675,7 @@ bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end,
}
}
else { /* see int_map[] */
+# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
while (s < end) {
p = s - tlen1;
if (str_lower_case_match(enc, case_fold_flag, target, target_end,
@@ -3666,11 +3684,13 @@ bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end,
if (s + 1 >= end) break;
s += reg->int_map[s[1]];
}
+# endif
}
return (UChar* )NULL;
}
#endif /* USE_SUNDAY_QUICK_SEARCH */
+#ifdef USE_INT_MAP_BACKWARD
static int
set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
int** skip)
@@ -3720,6 +3740,7 @@ bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end,
return (UChar* )NULL;
}
+#endif
static UChar*
map_search(OnigEncoding enc, UChar map[],
@@ -3758,31 +3779,6 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On
UChar *prev;
OnigMatchArg msa;
-#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
- start:
- THREAD_ATOMIC_START;
- if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) {
- ONIG_STATE_INC(reg);
- if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
- onig_chain_reduce(reg);
- ONIG_STATE_INC(reg);
- }
- }
- else {
- int n;
-
- THREAD_ATOMIC_END;
- n = 0;
- while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) {
- if (++n > THREAD_PASS_LIMIT_COUNT)
- return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
- THREAD_PASS;
- }
- goto start;
- }
- THREAD_ATOMIC_END;
-#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
-
MATCH_ARG_INIT(msa, option, region, at, at);
#ifdef USE_COMBINATION_EXPLOSION_CHECK
{
@@ -3791,11 +3787,7 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On
}
#endif
- if (region
-#ifdef USE_POSIX_API_REGION_OPTION
- && !IS_POSIX_REGION(option)
-#endif
- ) {
+ if (region) {
r = onig_region_resize_clear(region, reg->num_mem + 1);
}
else
@@ -3811,7 +3803,6 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On
}
MATCH_ARG_FREE(msa);
- ONIG_STATE_DEC_THREAD(reg);
return r;
}
@@ -3844,7 +3835,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
break;
case ONIG_OPTIMIZE_EXACT_IC:
p = slow_search_ic(reg->enc, reg->case_fold_flag,
- reg->exact, reg->exact_end, p, end, range);
+ reg->exact, reg->exact_end, p, end, range);
break;
case ONIG_OPTIMIZE_EXACT_BM:
@@ -3952,7 +3943,6 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
UChar* s, const UChar* range, UChar* adjrange,
UChar** low, UChar** high)
{
- int r;
UChar *p;
range += reg->dmin;
@@ -3970,13 +3960,15 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
case ONIG_OPTIMIZE_EXACT_BM_IC:
case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC:
p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,
- reg->exact, reg->exact_end,
- range, adjrange, end, p);
+ reg->exact, reg->exact_end,
+ range, adjrange, end, p);
break;
case ONIG_OPTIMIZE_EXACT_BM:
case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
+#ifdef USE_INT_MAP_BACKWARD
if (IS_NULL(reg->int_map_backward)) {
+ int r;
if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD)
goto exact_method;
@@ -3986,6 +3978,9 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
}
p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange,
end, p);
+#else
+ goto exact_method;
+#endif
break;
case ONIG_OPTIMIZE_MAP:
@@ -4070,42 +4065,13 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
const UChar *orig_range = range;
#endif
-#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
- start:
- THREAD_ATOMIC_START;
- if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) {
- ONIG_STATE_INC(reg);
- if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
- onig_chain_reduce(reg);
- ONIG_STATE_INC(reg);
- }
- }
- else {
- int n;
-
- THREAD_ATOMIC_END;
- n = 0;
- while (ONIG_STATE(reg) < ONIG_STATE_NORMAL) {
- if (++n > THREAD_PASS_LIMIT_COUNT)
- return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
- THREAD_PASS;
- }
- goto start;
- }
- THREAD_ATOMIC_END;
-#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
-
#ifdef ONIG_DEBUG_SEARCH
fprintf(stderr,
"onig_search (entry point): str: %"PRIuPTR" (%p), end: %"PRIuPTR", start: %"PRIuPTR", range: %"PRIuPTR"\n",
(intptr_t )str, str, end - str, start - str, range - str);
#endif
- if (region
-#ifdef USE_POSIX_API_REGION_OPTION
- && !IS_POSIX_REGION(option)
-#endif
- ) {
+ if (region) {
r = onig_region_resize_clear(region, reg->num_mem + 1);
if (r) goto finish_no_msa;
}
@@ -4114,8 +4080,8 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
-#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
-#define MATCH_AND_RETURN_CHECK(upper_range) \
+# ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
+# define MATCH_AND_RETURN_CHECK(upper_range) \
r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
if (r != ONIG_MISMATCH) {\
if (r >= 0) {\
@@ -4125,8 +4091,8 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
}\
else goto finish; /* error */ \
}
-#else
-#define MATCH_AND_RETURN_CHECK(upper_range) \
+# else
+# define MATCH_AND_RETURN_CHECK(upper_range) \
r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
if (r != ONIG_MISMATCH) {\
if (r >= 0) {\
@@ -4134,10 +4100,10 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
}\
else goto finish; /* error */ \
}
-#endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
+# endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
#else
-#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
-#define MATCH_AND_RETURN_CHECK(none) \
+# ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
+# define MATCH_AND_RETURN_CHECK(none) \
r = match_at(reg, str, end, s, prev, &msa);\
if (r != ONIG_MISMATCH) {\
if (r >= 0) {\
@@ -4147,8 +4113,8 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
}\
else goto finish; /* error */ \
}
-#else
-#define MATCH_AND_RETURN_CHECK(none) \
+# else
+# define MATCH_AND_RETURN_CHECK(none) \
r = match_at(reg, str, end, s, prev, &msa);\
if (r != ONIG_MISMATCH) {\
if (r >= 0) {\
@@ -4156,7 +4122,7 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
}\
else goto finish; /* error */ \
}
-#endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
+# endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
@@ -4168,7 +4134,15 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
/* search start-position only */
begin_position:
if (range > start)
- range = start + 1;
+ {
+ if (global_pos > start)
+ {
+ if (global_pos < range)
+ range = global_pos + 1;
+ }
+ else
+ range = start + 1;
+ }
else
range = start;
}
@@ -4244,9 +4218,7 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
}
}
else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) {
- if (! (reg->anchor & ANCHOR_LOOK_BEHIND)) {
- goto begin_position;
- }
+ goto begin_position;
}
}
else if (str == end) { /* empty string */
@@ -4306,7 +4278,7 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
}
if ((end - start) < reg->threshold_len)
- goto mismatch;
+ goto mismatch;
if (reg->dmax != ONIG_INFINITE_DISTANCE) {
do {
@@ -4328,24 +4300,24 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
if (! forward_search_range(reg, str, end, s, sch_range,
&low, &high, (UChar** )NULL)) goto mismatch;
- if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
- do {
- if ((reg->anchor & ANCHOR_BEGIN_POSITION) == 0)
- msa.gpos = s; /* move \G position */
- MATCH_AND_RETURN_CHECK(orig_range);
- prev = s;
- s += enclen(reg->enc, s, end);
-
- if ((reg->anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) == 0) {
- while (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)
- && s < range) {
- prev = s;
- s += enclen(reg->enc, s, end);
- }
- }
- } while (s < range);
- goto mismatch;
- }
+ if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
+ do {
+ if ((reg->anchor & ANCHOR_BEGIN_POSITION) == 0)
+ msa.gpos = s; /* move \G position */
+ MATCH_AND_RETURN_CHECK(orig_range);
+ prev = s;
+ s += enclen(reg->enc, s, end);
+
+ if ((reg->anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) == 0) {
+ while (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)
+ && s < range) {
+ prev = s;
+ s += enclen(reg->enc, s, end);
+ }
+ }
+ } while (s < range);
+ goto mismatch;
+ }
}
}
@@ -4428,15 +4400,10 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
finish:
MATCH_ARG_FREE(msa);
- ONIG_STATE_DEC_THREAD(reg);
/* If result is mismatch and no FIND_NOT_EMPTY option,
then the region is not set in match_at(). */
- if (IS_FIND_NOT_EMPTY(reg->options) && region
-#ifdef USE_POSIX_API_REGION_OPTION
- && !IS_POSIX_REGION(option)
-#endif
- ) {
+ if (IS_FIND_NOT_EMPTY(reg->options) && region) {
onig_region_clear(region);
}
@@ -4449,7 +4416,6 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
mismatch_no_msa:
r = ONIG_MISMATCH;
finish_no_msa:
- ONIG_STATE_DEC_THREAD(reg);
#ifdef ONIG_DEBUG
if (r != ONIG_MISMATCH)
fprintf(stderr, "onig_search: error %"PRIdPTRDIFF"\n", r);
@@ -4457,43 +4423,82 @@ onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
return r;
match:
- ONIG_STATE_DEC_THREAD(reg);
MATCH_ARG_FREE(msa);
return s - str;
}
+extern OnigPosition
+onig_scan(regex_t* reg, const UChar* str, const UChar* end,
+ OnigRegion* region, OnigOptionType option,
+ int (*scan_callback)(OnigPosition, OnigPosition, OnigRegion*, void*),
+ void* callback_arg)
+{
+ OnigPosition r;
+ OnigPosition n;
+ int rs;
+ const UChar* start;
+
+ n = 0;
+ start = str;
+ while (1) {
+ r = onig_search(reg, str, end, start, end, region, option);
+ if (r >= 0) {
+ rs = scan_callback(n, r, region, callback_arg);
+ n++;
+ if (rs != 0)
+ return rs;
+
+ if (region->end[0] == start - str)
+ start++;
+ else
+ start = str + region->end[0];
+
+ if (start > end)
+ break;
+ }
+ else if (r == ONIG_MISMATCH) {
+ break;
+ }
+ else { /* error */
+ return r;
+ }
+ }
+
+ return n;
+}
+
extern OnigEncoding
-onig_get_encoding(regex_t* reg)
+onig_get_encoding(const regex_t* reg)
{
return reg->enc;
}
extern OnigOptionType
-onig_get_options(regex_t* reg)
+onig_get_options(const regex_t* reg)
{
return reg->options;
}
extern OnigCaseFoldType
-onig_get_case_fold_flag(regex_t* reg)
+onig_get_case_fold_flag(const regex_t* reg)
{
return reg->case_fold_flag;
}
extern const OnigSyntaxType*
-onig_get_syntax(regex_t* reg)
+onig_get_syntax(const regex_t* reg)
{
return reg->syntax;
}
extern int
-onig_number_of_captures(regex_t* reg)
+onig_number_of_captures(const regex_t* reg)
{
return reg->num_mem;
}
extern int
-onig_number_of_capture_histories(regex_t* reg)
+onig_number_of_capture_histories(const regex_t* reg)
{
#ifdef USE_CAPTURE_HISTORY
int i, n;
diff --git a/regint.h b/regint.h
index 80d3523126..344ece4ef1 100644
--- a/regint.h
+++ b/regint.h
@@ -1,11 +1,11 @@
-#ifndef ONIGURUMA_REGINT_H
-#define ONIGURUMA_REGINT_H
+#ifndef ONIGMO_REGINT_H
+#define ONIGMO_REGINT_H
/**********************************************************************
regint.h - Onigmo (Oniguruma-mod) (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2013 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
- * Copyright (c) 2011-2014 K.Takata <kentkt AT csc DOT jp>
+ * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -35,6 +35,7 @@
/* #define ONIG_DEBUG_COMPILE */
/* #define ONIG_DEBUG_SEARCH */
/* #define ONIG_DEBUG_MATCH */
+/* #define ONIG_DEBUG_MEMLEAK */
/* #define ONIG_DONT_OPTIMIZE */
/* for byte-code statistical data. */
@@ -42,25 +43,25 @@
#if defined(ONIG_DEBUG_PARSE_TREE) || defined(ONIG_DEBUG_MATCH) || \
defined(ONIG_DEBUG_SEARCH) || defined(ONIG_DEBUG_COMPILE) || \
- defined(ONIG_DEBUG_STATISTICS)
-#ifndef ONIG_DEBUG
-#define ONIG_DEBUG
-#endif
+ defined(ONIG_DEBUG_STATISTICS) || defined(ONIG_DEBUG_MEMLEAK)
+# ifndef ONIG_DEBUG
+# define ONIG_DEBUG
+# endif
#endif
#ifndef UNALIGNED_WORD_ACCESS
-#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
- defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \
- defined(__powerpc64__) || \
- defined(__mc68020__)
-#define UNALIGNED_WORD_ACCESS 1
-#else
-#define UNALIGNED_WORD_ACCESS 0
-#endif
+# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
+ defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \
+ defined(__powerpc64__) || \
+ defined(__mc68020__)
+# define UNALIGNED_WORD_ACCESS 1
+# else
+# define UNALIGNED_WORD_ACCESS 0
+# endif
#endif
#if UNALIGNED_WORD_ACCESS
-#define PLATFORM_UNALIGNED_WORD_ACCESS
+# define PLATFORM_UNALIGNED_WORD_ACCESS
#endif
/* config */
@@ -73,213 +74,163 @@
#define USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT /* /(?:()|())*\2/ */
#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */
#define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
-/* #define USE_RECOMPILE_API */
/* !!! moved to regenc.h. */ /* #define USE_CRNL_AS_LINE_TERMINATOR */
#define USE_NO_INVALID_QUANTIFIER
/* internal config */
-#define USE_PARSE_TREE_NODE_RECYCLE
-#define USE_OP_PUSH_OR_JUMP_EXACT
+/* #define USE_OP_PUSH_OR_JUMP_EXACT */
#define USE_QTFR_PEEK_NEXT
#define USE_ST_LIBRARY
-#define USE_SHARED_CCLASS_TABLE
#define USE_SUNDAY_QUICK_SEARCH
#define INIT_MATCH_STACK_SIZE 160
#define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */
+#define DEFAULT_PARSE_DEPTH_LIMIT 4096
+
+#define OPT_EXACT_MAXLEN 24
/* check config */
#if defined(USE_PERL_SUBEXP_CALL) || defined(USE_CAPITAL_P_NAMED_GROUP)
-#if !defined(USE_NAMED_GROUP) || !defined(USE_SUBEXP_CALL)
-#error USE_NAMED_GROUP and USE_SUBEXP_CALL must be defined.
-#endif
+# if !defined(USE_NAMED_GROUP) || !defined(USE_SUBEXP_CALL)
+# error USE_NAMED_GROUP and USE_SUBEXP_CALL must be defined.
+# endif
#endif
#if defined(__GNUC__)
-# define ARG_UNUSED __attribute__ ((unused))
+# define ARG_UNUSED __attribute__ ((unused))
#else
-# define ARG_UNUSED
+# define ARG_UNUSED
#endif
-#ifndef RUBY_DEFINES_H
-#include "ruby/ruby.h"
-#undef xmalloc
-#undef xrealloc
-#undef xcalloc
-#undef xfree
+#if !defined(RUBY) && defined(RUBY_EXPORT)
+# define RUBY
#endif
+#ifdef RUBY
+# ifndef RUBY_DEFINES_H
+# include "ruby/ruby.h"
+# undef xmalloc
+# undef xrealloc
+# undef xcalloc
+# undef xfree
+# endif
+#else /* RUBY */
+# include "config.h"
+# if SIZEOF_LONG_LONG > 0
+# define LONG_LONG long long
+# endif
+#endif /* RUBY */
+
+#include <stdarg.h>
/* */
/* escape other system UChar definition */
#ifdef ONIG_ESCAPE_UCHAR_COLLISION
-#undef ONIG_ESCAPE_UCHAR_COLLISION
+# undef ONIG_ESCAPE_UCHAR_COLLISION
#endif
#define USE_WORD_BEGIN_END /* "\<": word-begin, "\>": word-end */
-#undef USE_CAPTURE_HISTORY
+#ifdef RUBY
+# undef USE_CAPTURE_HISTORY
+#else
+# define USE_CAPTURE_HISTORY
+#endif
#define USE_VARIABLE_META_CHARS
-#define USE_POSIX_API_REGION_OPTION /* needed for POSIX API support */
#define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
/* #define USE_COMBINATION_EXPLOSION_CHECK */ /* (X*)* */
-/* multithread config */
-/* #define USE_MULTI_THREAD_SYSTEM */
-/* #define USE_DEFAULT_MULTI_THREAD_SYSTEM */
-
-#if defined(USE_MULTI_THREAD_SYSTEM) \
- && defined(USE_DEFAULT_MULTI_THREAD_SYSTEM)
-
-#ifdef _WIN32
-#define WIN32_LEAN_AND_MEAN
-#include <windows.h>
-extern CRITICAL_SECTION gOnigMutex;
-#define THREAD_SYSTEM_INIT InitializeCriticalSection(&gOnigMutex)
-#define THREAD_SYSTEM_END DeleteCriticalSection(&gOnigMutex)
-#define THREAD_ATOMIC_START EnterCriticalSection(&gOnigMutex)
-#define THREAD_ATOMIC_END LeaveCriticalSection(&gOnigMutex)
-#define THREAD_PASS Sleep(0)
-#else /* _WIN32 */
-#include <pthread.h>
-#include <sched.h>
-extern pthread_mutex_t gOnigMutex;
-#define THREAD_SYSTEM_INIT pthread_mutex_init(&gOnigMutex, NULL)
-#define THREAD_SYSTEM_END pthread_mutex_destroy(&gOnigMutex)
-#define THREAD_ATOMIC_START pthread_mutex_lock(&gOnigMutex)
-#define THREAD_ATOMIC_END pthread_mutex_unlock(&gOnigMutex)
-#define THREAD_PASS sched_yield()
-#endif /* _WIN32 */
-
-#else /* USE_DEFAULT_MULTI_THREAD_SYSTEM */
-
-#ifndef THREAD_SYSTEM_INIT
-#define THREAD_SYSTEM_INIT /* depend on thread system */
-#endif
-#ifndef THREAD_SYSTEM_END
-#define THREAD_SYSTEM_END /* depend on thread system */
-#endif
-#ifndef THREAD_ATOMIC_START
-#define THREAD_ATOMIC_START /* depend on thread system */
-#endif
-#ifndef THREAD_ATOMIC_END
-#define THREAD_ATOMIC_END /* depend on thread system */
-#endif
-#ifndef THREAD_PASS
-#define THREAD_PASS /* depend on thread system */
-#endif
-
-#endif /* USE_DEFAULT_MULTI_THREAD_SYSTEM */
#ifndef xmalloc
-#define xmalloc malloc
-#define xrealloc realloc
-#define xcalloc calloc
-#define xfree free
+# define xmalloc malloc
+# define xrealloc realloc
+# define xcalloc calloc
+# define xfree free
#endif
#ifdef RUBY
-#define CHECK_INTERRUPT_IN_MATCH_AT rb_thread_check_ints()
-#define onig_st_init_table st_init_table
-#define onig_st_init_table_with_size st_init_table_with_size
-#define onig_st_init_numtable st_init_numtable
-#define onig_st_init_numtable_with_size st_init_numtable_with_size
-#define onig_st_init_strtable st_init_strtable
-#define onig_st_init_strtable_with_size st_init_strtable_with_size
-#define onig_st_delete st_delete
-#define onig_st_delete_safe st_delete_safe
-#define onig_st_insert st_insert
-#define onig_st_lookup st_lookup
-#define onig_st_foreach st_foreach
-#define onig_st_add_direct st_add_direct
-#define onig_st_free_table st_free_table
-#define onig_st_cleanup_safe st_cleanup_safe
-#define onig_st_copy st_copy
-#define onig_st_nothing_key_clone st_nothing_key_clone
-#define onig_st_nothing_key_free st_nothing_key_free
-#define onig_st_is_member st_is_member
-
-#define USE_UPPER_CASE_TABLE
-#else
-
-#define CHECK_INTERRUPT_IN_MATCH_AT
-
-#define st_init_table onig_st_init_table
-#define st_init_table_with_size onig_st_init_table_with_size
-#define st_init_numtable onig_st_init_numtable
-#define st_init_numtable_with_size onig_st_init_numtable_with_size
-#define st_init_strtable onig_st_init_strtable
-#define st_init_strtable_with_size onig_st_init_strtable_with_size
-#define st_delete onig_st_delete
-#define st_delete_safe onig_st_delete_safe
-#define st_insert onig_st_insert
-#define st_lookup onig_st_lookup
-#define st_foreach onig_st_foreach
-#define st_add_direct onig_st_add_direct
-#define st_free_table onig_st_free_table
-#define st_cleanup_safe onig_st_cleanup_safe
-#define st_copy onig_st_copy
-#define st_nothing_key_clone onig_st_nothing_key_clone
-#define st_nothing_key_free onig_st_nothing_key_free
+# define CHECK_INTERRUPT_IN_MATCH_AT rb_thread_check_ints()
+# define onig_st_init_table st_init_table
+# define onig_st_init_table_with_size st_init_table_with_size
+# define onig_st_init_numtable st_init_numtable
+# define onig_st_init_numtable_with_size st_init_numtable_with_size
+# define onig_st_init_strtable st_init_strtable
+# define onig_st_init_strtable_with_size st_init_strtable_with_size
+# define onig_st_delete st_delete
+# define onig_st_delete_safe st_delete_safe
+# define onig_st_insert st_insert
+# define onig_st_lookup st_lookup
+# define onig_st_foreach st_foreach
+# define onig_st_add_direct st_add_direct
+# define onig_st_free_table st_free_table
+# define onig_st_cleanup_safe st_cleanup_safe
+# define onig_st_copy st_copy
+# define onig_st_nothing_key_clone st_nothing_key_clone
+# define onig_st_nothing_key_free st_nothing_key_free
+# define onig_st_is_member st_is_member
+
+# define USE_UPPER_CASE_TABLE
+#else /* RUBY */
+
+# define CHECK_INTERRUPT_IN_MATCH_AT
+
+# define st_init_table onig_st_init_table
+# define st_init_table_with_size onig_st_init_table_with_size
+# define st_init_numtable onig_st_init_numtable
+# define st_init_numtable_with_size onig_st_init_numtable_with_size
+# define st_init_strtable onig_st_init_strtable
+# define st_init_strtable_with_size onig_st_init_strtable_with_size
+# define st_delete onig_st_delete
+# define st_delete_safe onig_st_delete_safe
+# define st_insert onig_st_insert
+# define st_lookup onig_st_lookup
+# define st_foreach onig_st_foreach
+# define st_add_direct onig_st_add_direct
+# define st_free_table onig_st_free_table
+# define st_cleanup_safe onig_st_cleanup_safe
+# define st_copy onig_st_copy
+# define st_nothing_key_clone onig_st_nothing_key_clone
+# define st_nothing_key_free onig_st_nothing_key_free
/* */
-#define onig_st_is_member st_is_member
+# define onig_st_is_member st_is_member
-#endif
+#endif /* RUBY */
#define STATE_CHECK_STRING_THRESHOLD_LEN 7
#define STATE_CHECK_BUFF_MAX_SIZE 0x4000
-#define THREAD_PASS_LIMIT_COUNT 8
#define xmemset memset
#define xmemcpy memcpy
#define xmemmove memmove
#if defined(_WIN32) && !defined(__GNUC__)
-#define xalloca _alloca
-#define xvsnprintf _vsnprintf
+# define xalloca _alloca
+# define xvsnprintf(buf,size,fmt,args) _vsnprintf_s(buf,size,_TRUNCATE,fmt,args)
+# define xsnprintf sprintf_s
+# define xstrcat(dest,src,size) strcat_s(dest,size,src)
#else
-#define xalloca alloca
-#define xvsnprintf vsnprintf
+# define xalloca alloca
+# define xvsnprintf vsnprintf
+# define xsnprintf snprintf
+# define xstrcat(dest,src,size) strcat(dest,src)
#endif
+#if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
+# define _CRTDBG_MAP_ALLOC
+# include <malloc.h>
+# include <crtdbg.h>
+#endif
-#if defined(USE_RECOMPILE_API) && defined(USE_MULTI_THREAD_SYSTEM)
-#define ONIG_STATE_INC(reg) (reg)->state++
-#define ONIG_STATE_DEC(reg) (reg)->state--
-
-#define ONIG_STATE_INC_THREAD(reg) do {\
- THREAD_ATOMIC_START;\
- (reg)->state++;\
- THREAD_ATOMIC_END;\
-} while(0)
-#define ONIG_STATE_DEC_THREAD(reg) do {\
- THREAD_ATOMIC_START;\
- (reg)->state--;\
- THREAD_ATOMIC_END;\
-} while(0)
-#else
-#define ONIG_STATE_INC(reg) /* Nothing */
-#define ONIG_STATE_DEC(reg) /* Nothing */
-#define ONIG_STATE_INC_THREAD(reg) /* Nothing */
-#define ONIG_STATE_DEC_THREAD(reg) /* Nothing */
-#endif /* USE_RECOMPILE_API && USE_MULTI_THREAD_SYSTEM */
-
-#ifdef HAVE_STDLIB_H
#include <stdlib.h>
-#endif
#if defined(HAVE_ALLOCA_H) && (defined(_AIX) || !defined(__GNUC__))
-#include <alloca.h>
+# include <alloca.h>
#endif
-#ifdef HAVE_STRING_H
-# include <string.h>
-#else
-# include <strings.h>
-#endif
+#include <string.h>
#include <ctype.h>
#ifdef HAVE_SYS_TYPES_H
-#include <sys/types.h>
+# include <sys/types.h>
#endif
#ifdef HAVE_STDINT_H
@@ -290,12 +241,10 @@ extern pthread_mutex_t gOnigMutex;
# include <inttypes.h>
#endif
-#ifdef STDC_HEADERS
-# include <stddef.h>
-#endif
+#include <stddef.h>
#ifdef _WIN32
-#include <malloc.h> /* for alloca() */
+# include <malloc.h> /* for alloca() */
#endif
#ifdef ONIG_DEBUG
@@ -303,28 +252,32 @@ extern pthread_mutex_t gOnigMutex;
#endif
#ifdef _WIN32
-#if defined(_MSC_VER) && (_MSC_VER < 1300)
-#ifndef _INTPTR_T_DEFINED
-#define _INTPTR_T_DEFINED
+# if defined(_MSC_VER) && (_MSC_VER < 1300)
+# ifndef _INTPTR_T_DEFINED
+# define _INTPTR_T_DEFINED
typedef int intptr_t;
-#endif
-#ifndef _UINTPTR_T_DEFINED
-#define _UINTPTR_T_DEFINED
+# endif
+# ifndef _UINTPTR_T_DEFINED
+# define _UINTPTR_T_DEFINED
typedef unsigned int uintptr_t;
-#endif
-#endif
+# endif
+# endif
#endif /* _WIN32 */
#ifndef PRIdPTR
-#ifdef _WIN64
-#define PRIdPTR "I64d"
-#define PRIuPTR "I64u"
-#define PRIxPTR "I64x"
-#else
-#define PRIdPTR "ld"
-#define PRIuPTR "lu"
-#define PRIxPTR "lx"
+# ifdef _WIN64
+# define PRIdPTR "I64d"
+# define PRIuPTR "I64u"
+# define PRIxPTR "I64x"
+# else
+# define PRIdPTR "ld"
+# define PRIuPTR "lu"
+# define PRIxPTR "lx"
+# endif
#endif
+
+#ifndef PRIdPTRDIFF
+# define PRIdPTRDIFF PRIdPTR
#endif
#include "regenc.h"
@@ -332,10 +285,10 @@ typedef unsigned int uintptr_t;
RUBY_SYMBOL_EXPORT_BEGIN
#ifdef MIN
-#undef MIN
+# undef MIN
#endif
#ifdef MAX
-#undef MAX
+# undef MAX
#endif
#define MIN(a,b) (((a)>(b))?(b):(a))
#define MAX(a,b) (((a)<(b))?(b):(a))
@@ -350,28 +303,28 @@ RUBY_SYMBOL_EXPORT_BEGIN
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
-#define PLATFORM_GET_INC(val,p,type) do{\
+# define PLATFORM_GET_INC(val,p,type) do{\
val = *(type* )p;\
(p) += sizeof(type);\
} while(0)
#else
-#define PLATFORM_GET_INC(val,p,type) do{\
+# define PLATFORM_GET_INC(val,p,type) do{\
xmemcpy(&val, (p), sizeof(type));\
(p) += sizeof(type);\
} while(0)
/* sizeof(OnigCodePoint) */
-#define WORD_ALIGNMENT_SIZE SIZEOF_LONG
+# define WORD_ALIGNMENT_SIZE SIZEOF_LONG
-#define GET_ALIGNMENT_PAD_SIZE(addr,pad_size) do {\
+# define GET_ALIGNMENT_PAD_SIZE(addr,pad_size) do {\
(pad_size) = WORD_ALIGNMENT_SIZE \
- ((uintptr_t )(addr) % WORD_ALIGNMENT_SIZE);\
if ((pad_size) == WORD_ALIGNMENT_SIZE) (pad_size) = 0;\
} while (0)
-#define ALIGNMENT_RIGHT(addr) do {\
+# define ALIGNMENT_RIGHT(addr) do {\
(addr) += (WORD_ALIGNMENT_SIZE - 1);\
(addr) -= ((uintptr_t )(addr) % WORD_ALIGNMENT_SIZE);\
} while (0)
@@ -435,7 +388,6 @@ typedef unsigned int BitStatusType;
#define IS_NOTEOL(option) ((option) & ONIG_OPTION_NOTEOL)
#define IS_NOTBOS(option) ((option) & ONIG_OPTION_NOTBOS)
#define IS_NOTEOS(option) ((option) & ONIG_OPTION_NOTEOS)
-#define IS_POSIX_REGION(option) ((option) & ONIG_OPTION_POSIX_REGION)
#define IS_ASCII_RANGE(option) ((option) & ONIG_OPTION_ASCII_RANGE)
#define IS_POSIX_BRACKET_ALL_RANGE(option) ((option) & ONIG_OPTION_POSIX_BRACKET_ALL_RANGE)
#define IS_WORD_BOUND_ALL_RANGE(option) ((option) & ONIG_OPTION_WORD_BOUND_ALL_RANGE)
@@ -618,7 +570,6 @@ enum OpCode {
OP_CCLASS_NOT,
OP_CCLASS_MB_NOT,
OP_CCLASS_MIX_NOT,
- OP_CCLASS_NODE, /* pointer to CClassNode node */
OP_ANYCHAR, /* "." */
OP_ANYCHAR_ML, /* "." multi-line */
@@ -781,10 +732,10 @@ typedef void* PointerType;
#define SIZE_OP_CONDITION (SIZE_OPCODE + SIZE_MEMNUM + SIZE_RELADDR)
#ifdef USE_COMBINATION_EXPLOSION_CHECK
-#define SIZE_OP_STATE_CHECK (SIZE_OPCODE + SIZE_STATE_CHECK_NUM)
-#define SIZE_OP_STATE_CHECK_PUSH (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR)
-#define SIZE_OP_STATE_CHECK_PUSH_OR_JUMP (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR)
-#define SIZE_OP_STATE_CHECK_ANYCHAR_STAR (SIZE_OPCODE + SIZE_STATE_CHECK_NUM)
+# define SIZE_OP_STATE_CHECK (SIZE_OPCODE + SIZE_STATE_CHECK_NUM)
+# define SIZE_OP_STATE_CHECK_PUSH (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR)
+# define SIZE_OP_STATE_CHECK_PUSH_OR_JUMP (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR)
+# define SIZE_OP_STATE_CHECK_ANYCHAR_STAR (SIZE_OPCODE + SIZE_STATE_CHECK_NUM)
#endif
#define MC_ESC(syn) (syn)->meta_char_table.esc
@@ -832,13 +783,10 @@ typedef void* PointerType;
/* cclass node */
#define FLAG_NCCLASS_NOT (1<<0)
-#define FLAG_NCCLASS_SHARE (1<<1)
#define NCCLASS_SET_NOT(nd) NCCLASS_FLAG_SET(nd, FLAG_NCCLASS_NOT)
-#define NCCLASS_SET_SHARE(nd) NCCLASS_FLAG_SET(nd, FLAG_NCCLASS_SHARE)
#define NCCLASS_CLEAR_NOT(nd) NCCLASS_FLAG_CLEAR(nd, FLAG_NCCLASS_NOT)
#define IS_NCCLASS_NOT(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_NOT)
-#define IS_NCCLASS_SHARE(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_SHARE)
typedef struct {
int type;
@@ -936,60 +884,44 @@ typedef struct {
extern OnigOpInfoType OnigOpInfo[];
-extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar* bpend, UChar** nextp, OnigEncoding enc));
+extern void onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp, OnigEncoding enc);
-#ifdef ONIG_DEBUG_STATISTICS
-extern void onig_statistics_init P_((void));
-extern void onig_print_statistics P_((FILE* f));
-#endif
+# ifdef ONIG_DEBUG_STATISTICS
+extern void onig_statistics_init(void);
+extern void onig_print_statistics(FILE* f);
+# endif
#endif
-extern UChar* onig_error_code_to_format P_((OnigPosition code));
-extern void onig_snprintf_with_pattern PV_((UChar buf[], int bufsize, OnigEncoding enc, UChar* pat, UChar* pat_end, const UChar *fmt, ...));
-extern int onig_bbuf_init P_((BBuf* buf, OnigDistance size));
-extern int onig_compile P_((regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo, const char *sourcefile, int sourceline));
-extern void onig_chain_reduce P_((regex_t* reg));
-extern void onig_chain_link_add P_((regex_t* to, regex_t* add));
-extern void onig_transfer P_((regex_t* to, regex_t* from));
-extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc));
-extern int onig_is_code_in_cc_len P_((int enclen, OnigCodePoint code, CClassNode* cc));
+extern UChar* onig_error_code_to_format(OnigPosition code);
+extern void onig_vsnprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc, UChar* pat, UChar* pat_end, const UChar *fmt, va_list args);
+extern void onig_snprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc, UChar* pat, UChar* pat_end, const UChar *fmt, ...);
+extern int onig_bbuf_init(BBuf* buf, OnigDistance size);
+extern int onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo);
+#ifdef RUBY
+extern int onig_compile_ruby(regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo, const char *sourcefile, int sourceline);
+#endif
+extern void onig_transfer(regex_t* to, regex_t* from);
+extern int onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc);
+extern int onig_is_code_in_cc_len(int enclen, OnigCodePoint code, CClassNode* cc);
/* strend hash */
typedef void hash_table_type;
#ifdef RUBY
-#include "ruby/st.h"
-typedef st_data_t hash_data_type;
+# include "ruby/st.h"
#else
-#include "st.h"
-typedef uintptr_t hash_data_type;
+# include "st.h"
#endif
+typedef st_data_t hash_data_type;
-extern hash_table_type* onig_st_init_strend_table_with_size P_((st_index_t size));
-extern int onig_st_lookup_strend P_((hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type *value));
-extern int onig_st_insert_strend P_((hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type value));
-
-/* encoding property management */
-#define PROPERTY_LIST_ADD_PROP(Name, CR) \
- r = onigenc_property_list_add_property((UChar* )Name, CR,\
- &PropertyNameTable, &PropertyList, &PropertyListNum,\
- &PropertyListSize);\
- if (r != 0) goto end
-
-#define PROPERTY_LIST_INIT_CHECK \
- if (PropertyInited == 0) {\
- int r = onigenc_property_list_init(init_property_list);\
- if (r != 0) return r;\
- }
-
-extern int onigenc_property_list_add_property P_((UChar* name, const OnigCodePoint* prop, hash_table_type **table, const OnigCodePoint*** plist, int *pnum, int *psize));
-
-typedef int (*ONIGENC_INIT_PROPERTY_LIST_FUNC_TYPE)(void);
-
-extern int onigenc_property_list_init P_((ONIGENC_INIT_PROPERTY_LIST_FUNC_TYPE));
+extern hash_table_type* onig_st_init_strend_table_with_size(st_index_t size);
+extern int onig_st_lookup_strend(hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type *value);
+extern int onig_st_insert_strend(hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type value);
-extern size_t onig_memsize P_((const regex_t *reg));
-extern size_t onig_region_memsize P_((const struct re_registers *regs));
+#ifdef RUBY
+extern size_t onig_memsize(const regex_t *reg);
+extern size_t onig_region_memsize(const struct re_registers *regs);
+#endif
RUBY_SYMBOL_EXPORT_END
-#endif /* ONIGURUMA_REGINT_H */
+#endif /* ONIGMO_REGINT_H */
diff --git a/regparse.c b/regparse.c
index fba0a34c42..204aa46ce9 100644
--- a/regparse.c
+++ b/regparse.c
@@ -3,7 +3,7 @@
**********************************************************************/
/*-
* Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
- * Copyright (c) 2011-2014 K.Takata <kentkt AT csc DOT jp>
+ * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -29,6 +29,7 @@
*/
#include "regparse.h"
+#include <stdarg.h>
#define WARN_BUFSIZE 256
@@ -51,6 +52,9 @@ const OnigSyntaxType OnigSyntaxRuby = {
ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL |
ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB |
ONIG_SYN_OP2_ESC_H_XDIGIT |
+#ifndef RUBY
+ ONIG_SYN_OP2_ESC_U_HEX4 |
+#endif
ONIG_SYN_OP2_ESC_CAPITAL_X_EXTENDED_GRAPHEME_CLUSTER |
ONIG_SYN_OP2_QMARK_LPAREN_CONDITION |
ONIG_SYN_OP2_ESC_CAPITAL_R_LINEBREAK |
@@ -105,6 +109,26 @@ extern void onig_set_verb_warn_func(OnigWarnFunc f)
static void CC_DUP_WARN(ScanEnv *env);
+
+static unsigned int ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT;
+
+extern unsigned int
+onig_get_parse_depth_limit(void)
+{
+ return ParseDepthLimit;
+}
+
+extern int
+onig_set_parse_depth_limit(unsigned int depth)
+{
+ if (depth == 0)
+ ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT;
+ else
+ ParseDepthLimit = depth;
+ return 0;
+}
+
+
static void
bbuf_free(BBuf* bbuf)
{
@@ -216,6 +240,7 @@ bitset_copy(BitSetRef dest, BitSetRef bs)
for (i = 0; i < BITSET_SIZE; i++) { dest[i] = bs[i]; }
}
+#if defined(USE_NAMED_GROUP) && !defined(USE_ST_LIBRARY)
extern int
onig_strncmp(const UChar* s1, const UChar* s2, int n)
{
@@ -227,6 +252,7 @@ onig_strncmp(const UChar* s1, const UChar* s2, int n)
}
return 0;
}
+#endif
extern void
onig_strcpy(UChar* dest, const UChar* src, const UChar* end)
@@ -265,9 +291,9 @@ strdup_with_null(OnigEncoding enc, UChar* s, UChar* end)
#ifdef __GNUC__
/* get rid of Wunused-but-set-variable and Wuninitialized */
-#define PFETCH_READY UChar* pfetch_prev = NULL; (void)pfetch_prev
+# define PFETCH_READY UChar* pfetch_prev = NULL; (void)pfetch_prev
#else
-#define PFETCH_READY UChar* pfetch_prev
+# define PFETCH_READY UChar* pfetch_prev
#endif
#define PEND (p < end ? 0 : 1)
#define PUNFETCH p = pfetch_prev
@@ -325,7 +351,11 @@ strcat_capa_from_static(UChar* dest, UChar* dest_end,
#ifdef USE_ST_LIBRARY
-#include "ruby/st.h"
+# ifdef RUBY
+# include "ruby/st.h"
+# else
+# include "st.h"
+# endif
typedef struct {
const UChar* s;
@@ -417,7 +447,7 @@ onig_st_insert_strend(hash_table_type* table, const UChar* str_key,
#ifdef USE_NAMED_GROUP
-#define INIT_NAME_BACKREFS_ALLOC_NUM 8
+# define INIT_NAME_BACKREFS_ALLOC_NUM 8
typedef struct {
UChar* name;
@@ -428,12 +458,12 @@ typedef struct {
int* back_refs;
} NameEntry;
-#ifdef USE_ST_LIBRARY
+# ifdef USE_ST_LIBRARY
typedef st_table NameTable;
typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */
-#ifdef ONIG_DEBUG
+# ifdef ONIG_DEBUG
static int
i_print_name_entry(UChar* key, NameEntry* e, void* arg)
{
@@ -467,7 +497,7 @@ onig_print_names(FILE* fp, regex_t* reg)
}
return 0;
}
-#endif /* ONIG_DEBUG */
+# endif /* ONIG_DEBUG */
static int
i_free_name_entry(UChar* key, NameEntry* e, void* arg ARG_UNUSED)
@@ -530,8 +560,8 @@ static int
i_names(UChar* key ARG_UNUSED, NameEntry* e, INamesArg* arg)
{
int r = (*(arg->func))(e->name,
- e->name + e->name_len,
- e->back_num,
+ e->name + e->name_len,
+ e->back_num,
(e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
arg->reg, arg->arg);
if (r != 0) {
@@ -589,7 +619,7 @@ onig_renumber_name_table(regex_t* reg, GroupNumRemap* map)
extern int
-onig_number_of_names(regex_t* reg)
+onig_number_of_names(const regex_t* reg)
{
NameTable* t = (NameTable* )reg->name_table;
@@ -599,9 +629,9 @@ onig_number_of_names(regex_t* reg)
return 0;
}
-#else /* USE_ST_LIBRARY */
+# else /* USE_ST_LIBRARY */
-#define INIT_NAMES_ALLOC_NUM 8
+# define INIT_NAMES_ALLOC_NUM 8
typedef struct {
NameEntry* e;
@@ -609,7 +639,7 @@ typedef struct {
int alloc;
} NameTable;
-#ifdef ONIG_DEBUG
+# ifdef ONIG_DEBUG
extern int
onig_print_names(FILE* fp, regex_t* reg)
{
@@ -640,7 +670,7 @@ onig_print_names(FILE* fp, regex_t* reg)
}
return 0;
}
-#endif
+# endif
static int
names_clear(regex_t* reg)
@@ -725,7 +755,7 @@ onig_foreach_name(regex_t* reg,
}
extern int
-onig_number_of_names(regex_t* reg)
+onig_number_of_names(const regex_t* reg)
{
NameTable* t = (NameTable* )reg->name_table;
@@ -735,7 +765,7 @@ onig_number_of_names(regex_t* reg)
return 0;
}
-#endif /* else USE_ST_LIBRARY */
+# endif /* else USE_ST_LIBRARY */
static int
name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)
@@ -749,7 +779,7 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)
e = name_find(reg, name, name_end);
if (IS_NULL(e)) {
-#ifdef USE_ST_LIBRARY
+# ifdef USE_ST_LIBRARY
if (IS_NULL(t)) {
t = onig_st_init_strend_table_with_size(5);
reg->name_table = (void* )t;
@@ -770,7 +800,7 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)
e->back_alloc = 0;
e->back_refs = (int* )NULL;
-#else
+# else
if (IS_NULL(t)) {
alloc = INIT_NAMES_ALLOC_NUM;
@@ -813,7 +843,7 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)
e->name = strdup_with_null(reg->enc, name, name_end);
if (IS_NULL(e->name)) return ONIGERR_MEMORY;
e->name_len = name_end - name;
-#endif
+# endif
}
if (e->back_num >= 1 &&
@@ -876,7 +906,7 @@ onig_name_to_group_numbers(regex_t* reg, const UChar* name,
extern int
onig_name_to_backref_number(regex_t* reg, const UChar* name,
- const UChar* name_end, OnigRegion *region)
+ const UChar* name_end, const OnigRegion *region)
{
int i, n, *nums;
@@ -909,7 +939,7 @@ onig_name_to_group_numbers(regex_t* reg, const UChar* name,
extern int
onig_name_to_backref_number(regex_t* reg, const UChar* name,
- const UChar* name_end, OnigRegion* region)
+ const UChar* name_end, const OnigRegion* region)
{
return ONIG_NO_SUPPORT_CONFIG;
}
@@ -922,14 +952,14 @@ onig_foreach_name(regex_t* reg,
}
extern int
-onig_number_of_names(regex_t* reg)
+onig_number_of_names(const regex_t* reg)
{
return 0;
}
#endif /* else USE_NAMED_GROUP */
extern int
-onig_noname_group_capture_is_active(regex_t* reg)
+onig_noname_group_capture_is_active(const regex_t* reg)
{
if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_DONT_CAPTURE_GROUP))
return 0;
@@ -976,6 +1006,7 @@ scan_env_clear(ScanEnv* env)
env->curr_max_regnum = 0;
env->has_recursion = 0;
#endif
+ env->parse_depth = 0;
env->warnings_flag = 0;
}
@@ -1025,14 +1056,6 @@ scan_env_set_mem_node(ScanEnv* env, int num, Node* node)
}
-#ifdef USE_PARSE_TREE_NODE_RECYCLE
-typedef struct _FreeNode {
- struct _FreeNode* next;
-} FreeNode;
-
-static FreeNode* FreeNodeList = (FreeNode* )NULL;
-#endif
-
extern void
onig_node_free(Node* node)
{
@@ -1053,18 +1076,7 @@ onig_node_free(Node* node)
{
Node* next_node = NCDR(node);
-#ifdef USE_PARSE_TREE_NODE_RECYCLE
- {
- FreeNode* n = (FreeNode* )node;
-
- THREAD_ATOMIC_START;
- n->next = FreeNodeList;
- FreeNodeList = n;
- THREAD_ATOMIC_END;
- }
-#else
xfree(node);
-#endif
node = next_node;
goto start;
}
@@ -1074,9 +1086,8 @@ onig_node_free(Node* node)
{
CClassNode* cc = NCCLASS(node);
- if (IS_NCCLASS_SHARE(cc)) return ;
if (cc->mbuf)
- bbuf_free(cc->mbuf);
+ bbuf_free(cc->mbuf);
}
break;
@@ -1101,77 +1112,18 @@ onig_node_free(Node* node)
break;
}
-#ifdef USE_PARSE_TREE_NODE_RECYCLE
- {
- FreeNode* n = (FreeNode* )node;
-
- THREAD_ATOMIC_START;
- n->next = FreeNodeList;
- FreeNodeList = n;
- THREAD_ATOMIC_END;
- }
-#else
xfree(node);
-#endif
}
-#ifdef USE_PARSE_TREE_NODE_RECYCLE
-extern int
-onig_free_node_list(void)
-{
- FreeNode* n;
-
- /* THREAD_ATOMIC_START; */
- while (IS_NOT_NULL(FreeNodeList)) {
- n = FreeNodeList;
- FreeNodeList = FreeNodeList->next;
- xfree(n);
- }
- /* THREAD_ATOMIC_END; */
- return 0;
-}
-#endif
-
static Node*
node_new(void)
{
Node* node;
-#ifdef USE_PARSE_TREE_NODE_RECYCLE
- THREAD_ATOMIC_START;
- if (IS_NOT_NULL(FreeNodeList)) {
- node = (Node* )FreeNodeList;
- FreeNodeList = FreeNodeList->next;
- THREAD_ATOMIC_END;
- return node;
- }
- THREAD_ATOMIC_END;
-#endif
-
- node = (Node* )xmalloc(sizeof(Node));
- /* xmemset(node, 0, sizeof(Node)); */
- return node;
-}
-
-#if defined(USE_MULTI_THREAD_SYSTEM) && \
- defined(USE_SHARED_CCLASS_TABLE) && \
- defined(USE_PARSE_TREE_NODE_RECYCLE)
-static Node*
-node_new_locked(void)
-{
- Node* node;
-
- if (IS_NOT_NULL(FreeNodeList)) {
- node = (Node* )FreeNodeList;
- FreeNodeList = FreeNodeList->next;
- return node;
- }
-
node = (Node* )xmalloc(sizeof(Node));
/* xmemset(node, 0, sizeof(Node)); */
return node;
}
-#endif
static void
initialize_cclass(CClassNode* cc)
@@ -1193,75 +1145,6 @@ node_new_cclass(void)
return node;
}
-#if defined(USE_MULTI_THREAD_SYSTEM) && \
- defined(USE_SHARED_CCLASS_TABLE) && \
- defined(USE_PARSE_TREE_NODE_RECYCLE)
-static Node*
-node_new_cclass_locked(void)
-{
- Node* node = node_new_locked();
- CHECK_NULL_RETURN(node);
-
- SET_NTYPE(node, NT_CCLASS);
- initialize_cclass(NCCLASS(node));
- return node;
-}
-#else
-#define node_new_cclass_locked() node_new_cclass()
-#endif
-
-#ifdef USE_SHARED_CCLASS_TABLE
-static Node*
-node_new_cclass_by_codepoint_range(int not, OnigCodePoint sb_out,
- const OnigCodePoint ranges[])
-{
- int n, i;
- CClassNode* cc;
- OnigCodePoint j;
-
- Node* node = node_new_cclass_locked();
- CHECK_NULL_RETURN(node);
-
- cc = NCCLASS(node);
- if (not != 0) NCCLASS_SET_NOT(cc);
-
- BITSET_CLEAR(cc->bs);
- if (sb_out > 0 && IS_NOT_NULL(ranges)) {
- n = ONIGENC_CODE_RANGE_NUM(ranges);
- for (i = 0; i < n; i++) {
- for (j = ONIGENC_CODE_RANGE_FROM(ranges, i);
- j <= (OnigCodePoint )ONIGENC_CODE_RANGE_TO(ranges, i); j++) {
- if (j >= sb_out) goto sb_end;
-
- BITSET_SET_BIT(cc->bs, j);
- }
- }
- }
-
- sb_end:
- if (IS_NULL(ranges)) {
- is_null:
- cc->mbuf = NULL;
- }
- else {
- BBuf* bbuf;
-
- n = ONIGENC_CODE_RANGE_NUM(ranges);
- if (n == 0) goto is_null;
-
- bbuf = (BBuf* )xmalloc(sizeof(BBuf));
- CHECK_NULL_RETURN(bbuf);
- bbuf->alloc = n + 1;
- bbuf->used = n + 1;
- bbuf->p = (UChar* )((void* )ranges);
-
- cc->mbuf = bbuf;
- }
-
- return node;
-}
-#endif /* USE_SHARED_CCLASS_TABLE */
-
static Node*
node_new_ctype(int type, int not, int ascii_range)
{
@@ -1548,6 +1431,7 @@ node_str_cat_codepoint(Node* node, OnigEncoding enc, OnigCodePoint c)
return onig_node_str_cat(node, buf, buf + num);
}
+#if 0
extern void
onig_node_conv_to_str_node(Node* node, int flag)
{
@@ -1557,6 +1441,7 @@ onig_node_conv_to_str_node(Node* node, int flag)
NSTR(node)->s = NSTR(node)->buf;
NSTR(node)->end = NSTR(node)->buf;
}
+#endif
extern void
onig_node_str_clear(Node* node)
@@ -1715,6 +1600,7 @@ scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int minlen,
}
else {
PUNFETCH;
+ maxlen++;
break;
}
}
@@ -1886,7 +1772,7 @@ add_code_range0(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to,
static int
add_code_range(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to)
{
- return add_code_range0(pbuf, env, from, to, 1);
+ return add_code_range0(pbuf, env, from, to, 1);
}
static int
@@ -1990,7 +1876,7 @@ or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1,
static int
and_code_range1(BBuf** pbuf, ScanEnv* env, OnigCodePoint from1, OnigCodePoint to1,
- OnigCodePoint* data, int n)
+ OnigCodePoint* data, int n)
{
int i, r;
OnigCodePoint from2, to2;
@@ -2204,8 +2090,8 @@ or_cclass(CClassNode* dest, CClassNode* cc, ScanEnv* env)
static void UNKNOWN_ESC_WARN(ScanEnv *env, int c);
-static int
-conv_backslash_value(int c, ScanEnv* env)
+static OnigCodePoint
+conv_backslash_value(OnigCodePoint c, ScanEnv* env)
{
if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) {
switch (c) {
@@ -2231,7 +2117,7 @@ conv_backslash_value(int c, ScanEnv* env)
}
#ifdef USE_NO_INVALID_QUANTIFIER
-#define is_invalid_quantifier_target(node) 0
+# define is_invalid_quantifier_target(node) 0
#else
static int
is_invalid_quantifier_target(Node* node)
@@ -2303,6 +2189,7 @@ enum ReduceType {
};
static enum ReduceType const ReduceTypeTable[6][6] = {
+/* '?', '*', '+', '??', '*?', '+?' p / c */
{RQ_DEL, RQ_A, RQ_A, RQ_QQ, RQ_AQ, RQ_ASIS}, /* '?' */
{RQ_DEL, RQ_DEL, RQ_DEL, RQ_P_QQ, RQ_P_QQ, RQ_DEL}, /* '*' */
{RQ_A, RQ_A, RQ_DEL, RQ_ASIS, RQ_P_QQ, RQ_DEL}, /* '+' */
@@ -2505,6 +2392,7 @@ fetch_range_quantifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env)
PFETCH(c);
if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) {
if (c != MC_ESC(env->syntax)) goto invalid;
+ if (PEND) goto invalid;
PFETCH(c);
}
if (c != '}') goto invalid;
@@ -2528,7 +2416,7 @@ fetch_range_quantifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env)
/* \M-, \C-, \c, or \... */
static int
-fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)
+fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env, OnigCodePoint* val)
{
int v;
OnigCodePoint c;
@@ -2547,9 +2435,8 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)
if (PEND) return ONIGERR_END_PATTERN_AT_META;
PFETCH_S(c);
if (c == MC_ESC(env->syntax)) {
- v = fetch_escaped_value(&p, end, env);
- if (v < 0) return v;
- c = (OnigCodePoint )v;
+ v = fetch_escaped_value(&p, end, env, &c);
+ if (v < 0) return v;
}
c = ((c & 0xff) | 0x80);
}
@@ -2573,15 +2460,14 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)
if (PEND) return ONIGERR_END_PATTERN_AT_CONTROL;
PFETCH_S(c);
if (c == '?') {
- c = 0177;
+ c = 0177;
}
else {
- if (c == MC_ESC(env->syntax)) {
- v = fetch_escaped_value(&p, end, env);
- if (v < 0) return v;
- c = (OnigCodePoint )v;
- }
- c &= 0x9f;
+ if (c == MC_ESC(env->syntax)) {
+ v = fetch_escaped_value(&p, end, env, &c);
+ if (v < 0) return v;
+ }
+ c &= 0x9f;
}
break;
}
@@ -2596,7 +2482,8 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)
}
*src = p;
- return c;
+ *val = c;
+ return 0;
}
static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env);
@@ -2617,8 +2504,13 @@ get_name_end_code_point(OnigCodePoint start)
}
#ifdef USE_NAMED_GROUP
-#define ONIGENC_IS_CODE_NAME(enc, c) TRUE
-#ifdef USE_BACKREF_WITH_LEVEL
+# ifdef RUBY
+# define ONIGENC_IS_CODE_NAME(enc, c) TRUE
+# else
+# define ONIGENC_IS_CODE_NAME(enc, c) ONIGENC_IS_CODE_WORD(enc, c)
+# endif
+
+# ifdef USE_BACKREF_WITH_LEVEL
/*
\k<name+n>, \k<name-n>
\k<num+n>, \k<num-n>
@@ -2678,11 +2570,11 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
if (is_num != 0) {
if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
- is_num = 1;
+ is_num = 1;
}
else {
- r = ONIGERR_INVALID_GROUP_NAME;
- is_num = 0;
+ r = ONIGERR_INVALID_GROUP_NAME;
+ is_num = 0;
}
}
else if (!ONIGENC_IS_CODE_NAME(enc, c)) {
@@ -2695,6 +2587,10 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
int level;
int flag = (c == '-' ? -1 : 1);
+ if (PEND) {
+ r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
+ goto end;
+ }
PFETCH(c);
if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto err;
PUNFETCH;
@@ -2703,9 +2599,11 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
*rlevel = (level * flag);
exist_level = 1;
- PFETCH(c);
- if (c == end_code)
- goto end;
+ if (!PEND) {
+ PFETCH(c);
+ if (c == end_code)
+ goto end;
+ }
}
err:
@@ -2732,7 +2630,7 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
return r;
}
}
-#endif /* USE_BACKREF_WITH_LEVEL */
+# endif /* USE_BACKREF_WITH_LEVEL */
/*
ref: 0 -> define name (don't allow number name)
@@ -2769,17 +2667,17 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
if (ref == 1)
- is_num = 1;
+ is_num = 1;
else {
- r = ONIGERR_INVALID_GROUP_NAME;
- is_num = 0;
+ r = ONIGERR_INVALID_GROUP_NAME;
+ is_num = 0;
}
}
else if (c == '-') {
if (ref == 1) {
- is_num = 2;
- sign = -1;
- pnum_head = p;
+ is_num = 2;
+ sign = -1;
+ pnum_head = p;
}
else {
r = ONIGERR_INVALID_GROUP_NAME;
@@ -2796,30 +2694,30 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
name_end = p;
PFETCH_S(c);
if (c == end_code || c == ')') {
- if (is_num == 2) {
- r = ONIGERR_INVALID_GROUP_NAME;
- goto teardown;
- }
- break;
+ if (is_num == 2) {
+ r = ONIGERR_INVALID_GROUP_NAME;
+ goto teardown;
+ }
+ break;
}
if (is_num != 0) {
- if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
- is_num = 1;
- }
- else {
- if (!ONIGENC_IS_CODE_WORD(enc, c))
- r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
- else
- r = ONIGERR_INVALID_GROUP_NAME;
- goto teardown;
- }
+ if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
+ is_num = 1;
+ }
+ else {
+ if (!ONIGENC_IS_CODE_WORD(enc, c))
+ r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
+ else
+ r = ONIGERR_INVALID_GROUP_NAME;
+ goto teardown;
+ }
}
else {
- if (!ONIGENC_IS_CODE_NAME(enc, c)) {
- r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
- goto teardown;
- }
+ if (!ONIGENC_IS_CODE_NAME(enc, c)) {
+ r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
+ goto teardown;
+ }
}
}
@@ -2833,8 +2731,8 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
*rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
else if (*rback_num == 0) {
- r = ONIGERR_INVALID_GROUP_NAME;
- goto err;
+ r = ONIGERR_INVALID_GROUP_NAME;
+ goto err;
}
*rback_num *= sign;
@@ -2845,12 +2743,12 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
return 0;
}
else {
- teardown:
+teardown:
while (!PEND) {
name_end = p;
PFETCH_S(c);
if (c == end_code || c == ')')
- break;
+ break;
}
if (PEND)
name_end = end;
@@ -2939,8 +2837,6 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
}
#endif /* USE_NAMED_GROUP */
-void onig_vsnprintf_with_pattern(UChar buf[], int bufsize, OnigEncoding enc,
- UChar* pat, UChar* pat_end, const UChar *fmt, va_list args);
static void
onig_syntax_warn(ScanEnv *env, const char *fmt, ...)
@@ -2952,10 +2848,14 @@ onig_syntax_warn(ScanEnv *env, const char *fmt, ...)
env->pattern, env->pattern_end,
(const UChar *)fmt, args);
va_end(args);
+#ifdef RUBY
if (env->sourcefile == NULL)
rb_warn("%s", (char *)buf);
else
rb_compile_warn(env->sourcefile, env->sourceline, "%s", (char *)buf);
+#else
+ (*onig_warn)((char* )buf);
+#endif
}
static void
@@ -2979,6 +2879,10 @@ CLOSE_BRACKET_WITHOUT_ESC_WARN(ScanEnv* env, UChar* c)
}
}
+#ifndef RTEST
+# define RTEST(v) 1
+#endif
+
static void
CC_DUP_WARN(ScanEnv *env)
{
@@ -3148,6 +3052,8 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
case 'p':
case 'P':
+ if (PEND) break;
+
c2 = PPEEK;
if (c2 == '{' &&
IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {
@@ -3155,7 +3061,7 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
tok->type = TK_CHAR_PROPERTY;
tok->u.prop.not = (c == 'P' ? 1 : 0);
- if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
+ if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
PFETCH(c2);
if (c2 == '^') {
tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);
@@ -3178,10 +3084,10 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc);
if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
if (!PEND) {
- c2 = PPEEK;
- if (ONIGENC_IS_CODE_XDIGIT(enc, c2))
- return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
- }
+ c2 = PPEEK;
+ if (ONIGENC_IS_CODE_XDIGIT(enc, c2))
+ return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
+ }
if (p > prev + enclen(enc, prev, end) && !PEND && (PPEEK_IS('}'))) {
PINC;
@@ -3223,6 +3129,33 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
}
break;
+ case 'o':
+ if (PEND) break;
+
+ prev = p;
+ if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_O_BRACE_OCTAL)) {
+ PINC;
+ num = scan_unsigned_octal_number(&p, end, 11, enc);
+ if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
+ if (!PEND) {
+ c2 = PPEEK;
+ if (ONIGENC_IS_CODE_DIGIT(enc, c2) && c2 < '8')
+ return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
+ }
+
+ if (p > prev + enclen(enc, prev, end) && !PEND && (PPEEK_IS('}'))) {
+ PINC;
+ tok->type = TK_CODE_POINT;
+ tok->base = 8;
+ tok->u.code = (OnigCodePoint )num;
+ }
+ else {
+ /* can't read nothing or invalid format */
+ p = prev;
+ }
+ }
+ break;
+
case '0':
case '1': case '2': case '3': case '4': case '5': case '6': case '7':
if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {
@@ -3241,10 +3174,10 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
default:
PUNFETCH;
- num = fetch_escaped_value(&p, end, env);
+ num = fetch_escaped_value(&p, end, env, &c2);
if (num < 0) return num;
- if (tok->u.c != num) {
- tok->u.code = (OnigCodePoint )num;
+ if ((OnigCodePoint)tok->u.c != c2) {
+ tok->u.code = (OnigCodePoint )c2;
tok->type = TK_CODE_POINT;
}
break;
@@ -3302,15 +3235,15 @@ fetch_named_backref_token(OnigCodePoint c, OnigToken* tok, UChar** src,
prev = p;
-#ifdef USE_BACKREF_WITH_LEVEL
+# ifdef USE_BACKREF_WITH_LEVEL
name_end = NULL_UCHARP; /* no need. escape gcc warning. */
r = fetch_name_with_level(c, &p, end, &name_end,
env, &back_num, &tok->u.backref.level);
if (r == 1) tok->u.backref.exist_level = 1;
else tok->u.backref.exist_level = 0;
-#else
+# else
r = fetch_name(&p, end, &name_end, env, &back_num, 1);
-#endif
+# endif
if (r < 0) return r;
if (back_num != 0) {
@@ -3348,7 +3281,7 @@ fetch_named_backref_token(OnigCodePoint c, OnigToken* tok, UChar** src,
tok->type = TK_BACKREF;
tok->u.backref.by_name = 1;
- if (num == 1) {
+ if (num == 1 || IS_SYNTAX_BV(syn, ONIG_SYN_USE_LEFT_MOST_NAMED_GROUP)) {
tok->u.backref.num = 1;
tok->u.backref.ref1 = backs[0];
}
@@ -3601,9 +3534,9 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc);
if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
if (!PEND) {
- if (ONIGENC_IS_CODE_XDIGIT(enc, PPEEK))
- return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
- }
+ if (ONIGENC_IS_CODE_XDIGIT(enc, PPEEK))
+ return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
+ }
if ((p > prev + enclen(enc, prev, end)) && !PEND && PPEEK_IS('}')) {
PINC;
@@ -3644,13 +3577,39 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
}
break;
+ case 'o':
+ if (PEND) break;
+
+ prev = p;
+ if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_O_BRACE_OCTAL)) {
+ PINC;
+ num = scan_unsigned_octal_number(&p, end, 11, enc);
+ if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
+ if (!PEND) {
+ OnigCodePoint c = PPEEK;
+ if (ONIGENC_IS_CODE_DIGIT(enc, c) && c < '8')
+ return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
+ }
+
+ if ((p > prev + enclen(enc, prev, end)) && !PEND && PPEEK_IS('}')) {
+ PINC;
+ tok->type = TK_CODE_POINT;
+ tok->u.code = (OnigCodePoint )num;
+ }
+ else {
+ /* can't read nothing or invalid format */
+ p = prev;
+ }
+ }
+ break;
+
case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
PUNFETCH;
prev = p;
num = onig_scan_unsigned_number(&p, end, enc);
if (num < 0 || num > ONIG_MAX_BACKREF_NUM) {
- goto skip_backref;
+ goto skip_backref;
}
if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) &&
@@ -3698,7 +3657,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
#ifdef USE_NAMED_GROUP
case 'k':
- if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) {
+ if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) {
PFETCH(c);
if (c == '<' || c == '\'') {
r = fetch_named_backref_token(c, tok, &p, end, env);
@@ -3714,8 +3673,8 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
#if defined(USE_SUBEXP_CALL) || defined(USE_NAMED_GROUP)
case 'g':
-#ifdef USE_NAMED_GROUP
- if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_BRACE_BACKREF)) {
+# ifdef USE_NAMED_GROUP
+ if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_BRACE_BACKREF)) {
PFETCH(c);
if (c == '{') {
r = fetch_named_backref_token(c, tok, &p, end, env);
@@ -3724,9 +3683,9 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
else
PUNFETCH;
}
-#endif
-#ifdef USE_SUBEXP_CALL
- if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) {
+# endif
+# ifdef USE_SUBEXP_CALL
+ if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) {
PFETCH(c);
if (c == '<' || c == '\'') {
int gnum = -1, rel = 0;
@@ -3763,7 +3722,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
PUNFETCH;
}
}
-#endif
+# endif
break;
#endif
@@ -3781,7 +3740,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
tok->type = TK_CHAR_PROPERTY;
tok->u.prop.not = (c == 'P' ? 1 : 0);
- if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
+ if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
PFETCH(c);
if (c == '^') {
tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);
@@ -3814,16 +3773,20 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
break;
default:
- PUNFETCH;
- num = fetch_escaped_value(&p, end, env);
- if (num < 0) return num;
- /* set_raw: */
- if (tok->u.c != num) {
- tok->type = TK_CODE_POINT;
- tok->u.code = (OnigCodePoint )num;
- }
- else { /* string */
- p = tok->backp + enclen(enc, tok->backp, end);
+ {
+ OnigCodePoint c2;
+
+ PUNFETCH;
+ num = fetch_escaped_value(&p, end, env, &c2);
+ if (num < 0) return num;
+ /* set_raw: */
+ if ((OnigCodePoint)tok->u.c != c2) {
+ tok->type = TK_CODE_POINT;
+ tok->u.code = (OnigCodePoint )c2;
+ }
+ else { /* string */
+ p = tok->backp + enclen(enc, tok->backp, end);
+ }
}
break;
}
@@ -3913,22 +3876,22 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
case '(':
if (PPEEK_IS('?') &&
- IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {
- PINC;
- if (PPEEK_IS('#')) {
- PFETCH(c);
- while (1) {
- if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
- PFETCH(c);
- if (c == MC_ESC(syn)) {
- if (!PEND) PFETCH(c);
- }
- else {
- if (c == ')') break;
- }
- }
- goto start;
- }
+ IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) {
+ PINC;
+ if (PPEEK_IS('#')) {
+ PFETCH(c);
+ while (1) {
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
+ PFETCH(c);
+ if (c == MC_ESC(syn)) {
+ if (!PEND) PFETCH(c);
+ }
+ else {
+ if (c == ')') break;
+ }
+ }
+ goto start;
+ }
#ifdef USE_PERL_SUBEXP_CALL
/* (?&name), (?n), (?R), (?0), (?+n), (?-n) */
c = PPEEK;
@@ -3999,6 +3962,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
PFETCH_READY;
PINC; /* skip 'P' */
+ if (PEND) return ONIGERR_UNDEFINED_GROUP_OPTION;
PFETCH(c);
if (c == '=') { /* (?P=name): backref */
r = fetch_named_backref_token((OnigCodePoint )'(', tok, &p, end, env);
@@ -4017,10 +3981,9 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
tok->u.call.rel = 0;
break;
}
- PUNFETCH;
}
#endif /* USE_CAPITAL_P_NAMED_GROUP */
- PUNFETCH;
+ PUNFETCH;
}
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LPAREN_SUBEXP)) break;
@@ -4098,8 +4061,8 @@ add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not,
if (not == 0) {
for (i = 0; i < n; i++) {
- for (j = ONIGENC_CODE_RANGE_FROM(mbr, i);
- j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) {
+ for (j = ONIGENC_CODE_RANGE_FROM(mbr, i);
+ j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) {
if (j >= sb_out) {
if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {
r = add_code_range_to_buf(&(cc->mbuf), env, j,
@@ -4110,7 +4073,7 @@ add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not,
goto sb_end;
}
- BITSET_SET_BIT_CHKDUP(cc->bs, j);
+ BITSET_SET_BIT_CHKDUP(cc->bs, j);
}
}
@@ -4183,12 +4146,15 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, int ascii_range, ScanEnv* en
CClassNode ccascii;
initialize_cclass(&ccascii);
if (ONIGENC_MBC_MINLEN(env->enc) > 1) {
- add_code_range(&(ccascii.mbuf), env, 0x00, 0x7F);
+ r = add_code_range(&(ccascii.mbuf), env, 0x00, 0x7F);
}
else {
bitset_set_range(env, ccascii.bs, 0x00, 0x7F);
+ r = 0;
+ }
+ if (r == 0) {
+ r = and_cclass(&ccwork, &ccascii, env);
}
- r = and_cclass(&ccwork, &ccascii, env);
if (IS_NOT_NULL(ccascii.mbuf)) bbuf_free(ccascii.mbuf);
}
if (r == 0) {
@@ -4244,7 +4210,7 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, int ascii_range, ScanEnv* en
BITSET_SET_BIT_CHKDUP(cc->bs, c);
}
if (ascii_range)
- ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
+ ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
}
else {
for (c = 0; c < maxcode; c++) {
@@ -4252,7 +4218,7 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, int ascii_range, ScanEnv* en
BITSET_SET_BIT_CHKDUP(cc->bs, c);
}
if (! ascii_range)
- ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
+ ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
}
break;
@@ -4262,16 +4228,16 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, int ascii_range, ScanEnv* en
if (ONIGENC_IS_CODE_WORD(enc, c)) BITSET_SET_BIT_CHKDUP(cc->bs, c);
}
if (! ascii_range)
- ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
+ ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
}
else {
for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
- if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) /* check invalid code point */
+ if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) /* check invalid code point */
&& (! ONIGENC_IS_CODE_WORD(enc, c) || c >= maxcode))
BITSET_SET_BIT_CHKDUP(cc->bs, c);
}
if (ascii_range)
- ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
+ ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
}
break;
@@ -4330,7 +4296,7 @@ parse_posix_bracket(CClassNode* cc, CClassNode* asc_cc,
if (onigenc_with_ascii_strncmp(enc, p, end, pb->name, pb->len) == 0) {
p = (UChar* )onigenc_step(enc, p, end, pb->len);
if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0)
- return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
+ return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
r = add_ctype_to_cc(cc, pb->ctype, not, ascii_range, env);
if (r != 0) return r;
@@ -4361,7 +4327,7 @@ parse_posix_bracket(CClassNode* cc, CClassNode* asc_cc,
if (! PEND) {
PFETCH_S(c);
if (c == ']')
- return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
+ return ONIGERR_INVALID_POSIX_BRACKET_TYPE;
}
}
@@ -4441,7 +4407,7 @@ enum CCVALTYPE {
static int
next_state_class(CClassNode* cc, CClassNode* asc_cc,
- OnigCodePoint* vs, enum CCVALTYPE* type,
+ OnigCodePoint* vs, enum CCVALTYPE* type,
enum CCSTATE* state, ScanEnv* env)
{
int r;
@@ -4499,8 +4465,8 @@ next_state_val(CClassNode* cc, CClassNode* asc_cc,
case CCS_RANGE:
if (intype == *type) {
if (intype == CCV_SB) {
- if (*vs > 0xff || v > 0xff)
- return ONIGERR_INVALID_CODE_POINT_VALUE;
+ if (*vs > 0xff || v > 0xff)
+ return ONIGERR_INVALID_CODE_POINT_VALUE;
if (*vs > v) {
if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
@@ -4604,6 +4570,9 @@ parse_char_class(Node** np, Node** asc_np, OnigToken* tok, UChar** src, UChar* e
enum CCVALTYPE val_type, in_type;
int val_israw, in_israw;
+ env->parse_depth++;
+ if (env->parse_depth > ParseDepthLimit)
+ return ONIGERR_PARSE_DEPTH_LIMIT_OVER;
prev_cc = asc_prev_cc = (CClassNode* )NULL;
*np = *asc_np = NULL_NODE;
r = fetch_token_in_cc(tok, src, end, env);
@@ -4687,7 +4656,7 @@ parse_char_class(Node** np, Node** asc_np, OnigToken* tok, UChar** src, UChar* e
goto err;
}
- len = enclen(env->enc, buf, buf+i);
+ len = enclen(env->enc, buf, buf + i);
if (i < len) {
r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
goto err;
@@ -4695,7 +4664,8 @@ parse_char_class(Node** np, Node** asc_np, OnigToken* tok, UChar** src, UChar* e
else if (i > len) { /* fetch back */
p = psave;
for (i = 1; i < len; i++) {
- r = fetch_token_in_cc(tok, &p, end, env);
+ (void)fetch_token_in_cc(tok, &p, end, env);
+ /* no need to check the retun value (already checked above) */
}
fetched = 0;
}
@@ -4948,16 +4918,17 @@ parse_char_class(Node** np, Node** asc_np, OnigToken* tok, UChar** src, UChar* e
#define NEWLINE_CODE 0x0a
if (ONIGENC_IS_CODE_NEWLINE(env->enc, NEWLINE_CODE)) {
- if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1)
- BITSET_SET_BIT_CHKDUP(cc->bs, NEWLINE_CODE);
- else {
- r = add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE);
- if (r < 0) goto err;
- }
+ if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1)
+ BITSET_SET_BIT_CHKDUP(cc->bs, NEWLINE_CODE);
+ else {
+ r = add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE);
+ if (r < 0) goto err;
+ }
}
}
}
*src = p;
+ env->parse_depth--;
return 0;
err:
@@ -5028,18 +4999,20 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
return ONIGERR_UNDEFINED_GROUP_OPTION;
break;
-#ifdef USE_CAPITAL_P_NAMED_GROUP
+# ifdef USE_CAPITAL_P_NAMED_GROUP
case 'P': /* (?P<name>...) */
- if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_CAPITAL_P_NAMED_GROUP)) {
+ if (!PEND &&
+ IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_CAPITAL_P_NAMED_GROUP)) {
PFETCH(c);
if (c == '<') goto named_group1;
}
return ONIGERR_UNDEFINED_GROUP_OPTION;
break;
-#endif
+# endif
#endif
case '<': /* look behind (?<=...), (?<!...) */
+ if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
PFETCH(c);
if (c == '=')
*np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND);
@@ -5090,7 +5063,8 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
case '@':
if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) {
#ifdef USE_NAMED_GROUP
- if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
+ if (!PEND &&
+ IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
PFETCH(c);
if (c == '<' || c == '\'') {
list_capture = 1;
@@ -5115,7 +5089,8 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
break;
case '(': /* conditional expression: (?(cond)yes), (?(cond)yes|no) */
- if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LPAREN_CONDITION)) {
+ if (!PEND &&
+ IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LPAREN_CONDITION)) {
UChar *name = NULL;
UChar *name_end;
PFETCH(c);
@@ -5133,36 +5108,29 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
#endif
if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) {
if (num > env->num_mem ||
- IS_NULL(SCANENV_MEM_NODES(env)[num]))
+ IS_NULL(SCANENV_MEM_NODES(env)[num]))
return ONIGERR_INVALID_BACKREF;
}
}
#ifdef USE_NAMED_GROUP
else if (c == '<' || c == '\'') { /* (<name>), ('name') */
- int nums;
- int *backs;
-
name = p;
- r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num, 0);
+ r = fetch_named_backref_token(c, tok, &p, end, env);
if (r < 0) return r;
- PFETCH(c);
- if (c != ')') return ONIGERR_UNDEFINED_GROUP_OPTION;
+ if (!PPEEK_IS(')')) return ONIGERR_UNDEFINED_GROUP_OPTION;
+ PINC;
- nums = onig_name_to_group_numbers(env->reg, name, name_end, &backs);
- if (nums <= 0) {
- onig_scan_env_set_error_string(env,
- ONIGERR_UNDEFINED_NAME_REFERENCE, name, name_end);
- return ONIGERR_UNDEFINED_NAME_REFERENCE;
+ if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_USE_LEFT_MOST_NAMED_GROUP)) {
+ num = tok->u.backref.ref1;
}
- if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) {
- int i;
- for (i = 0; i < nums; i++) {
- if (backs[i] > env->num_mem ||
- IS_NULL(SCANENV_MEM_NODES(env)[backs[i]]))
- return ONIGERR_INVALID_BACKREF;
- }
+ else {
+ /* FIXME:
+ * Use left most named group for now. This is the same as Perl.
+ * However this should use the same strategy as normal back-
+ * references on Ruby syntax; search right to left. */
+ int len = tok->u.backref.num;
+ num = len > 1 ? tok->u.backref.refs[0] : tok->u.backref.ref1;
}
- num = backs[0]; /* XXX: use left most named group as Perl */
}
#endif
else
@@ -5187,7 +5155,7 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
#endif
case '^': /* loads default options */
- if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
+ if (!PEND && IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
/* d-imsx */
ONOFF(option, ONIG_OPTION_ASCII_RANGE, 1);
ONOFF(option, ONIG_OPTION_IGNORECASE, 1);
@@ -5197,7 +5165,7 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
PFETCH(c);
}
#if 0
- else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) {
+ else if (!PEND && IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) {
/* d-imx */
ONOFF(option, ONIG_OPTION_ASCII_RANGE, 0);
ONOFF(option, ONIG_OPTION_POSIX_BRACKET_ALL_RANGE, 0);
@@ -5255,8 +5223,8 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
case 'a': /* limits \d, \s, \w and POSIX brackets to ASCII range */
if ((IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL) ||
- IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) &&
- (neg == 0)) {
+ IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) &&
+ (neg == 0)) {
ONOFF(option, ONIG_OPTION_ASCII_RANGE, 0);
ONOFF(option, ONIG_OPTION_POSIX_BRACKET_ALL_RANGE, 1);
ONOFF(option, ONIG_OPTION_WORD_BOUND_ALL_RANGE, 1);
@@ -5267,8 +5235,8 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
case 'u':
if ((IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL) ||
- IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) &&
- (neg == 0)) {
+ IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) &&
+ (neg == 0)) {
ONOFF(option, ONIG_OPTION_ASCII_RANGE, 1);
ONOFF(option, ONIG_OPTION_POSIX_BRACKET_ALL_RANGE, 1);
ONOFF(option, ONIG_OPTION_WORD_BOUND_ALL_RANGE, 1);
@@ -5279,11 +5247,11 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
case 'd':
if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL) &&
- (neg == 0)) {
+ (neg == 0)) {
ONOFF(option, ONIG_OPTION_ASCII_RANGE, 1);
}
else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY) &&
- (neg == 0)) {
+ (neg == 0)) {
ONOFF(option, ONIG_OPTION_ASCII_RANGE, 0);
ONOFF(option, ONIG_OPTION_POSIX_BRACKET_ALL_RANGE, 0);
ONOFF(option, ONIG_OPTION_WORD_BOUND_ALL_RANGE, 0);
@@ -5313,9 +5281,12 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
else if (c == ':') {
OnigOptionType prev = env->option;
- env->option = option;
+ env->option = option;
r = fetch_token(tok, &p, end, env);
- if (r < 0) return r;
+ if (r < 0) {
+ env->option = prev;
+ return r;
+ }
r = parse_subexp(&target, tok, term, &p, end, env);
env->option = prev;
if (r < 0) return r;
@@ -5430,29 +5401,29 @@ set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)
int targetq_num = popular_quantifier_num(qnt);
#ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
- if (!IS_QUANTIFIER_BY_NUMBER(qn) && !IS_QUANTIFIER_BY_NUMBER(qnt) &&
+ if (nestq_num >= 0 && targetq_num >= 0 &&
IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {
- switch (ReduceTypeTable[targetq_num][nestq_num]) {
- case RQ_ASIS:
- break;
-
- case RQ_DEL:
- if (onig_warn != onig_null_warn) {
- onig_syntax_warn(env, "regular expression has redundant nested repeat operator '%s'",
- PopularQStr[targetq_num]);
- }
- goto warn_exit;
- break;
-
- default:
- if (onig_warn != onig_null_warn) {
- onig_syntax_warn(env, "nested repeat operator '%s' and '%s' was replaced with '%s' in regular expression",
- PopularQStr[targetq_num], PopularQStr[nestq_num],
- ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);
- }
- goto warn_exit;
- break;
- }
+ switch (ReduceTypeTable[targetq_num][nestq_num]) {
+ case RQ_ASIS:
+ break;
+
+ case RQ_DEL:
+ if (onig_warn != onig_null_warn) {
+ onig_syntax_warn(env, "regular expression has redundant nested repeat operator '%s'",
+ PopularQStr[targetq_num]);
+ }
+ goto warn_exit;
+ break;
+
+ default:
+ if (onig_warn != onig_null_warn) {
+ onig_syntax_warn(env, "nested repeat operator '%s' and '%s' was replaced with '%s' in regular expression",
+ PopularQStr[targetq_num], PopularQStr[nestq_num],
+ ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]);
+ }
+ goto warn_exit;
+ break;
+ }
}
warn_exit:
@@ -5482,85 +5453,6 @@ set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)
}
-#ifdef USE_SHARED_CCLASS_TABLE
-
-#define THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS 8
-
-/* for ctype node hash table */
-
-typedef struct {
- OnigEncoding enc;
- int not;
- int type;
-} type_cclass_key;
-
-static int type_cclass_cmp(type_cclass_key* x, type_cclass_key* y)
-{
- if (x->type != y->type) return 1;
- if (x->enc != y->enc) return 1;
- if (x->not != y->not) return 1;
- return 0;
-}
-
-static st_index_t type_cclass_hash(type_cclass_key* key)
-{
- int i, val;
- UChar *p;
-
- val = 0;
-
- p = (UChar* )&(key->enc);
- for (i = 0; i < (int )sizeof(key->enc); i++) {
- val = val * 997 + (int )*p++;
- }
-
- p = (UChar* )(&key->type);
- for (i = 0; i < (int )sizeof(key->type); i++) {
- val = val * 997 + (int )*p++;
- }
-
- val += key->not;
- return val + (val >> 5);
-}
-
-static const struct st_hash_type type_type_cclass_hash = {
- type_cclass_cmp,
- type_cclass_hash,
-};
-
-static st_table* OnigTypeCClassTable;
-
-
-static int
-i_free_shared_class(type_cclass_key* key, Node* node, void* arg ARG_UNUSED)
-{
- if (IS_NOT_NULL(node)) {
- CClassNode* cc = NCCLASS(node);
- if (IS_NOT_NULL(cc->mbuf)) xfree(cc->mbuf);
- xfree(node);
- }
-
- if (IS_NOT_NULL(key)) xfree(key);
- return ST_DELETE;
-}
-
-extern int
-onig_free_shared_cclass_table(void)
-{
- /* THREAD_ATOMIC_START; */
- if (IS_NOT_NULL(OnigTypeCClassTable)) {
- onig_st_foreach(OnigTypeCClassTable, i_free_shared_class, 0);
- onig_st_free_table(OnigTypeCClassTable);
- OnigTypeCClassTable = NULL;
- }
- /* THREAD_ATOMIC_END; */
-
- return 0;
-}
-
-#endif /* USE_SHARED_CCLASS_TABLE */
-
-
#ifndef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
static int
clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)
@@ -5603,7 +5495,7 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[],
CClassNode* cc;
CClassNode* asc_cc;
BitSetRef bs;
- int add_flag;
+ int add_flag, r;
iarg = (IApplyCaseFoldArg* )arg;
env = iarg->env;
@@ -5630,7 +5522,8 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[],
(is_in == 0 && IS_NCCLASS_NOT(cc))) {
if (add_flag) {
if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
- add_code_range0(&(cc->mbuf), env, *to, *to, 0);
+ r = add_code_range0(&(cc->mbuf), env, *to, *to, 0);
+ if (r < 0) return r;
}
else {
BITSET_SET_BIT(bs, *to);
@@ -5642,7 +5535,8 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[],
if (add_flag) {
if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc);
- add_code_range0(&(cc->mbuf), env, *to, *to, 0);
+ r = add_code_range0(&(cc->mbuf), env, *to, *to, 0);
+ if (r < 0) return r;
}
else {
if (IS_NCCLASS_NOT(cc)) {
@@ -5732,7 +5626,7 @@ node_linebreak(Node** np, ScanEnv* env)
Node* target1 = NULL;
Node* target2 = NULL;
CClassNode* cc;
- int num1, num2;
+ int num1, num2, r;
UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN * 2];
/* \x0D\x0A */
@@ -5748,7 +5642,8 @@ node_linebreak(Node** np, ScanEnv* env)
if (IS_NULL(right)) goto err;
cc = NCCLASS(right);
if (ONIGENC_MBC_MINLEN(env->enc) > 1) {
- add_code_range(&(cc->mbuf), env, 0x0A, 0x0D);
+ r = add_code_range(&(cc->mbuf), env, 0x0A, 0x0D);
+ if (r != 0) goto err;
}
else {
bitset_set_range(env, cc->bs, 0x0A, 0x0D);
@@ -5757,8 +5652,10 @@ node_linebreak(Node** np, ScanEnv* env)
/* TODO: move this block to enc/unicode.c */
if (ONIGENC_IS_UNICODE(env->enc)) {
/* UTF-8, UTF-16BE/LE, UTF-32BE/LE */
- add_code_range(&(cc->mbuf), env, 0x85, 0x85);
- add_code_range(&(cc->mbuf), env, 0x2028, 0x2029);
+ r = add_code_range(&(cc->mbuf), env, 0x85, 0x85);
+ if (r != 0) goto err;
+ r = add_code_range(&(cc->mbuf), env, 0x2028, 0x2029);
+ if (r != 0) goto err;
}
/* ...|... */
@@ -5787,7 +5684,7 @@ node_linebreak(Node** np, ScanEnv* env)
static int
propname2ctype(ScanEnv* env, const char* propname)
{
- UChar* name = (UChar*)propname;
+ UChar* name = (UChar* )propname;
int ctype = env->enc->property_name_to_ctype(ONIG_ENCODING_ASCII,
name, name + strlen(propname));
return ctype;
@@ -5796,21 +5693,23 @@ propname2ctype(ScanEnv* env, const char* propname)
static int
node_extended_grapheme_cluster(Node** np, ScanEnv* env)
{
+ Node* tmp = NULL;
Node* np1 = NULL;
Node* list = NULL;
Node* list2 = NULL;
Node* alt = NULL;
Node* alt2 = NULL;
+ BBuf *pbuf1 = NULL;
int r = 0;
+ int num1;
+ UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN * 2];
+ OnigOptionType option;
#ifdef USE_UNICODE_PROPERTIES
if (ONIGENC_IS_UNICODE(env->enc)) {
/* UTF-8, UTF-16BE/LE, UTF-32BE/LE */
- Node* tmp = NULL;
- int num1, num2;
- UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN * 2];
CClassNode* cc;
- OnigOptionType option;
+ OnigCodePoint sb_out = (ONIGENC_MBC_MINLEN(env->enc) > 1) ? 0x00 : 0x80;
int extend = propname2ctype(env, "Grapheme_Cluster_Break=Extend");
/* Prepend*
@@ -5845,8 +5744,26 @@ node_extended_grapheme_cluster(Node** np, ScanEnv* env)
cc = NCCLASS(np1);
r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=Control"), 1, 0, env);
if (r != 0) goto err;
- BITSET_CLEAR_BIT(cc->bs, 0x0a);
- BITSET_CLEAR_BIT(cc->bs, 0x0d);
+ if (ONIGENC_MBC_MINLEN(env->enc) > 1) {
+ BBuf *pbuf2 = NULL;
+ r = add_code_range(&pbuf1, env, 0x0a, 0x0a);
+ if (r != 0) goto err;
+ r = add_code_range(&pbuf1, env, 0x0d, 0x0d);
+ if (r != 0) goto err;
+ r = and_code_range_buf(cc->mbuf, 0, pbuf1, 1, &pbuf2, env);
+ if (r != 0) {
+ bbuf_free(pbuf2);
+ goto err;
+ }
+ bbuf_free(pbuf1);
+ pbuf1 = NULL;
+ bbuf_free(cc->mbuf);
+ cc->mbuf = pbuf2;
+ }
+ else {
+ BITSET_CLEAR_BIT(cc->bs, 0x0a);
+ BITSET_CLEAR_BIT(cc->bs, 0x0d);
+ }
tmp = onig_node_new_alt(np1, NULL_NODE);
if (IS_NULL(tmp)) goto err;
@@ -6134,32 +6051,26 @@ node_extended_grapheme_cluster(Node** np, ScanEnv* env)
np1 = node_new_cclass();
if (IS_NULL(np1)) goto err;
cc = NCCLASS(np1);
- r = add_code_range(&(cc->mbuf), env, 0x1F308, 0x1F308);
- if (r != 0) goto err;
- r = add_code_range(&(cc->mbuf), env, 0x1F33E, 0x1F33E);
- if (r != 0) goto err;
- r = add_code_range(&(cc->mbuf), env, 0x1F373, 0x1F373);
- if (r != 0) goto err;
- r = add_code_range(&(cc->mbuf), env, 0x1F393, 0x1F393);
- if (r != 0) goto err;
- r = add_code_range(&(cc->mbuf), env, 0x1F3A4, 0x1F3A4);
- if (r != 0) goto err;
- r = add_code_range(&(cc->mbuf), env, 0x1F3A8, 0x1F3A8);
- if (r != 0) goto err;
- r = add_code_range(&(cc->mbuf), env, 0x1F3EB, 0x1F3EB);
- if (r != 0) goto err;
- r = add_code_range(&(cc->mbuf), env, 0x1F3ED, 0x1F3ED);
- if (r != 0) goto err;
- r = add_code_range(&(cc->mbuf), env, 0x1F4BB, 0x1F4BC);
- if (r != 0) goto err;
- r = add_code_range(&(cc->mbuf), env, 0x1F527, 0x1F527);
- if (r != 0) goto err;
- r = add_code_range(&(cc->mbuf), env, 0x1F52C, 0x1F52C);
- if (r != 0) goto err;
- r = add_code_range(&(cc->mbuf), env, 0x1F680, 0x1F680);
- if (r != 0) goto err;
- r = add_code_range(&(cc->mbuf), env, 0x1F692, 0x1F692);
- if (r != 0) goto err;
+ {
+ static const OnigCodePoint ranges[] = {
+ 13,
+ 0x1F308, 0x1F308,
+ 0x1F33E, 0x1F33E,
+ 0x1F373, 0x1F373,
+ 0x1F393, 0x1F393,
+ 0x1F3A4, 0x1F3A4,
+ 0x1F3A8, 0x1F3A8,
+ 0x1F3EB, 0x1F3EB,
+ 0x1F3ED, 0x1F3ED,
+ 0x1F4BB, 0x1F4BC,
+ 0x1F527, 0x1F527,
+ 0x1F52C, 0x1F52C,
+ 0x1F680, 0x1F680,
+ 0x1F692, 0x1F692,
+ };
+ r = add_ctype_to_cc_by_range(cc, -1, 0, env, sb_out, ranges);
+ if (r != 0) goto err;
+ }
r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=Glue_After_Zwj"), 0, 0, env);
if (r != 0) goto err;
@@ -6176,11 +6087,10 @@ node_extended_grapheme_cluster(Node** np, ScanEnv* env)
/* Emoji variation sequence
* http://unicode.org/Public/emoji/4.0/emoji-zwj-sequences.txt
*/
- np1 = node_new_cclass();
+ r = ONIGENC_CODE_TO_MBC(env->enc, 0xfe0f, buf);
+ if (r < 0) goto err;
+ np1 = node_new_str_raw(buf, buf + r);
if (IS_NULL(np1)) goto err;
- cc = NCCLASS(np1);
- r = add_code_range(&(cc->mbuf), env, 0xfe0f, 0xfe0f);
- if (r != 0) goto err;
tmp = node_new_quantifier(0, 1, 0);
if (IS_NULL(tmp)) goto err;
@@ -6195,14 +6105,17 @@ node_extended_grapheme_cluster(Node** np, ScanEnv* env)
np1 = node_new_cclass();
if (IS_NULL(np1)) goto err;
cc = NCCLASS(np1);
- r = add_code_range(&(cc->mbuf), env, 0x2640, 0x2640);
- if (r != 0) goto err;
- r = add_code_range(&(cc->mbuf), env, 0x2642, 0x2642);
- if (r != 0) goto err;
- r = add_code_range(&(cc->mbuf), env, 0x2695, 0x2696);
- if (r != 0) goto err;
- r = add_code_range(&(cc->mbuf), env, 0x2708, 0x2708);
- if (r != 0) goto err;
+ {
+ static const OnigCodePoint ranges[] = {
+ 4,
+ 0x2640, 0x2640,
+ 0x2642, 0x2642,
+ 0x2695, 0x2696,
+ 0x2708, 0x2708,
+ };
+ r = add_ctype_to_cc_by_range(cc, -1, 0, env, sb_out, ranges);
+ if (r != 0) goto err;
+ }
tmp = node_new_list(np1, list2);
if (IS_NULL(tmp)) goto err;
@@ -6220,11 +6133,10 @@ node_extended_grapheme_cluster(Node** np, ScanEnv* env)
alt2 = NULL;
/* ZWJ */
- np1 = node_new_cclass();
+ r = ONIGENC_CODE_TO_MBC(env->enc, 0x200D, buf);
+ if (r < 0) goto err;
+ np1 = node_new_str_raw(buf, buf + r);
if (IS_NULL(np1)) goto err;
- cc = NCCLASS(np1);
- r = add_code_range(&(cc->mbuf), env, 0x200D, 0x200D);
- if (r != 0) goto err;
tmp = node_new_list(np1, list2);
if (IS_NULL(tmp)) goto err;
@@ -6280,21 +6192,21 @@ node_extended_grapheme_cluster(Node** np, ScanEnv* env)
np1 = node_new_cclass();
if (IS_NULL(np1)) goto err;
cc = NCCLASS(np1);
- r = add_code_range(&(cc->mbuf), env, 0x1F3C2, 0x1F3C2);
- if (r != 0) goto err;
- r = add_code_range(&(cc->mbuf), env, 0x1F3C7, 0x1F3C7);
- if (r != 0) goto err;
- r = add_code_range(&(cc->mbuf), env, 0x1F3CC, 0x1F3CC);
- if (r != 0) goto err;
- r = add_code_range(&(cc->mbuf), env, 0x1F3F3, 0x1F3F3);
- if (r != 0) goto err;
- r = add_code_range(&(cc->mbuf), env, 0x1F441, 0x1F441);
- if (r != 0) goto err;
- r = add_code_range(&(cc->mbuf), env, 0x1F46F, 0x1F46F);
- if (r != 0) goto err;
- r = add_code_range(&(cc->mbuf), env, 0x1F574, 0x1F574);
- if (r != 0) goto err;
- r = add_code_range(&(cc->mbuf), env, 0x1F6CC, 0x1F6CC);
+ {
+ static const OnigCodePoint ranges[] = {
+ 8,
+ 0x1F3C2, 0x1F3C2,
+ 0x1F3C7, 0x1F3C7,
+ 0x1F3CC, 0x1F3CC,
+ 0x1F3F3, 0x1F3F3,
+ 0x1F441, 0x1F441,
+ 0x1F46F, 0x1F46F,
+ 0x1F574, 0x1F574,
+ 0x1F6CC, 0x1F6CC,
+ };
+ r = add_ctype_to_cc_by_range(cc, -1, 0, env, sb_out, ranges);
+ if (r != 0) goto err;
+ }
r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=E_Base"), 0, 0, env);
if (r != 0) goto err;
r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=E_Base_GAZ"), 0, 0, env);
@@ -6344,11 +6256,10 @@ node_extended_grapheme_cluster(Node** np, ScanEnv* env)
list2 = tmp;
np1 = NULL;
- np1 = node_new_cclass();
+ r = ONIGENC_CODE_TO_MBC(env->enc, 0x200D, buf);
+ if (r < 0) goto err;
+ np1 = node_new_str_raw(buf, buf + r);
if (IS_NULL(np1)) goto err;
- cc = NCCLASS(np1);
- r = add_code_range(&(cc->mbuf), env, 0x200D, 0x200D);
- if (r != 0) goto err;
tmp = node_new_list(np1, list2);
if (IS_NULL(tmp)) goto err;
@@ -6421,11 +6332,10 @@ node_extended_grapheme_cluster(Node** np, ScanEnv* env)
np1 = NULL;
/* Prepend+ */
- np1 = node_new_cclass();
+ r = ONIGENC_CODE_TO_MBC(env->enc, 0x200D, buf);
+ if (r < 0) goto err;
+ np1 = node_new_str_raw(buf, buf + r);
if (IS_NULL(np1)) goto err;
- cc = NCCLASS(np1);
- r = add_code_range(&(cc->mbuf), env, 0x200D, 0x200D);
- if (r != 0) goto err;
tmp = node_new_quantifier(0, 1, 0);
if (IS_NULL(tmp)) goto err;
@@ -6462,39 +6372,60 @@ node_extended_grapheme_cluster(Node** np, ScanEnv* env)
if (IS_NULL(tmp)) goto err;
alt = tmp;
list = NULL;
-
- /* \x0D\x0A */
- num1 = ONIGENC_CODE_TO_MBC(env->enc, 0x0D, buf);
- if (num1 < 0) return num1;
- num2 = ONIGENC_CODE_TO_MBC(env->enc, 0x0A, buf + num1);
- if (num2 < 0) return num2;
- np1 = node_new_str_raw(buf, buf + num1 + num2);
- if (IS_NULL(np1)) goto err;
-
- tmp = onig_node_new_alt(np1, alt);
- if (IS_NULL(tmp)) goto err;
- alt = tmp;
- np1 = NULL;
-
- /* (?>...) */
- *np = node_new_enclose(ENCLOSE_STOP_BACKTRACK);
- if (IS_NULL(*np)) goto err;
- NENCLOSE(*np)->target = alt;
- return ONIG_NORMAL;
}
+ else
#endif /* USE_UNICODE_PROPERTIES */
- if (IS_NULL(*np)) {
+ {
/* PerlSyntax: (?s:.), RubySyntax: (?m:.) */
- OnigOptionType option;
np1 = node_new_anychar();
if (IS_NULL(np1)) goto err;
option = env->option;
ONOFF(option, ONIG_OPTION_MULTILINE, 0);
+ tmp = node_new_option(option);
+ if (IS_NULL(tmp)) goto err;
+ NENCLOSE(tmp)->target = np1;
+ np1 = tmp;
+
+ alt = onig_node_new_alt(np1, NULL_NODE);
+ if (IS_NULL(alt)) goto err;
+ np1 = NULL;
+ }
+
+ /* \x0D\x0A */
+ r = ONIGENC_CODE_TO_MBC(env->enc, 0x0D, buf);
+ if (r < 0) goto err;
+ num1 = r;
+ r = ONIGENC_CODE_TO_MBC(env->enc, 0x0A, buf + num1);
+ if (r < 0) goto err;
+ np1 = node_new_str_raw(buf, buf + num1 + r);
+ if (IS_NULL(np1)) goto err;
+
+ tmp = onig_node_new_alt(np1, alt);
+ if (IS_NULL(tmp)) goto err;
+ alt = tmp;
+ np1 = NULL;
+
+ /* (?>\x0D\x0A|...) */
+ tmp = node_new_enclose(ENCLOSE_STOP_BACKTRACK);
+ if (IS_NULL(tmp)) goto err;
+ NENCLOSE(tmp)->target = alt;
+ np1 = tmp;
+
+#ifdef USE_UNICODE_PROPERTIES
+ if (ONIGENC_IS_UNICODE(env->enc)) {
+ /* Don't ignore case. */
+ option = env->option;
+ ONOFF(option, ONIG_OPTION_IGNORECASE, 1);
*np = node_new_option(option);
if (IS_NULL(*np)) goto err;
NENCLOSE(*np)->target = np1;
}
+ else
+#endif
+ {
+ *np = np1;
+ }
return ONIG_NORMAL;
err:
@@ -6503,6 +6434,7 @@ node_extended_grapheme_cluster(Node** np, ScanEnv* env)
onig_node_free(list2);
onig_node_free(alt);
onig_node_free(alt2);
+ bbuf_free(pbuf1);
return (r == 0) ? ONIGERR_MEMORY : r;
}
@@ -6535,7 +6467,7 @@ is_onechar_cclass(CClassNode* cc, OnigCodePoint* code)
/* only one char found in the bbuf, save the code point. */
c = data[0];
if (((c < SINGLE_BYTE_SIZE) && BITSET_AT(cc->bs, c))) {
- /* skip if c is included in the bitset */
+ /* skip if c is included in the bitset */
c = not_found;
}
}
@@ -6549,9 +6481,9 @@ is_onechar_cclass(CClassNode* cc, OnigCodePoint* code)
Bits b1 = cc->bs[i];
if (b1 != 0) {
if (((b1 & (b1 - 1)) == 0) && (c == not_found)) {
- c = BITS_IN_ROOM * i + countbits(b1 - 1);
+ c = BITS_IN_ROOM * i + countbits(b1 - 1);
} else {
- return 0; /* the character class contains multiple chars */
+ return 0; /* the character class contains multiple chars */
}
}
}
@@ -6596,7 +6528,10 @@ parse_exp(Node** np, OnigToken* tok, int term,
env->option = NENCLOSE(*np)->option;
r = fetch_token(tok, src, end, env);
- if (r < 0) return r;
+ if (r < 0) {
+ env->option = prev;
+ return r;
+ }
r = parse_subexp(&target, tok, term, src, end, env);
env->option = prev;
if (r < 0) {
@@ -6749,69 +6684,13 @@ parse_exp(Node** np, OnigToken* tok, int term,
{
CClassNode* cc;
-#ifdef USE_SHARED_CCLASS_TABLE
- const OnigCodePoint *mbr;
- OnigCodePoint sb_out;
-
- r = ONIGENC_GET_CTYPE_CODE_RANGE(env->enc, tok->u.prop.ctype,
- &sb_out, &mbr);
- if (r == 0 &&
- ! IS_ASCII_RANGE(env->option) &&
- ONIGENC_CODE_RANGE_NUM(mbr)
- >= THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS) {
- type_cclass_key key;
- type_cclass_key* new_key;
-
- key.enc = env->enc;
- key.not = tok->u.prop.not;
- key.type = tok->u.prop.ctype;
-
- THREAD_ATOMIC_START;
-
- if (IS_NULL(OnigTypeCClassTable)) {
- OnigTypeCClassTable
- = onig_st_init_table_with_size(&type_type_cclass_hash, 10);
- if (IS_NULL(OnigTypeCClassTable)) {
- THREAD_ATOMIC_END;
- return ONIGERR_MEMORY;
- }
- }
- else {
- if (onig_st_lookup(OnigTypeCClassTable, (st_data_t )&key,
- (st_data_t* )np)) {
- THREAD_ATOMIC_END;
- break;
- }
- }
-
- *np = node_new_cclass_by_codepoint_range(tok->u.prop.not,
- sb_out, mbr);
- if (IS_NULL(*np)) {
- THREAD_ATOMIC_END;
- return ONIGERR_MEMORY;
- }
-
- cc = NCCLASS(*np);
- NCCLASS_SET_SHARE(cc);
- new_key = (type_cclass_key* )xmalloc(sizeof(type_cclass_key));
- xmemcpy(new_key, &key, sizeof(type_cclass_key));
- onig_st_add_direct(OnigTypeCClassTable, (st_data_t )new_key,
- (st_data_t )*np);
-
- THREAD_ATOMIC_END;
- }
- else {
-#endif
- *np = node_new_cclass();
- CHECK_NULL_RETURN_MEMERR(*np);
- cc = NCCLASS(*np);
- r = add_ctype_to_cc(cc, tok->u.prop.ctype, 0,
- IS_ASCII_RANGE(env->option), env);
- if (r != 0) return r;
- if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
-#ifdef USE_SHARED_CCLASS_TABLE
- }
-#endif
+ *np = node_new_cclass();
+ CHECK_NULL_RETURN_MEMERR(*np);
+ cc = NCCLASS(*np);
+ r = add_ctype_to_cc(cc, tok->u.prop.ctype, 0,
+ IS_ASCII_RANGE(env->option), env);
+ if (r != 0) return r;
+ if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
}
break;
@@ -7041,6 +6920,9 @@ parse_subexp(Node** top, OnigToken* tok, int term,
Node *node, **headp;
*top = NULL;
+ env->parse_depth++;
+ if (env->parse_depth > ParseDepthLimit)
+ return ONIGERR_PARSE_DEPTH_LIMIT_OVER;
r = parse_branch(&node, tok, term, src, end, env);
if (r < 0) {
onig_node_free(node);
@@ -7078,6 +6960,7 @@ parse_subexp(Node** top, OnigToken* tok, int term,
return ONIGERR_PARSER_BUG;
}
+ env->parse_depth--;
return r;
}
diff --git a/regparse.h b/regparse.h
index caf0790b1c..111a840b84 100644
--- a/regparse.h
+++ b/regparse.h
@@ -1,11 +1,11 @@
-#ifndef ONIGURUMA_REGPARSE_H
-#define ONIGURUMA_REGPARSE_H
+#ifndef ONIGMO_REGPARSE_H
+#define ONIGMO_REGPARSE_H
/**********************************************************************
regparse.h - Onigmo (Oniguruma-mod) (regular expression library)
**********************************************************************/
/*-
* Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
- * Copyright (c) 2011 K.Takata <kentkt AT csc DOT jp>
+ * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -317,9 +317,12 @@ typedef struct {
int curr_max_regnum;
int has_recursion;
#endif
+ unsigned int parse_depth;
int warnings_flag;
+#ifdef RUBY
const char* sourcefile;
int sourceline;
+#endif
} ScanEnv;
@@ -332,36 +335,35 @@ typedef struct {
int new_val;
} GroupNumRemap;
-extern int onig_renumber_name_table P_((regex_t* reg, GroupNumRemap* map));
+extern int onig_renumber_name_table(regex_t* reg, GroupNumRemap* map);
#endif
-extern int onig_strncmp P_((const UChar* s1, const UChar* s2, int n));
-extern void onig_strcpy P_((UChar* dest, const UChar* src, const UChar* end));
-extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end));
-extern int onig_scan_unsigned_number P_((UChar** src, const UChar* end, OnigEncoding enc));
-extern void onig_reduce_nested_quantifier P_((Node* pnode, Node* cnode));
-extern void onig_node_conv_to_str_node P_((Node* node, int raw));
-extern int onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end));
-extern int onig_node_str_set P_((Node* node, const UChar* s, const UChar* end));
-extern void onig_node_free P_((Node* node));
-extern Node* onig_node_new_enclose P_((int type));
-extern Node* onig_node_new_anchor P_((int type));
-extern Node* onig_node_new_str P_((const UChar* s, const UChar* end));
-extern Node* onig_node_new_list P_((Node* left, Node* right));
-extern Node* onig_node_list_add P_((Node* list, Node* x));
-extern Node* onig_node_new_alt P_((Node* left, Node* right));
-extern void onig_node_str_clear P_((Node* node));
-extern int onig_free_node_list P_((void));
-extern int onig_names_free P_((regex_t* reg));
-extern int onig_parse_make_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env));
-extern int onig_free_shared_cclass_table P_((void));
+extern int onig_strncmp(const UChar* s1, const UChar* s2, int n);
+extern void onig_strcpy(UChar* dest, const UChar* src, const UChar* end);
+extern void onig_scan_env_set_error_string(ScanEnv* env, int ecode, UChar* arg, UChar* arg_end);
+extern int onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc);
+extern void onig_reduce_nested_quantifier(Node* pnode, Node* cnode);
+extern void onig_node_conv_to_str_node(Node* node, int raw);
+extern int onig_node_str_cat(Node* node, const UChar* s, const UChar* end);
+extern int onig_node_str_set(Node* node, const UChar* s, const UChar* end);
+extern void onig_node_free(Node* node);
+extern Node* onig_node_new_enclose(int type);
+extern Node* onig_node_new_anchor(int type);
+extern Node* onig_node_new_str(const UChar* s, const UChar* end);
+extern Node* onig_node_new_list(Node* left, Node* right);
+extern Node* onig_node_list_add(Node* list, Node* x);
+extern Node* onig_node_new_alt(Node* left, Node* right);
+extern void onig_node_str_clear(Node* node);
+extern int onig_names_free(regex_t* reg);
+extern int onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env);
+extern int onig_free_shared_cclass_table(void);
#ifdef ONIG_DEBUG
-#ifdef USE_NAMED_GROUP
+# ifdef USE_NAMED_GROUP
extern int onig_print_names(FILE*, regex_t*);
-#endif
+# endif
#endif
RUBY_SYMBOL_EXPORT_END
-#endif /* ONIGURUMA_REGPARSE_H */
+#endif /* ONIGMO_REGPARSE_H */
diff --git a/regsyntax.c b/regsyntax.c
index 7cb98f2d46..657ffcd0f3 100644
--- a/regsyntax.c
+++ b/regsyntax.c
@@ -3,7 +3,7 @@
**********************************************************************/
/*-
* Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
- * Copyright (c) 2011-2012 K.Takata <kentkt AT csc DOT jp>
+ * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -229,7 +229,7 @@ const OnigSyntaxType OnigSyntaxPerl = {
(( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
- ONIG_SYN_OP_ESC_C_CONTROL )
+ ONIG_SYN_OP_ESC_O_BRACE_OCTAL | ONIG_SYN_OP_ESC_C_CONTROL )
& ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
, ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |
ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL |
@@ -248,7 +248,8 @@ const OnigSyntaxType OnigSyntaxPerl = {
ONIG_SYN_OP2_ESC_K_NAMED_BACKREF )
, ( SYN_GNU_REGEX_BV |
ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |
- ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL )
+ ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL |
+ ONIG_SYN_USE_LEFT_MOST_NAMED_GROUP )
, ( ONIG_OPTION_SINGLELINE | ONIG_OPTION_CAPTURE_GROUP )
,
{
@@ -332,25 +333,25 @@ onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options)
}
extern unsigned int
-onig_get_syntax_op(OnigSyntaxType* syntax)
+onig_get_syntax_op(const OnigSyntaxType* syntax)
{
return syntax->op;
}
extern unsigned int
-onig_get_syntax_op2(OnigSyntaxType* syntax)
+onig_get_syntax_op2(const OnigSyntaxType* syntax)
{
return syntax->op2;
}
extern unsigned int
-onig_get_syntax_behavior(OnigSyntaxType* syntax)
+onig_get_syntax_behavior(const OnigSyntaxType* syntax)
{
return syntax->behavior;
}
extern OnigOptionType
-onig_get_syntax_options(OnigSyntaxType* syntax)
+onig_get_syntax_options(const OnigSyntaxType* syntax)
{
return syntax->options;
}
diff --git a/template/encdb.h.tmpl b/template/encdb.h.tmpl
index 9cbb1f0083..9de29bebde 100644
--- a/template/encdb.h.tmpl
+++ b/template/encdb.h.tmpl
@@ -41,7 +41,8 @@ encdirs.each do |encdir|
open(File.join(encdir,fn)) do |f|
name = nil
f.each_line do |line|
- if (/^OnigEncodingDefine/ =~ line)..(/"(.*?)"/ =~ line)
+ if (/^#ifndef RUBY/ =~ line)..(/^#endif/ =~ line)
+ elsif (/^OnigEncodingDefine/ =~ line)..(/"(.*?)"/ =~ line)
if $1
if name
lines << %[ENC_SET_BASE("#$1", "#{name}");]
diff --git a/tool/enc-unicode.rb b/tool/enc-unicode.rb
index feb94c3e61..eddd39747a 100755
--- a/tool/enc-unicode.rb
+++ b/tool/enc-unicode.rb
@@ -20,7 +20,7 @@ end
$unicode_version = File.basename(ARGV[0])[/\A[.\d]+\z/]
-POSIX_NAMES = %w[NEWLINE Alpha Blank Cntrl Digit Graph Lower Print Punct Space Upper XDigit Word Alnum ASCII]
+POSIX_NAMES = %w[NEWLINE Alpha Blank Cntrl Digit Graph Lower Print XPosixPunct Space Upper XDigit Word Alnum ASCII Punct]
def pair_codepoints(codepoints)
@@ -115,6 +115,7 @@ def define_posix_props(data)
data['Upper'] = data['Uppercase']
data['Lower'] = data['Lowercase']
data['Punct'] = data['Punctuation']
+ data['XPosixPunct'] = data['Punctuation'] + [0x24, 0x2b, 0x3c, 0x3d, 0x3e, 0x5e, 0x60, 0x7c, 0x7e]
data['Digit'] = data['Decimal_Number']
data['XDigit'] = (0x0030..0x0039).to_a + (0x0041..0x0046).to_a +
(0x0061..0x0066).to_a
@@ -260,7 +261,11 @@ $const_cache = {}
# given property, group of paired codepoints, and a human-friendly name for
# the group
def make_const(prop, data, name)
- puts "\n/* '#{prop}': #{name} */"
+ if name.empty?
+ puts "\n/* '#{prop}' */"
+ else
+ puts "\n/* '#{prop}': #{name} */"
+ end
if origprop = $const_cache.key(data)
puts "#define CR_#{prop} CR_#{origprop}"
else
@@ -387,7 +392,13 @@ props.concat parse_scripts(data, categories)
aliases = parse_aliases(data)
define_posix_props(data)
POSIX_NAMES.each do |name|
- make_const(name, data[name], "[[:#{name}:]]")
+ if name == 'XPosixPunct'
+ make_const(name, data[name], "[[:Punct:]]")
+ elsif name == 'Punct'
+ make_const(name, data[name], "")
+ else
+ make_const(name, data[name], "[[:#{name}:]]")
+ end
end
output.ifdef :USE_UNICODE_PROPERTIES
props.each do |name|