aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog4
-rw-r--r--enc/euc_jp.c21
-rw-r--r--enc/iso_8859_10.c97
-rw-r--r--enc/iso_8859_11.c2
-rw-r--r--enc/iso_8859_13.c67
-rw-r--r--enc/iso_8859_14.c100
-rw-r--r--enc/iso_8859_15.c80
-rw-r--r--enc/iso_8859_16.c93
-rw-r--r--enc/iso_8859_8.c2
-rw-r--r--enc/iso_8859_9.c5
-rw-r--r--enc/sjis.c20
-rw-r--r--enc/unicode.c56
-rw-r--r--enc/utf8.c40
-rw-r--r--include/ruby/oniguruma.h19
-rw-r--r--regcomp.c128
-rw-r--r--regenc.c44
-rw-r--r--regenc.h18
-rw-r--r--regerror.c38
-rw-r--r--regexec.c121
-rw-r--r--regint.h30
-rw-r--r--regparse.c530
-rw-r--r--regparse.h3
-rw-r--r--version.h6
23 files changed, 872 insertions, 652 deletions
diff --git a/ChangeLog b/ChangeLog
index 54ec3c6214..8a3216f2d5 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+Fri Jan 4 00:54:43 2008 Yukihiro Matsumoto <matz@ruby-lang.org>
+
+ * include/ruby/oniguruma.h: Oniguruma 1.9.1 merged.
+
Fri Jan 4 00:20:47 2008 Tanaka Akira <akr@fsij.org>
* io.c (io_ungetc): move data in buffer if it is required to store the
diff --git a/enc/euc_jp.c b/enc/euc_jp.c
index 35767412f1..4e397ed25a 100644
--- a/enc/euc_jp.c
+++ b/enc/euc_jp.c
@@ -137,7 +137,7 @@ eucjp_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc)
int c, i, len;
OnigCodePoint n;
- len = enc_len(ONIG_ENCODING_EUC_JP, p, end);
+ len = enclen(ONIG_ENCODING_EUC_JP, p, end);
n = (OnigCodePoint )*p++;
if (len == 1) return n;
@@ -156,7 +156,8 @@ eucjp_code_to_mbclen(OnigCodePoint code, OnigEncoding enc)
else if (code > 0xffffff) return 0;
else if ((code & 0xff0000) >= 0x800000) return 3;
else if ((code & 0xff00) >= 0x8000) return 2;
- else return 0;
+ else
+ return ONIGERR_INVALID_CODE_POINT_VALUE;
}
#if 0
@@ -188,8 +189,8 @@ eucjp_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc)
*p++ = (UChar )(code & 0xff);
#if 1
- if (enc_len(ONIG_ENCODING_EUC_JP, buf, p) != (p - buf))
- return ONIGENC_ERR_INVALID_WIDE_CHAR_VALUE;
+ if (enclen(ONIG_ENCODING_EUC_JP, buf, p) != (p - buf))
+ return ONIGERR_INVALID_CODE_POINT_VALUE;
#endif
return p - buf;
}
@@ -210,7 +211,7 @@ eucjp_mbc_case_fold(OnigCaseFoldType flag,
else {
int i;
- len = enc_len(ONIG_ENCODING_EUC_JP, p, end);
+ len = enclen(ONIG_ENCODING_EUC_JP, p, end);
for (i = 0; i < len; i++) {
*lower++ = *p++;
}
@@ -232,7 +233,7 @@ eucjp_left_adjust_char_head(const UChar* start, const UChar* s, OnigEncoding enc
p = s;
while (!eucjp_islead(*p) && p > start) p--;
- len = enc_len(ONIG_ENCODING_EUC_JP, p, s);
+ len = enclen(ONIG_ENCODING_EUC_JP, p, s);
if (p + len > s) return (UChar* )p;
p += len;
return (UChar* )(p + ((s - p) & ~1));
@@ -311,7 +312,7 @@ eucjp_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc)
ctype -= (ONIGENC_MAX_STD_CTYPE + 1);
if (ctype >= (unsigned int )PropertyListNum)
- return ONIGENC_ERR_TYPE_BUG;
+ return ONIGERR_TYPE_BUG;
return onig_is_in_code_range((UChar* )PropertyList[ctype], code);
}
@@ -320,7 +321,7 @@ eucjp_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc)
}
static int
-eucjp_get_ctype_code_range(int ctype, OnigCodePoint* sb_out,
+eucjp_get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out,
const OnigCodePoint* ranges[], OnigEncoding enc)
{
if (ctype <= ONIGENC_MAX_STD_CTYPE) {
@@ -332,8 +333,8 @@ eucjp_get_ctype_code_range(int ctype, OnigCodePoint* sb_out,
PROPERTY_LIST_INIT_CHECK;
ctype -= (ONIGENC_MAX_STD_CTYPE + 1);
- if (ctype >= PropertyListNum)
- return ONIGENC_ERR_TYPE_BUG;
+ if (ctype >= (OnigCtype )PropertyListNum)
+ return ONIGERR_TYPE_BUG;
*ranges = PropertyList[ctype];
return 0;
diff --git a/enc/iso_8859_10.c b/enc/iso_8859_10.c
index 2b5affa4ee..9b8a035253 100644
--- a/enc/iso_8859_10.c
+++ b/enc/iso_8859_10.c
@@ -2,7 +2,7 @@
iso8859_10.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -124,8 +124,7 @@ iso_8859_10_mbc_case_fold(OnigCaseFoldType flag,
#if 0
static int
-iso_8859_10_is_mbc_ambiguous(OnigCaseFoldType flag,
- const UChar** pp, const UChar* end)
+is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
{
int v;
const UChar* p = *pp;
@@ -155,52 +154,52 @@ iso_8859_10_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding e
}
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
- { 0xa1, 0xb1 },
- { 0xa2, 0xb2 },
- { 0xa3, 0xb3 },
- { 0xa4, 0xb4 },
- { 0xa5, 0xb5 },
- { 0xa6, 0xb6 },
- { 0xa8, 0xb8 },
- { 0xa9, 0xb9 },
- { 0xaa, 0xba },
- { 0xab, 0xbb },
- { 0xac, 0xbc },
- { 0xae, 0xbe },
- { 0xaf, 0xbf },
-
- { 0xc0, 0xe0 },
- { 0xc1, 0xe1 },
- { 0xc2, 0xe2 },
- { 0xc3, 0xe3 },
- { 0xc4, 0xe4 },
- { 0xc5, 0xe5 },
- { 0xc6, 0xe6 },
- { 0xc7, 0xe7 },
- { 0xc8, 0xe8 },
- { 0xc9, 0xe9 },
- { 0xca, 0xea },
- { 0xcb, 0xeb },
- { 0xcc, 0xec },
- { 0xcd, 0xed },
- { 0xce, 0xee },
- { 0xcf, 0xef },
-
- { 0xd0, 0xf0 },
- { 0xd1, 0xf1 },
- { 0xd2, 0xf2 },
- { 0xd3, 0xf3 },
- { 0xd4, 0xf4 },
- { 0xd5, 0xf5 },
- { 0xd6, 0xf6 },
- { 0xd7, 0xf7 },
- { 0xd8, 0xf8 },
- { 0xd9, 0xf9 },
- { 0xda, 0xfa },
- { 0xdb, 0xfb },
- { 0xdc, 0xfc },
- { 0xdd, 0xfd },
- { 0xde, 0xfe }
+ { 0xa1, 0xb1 },
+ { 0xa2, 0xb2 },
+ { 0xa3, 0xb3 },
+ { 0xa4, 0xb4 },
+ { 0xa5, 0xb5 },
+ { 0xa6, 0xb6 },
+ { 0xa8, 0xb8 },
+ { 0xa9, 0xb9 },
+ { 0xaa, 0xba },
+ { 0xab, 0xbb },
+ { 0xac, 0xbc },
+ { 0xae, 0xbe },
+ { 0xaf, 0xbf },
+
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd7, 0xf7 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe }
};
static int
diff --git a/enc/iso_8859_11.c b/enc/iso_8859_11.c
index 343993d01c..aa5d879e95 100644
--- a/enc/iso_8859_11.c
+++ b/enc/iso_8859_11.c
@@ -2,7 +2,7 @@
iso8859_11.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/enc/iso_8859_13.c b/enc/iso_8859_13.c
index 4b6db1adf3..799df9e5bd 100644
--- a/enc/iso_8859_13.c
+++ b/enc/iso_8859_13.c
@@ -2,7 +2,7 @@
iso8859_13.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -124,8 +124,7 @@ iso_8859_13_mbc_case_fold(OnigCaseFoldType flag,
#if 0
static int
-iso_8859_13_is_mbc_ambiguous(OnigCaseFoldType flag,
- const UChar** pp, const UChar* end)
+is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
{
int v;
const UChar* p = *pp;
@@ -159,37 +158,37 @@ iso_8859_13_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding e
}
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
- { 0xc0, 0xe0 },
- { 0xc1, 0xe1 },
- { 0xc2, 0xe2 },
- { 0xc3, 0xe3 },
- { 0xc4, 0xe4 },
- { 0xc5, 0xe5 },
- { 0xc6, 0xe6 },
- { 0xc7, 0xe7 },
- { 0xc8, 0xe8 },
- { 0xc9, 0xe9 },
- { 0xca, 0xea },
- { 0xcb, 0xeb },
- { 0xcc, 0xec },
- { 0xcd, 0xed },
- { 0xce, 0xee },
- { 0xcf, 0xef },
-
- { 0xd0, 0xf0 },
- { 0xd1, 0xf1 },
- { 0xd2, 0xf2 },
- { 0xd3, 0xf3 },
- { 0xd4, 0xf4 },
- { 0xd5, 0xf5 },
- { 0xd6, 0xf6 },
- { 0xd8, 0xf8 },
- { 0xd9, 0xf9 },
- { 0xda, 0xfa },
- { 0xdb, 0xfb },
- { 0xdc, 0xfc },
- { 0xdd, 0xfd },
- { 0xde, 0xfe }
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe }
};
static int
diff --git a/enc/iso_8859_14.c b/enc/iso_8859_14.c
index c54d8fcbd2..225154a0a6 100644
--- a/enc/iso_8859_14.c
+++ b/enc/iso_8859_14.c
@@ -2,7 +2,7 @@
iso8859_14.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -124,8 +124,8 @@ iso_8859_14_mbc_case_fold(OnigCaseFoldType flag,
#if 0
static int
-iso_8859_14_is_mbc_ambiguous(OnigCaseFoldType flag,
- const UChar** pp, const UChar* end)
+is_mbc_ambiguous(OnigCaseFoldType flag,
+ const UChar** pp, const UChar* end)
{
int v;
const UChar* p = *pp;
@@ -155,53 +155,53 @@ iso_8859_14_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding e
}
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
- { 0xa1, 0xa2 },
- { 0xa4, 0xa5 },
- { 0xa6, 0xab },
- { 0xa8, 0xb8 },
- { 0xaa, 0xba },
- { 0xac, 0xbc },
- { 0xaf, 0xff },
-
- { 0xb0, 0xb1 },
- { 0xb2, 0xb3 },
- { 0xb4, 0xb5 },
- { 0xb7, 0xb9 },
- { 0xbb, 0xbf },
- { 0xbd, 0xbe },
-
- { 0xc0, 0xe0 },
- { 0xc1, 0xe1 },
- { 0xc2, 0xe2 },
- { 0xc3, 0xe3 },
- { 0xc4, 0xe4 },
- { 0xc5, 0xe5 },
- { 0xc6, 0xe6 },
- { 0xc7, 0xe7 },
- { 0xc8, 0xe8 },
- { 0xc9, 0xe9 },
- { 0xca, 0xea },
- { 0xcb, 0xeb },
- { 0xcc, 0xec },
- { 0xcd, 0xed },
- { 0xce, 0xee },
- { 0xcf, 0xef },
-
- { 0xd0, 0xf0 },
- { 0xd1, 0xf1 },
- { 0xd2, 0xf2 },
- { 0xd3, 0xf3 },
- { 0xd4, 0xf4 },
- { 0xd5, 0xf5 },
- { 0xd6, 0xf6 },
- { 0xd7, 0xf7 },
- { 0xd8, 0xf8 },
- { 0xd9, 0xf9 },
- { 0xda, 0xfa },
- { 0xdb, 0xfb },
- { 0xdc, 0xfc },
- { 0xdd, 0xfd },
- { 0xde, 0xfe }
+ { 0xa1, 0xa2 },
+ { 0xa4, 0xa5 },
+ { 0xa6, 0xab },
+ { 0xa8, 0xb8 },
+ { 0xaa, 0xba },
+ { 0xac, 0xbc },
+ { 0xaf, 0xff },
+
+ { 0xb0, 0xb1 },
+ { 0xb2, 0xb3 },
+ { 0xb4, 0xb5 },
+ { 0xb7, 0xb9 },
+ { 0xbb, 0xbf },
+ { 0xbd, 0xbe },
+
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd7, 0xf7 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe }
};
static int
diff --git a/enc/iso_8859_15.c b/enc/iso_8859_15.c
index de963706f4..339c7d9c32 100644
--- a/enc/iso_8859_15.c
+++ b/enc/iso_8859_15.c
@@ -2,7 +2,7 @@
iso8859_15.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -124,8 +124,8 @@ iso_8859_15_mbc_case_fold(OnigCaseFoldType flag,
#if 0
static int
-iso_8859_15_is_mbc_ambiguous(OnigCaseFoldType flag,
- const UChar** pp, const UChar* end)
+is_mbc_ambiguous(OnigCaseFoldType flag,
+ const UChar** pp, const UChar* end)
{
int v;
const UChar* p = *pp;
@@ -159,43 +159,43 @@ iso_8859_15_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding e
}
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
- { 0xa6, 0xa8 },
-
- { 0xb4, 0xb8 },
- { 0xbc, 0xbd },
- { 0xbe, 0xff },
-
- { 0xc0, 0xe0 },
- { 0xc1, 0xe1 },
- { 0xc2, 0xe2 },
- { 0xc3, 0xe3 },
- { 0xc4, 0xe4 },
- { 0xc5, 0xe5 },
- { 0xc6, 0xe6 },
- { 0xc7, 0xe7 },
- { 0xc8, 0xe8 },
- { 0xc9, 0xe9 },
- { 0xca, 0xea },
- { 0xcb, 0xeb },
- { 0xcc, 0xec },
- { 0xcd, 0xed },
- { 0xce, 0xee },
- { 0xcf, 0xef },
-
- { 0xd0, 0xf0 },
- { 0xd1, 0xf1 },
- { 0xd2, 0xf2 },
- { 0xd3, 0xf3 },
- { 0xd4, 0xf4 },
- { 0xd5, 0xf5 },
- { 0xd6, 0xf6 },
- { 0xd8, 0xf8 },
- { 0xd9, 0xf9 },
- { 0xda, 0xfa },
- { 0xdb, 0xfb },
- { 0xdc, 0xfc },
- { 0xdd, 0xfd },
- { 0xde, 0xfe }
+ { 0xa6, 0xa8 },
+
+ { 0xb4, 0xb8 },
+ { 0xbc, 0xbd },
+ { 0xbe, 0xff },
+
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe }
};
static int
diff --git a/enc/iso_8859_16.c b/enc/iso_8859_16.c
index 1cf4f65672..9bd42b4689 100644
--- a/enc/iso_8859_16.c
+++ b/enc/iso_8859_16.c
@@ -2,7 +2,7 @@
iso8859_16.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -124,8 +124,7 @@ iso_8859_16_mbc_case_fold(OnigCaseFoldType flag,
#if 0
static int
-iso_8859_16_is_mbc_ambiguous(OnigCaseFoldType flag,
- const UChar** pp, const UChar* end)
+is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
{
int v;
const UChar* p = *pp;
@@ -155,50 +154,50 @@ iso_8859_16_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding e
}
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
- { 0xa1, 0xa2 },
- { 0xa3, 0xb3 },
- { 0xa6, 0xa8 },
- { 0xaa, 0xba },
- { 0xac, 0xae },
- { 0xaf, 0xbf },
-
- { 0xb2, 0xb9 },
- { 0xb4, 0xb8 },
- { 0xbc, 0xbd },
- { 0xbe, 0xff },
-
- { 0xc0, 0xe0 },
- { 0xc1, 0xe1 },
- { 0xc2, 0xe2 },
- { 0xc3, 0xe3 },
- { 0xc4, 0xe4 },
- { 0xc5, 0xe5 },
- { 0xc6, 0xe6 },
- { 0xc7, 0xe7 },
- { 0xc8, 0xe8 },
- { 0xc9, 0xe9 },
- { 0xca, 0xea },
- { 0xcb, 0xeb },
- { 0xcc, 0xec },
- { 0xcd, 0xed },
- { 0xce, 0xee },
- { 0xcf, 0xef },
-
- { 0xd0, 0xf0 },
- { 0xd1, 0xf1 },
- { 0xd2, 0xf2 },
- { 0xd3, 0xf3 },
- { 0xd4, 0xf4 },
- { 0xd5, 0xf5 },
- { 0xd6, 0xf6 },
- { 0xd7, 0xf7 },
- { 0xd8, 0xf8 },
- { 0xd9, 0xf9 },
- { 0xda, 0xfa },
- { 0xdb, 0xfb },
- { 0xdc, 0xfc },
- { 0xdd, 0xfd },
- { 0xde, 0xfe }
+ { 0xa1, 0xa2 },
+ { 0xa3, 0xb3 },
+ { 0xa6, 0xa8 },
+ { 0xaa, 0xba },
+ { 0xac, 0xae },
+ { 0xaf, 0xbf },
+
+ { 0xb2, 0xb9 },
+ { 0xb4, 0xb8 },
+ { 0xbc, 0xbd },
+ { 0xbe, 0xff },
+
+ { 0xc0, 0xe0 },
+ { 0xc1, 0xe1 },
+ { 0xc2, 0xe2 },
+ { 0xc3, 0xe3 },
+ { 0xc4, 0xe4 },
+ { 0xc5, 0xe5 },
+ { 0xc6, 0xe6 },
+ { 0xc7, 0xe7 },
+ { 0xc8, 0xe8 },
+ { 0xc9, 0xe9 },
+ { 0xca, 0xea },
+ { 0xcb, 0xeb },
+ { 0xcc, 0xec },
+ { 0xcd, 0xed },
+ { 0xce, 0xee },
+ { 0xcf, 0xef },
+
+ { 0xd0, 0xf0 },
+ { 0xd1, 0xf1 },
+ { 0xd2, 0xf2 },
+ { 0xd3, 0xf3 },
+ { 0xd4, 0xf4 },
+ { 0xd5, 0xf5 },
+ { 0xd6, 0xf6 },
+ { 0xd7, 0xf7 },
+ { 0xd8, 0xf8 },
+ { 0xd9, 0xf9 },
+ { 0xda, 0xfa },
+ { 0xdb, 0xfb },
+ { 0xdc, 0xfc },
+ { 0xdd, 0xfd },
+ { 0xde, 0xfe }
};
static int
diff --git a/enc/iso_8859_8.c b/enc/iso_8859_8.c
index 3a075b0af3..e0a70483a1 100644
--- a/enc/iso_8859_8.c
+++ b/enc/iso_8859_8.c
@@ -2,7 +2,7 @@
iso8859_8.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/enc/iso_8859_9.c b/enc/iso_8859_9.c
index 1921c591ba..51ce4c8f66 100644
--- a/enc/iso_8859_9.c
+++ b/enc/iso_8859_9.c
@@ -2,7 +2,7 @@
iso8859_9.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -124,8 +124,7 @@ iso_8859_9_mbc_case_fold(OnigCaseFoldType flag,
#if 0
static int
-iso_8859_9_is_mbc_ambiguous(OnigCaseFoldType flag,
- const UChar** pp, const UChar* end)
+is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
{
int v;
const UChar* p = *pp;
diff --git a/enc/sjis.c b/enc/sjis.c
index 6d64acd02b..d526f66865 100644
--- a/enc/sjis.c
+++ b/enc/sjis.c
@@ -141,7 +141,7 @@ sjis_code_to_mbclen(OnigCodePoint code, OnigEncoding enc)
return 2;
}
else
- return 0;
+ return ONIGERR_INVALID_CODE_POINT_VALUE;
}
static OnigCodePoint
@@ -150,7 +150,7 @@ sjis_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc)
int c, i, len;
OnigCodePoint n;
- len = enc_len(ONIG_ENCODING_SJIS, p, end);
+ len = enclen(ONIG_ENCODING_SJIS, p, end);
c = *p++;
n = c;
if (len == 1) return n;
@@ -172,8 +172,8 @@ sjis_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc)
*p++ = (UChar )(code & 0xff);
#if 0
- if (enc_len(ONIG_ENCODING_SJIS, buf) != (p - buf))
- return REGERR_INVALID_WIDE_CHAR_VALUE;
+ if (enclen(ONIG_ENCODING_SJIS, buf) != (p - buf))
+ return REGERR_INVALID_CODE_POINT_VALUE;
#endif
return p - buf;
}
@@ -192,7 +192,7 @@ sjis_mbc_case_fold(OnigCaseFoldType flag,
}
else {
int i;
- int len = enc_len(ONIG_ENCODING_SJIS, p, end);
+ int len = enclen(ONIG_ENCODING_SJIS, p, end);
for (i = 0; i < len; i++) {
*lower++ = *p++;
@@ -245,7 +245,7 @@ sjis_left_adjust_char_head(const UChar* start, const UChar* s, OnigEncoding enc)
}
}
}
- len = enc_len(ONIG_ENCODING_SJIS, p, s);
+ len = enclen(ONIG_ENCODING_SJIS, p, s);
if (p + len > s) return (UChar* )p;
p += len;
return (UChar* )(p + ((s - p) & ~1));
@@ -322,7 +322,7 @@ sjis_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc)
ctype -= (ONIGENC_MAX_STD_CTYPE + 1);
if (ctype >= (unsigned int )PropertyListNum)
- return ONIGENC_ERR_TYPE_BUG;
+ return ONIGERR_TYPE_BUG;
return onig_is_in_code_range((UChar* )PropertyList[ctype], code);
}
@@ -331,7 +331,7 @@ sjis_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc)
}
static int
-sjis_get_ctype_code_range(int ctype, OnigCodePoint* sb_out,
+sjis_get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out,
const OnigCodePoint* ranges[], OnigEncoding enc)
{
if (ctype <= ONIGENC_MAX_STD_CTYPE) {
@@ -343,8 +343,8 @@ sjis_get_ctype_code_range(int ctype, OnigCodePoint* sb_out,
PROPERTY_LIST_INIT_CHECK;
ctype -= (ONIGENC_MAX_STD_CTYPE + 1);
- if (ctype >= PropertyListNum)
- return ONIGENC_ERR_TYPE_BUG;
+ if (ctype >= (OnigCtype )PropertyListNum)
+ return ONIGERR_TYPE_BUG;
*ranges = PropertyList[ctype];
return 0;
diff --git a/enc/unicode.c b/enc/unicode.c
index 8b1a1308dc..4642bbe5c1 100644
--- a/enc/unicode.c
+++ b/enc/unicode.c
@@ -10618,7 +10618,7 @@ static PosixBracketEntryType HashEntryData[] = {
static const OnigCodePoint* CodeRanges[CODE_RANGES_NUM];
static int CodeRangeTableInited = 0;
-static void init_code_range_array() {
+static void init_code_range_array(void) {
THREAD_ATOMIC_START;
CodeRanges[0] = CR_NEWLINE;
@@ -10756,7 +10756,7 @@ onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncodi
}
if (ctype >= CODE_RANGES_NUM) {
- return ONIGENC_ERR_TYPE_BUG;
+ return ONIGERR_TYPE_BUG;
}
if (CodeRangeTableInited == 0) init_code_range_array();
@@ -10769,7 +10769,7 @@ extern int
onigenc_unicode_ctype_code_range(int ctype, const OnigCodePoint* ranges[])
{
if (ctype >= CODE_RANGES_NUM) {
- return ONIGENC_ERR_TYPE_BUG;
+ return ONIGERR_TYPE_BUG;
}
if (CodeRangeTableInited == 0) init_code_range_array();
@@ -10780,7 +10780,7 @@ onigenc_unicode_ctype_code_range(int ctype, const OnigCodePoint* ranges[])
}
extern int
-onigenc_utf16_32_get_ctype_code_range(int ctype, OnigCodePoint* sb_out,
+onigenc_utf16_32_get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out,
const OnigCodePoint* ranges[])
{
*sb_out = 0x00;
@@ -10832,7 +10832,7 @@ onigenc_unicode_property_name_to_ctype(OnigEncoding enc, UChar* name, UChar* end
if (len >= PROPERTY_NAME_MAX_SIZE)
return ONIGERR_INVALID_CHAR_PROPERTY_NAME;
- p += enc_len(enc, p, end);
+ p += enclen(enc, p, end);
}
buf[len] = 0;
@@ -10903,11 +10903,12 @@ static int init_case_fold_table(void)
FoldTable = st_init_numtable_with_size(1200);
if (ONIG_IS_NULL(FoldTable)) return ONIGERR_MEMORY;
- for (i = 0; i < sizeof(CaseFold)/sizeof(CaseFold_11_Type); i++) {
+ for (i = 0; i < (int )(sizeof(CaseFold)/sizeof(CaseFold_11_Type)); i++) {
p = &CaseFold[i];
st_add_direct(FoldTable, (st_data_t )p->from, (st_data_t )&(p->to));
}
- for (i = 0; i < sizeof(CaseFold_Locale)/sizeof(CaseFold_11_Type); i++) {
+ for (i = 0; i < (int )(sizeof(CaseFold_Locale)/sizeof(CaseFold_11_Type));
+ i++) {
p = &CaseFold_Locale[i];
st_add_direct(FoldTable, (st_data_t )p->from, (st_data_t )&(p->to));
}
@@ -10915,11 +10916,13 @@ static int init_case_fold_table(void)
Unfold1Table = st_init_numtable_with_size(1000);
if (ONIG_IS_NULL(Unfold1Table)) return ONIGERR_MEMORY;
- for (i = 0; i < sizeof(CaseUnfold_11)/sizeof(CaseUnfold_11_Type); i++) {
+ for (i = 0; i < (int )(sizeof(CaseUnfold_11)/sizeof(CaseUnfold_11_Type));
+ i++) {
p1 = &CaseUnfold_11[i];
st_add_direct(Unfold1Table, (st_data_t )p1->from, (st_data_t )&(p1->to));
}
- for (i = 0; i < sizeof(CaseUnfold_11_Locale)/sizeof(CaseUnfold_11_Type);
+ for (i = 0;
+ i < (int )(sizeof(CaseUnfold_11_Locale)/sizeof(CaseUnfold_11_Type));
i++) {
p1 = &CaseUnfold_11_Locale[i];
st_add_direct(Unfold1Table, (st_data_t )p1->from, (st_data_t )&(p1->to));
@@ -10928,11 +10931,13 @@ static int init_case_fold_table(void)
Unfold2Table = st_init_table_with_size(&type_code2_hash, 200);
if (ONIG_IS_NULL(Unfold2Table)) return ONIGERR_MEMORY;
- for (i = 0; i < sizeof(CaseUnfold_12)/sizeof(CaseUnfold_12_Type); i++) {
+ for (i = 0; i < (int )(sizeof(CaseUnfold_12)/sizeof(CaseUnfold_12_Type));
+ i++) {
p2 = &CaseUnfold_12[i];
st_add_direct(Unfold2Table, (st_data_t )p2->from, (st_data_t )(&p2->to));
}
- for (i = 0; i < sizeof(CaseUnfold_12_Locale)/sizeof(CaseUnfold_12_Type);
+ for (i = 0;
+ i < (int )(sizeof(CaseUnfold_12_Locale)/sizeof(CaseUnfold_12_Type));
i++) {
p2 = &CaseUnfold_12_Locale[i];
st_add_direct(Unfold2Table, (st_data_t )p2->from, (st_data_t )(&p2->to));
@@ -10941,7 +10946,8 @@ static int init_case_fold_table(void)
Unfold3Table = st_init_table_with_size(&type_code3_hash, 30);
if (ONIG_IS_NULL(Unfold3Table)) return ONIGERR_MEMORY;
- for (i = 0; i < sizeof(CaseUnfold_13)/sizeof(CaseUnfold_13_Type); i++) {
+ for (i = 0; i < (int )(sizeof(CaseUnfold_13)/sizeof(CaseUnfold_13_Type));
+ i++) {
p3 = &CaseUnfold_13[i];
st_add_direct(Unfold3Table, (st_data_t )p3->from, (st_data_t )(&p3->to));
}
@@ -10953,7 +10959,8 @@ static int init_case_fold_table(void)
extern int
onigenc_unicode_mbc_case_fold(OnigEncoding enc,
- OnigCaseFoldType flag, const UChar** pp, const UChar* end, UChar* fold)
+ OnigCaseFoldType flag ARG_UNUSED, const UChar** pp, const UChar* end,
+ UChar* fold)
{
CodePointList3 *to;
OnigCodePoint code;
@@ -10963,7 +10970,7 @@ onigenc_unicode_mbc_case_fold(OnigEncoding enc,
if (CaseFoldInited == 0) init_case_fold_table();
code = ONIGENC_MBC_TO_CODE(enc, p, end);
- len = enc_len(enc, p, end);
+ len = enclen(enc, p, end);
*pp += len;
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
@@ -11014,7 +11021,8 @@ onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag,
/* if (CaseFoldInited == 0) init_case_fold_table(); */
- for (i = 0; i < sizeof(CaseUnfold_11)/sizeof(CaseUnfold_11_Type); i++) {
+ for (i = 0; i < (int )(sizeof(CaseUnfold_11)/sizeof(CaseUnfold_11_Type));
+ i++) {
p11 = &CaseUnfold_11[i];
for (j = 0; j < p11->to.n; j++) {
code = p11->from;
@@ -11053,7 +11061,8 @@ onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag,
}
else {
#endif
- for (i = 0; i < sizeof(CaseUnfold_11_Locale)/sizeof(CaseUnfold_11_Type);
+ for (i = 0;
+ i < (int )(sizeof(CaseUnfold_11_Locale)/sizeof(CaseUnfold_11_Type));
i++) {
p11 = &CaseUnfold_11_Locale[i];
for (j = 0; j < p11->to.n; j++) {
@@ -11081,7 +11090,8 @@ onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag,
#endif
if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
- for (i = 0; i < sizeof(CaseUnfold_12)/sizeof(CaseUnfold_12_Type); i++) {
+ for (i = 0; i < (int )(sizeof(CaseUnfold_12)/sizeof(CaseUnfold_12_Type));
+ i++) {
for (j = 0; j < CaseUnfold_12[i].to.n; j++) {
r = (*f)(CaseUnfold_12[i].to.code[j],
(OnigCodePoint* )CaseUnfold_12[i].from, 2, arg);
@@ -11100,7 +11110,8 @@ onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag,
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) == 0) {
#endif
- for (i = 0; i < sizeof(CaseUnfold_12_Locale)/sizeof(CaseUnfold_12_Type);
+ for (i = 0;
+ i < (int )(sizeof(CaseUnfold_12_Locale)/sizeof(CaseUnfold_12_Type));
i++) {
for (j = 0; j < CaseUnfold_12_Locale[i].to.n; j++) {
r = (*f)(CaseUnfold_12_Locale[i].to.code[j],
@@ -11121,7 +11132,8 @@ onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag,
}
#endif
- for (i = 0; i < sizeof(CaseUnfold_13)/sizeof(CaseUnfold_13_Type); i++) {
+ for (i = 0; i < (int )(sizeof(CaseUnfold_13)/sizeof(CaseUnfold_13_Type));
+ i++) {
for (j = 0; j < CaseUnfold_13[i].to.n; j++) {
r = (*f)(CaseUnfold_13[i].to.code[j],
(OnigCodePoint* )CaseUnfold_13[i].from, 3, arg);
@@ -11156,7 +11168,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
n = 0;
code = ONIGENC_MBC_TO_CODE(enc, p, end);
- len = enc_len(enc, p, end);
+ len = enclen(enc, p, end);
#ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI
if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) {
@@ -11305,7 +11317,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
else
codes[1] = code;
- clen = enc_len(enc, p, end);
+ clen = enclen(enc, p, end);
len += clen;
if (onig_st_lookup(Unfold2Table, (st_data_t )codes, (void* )&z2) != 0) {
for (i = 0; i < z2->n; i++) {
@@ -11326,7 +11338,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
else
codes[2] = code;
- clen = enc_len(enc, p, end);
+ clen = enclen(enc, p, end);
len += clen;
if (onig_st_lookup(Unfold3Table, (st_data_t )codes,
(void* )&z2) != 0) {
diff --git a/enc/utf8.c b/enc/utf8.c
index adfb2742a5..33cb0eed53 100644
--- a/enc/utf8.c
+++ b/enc/utf8.c
@@ -272,7 +272,7 @@ utf8_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc)
int c, len;
OnigCodePoint n;
- len = enc_len(ONIG_ENCODING_UTF8, p, end);
+ len = enclen(ONIG_ENCODING_UTF8, p, end);
c = *p++;
if (len > 1) {
len--;
@@ -307,33 +307,9 @@ utf8_code_to_mbclen(OnigCodePoint code, OnigEncoding enc)
else if (code == INVALID_CODE_FF) return 1;
#endif
else
- return ONIGENC_ERR_TOO_BIG_WIDE_CHAR_VALUE;
+ return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
}
-#if 0
-static int
-utf8_code_to_mbc_first(OnigCodePoint code)
-{
- if ((code & 0xffffff80) == 0)
- return code;
- else {
- if ((code & 0xfffff800) == 0)
- return ((code>>6)& 0x1f) | 0xc0;
- else if ((code & 0xffff0000) == 0)
- return ((code>>12) & 0x0f) | 0xe0;
- else if ((code & 0xffe00000) == 0)
- return ((code>>18) & 0x07) | 0xf0;
- else if ((code & 0xfc000000) == 0)
- return ((code>>24) & 0x03) | 0xf8;
- else if ((code & 0x80000000) == 0)
- return ((code>>30) & 0x01) | 0xfc;
- else {
- return ONIGENC_ERR_TOO_BIG_WIDE_CHAR_VALUE;
- }
- }
-}
-#endif
-
static int
utf8_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc)
{
@@ -383,7 +359,7 @@ utf8_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc)
}
#endif
else {
- return ONIGENC_ERR_TOO_BIG_WIDE_CHAR_VALUE;
+ return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
}
*p++ = UTF8_TRAIL0(code);
@@ -421,7 +397,7 @@ utf8_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp,
#if 0
static int
-utf8_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
+is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
{
const UChar* p = *pp;
@@ -430,7 +406,7 @@ utf8_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
}
else {
- (*pp) += enc_len(ONIG_ENCODING_UTF8, p);
+ (*pp) += enclen(ONIG_ENCODING_UTF8, p);
if (*p == 0xc3) {
int c = *(p + 1);
@@ -457,7 +433,7 @@ utf8_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
static int
-utf8_get_ctype_code_range(int ctype, OnigCodePoint *sb_out,
+utf8_get_ctype_code_range(OnigCtype ctype, OnigCodePoint *sb_out,
const OnigCodePoint* ranges[], OnigEncoding enc)
{
*sb_out = 0x80;
@@ -478,7 +454,7 @@ utf8_left_adjust_char_head(const UChar* start, const UChar* s, OnigEncoding enc)
}
static int
-utf8_get_case_fold_codes_by_str(OnigCaseFoldType flag,
+get_case_fold_codes_by_str(OnigCaseFoldType flag,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[],
OnigEncoding enc)
{
@@ -497,7 +473,7 @@ OnigEncodingDefine(utf8, UTF8) = {
utf8_code_to_mbc,
utf8_mbc_case_fold,
onigenc_unicode_apply_all_case_fold,
- utf8_get_case_fold_codes_by_str,
+ get_case_fold_codes_by_str,
onigenc_unicode_property_name_to_ctype,
onigenc_unicode_is_code_ctype,
utf8_get_ctype_code_range,
diff --git a/include/ruby/oniguruma.h b/include/ruby/oniguruma.h
index 0cf2e0c6e4..37305bc17c 100644
--- a/include/ruby/oniguruma.h
+++ b/include/ruby/oniguruma.h
@@ -39,7 +39,7 @@ extern "C" {
#define ONIGURUMA
#define ONIGURUMA_VERSION_MAJOR 5
#define ONIGURUMA_VERSION_MINOR 9
-#define ONIGURUMA_VERSION_TEENY 0
+#define ONIGURUMA_VERSION_TEENY 1
#ifdef __cplusplus
# ifndef HAVE_PROTOTYPES
@@ -57,6 +57,12 @@ extern "C" {
# endif
#endif
+#ifdef HAVE_STDARG_H
+# ifndef HAVE_STDARG_PROTOTYPES
+# define HAVE_STDARG_PROTOTYPES 1
+# endif
+#endif
+
#ifndef P_
#if defined(__STDC__) || defined(_WIN32)
# define P_(args) args
@@ -99,12 +105,12 @@ extern "C" {
typedef unsigned char OnigUChar;
typedef unsigned long OnigCodePoint;
+typedef unsigned int OnigCtype;
typedef unsigned int OnigDistance;
#define ONIG_INFINITE_DISTANCE ~((OnigDistance )0)
-/* case fold flag */
-typedef unsigned int OnigCaseFoldType;
+typedef unsigned int OnigCaseFoldType; /* case fold flag */
ONIG_EXTERN OnigCaseFoldType OnigDefaultCaseFoldFlag;
@@ -156,8 +162,8 @@ typedef struct OnigEncodingTypeST {
int (*apply_all_case_fold)(OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg, struct OnigEncodingTypeST* enc);
int (*get_case_fold_codes_by_str)(OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem acs[], struct OnigEncodingTypeST* enc);
int (*property_name_to_ctype)(struct OnigEncodingTypeST* enc, OnigUChar* p, OnigUChar* end);
- int (*is_code_ctype)(OnigCodePoint code, unsigned int ctype, struct OnigEncodingTypeST* enc);
- int (*get_ctype_code_range)(int ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[], struct OnigEncodingTypeST* enc);
+ int (*is_code_ctype)(OnigCodePoint code, OnigCtype ctype, struct OnigEncodingTypeST* enc);
+ int (*get_ctype_code_range)(OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[], struct OnigEncodingTypeST* enc);
OnigUChar* (*left_adjust_char_head)(const OnigUChar* start, const OnigUChar* p, struct OnigEncodingTypeST* enc);
int (*is_allowed_reverse_match)(const OnigUChar* p, const OnigUChar* end, struct OnigEncodingTypeST* enc);
void *auxiliary_data;
@@ -256,7 +262,7 @@ ONIG_EXTERN OnigEncodingType OnigEncodingGB18030;
#define ONIGENC_MAX_STD_CTYPE ONIGENC_CTYPE_ASCII
-#define enc_len(enc,p,e) ONIGENC_MBC_ENC_LEN(enc, p, e)
+#define onig_enc_len(enc,p,e) ONIGENC_MBC_ENC_LEN(enc, p, e)
#define ONIGENC_IS_UNDEF(enc) ((enc) == ONIG_ENCODING_UNDEF)
#define ONIGENC_IS_SINGLEBYTE(enc) (ONIGENC_MBC_MAXLEN(enc) == 1)
@@ -604,6 +610,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
#define ONIGERR_NEVER_ENDING_RECURSION -221
#define ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY -222
#define ONIGERR_INVALID_CHAR_PROPERTY_NAME -223
+#define ONIGERR_INVALID_CODE_POINT_VALUE -400
#define ONIGERR_INVALID_WIDE_CHAR_VALUE -400
#define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE -401
#define ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION -402
diff --git a/regcomp.c b/regcomp.c
index f3d604a21d..19cd91fc03 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -115,7 +115,7 @@ static int
bitset_is_empty(BitSetRef bs)
{
int i;
- for (i = 0; i < BITSET_SIZE; i++) {
+ for (i = 0; i < (int )BITSET_SIZE; i++) {
if (bs[i] != 0) return 0;
}
return 1;
@@ -416,8 +416,8 @@ compile_tree_n_times(Node* node, int n, regex_t* reg)
}
static int
-add_compile_string_length(UChar* s, int mb_len, int str_len,
- regex_t* reg, int ignore_case)
+add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, int str_len,
+ regex_t* reg ARG_UNUSED, int ignore_case)
{
int len;
int op = select_str_opcode(mb_len, str_len, ignore_case);
@@ -469,13 +469,13 @@ compile_length_string_node(Node* node, regex_t* reg)
ambig = NSTRING_IS_AMBIG(node);
p = prev = sn->s;
- prev_len = enc_len(enc, p, sn->end);
+ prev_len = enclen(enc, p, sn->end);
p += prev_len;
slen = 1;
rlen = 0;
for (; p < sn->end; ) {
- len = enc_len(enc, p, sn->end);
+ len = enclen(enc, p, sn->end);
if (len == prev_len) {
slen++;
}
@@ -518,12 +518,12 @@ compile_string_node(Node* node, regex_t* reg)
ambig = NSTRING_IS_AMBIG(node);
p = prev = sn->s;
- prev_len = enc_len(enc, p, end);
+ prev_len = enclen(enc, p, end);
p += prev_len;
slen = 1;
for (; p < end; ) {
- len = enc_len(enc, p, end);
+ len = enclen(enc, p, end);
if (len == prev_len) {
slen++;
}
@@ -1535,7 +1535,7 @@ compile_length_tree(Node* node, regex_t* reg)
{
BRefNode* br = NBREF(node);
-#ifdef USE_BACKREF_AT_LEVEL
+#ifdef USE_BACKREF_WITH_LEVEL
if (IS_BACKREF_NEST_LEVEL(br)) {
r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
@@ -1659,9 +1659,9 @@ compile_tree(Node* node, regex_t* reg)
{
BRefNode* br = NBREF(node);
-#ifdef USE_BACKREF_AT_LEVEL
+#ifdef USE_BACKREF_WITH_LEVEL
if (IS_BACKREF_NEST_LEVEL(br)) {
- r = add_opcode(reg, OP_BACKREF_AT_LEVEL);
+ r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
if (r) return r;
r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
if (r) return r;
@@ -1703,7 +1703,7 @@ compile_tree(Node* node, regex_t* reg)
}
if (r) return r;
-#ifdef USE_BACKREF_AT_LEVEL
+#ifdef USE_BACKREF_WITH_LEVEL
add_bacref_mems:
#endif
r = add_length(reg, br->back_num);
@@ -1951,7 +1951,7 @@ unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
}
#endif
-#ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
+#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
static int
quantifiers_memory_node_info(Node* node)
{
@@ -2018,7 +2018,7 @@ quantifiers_memory_node_info(Node* node)
return r;
}
-#endif /* USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK */
+#endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
static int
get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
@@ -2312,7 +2312,7 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
StrNode* sn = NSTR(node);
UChar *s = sn->s;
while (s < sn->end) {
- s += enc_len(reg->enc, s, sn->end);
+ s += enclen(reg->enc, s, sn->end);
(*len)++;
}
}
@@ -3003,25 +3003,12 @@ setup_subexp_call(Node* node, ScanEnv* env)
case NT_CALL:
{
- int n, num, *refs;
- UChar *p;
CallNode* cn = NCALL(node);
Node** nodes = SCANENV_MEM_NODES(env);
-#ifdef USE_NAMED_GROUP
- n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end, &refs);
-#else
- n = -1;
-#endif
- if (n <= 0) {
- /* name not found, check group number. (?*ddd) */
- p = cn->name;
- num = onig_scan_unsigned_number(&p, cn->name_end, env->enc);
- if (num <= 0 || p != cn->name_end) {
- onig_scan_env_set_error_string(env,
- ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
- return ONIGERR_UNDEFINED_NAME_REFERENCE;
- }
+ if (cn->group_num != 0) {
+ int gnum = cn->group_num;
+
#ifdef USE_NAMED_GROUP
if (env->num_named > 0 &&
IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
@@ -3029,32 +3016,47 @@ setup_subexp_call(Node* node, ScanEnv* env)
return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
}
#endif
- if (num > env->num_mem) {
+ if (gnum > env->num_mem) {
onig_scan_env_set_error_string(env,
ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
return ONIGERR_UNDEFINED_GROUP_REFERENCE;
}
- cn->ref_num = num;
- goto set_call_attr;
- }
- else if (n > 1) {
- onig_scan_env_set_error_string(env,
- ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
- return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
- }
- else {
- cn->ref_num = refs[0];
+
+#ifdef USE_NAMED_GROUP
set_call_attr:
- cn->target = nodes[cn->ref_num];
+#endif
+ cn->target = nodes[cn->group_num];
if (IS_NULL(cn->target)) {
onig_scan_env_set_error_string(env,
- ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
+ ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
return ONIGERR_UNDEFINED_NAME_REFERENCE;
}
SET_ENCLOSE_STATUS(cn->target, NST_CALLED);
- BIT_STATUS_ON_AT(env->bt_mem_start, cn->ref_num);
+ BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
cn->unset_addr_list = env->unset_addr_list;
}
+#ifdef USE_NAMED_GROUP
+ else {
+ int *refs;
+
+ int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
+ &refs);
+ if (n <= 0) {
+ onig_scan_env_set_error_string(env,
+ ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
+ return ONIGERR_UNDEFINED_NAME_REFERENCE;
+ }
+ else if (n > 1) {
+ onig_scan_env_set_error_string(env,
+ ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
+ return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
+ }
+ else {
+ cn->group_num = refs[0];
+ goto set_call_attr;
+ }
+ }
+#endif
}
break;
@@ -3389,7 +3391,7 @@ expand_case_fold_string(Node* node, regex_t* reg)
goto err;
}
- len = enc_len(reg->enc, p, end);
+ len = enclen(reg->enc, p, end);
if (n == 0) {
if (IS_NULL(snode)) {
@@ -3706,7 +3708,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
-#ifdef USE_BACKREF_AT_LEVEL
+#ifdef USE_BACKREF_WITH_LEVEL
if (IS_BACKREF_NEST_LEVEL(br)) {
BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
}
@@ -3731,7 +3733,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
if (r) break;
if (d == 0) {
qn->target_empty_info = NQ_TARGET_IS_EMPTY;
-#ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
+#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
r = quantifiers_memory_node_info(target);
if (r < 0) break;
if (r > 0) {
@@ -3898,7 +3900,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
/* set skip map for Boyer-Moor search */
static int
-set_bm_skip(UChar* s, UChar* end, OnigEncoding enc,
+set_bm_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
UChar skip[], int** int_skip)
{
int i, len;
@@ -3987,7 +3989,7 @@ map_position_value(OnigEncoding enc, int i)
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
};
- if (i < sizeof(ByteValTable)/sizeof(ByteValTable[0])) {
+ if (i < (int )(sizeof(ByteValTable)/sizeof(ByteValTable[0]))) {
if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
return 20;
else
@@ -4019,7 +4021,7 @@ distance_value(MinMaxLen* mm)
if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
d = mm->max - mm->min;
- if (d < sizeof(dist_vals)/sizeof(dist_vals[0]))
+ if (d < (int )(sizeof(dist_vals)/sizeof(dist_vals[0])))
/* return dist_vals[d] * 16 / (mm->min + 12); */
return (int )dist_vals[d];
else
@@ -4212,7 +4214,7 @@ concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
p = add->s;
end = p + add->len;
for (i = to->len; p < end; ) {
- len = enc_len(enc, p, end);
+ len = enclen(enc, p, end);
if (i + len > OPT_EXACT_MAXLEN) break;
for (j = 0; j < len && p < end; j++)
to->s[i++] = *p++;
@@ -4227,14 +4229,14 @@ concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
}
static void
-concat_opt_exact_info_str(OptExactInfo* to,
- UChar* s, UChar* end, int raw, OnigEncoding enc)
+concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end,
+ int raw ARG_UNUSED, OnigEncoding enc)
{
int i, j, len;
UChar *p;
for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
- len = enc_len(enc, p, end);
+ len = enclen(enc, p, end);
if (i + len > OPT_EXACT_MAXLEN) break;
for (j = 0; j < len && p < end; j++)
to->s[i++] = *p++;
@@ -4260,7 +4262,7 @@ alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
for (i = 0; i < to->len && i < add->len; ) {
if (to->s[i] != add->s[i]) break;
- len = enc_len(env->enc, to->s + i, to->s + to->len);
+ len = enclen(env->enc, to->s + i, to->s + to->len);
for (j = 1; j < len; j++) {
if (to->s[i+j] != add->s[i+j]) break;
@@ -5044,7 +5046,7 @@ static void print_enc_string(FILE* fp, OnigEncoding enc,
fputc((int )code, fp);
}
- p += enc_len(enc, p);
+ p += enclen(enc, p);
}
}
else {
@@ -5634,8 +5636,6 @@ onig_init(void)
extern int
onig_end(void)
{
- extern int onig_free_shared_cclass_table(void);
-
THREAD_ATOMIC_START;
#ifdef ONIG_DEBUG_STATISTICS
@@ -5679,11 +5679,11 @@ onig_is_in_code_range(const UChar* p, OnigCodePoint code)
}
extern int
-onig_is_code_in_cc_len(int enclen, OnigCodePoint code, CClassNode* cc)
+onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc)
{
int found;
- if (enclen > 1 || (code >= SINGLE_BYTE_SIZE)) {
+ if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
if (IS_NULL(cc->mbuf)) {
found = 0;
}
@@ -5776,7 +5776,7 @@ OnigOpInfoType OnigOpInfo[] = {
{ OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL },
{ OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
{ OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL },
- { OP_BACKREF_AT_LEVEL, "backref_at_level", ARG_SPECIAL },
+ { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL },
{ OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
{ OP_MEMORY_START, "mem-start", ARG_MEMNUM },
{ OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },
@@ -5968,7 +5968,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp,
break;
case OP_EXACT1_IC:
- len = enc_len(enc, bp);
+ len = enclen(enc, bp);
p_string(f, len, bp);
bp += len;
break;
@@ -6043,7 +6043,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp,
}
break;
- case OP_BACKREF_AT_LEVEL:
+ case OP_BACKREF_WITH_LEVEL:
{
OnigOptionType option;
LengthType level;
@@ -6182,7 +6182,7 @@ print_indent_tree(FILE* f, Node* node, int indent)
case NT_CCLASS:
fprintf(f, "<cclass:%x>", (int )node);
- if (IS_CCLASS_NOT(NCCLASS(node)) fputs(" not", f);
+ if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(" not", f);
if (NCCLASS(node)->mbuf) {
BBuf* bbuf = NCCLASS(node)->mbuf;
for (i = 0; i < bbuf->used; i++) {
diff --git a/regenc.c b/regenc.c
index ebeb086810..c6262b0909 100644
--- a/regenc.c
+++ b/regenc.c
@@ -55,7 +55,7 @@ onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const U
{
UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s);
if (p < s) {
- p += enc_len(enc, p, s);
+ p += enclen(enc, p, s);
}
return p;
}
@@ -68,7 +68,7 @@ onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,
if (p < s) {
if (prev) *prev = (const UChar* )p;
- p += enc_len(enc, p, s);
+ p += enclen(enc, p, s);
}
else {
if (prev) *prev = (const UChar* )NULL; /* Sorry */
@@ -351,7 +351,7 @@ const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = {
#endif
extern void
-onigenc_set_default_caseconv_table(const UChar* table)
+onigenc_set_default_caseconv_table(const UChar* table ARG_UNUSED)
{
/* nothing */
/* obsoleted. */
@@ -393,15 +393,16 @@ const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = {
};
extern int
-onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag,
+onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
OnigApplyAllCaseFoldFunc f, void* arg,
OnigEncoding enc)
{
OnigCodePoint code;
int i, r;
- for (i = 0; i < sizeof(OnigAsciiLowerMap)/sizeof(OnigPairCaseFoldCodes)
- ; i++) {
+ for (i = 0;
+ i < (int )(sizeof(OnigAsciiLowerMap)/sizeof(OnigPairCaseFoldCodes));
+ i++) {
code = OnigAsciiLowerMap[i].to;
r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg);
if (r != 0) return r;
@@ -435,8 +436,8 @@ onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag,
return 0;
}
-extern int
-ss_apply_all_case_fold(OnigCaseFoldType flag,
+static int
+ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED,
OnigApplyAllCaseFoldFunc f, void* arg)
{
static OnigCodePoint ss[] = { 0x73, 0x73 };
@@ -475,7 +476,7 @@ onigenc_apply_all_case_fold_with_map(int map_size,
extern int
onigenc_get_case_fold_codes_by_str_with_map(int map_size,
const OnigPairCaseFoldCodes map[],
- int ess_tsett_flag, OnigCaseFoldType flag,
+ int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED,
const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])
{
if (0x41 <= *p && *p <= 0x5a) {
@@ -555,7 +556,7 @@ onigenc_get_case_fold_codes_by_str_with_map(int map_size,
extern int
-onigenc_not_support_get_ctype_code_range(int ctype,
+onigenc_not_support_get_ctype_code_range(OnigCtype ctype,
OnigCodePoint* sb_out, const OnigCodePoint* ranges[],
OnigEncoding enc)
{
@@ -656,7 +657,7 @@ onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end)
int c, i, len;
OnigCodePoint n;
- len = enc_len(enc, p, end);
+ len = enclen(enc, p, end);
n = (OnigCodePoint )(*p++);
if (len == 1) return n;
@@ -669,8 +670,9 @@ onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end)
}
extern int
-onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag,
- const UChar** pp, const UChar* end, UChar* lower)
+onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED,
+ const UChar** pp, const UChar* end ARG_UNUSED,
+ UChar* lower)
{
int len;
const UChar *p = *pp;
@@ -683,7 +685,7 @@ onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag,
else {
int i;
- len = enc_len(enc, p, end);
+ len = enclen(enc, p, end);
for (i = 0; i < len; i++) {
*lower++ = *p++;
}
@@ -704,7 +706,7 @@ onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag,
return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
}
- (*pp) += enc_len(enc, p);
+ (*pp) += enclen(enc, p);
return FALSE;
}
#endif
@@ -736,8 +738,8 @@ onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
*p++ = (UChar )(code & 0xff);
#if 1
- if (enc_len(enc, buf, p) != (p - buf))
- return ONIGENC_ERR_INVALID_WIDE_CHAR_VALUE;
+ if (enclen(enc, buf, p) != (p - buf))
+ return ONIGERR_INVALID_CODE_POINT_VALUE;
#endif
return p - buf;
}
@@ -759,8 +761,8 @@ onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
*p++ = (UChar )(code & 0xff);
#if 1
- if (enc_len(enc, buf, p) != (p - buf))
- return ONIGENC_ERR_INVALID_WIDE_CHAR_VALUE;
+ if (enclen(enc, buf, p) != (p - buf))
+ return ONIGERR_INVALID_CODE_POINT_VALUE;
#endif
return p - buf;
}
@@ -843,7 +845,7 @@ onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end,
if (x) return x;
sascii++;
- p += enc_len(enc, p, end);
+ p += enclen(enc, p, end);
}
return 0;
}
@@ -900,7 +902,7 @@ onigenc_property_list_add_property(UChar* name, const OnigCodePoint* prop,
}
extern int
-onigenc_property_list_init(int (*f)())
+onigenc_property_list_init(int (*f)(void))
{
int r;
diff --git a/regenc.h b/regenc.h
index 0bd0abeeb2..e34ce40b47 100644
--- a/regenc.h
+++ b/regenc.h
@@ -57,17 +57,21 @@ typedef struct {
#define FALSE 0
#endif
-/* error codes */
-#define ONIGENC_ERR_MEMORY -5
-#define ONIGENC_ERR_TYPE_BUG -6
-#define ONIGENC_ERR_INVALID_WIDE_CHAR_VALUE -400
-#define ONIGENC_ERR_TOO_BIG_WIDE_CHAR_VALUE -401
+#ifndef ARG_UNUSED
+#if defined(__GNUC__)
+# define ARG_UNUSED __attribute__ ((unused))
+#else
+# define ARG_UNUSED
+#endif
+#endif
#define ONIG_IS_NULL(p) (((void*)(p)) == (void*)0)
#define ONIG_IS_NOT_NULL(p) (((void*)(p)) != (void*)0)
#define ONIG_CHECK_NULL_RETURN(p) if (ONIG_IS_NULL(p)) return NULL
#define ONIG_CHECK_NULL_RETURN_VAL(p,val) if (ONIG_IS_NULL(p)) return (val)
+#define enclen(enc,p,e) ONIGENC_MBC_ENC_LEN(enc,p,e)
+
/* character types bit flag */
#define BIT_CTYPE_NEWLINE (1<< ONIGENC_CTYPE_NEWLINE)
#define BIT_CTYPE_ALPHA (1<< ONIGENC_CTYPE_ALPHA)
@@ -111,7 +115,7 @@ ONIG_EXTERN int onigenc_ascii_apply_all_case_fold P_((OnigCaseFoldType flag, Oni
ONIG_EXTERN int onigenc_ascii_get_case_fold_codes_by_str P_((OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[], OnigEncoding enc));
ONIG_EXTERN int onigenc_apply_all_case_fold_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg));
ONIG_EXTERN int onigenc_get_case_fold_codes_by_str_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));
-ONIG_EXTERN int onigenc_not_support_get_ctype_code_range P_((int ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[], OnigEncoding enc));
+ONIG_EXTERN int onigenc_not_support_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[], OnigEncoding enc));
ONIG_EXTERN int onigenc_is_mbc_newline_0x0a P_((const UChar* p, const UChar* end, OnigEncoding enc));
@@ -141,7 +145,7 @@ ONIG_EXTERN int onigenc_mb4_is_code_ctype P_((OnigEncoding enc, OnigCodePoint co
/* in enc/unicode.c */
ONIG_EXTERN int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype, OnigEncoding enc));
-ONIG_EXTERN int onigenc_utf16_32_get_ctype_code_range P_((int ctype, OnigCodePoint *sb_out, const OnigCodePoint* ranges[]));
+ONIG_EXTERN int onigenc_utf16_32_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint *sb_out, const OnigCodePoint* ranges[]));
ONIG_EXTERN int onigenc_unicode_ctype_code_range P_((int ctype, const OnigCodePoint* ranges[]));
ONIG_EXTERN int onigenc_unicode_get_case_fold_codes_by_str P_((OnigEncoding enc, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));
ONIG_EXTERN int onigenc_unicode_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** pp, const UChar* end, UChar* fold));
diff --git a/regerror.c b/regerror.c
index cf80bbfc99..2bc2da4c71 100644
--- a/regerror.c
+++ b/regerror.c
@@ -142,8 +142,8 @@ onig_error_code_to_format(int code)
p = "too big wide-char value"; break;
case ONIGERR_TOO_LONG_WIDE_CHAR_VALUE:
p = "too long wide-char value"; break;
- case ONIGERR_INVALID_WIDE_CHAR_VALUE:
- p = "invalid wide-char value"; break;
+ case ONIGERR_INVALID_CODE_POINT_VALUE:
+ p = "invalid code point value"; break;
case ONIGERR_EMPTY_GROUP_NAME:
p = "group name is empty"; break;
case ONIGERR_INVALID_GROUP_NAME:
@@ -182,6 +182,15 @@ onig_error_code_to_format(int code)
return (UChar* )p;
}
+static void sprint_byte(char* s, unsigned int v)
+{
+ sprintf(s, "%02x", (v & 0377));
+}
+
+static void sprint_byte_with_x(char* s, unsigned int v)
+{
+ sprintf(s, "\\x%02x", (v & 0377));
+}
static int to_ascii(OnigEncoding enc, UChar *s, UChar *end,
UChar buf[], int buf_size, int *is_over)
@@ -196,10 +205,17 @@ static int to_ascii(OnigEncoding enc, UChar *s, UChar *end,
while (p < end) {
code = ONIGENC_MBC_TO_CODE(enc, p, end);
if (code >= 0x80) {
- if (len + 5 <= buf_size) {
- sprintf((char* )(&(buf[len])), "\\x%02X",
- (unsigned int )(code & 0377));
- len += 5;
+ if (code > 0xffff && len + 10 <= buf_size) {
+ sprint_byte_with_x((char*)(&(buf[len])), (unsigned int)(code >> 24));
+ sprint_byte((char*)(&(buf[len+4])), (unsigned int)(code >> 16));
+ sprint_byte((char*)(&(buf[len+6])), (unsigned int)(code >> 8));
+ sprint_byte((char*)(&(buf[len+8])), (unsigned int)code);
+ len += 10;
+ }
+ else if (len + 6 <= buf_size) {
+ sprint_byte_with_x((char*)(&(buf[len])), (unsigned int)(code >> 8));
+ sprint_byte((char*)(&(buf[len+4])), (unsigned int)code);
+ len += 6;
}
else {
break;
@@ -209,7 +225,7 @@ static int to_ascii(OnigEncoding enc, UChar *s, UChar *end,
buf[len++] = (UChar )code;
}
- p += enc_len(enc, p, end);
+ p += enclen(enc, p, end);
if (len >= buf_size) break;
}
@@ -330,7 +346,7 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
while (p < pat_end) {
if (*p == '\\') {
*s++ = *p++;
- len = enc_len(enc, p, pat_end);
+ len = enclen(enc, p, pat_end);
while (len-- > 0) *s++ = *p++;
}
else if (*p == '/') {
@@ -338,7 +354,7 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
*s++ = *p++;
}
else if (ONIGENC_IS_MBC_HEAD(enc, p, pat_end)) {
- len = enc_len(enc, p, pat_end);
+ len = enclen(enc, p, pat_end);
if (ONIGENC_MBC_MINLEN(enc) == 1) {
while (len-- > 0) *s++ = *p++;
}
@@ -346,7 +362,7 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
int blen;
while (len-- > 0) {
- sprintf((char* )bs, "\\x%02X", *p++ & 0377);
+ sprint_byte_with_x((char* )bs, (unsigned int )(*p++));
blen = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs);
bp = bs;
while (blen-- > 0) *s++ = *bp++;
@@ -355,7 +371,7 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
}
else if (!ONIGENC_IS_CODE_PRINT(enc, *p) &&
!ONIGENC_IS_CODE_SPACE(enc, *p)) {
- sprintf((char* )bs, "\\x%02X", *p++ & 0377);
+ sprint_byte_with_x((char* )bs, (unsigned int )(*p++));
len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs);
bp = bs;
while (len-- > 0) *s++ = *bp++;
diff --git a/regexec.c b/regexec.c
index 84d69659c2..be3398961e 100644
--- a/regexec.c
+++ b/regexec.c
@@ -29,10 +29,12 @@
#include "regint.h"
+#define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
+
#ifdef USE_CRNL_AS_LINE_TERMINATOR
#define ONIGENC_IS_MBC_CRNL(enc,p,end) \
(ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
- ONIGENC_IS_MBC_NEWLINE(enc,(p+enc_len(enc,p)),end))
+ ONIGENC_IS_MBC_NEWLINE(enc,(p+enclen(enc,p)),end))
#endif
#ifdef USE_CAPTURE_HISTORY
@@ -196,7 +198,7 @@ onig_region_resize(OnigRegion* region, int n)
return 0;
}
-extern int
+static int
onig_region_resize_clear(OnigRegion* region, int n)
{
int r;
@@ -1019,7 +1021,7 @@ static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
#define IS_EMPTY_STR (str == end)
#define ON_STR_BEGIN(s) ((s) == str)
#define ON_STR_END(s) ((s) == end)
-#ifdef USE_MATCH_RANGE_IS_COMPLETE_RANGE
+#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
#define DATA_ENSURE_CHECK1 (s < right_range)
#define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
#define DATA_ENSURE(n) if (s + (n) > right_range) goto fail
@@ -1027,7 +1029,7 @@ static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
#define DATA_ENSURE_CHECK1 (s < end)
#define DATA_ENSURE_CHECK(n) (s + (n) <= end)
#define DATA_ENSURE(n) if (s + (n) > end) goto fail
-#endif /* USE_MATCH_RANGE_IS_COMPLETE_RANGE */
+#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
#ifdef USE_CAPTURE_HISTORY
@@ -1072,7 +1074,7 @@ make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp,
}
#endif
-#ifdef USE_BACKREF_AT_LEVEL
+#ifdef USE_BACKREF_WITH_LEVEL
static int mem_is_in_memp(int mem, int num, UChar* memp)
{
int i;
@@ -1140,7 +1142,7 @@ static int backref_match_at_nested_level(regex_t* reg
return 0;
}
-#endif /* USE_BACKREF_AT_LEVEL */
+#endif /* USE_BACKREF_WITH_LEVEL */
#ifdef ONIG_DEBUG_STATISTICS
@@ -1234,7 +1236,7 @@ typedef struct {
/* if sstart == str then set sprev to NULL. */
static int
match_at(regex_t* reg, const UChar* str, const UChar* end,
-#ifdef USE_MATCH_RANGE_IS_COMPLETE_RANGE
+#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
const UChar* right_range,
#endif
const UChar* sstart, UChar* sprev, OnigMatchArg* msa)
@@ -1296,7 +1298,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
fprintf(stderr, "%4d> \"", (int )(s - str));
bp = buf;
for (i = 0, q = s; i < 7 && q < end; i++) {
- len = enc_len(encode, q);
+ len = enclen(encode, q);
while (len-- > 0) *bp++ = *q++;
}
if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; }
@@ -1328,7 +1330,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
best_len = n;
region = msa->region;
if (region) {
-#ifdef USE_POSIX_REGION_OPTION
+#ifdef USE_POSIX_API_REGION_OPTION
if (IS_POSIX_REGION(msa->options)) {
posix_regmatch_t* rmt = (posix_regmatch_t* )region;
@@ -1351,7 +1353,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
}
else {
-#endif /* USE_POSIX_REGION_OPTION */
+#endif /* USE_POSIX_API_REGION_OPTION */
region->beg[0] = sstart - str;
region->end[0] = s - str;
for (i = 1; i <= num_mem; i++) {
@@ -1397,7 +1399,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
}
#endif /* USE_CAPTURE_HISTORY */
-#ifdef USE_POSIX_REGION_OPTION
+#ifdef USE_POSIX_API_REGION_OPTION
} /* else IS_POSIX_REGION() */
#endif
} /* if (region) */
@@ -1642,7 +1644,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
DATA_ENSURE(1);
if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail;
p += SIZE_BITSET;
- s += enc_len(encode, s, end); /* OP_CCLASS can match mb-code. \D, \S */
+ s += enclen(encode, s, end); /* OP_CCLASS can match mb-code. \D, \S */
MOP_OUT;
break;
@@ -1657,7 +1659,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
int mb_len;
DATA_ENSURE(1);
- mb_len = enc_len(encode, s, end);
+ mb_len = enclen(encode, s, end);
DATA_ENSURE(mb_len);
ss = s;
s += mb_len;
@@ -1697,7 +1699,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
DATA_ENSURE(1);
if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail;
p += SIZE_BITSET;
- s += enc_len(encode, s, end);
+ s += enclen(encode, s, end);
MOP_OUT;
break;
@@ -1715,7 +1717,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
{
OnigCodePoint code;
UChar *ss;
- int mb_len = enc_len(encode, s, end);
+ int mb_len = enclen(encode, s, end);
if (! DATA_ENSURE_CHECK(mb_len)) {
DATA_ENSURE(1);
@@ -1769,7 +1771,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
DATA_ENSURE(1);
GET_POINTER_INC(node, p);
- mb_len = enc_len(encode, s, end);
+ mb_len = enclen(encode, s, end);
ss = s;
s += mb_len;
DATA_ENSURE(0);
@@ -1781,7 +1783,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case OP_ANYCHAR: MOP_IN(OP_ANYCHAR);
DATA_ENSURE(1);
- n = enc_len(encode, s, end);
+ n = enclen(encode, s, end);
DATA_ENSURE(n);
if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
s += n;
@@ -1790,7 +1792,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case OP_ANYCHAR_ML: MOP_IN(OP_ANYCHAR_ML);
DATA_ENSURE(1);
- n = enc_len(encode, s, end);
+ n = enclen(encode, s, end);
DATA_ENSURE(n);
s += n;
MOP_OUT;
@@ -1799,7 +1801,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case OP_ANYCHAR_STAR: MOP_IN(OP_ANYCHAR_STAR);
while (DATA_ENSURE_CHECK1) {
STACK_PUSH_ALT(p, s, sprev);
- n = enc_len(encode, s, end);
+ n = enclen(encode, s, end);
DATA_ENSURE(n);
if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
sprev = s;
@@ -1811,7 +1813,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case OP_ANYCHAR_ML_STAR: MOP_IN(OP_ANYCHAR_ML_STAR);
while (DATA_ENSURE_CHECK1) {
STACK_PUSH_ALT(p, s, sprev);
- n = enc_len(encode, s, end);
+ n = enclen(encode, s, end);
if (n > 1) {
DATA_ENSURE(n);
sprev = s;
@@ -1830,7 +1832,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (*p == *s) {
STACK_PUSH_ALT(p + 1, s, sprev);
}
- n = enc_len(encode, s, end);
+ n = enclen(encode, s, end);
DATA_ENSURE(n);
if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
sprev = s;
@@ -1845,7 +1847,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (*p == *s) {
STACK_PUSH_ALT(p + 1, s, sprev);
}
- n = enc_len(encode, s, end);
+ n = enclen(encode, s, end);
if (n > 1) {
DATA_ENSURE(n);
sprev = s;
@@ -1868,7 +1870,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (scv) goto fail;
STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
- n = enc_len(encode, s);
+ n = enclen(encode, s);
DATA_ENSURE(n);
if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
sprev = s;
@@ -1886,7 +1888,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (scv) goto fail;
STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem);
- n = enc_len(encode, s);
+ n = enclen(encode, s);
if (n > 1) {
DATA_ENSURE(n);
sprev = s;
@@ -1906,7 +1908,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (! ONIGENC_IS_MBC_WORD(encode, s, end))
goto fail;
- s += enc_len(encode, s, end);
+ s += enclen(encode, s, end);
MOP_OUT;
break;
@@ -1915,7 +1917,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (ONIGENC_IS_MBC_WORD(encode, s, end))
goto fail;
- s += enc_len(encode, s, end);
+ s += enclen(encode, s, end);
MOP_OUT;
break;
@@ -2043,14 +2045,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
#endif
}
else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) &&
- ON_STR_END(s + enc_len(encode, s, end))) {
+ ON_STR_END(s + enclen(encode, s, end))) {
MOP_OUT;
continue;
}
#ifdef USE_CRNL_AS_LINE_TERMINATOR
else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) {
- UChar* ss = s + enc_len(encode, s);
- ss += enc_len(encode, ss);
+ UChar* ss = s + enclen(encode, s);
+ ss += enclen(encode, ss);
if (ON_STR_END(ss)) {
MOP_OUT;
continue;
@@ -2157,7 +2159,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
DATA_ENSURE(n);
sprev = s;
STRING_CMP(pstart, s, n);
- while (sprev + (len = enc_len(encode, sprev, end)) < s)
+ while (sprev + (len = enclen(encode, sprev, end)) < s)
sprev += len;
MOP_OUT;
@@ -2189,7 +2191,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
DATA_ENSURE(n);
sprev = s;
STRING_CMP_IC(case_fold_flag, pstart, &s, n);
- while (sprev + (len = enc_len(encode, sprev, end)) < s)
+ while (sprev + (len = enclen(encode, sprev, end)) < s)
sprev += len;
MOP_OUT;
@@ -2224,7 +2226,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
STRING_CMP_VALUE(pstart, swork, n, is_fail);
if (is_fail) continue;
s = swork;
- while (sprev + (len = enc_len(encode, sprev, end)) < s)
+ while (sprev + (len = enclen(encode, sprev, end)) < s)
sprev += len;
p += (SIZE_MEMNUM * (tlen - i - 1));
@@ -2263,7 +2265,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail);
if (is_fail) continue;
s = swork;
- while (sprev + (len = enc_len(encode, sprev, end)) < s)
+ while (sprev + (len = enclen(encode, sprev, end)) < s)
sprev += len;
p += (SIZE_MEMNUM * (tlen - i - 1));
@@ -2275,8 +2277,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
break;
-#ifdef USE_BACKREF_AT_LEVEL
- case OP_BACKREF_AT_LEVEL:
+#ifdef USE_BACKREF_WITH_LEVEL
+ case OP_BACKREF_WITH_LEVEL:
{
int len;
OnigOptionType ic;
@@ -2289,7 +2291,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
sprev = s;
if (backref_match_at_nested_level(reg, stk, stk_base, ic
, case_fold_flag, (int )level, (int )tlen, p, &s, end)) {
- while (sprev + (len = enc_len(encode, sprev, end)) < s)
+ while (sprev + (len = enclen(encode, sprev, end)) < s)
sprev += len;
p += (SIZE_MEMNUM * tlen);
@@ -2361,7 +2363,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
continue;
break;
-#ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
+#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
case OP_NULL_CHECK_END_MEMST: MOP_IN(OP_NULL_CHECK_END_MEMST);
{
int isnull;
@@ -2389,7 +2391,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
int isnull;
GET_MEMNUM_INC(mem, p); /* mem: null check id */
-#ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK
+#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg);
#else
STACK_NULL_CHECK_REC(isnull, mem, s);
@@ -2760,7 +2762,7 @@ slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
if (t == target_end)
return s;
}
- s += enc_len(enc, s, end);
+ s += enclen(enc, s, end);
}
return (UChar* )NULL;
@@ -2805,7 +2807,7 @@ slow_search_ic(OnigEncoding enc, int case_fold_flag,
s, text_end))
return s;
- s += enc_len(enc, s, text_end);
+ s += enclen(enc, s, text_end);
}
return (UChar* )NULL;
@@ -2903,7 +2905,7 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
skip = reg->map[*se];
t = s;
do {
- s += enc_len(reg->enc, s, end);
+ s += enclen(reg->enc, s, end);
} while ((s - t) < skip && s < end);
}
}
@@ -2919,7 +2921,7 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
skip = reg->int_map[*se];
t = s;
do {
- s += enc_len(reg->enc, s, end);
+ s += enclen(reg->enc, s, end);
} while ((s - t) < skip && s < end);
}
}
@@ -2966,7 +2968,8 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
}
static int
-set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc, int** skip)
+set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
+ int** skip)
{
int i, len;
@@ -3024,7 +3027,7 @@ map_search(OnigEncoding enc, UChar map[],
while (s < text_range) {
if (map[*s]) return (UChar* )s;
- s += enc_len(enc, s, text_range);
+ s += enclen(enc, s, text_range);
}
return (UChar* )NULL;
}
@@ -3086,7 +3089,7 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On
#endif
if (region
-#ifdef USE_POSIX_REGION_OPTION
+#ifdef USE_POSIX_API_REGION_OPTION
&& !IS_POSIX_REGION(option)
#endif
) {
@@ -3098,7 +3101,7 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On
if (r == 0) {
prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at);
r = match_at(reg, str, end,
-#ifdef USE_MATCH_RANGE_IS_COMPLETE_RANGE
+#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
end,
#endif
at, prev, &msa);
@@ -3127,7 +3130,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
}
else {
UChar *q = p + reg->dmin;
- while (p < q) p += enc_len(reg->enc, p, end);
+ while (p < q) p += enclen(reg->enc, p, end);
}
}
@@ -3158,7 +3161,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
if (p - reg->dmin < s) {
retry_gate:
pprev = p;
- p += enc_len(reg->enc, p, end);
+ p += enclen(reg->enc, p, end);
goto retry;
}
@@ -3353,7 +3356,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
UChar *s, *prev;
OnigMatchArg msa;
const UChar *orig_start = start;
-#ifdef USE_MATCH_RANGE_IS_COMPLETE_RANGE
+#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
const UChar *orig_range = range;
#endif
@@ -3389,7 +3392,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
#endif
if (region
-#ifdef USE_POSIX_REGION_OPTION
+#ifdef USE_POSIX_API_REGION_OPTION
&& !IS_POSIX_REGION(option)
#endif
) {
@@ -3400,7 +3403,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
if (start > end || start < str) goto mismatch_no_msa;
-#ifdef USE_MATCH_RANGE_IS_COMPLETE_RANGE
+#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
#define MATCH_AND_RETURN_CHECK(upper_range) \
r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
@@ -3444,7 +3447,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
else goto finish; /* error */ \
}
#endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
-#endif /* USE_MATCH_RANGE_IS_COMPLETE_RANGE */
+#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
/* anchor optimize: resume search range */
@@ -3604,7 +3607,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
while (s <= high) {
MATCH_AND_RETURN_CHECK(orig_range);
prev = s;
- s += enc_len(reg->enc, s, end);
+ s += enclen(reg->enc, s, end);
}
} while (s < range);
goto mismatch;
@@ -3617,11 +3620,11 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
do {
MATCH_AND_RETURN_CHECK(orig_range);
prev = s;
- s += enc_len(reg->enc, s, end);
+ s += enclen(reg->enc, s, end);
while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) {
prev = s;
- s += enc_len(reg->enc, s, end);
+ s += enclen(reg->enc, s, end);
}
} while (s < range);
goto mismatch;
@@ -3632,7 +3635,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
do {
MATCH_AND_RETURN_CHECK(orig_range);
prev = s;
- s += enc_len(reg->enc, s, end);
+ s += enclen(reg->enc, s, end);
} while (s < range);
if (s == range) { /* because empty match with /$/. */
@@ -3640,9 +3643,9 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
}
}
else { /* backward search */
-#ifdef USE_MATCH_RANGE_IS_COMPLETE_RANGE
+#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
if (orig_start < end)
- orig_start += enc_len(reg->enc, orig_start); /* is upper range */
+ orig_start += enclen(reg->enc, orig_start, end); /* is upper range */
#endif
if (reg->optimize != ONIG_OPTIMIZE_NONE) {
@@ -3718,7 +3721,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
/* If result is mismatch and no FIND_NOT_EMPTY option,
then the region is not setted in match_at(). */
if (IS_FIND_NOT_EMPTY(reg->options) && region
-#ifdef USE_POSIX_REGION_OPTION
+#ifdef USE_POSIX_API_REGION_OPTION
&& !IS_POSIX_REGION(option)
#endif
) {
diff --git a/regint.h b/regint.h
index 182eed2670..58b3b4ab9a 100644
--- a/regint.h
+++ b/regint.h
@@ -51,19 +51,19 @@
(defined(__ppc__) && defined(__APPLE__)) || \
defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD86) || \
defined(__mc68020__)
-/* #define PLATFORM_UNALIGNED_WORD_ACCESS */
+#define PLATFORM_UNALIGNED_WORD_ACCESS
#endif
/* config */
/* spec. config */
#define USE_NAMED_GROUP
#define USE_SUBEXP_CALL
-#define USE_BACKREF_AT_LEVEL /* \k<name+n>, \k<name-n> */
-#define USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK /* /(?:()|())*\2/ */
+#define USE_BACKREF_WITH_LEVEL /* \k<name+n>, \k<name-n> */
+#define USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT /* /(?:()|())*\2/ */
#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */
#define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
/* #define USE_RECOMPILE_API */
-/* #define USE_CRNL_AS_LINE_TERMINATOR */ /* moved to regenc.h. */
+/* !!! moved to regenc.h. */ /* #define USE_CRNL_AS_LINE_TERMINATOR */
/* internal config */
#define USE_PARSE_TREE_NODE_RECYCLE
@@ -75,6 +75,12 @@
#define INIT_MATCH_STACK_SIZE 160
#define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */
+#if defined(__GNUC__)
+# define ARG_UNUSED __attribute__ ((unused))
+#else
+# define ARG_UNUSED
+#endif
+
/* */
/* escape other system UChar definition */
#ifndef RUBY_DEFINES_H
@@ -236,7 +242,6 @@
#define CHECK_NULL_RETURN_MEMERR(p) if (IS_NULL(p)) return ONIGERR_MEMORY
#define NULL_UCHARP ((UChar* )0)
-
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
#define PLATFORM_GET_INC(val,p,type) do{\
@@ -287,17 +292,17 @@ typedef unsigned int BitStatusType;
#define BIT_STATUS_CLEAR(stats) (stats) = 0
#define BIT_STATUS_ON_ALL(stats) (stats) = ~((BitStatusType )0)
#define BIT_STATUS_AT(stats,n) \
- ((n) < BIT_STATUS_BITS_NUM ? ((stats) & (1 << n)) : ((stats) & 1))
+ ((n) < (int )BIT_STATUS_BITS_NUM ? ((stats) & (1 << n)) : ((stats) & 1))
#define BIT_STATUS_ON_AT(stats,n) do {\
- if ((n) < BIT_STATUS_BITS_NUM)\
+ if ((n) < (int )BIT_STATUS_BITS_NUM) \
(stats) |= (1 << (n));\
else\
(stats) |= 1;\
} while (0)
#define BIT_STATUS_ON_AT_SIMPLE(stats,n) do {\
- if ((n) < BIT_STATUS_BITS_NUM)\
+ if ((n) < (int )BIT_STATUS_BITS_NUM)\
(stats) |= (1 << (n));\
} while (0)
@@ -353,7 +358,7 @@ typedef Bits* BitSetRef;
#define BITSET_CLEAR(bs) do {\
int i;\
- for (i = 0; i < BITSET_SIZE; i++) { (bs)[i] = 0; }\
+ for (i = 0; i < (int )BITSET_SIZE; i++) { (bs)[i] = 0; } \
} while (0)
#define BS_ROOM(bs,pos) (bs)[pos / BITS_IN_ROOM]
@@ -520,7 +525,7 @@ enum OpCode {
OP_BACKREFN_IC,
OP_BACKREF_MULTI,
OP_BACKREF_MULTI_IC,
- OP_BACKREF_AT_LEVEL, /* \k<xxx+n>, \k<xxx-n> */
+ OP_BACKREF_WITH_LEVEL, /* \k<xxx+n>, \k<xxx-n> */
OP_MEMORY_START,
OP_MEMORY_START_PUSH, /* push back-tracker to stack */
@@ -831,6 +836,9 @@ extern int onig_st_insert_strend P_((hash_table_type* table, const UChar* str_ke
}
extern int onigenc_property_list_add_property P_((UChar* name, const OnigCodePoint* prop, hash_table_type **table, const OnigCodePoint*** plist, int *pnum, int *psize));
-extern int onigenc_property_list_init P_((int (*f)()));
+
+typedef int (*ONIGENC_INIT_PROPERTY_LIST_FUNC_TYPE)(void);
+
+extern int onigenc_property_list_init P_((ONIGENC_INIT_PROPERTY_LIST_FUNC_TYPE));
#endif /* REGINT_H */
diff --git a/regparse.c b/regparse.c
index f86abc80ab..e5a732053b 100644
--- a/regparse.c
+++ b/regparse.c
@@ -31,6 +31,9 @@
#define WARN_BUFSIZE 256
+#define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
+
+
OnigSyntaxType OnigSyntaxRuby = {
(( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
@@ -69,7 +72,7 @@ OnigSyntaxType OnigSyntaxRuby = {
OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_RUBY;
-extern void onig_null_warn(const char* s) { }
+extern void onig_null_warn(const char* s ARG_UNUSED) { }
#ifdef DEFAULT_WARN_FUNCTION
static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION;
@@ -117,6 +120,9 @@ bbuf_clone(BBuf** rto, BBuf* from)
return 0;
}
+#define BACKREF_REL_TO_ABS(rel_no, env) \
+ ((env)->num_mem + 1 + (rel_no))
+
#define ONOFF(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f))
#define MBCODE_START_POS(enc) \
@@ -136,7 +142,7 @@ bbuf_clone(BBuf** rto, BBuf* from)
#define BITSET_IS_EMPTY(bs,empty) do {\
int i;\
empty = 1;\
- for (i = 0; i < BITSET_SIZE; i++) {\
+ for (i = 0; i < (int )BITSET_SIZE; i++) {\
if ((bs)[i] != 0) {\
empty = 0; break;\
}\
@@ -165,35 +171,35 @@ static void
bitset_invert(BitSetRef bs)
{
int i;
- for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~(bs[i]); }
+ for (i = 0; i < (int )BITSET_SIZE; i++) { bs[i] = ~(bs[i]); }
}
static void
bitset_invert_to(BitSetRef from, BitSetRef to)
{
int i;
- for (i = 0; i < BITSET_SIZE; i++) { to[i] = ~(from[i]); }
+ for (i = 0; i < (int )BITSET_SIZE; i++) { to[i] = ~(from[i]); }
}
static void
bitset_and(BitSetRef dest, BitSetRef bs)
{
int i;
- for (i = 0; i < BITSET_SIZE; i++) { dest[i] &= bs[i]; }
+ for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] &= bs[i]; }
}
static void
bitset_or(BitSetRef dest, BitSetRef bs)
{
int i;
- for (i = 0; i < BITSET_SIZE; i++) { dest[i] |= bs[i]; }
+ for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] |= bs[i]; }
}
static void
bitset_copy(BitSetRef dest, BitSetRef bs)
{
int i;
- for (i = 0; i < BITSET_SIZE; i++) { dest[i] = bs[i]; }
+ for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] = bs[i]; }
}
extern int
@@ -218,6 +224,7 @@ onig_strcpy(UChar* dest, const UChar* src, const UChar* end)
}
}
+#ifdef USE_NAMED_GROUP
static UChar*
strdup_with_null(OnigEncoding enc, UChar* s, UChar* end)
{
@@ -236,7 +243,7 @@ strdup_with_null(OnigEncoding enc, UChar* s, UChar* end)
return r;
}
-
+#endif
/* scan pattern methods */
#define PEND_VALUE 0
@@ -287,32 +294,20 @@ strcat_capa_from_static(UChar* dest, UChar* dest_end,
return r;
}
-#ifdef USE_NAMED_GROUP
-
-#define INIT_NAME_BACKREFS_ALLOC_NUM 8
-
-typedef struct {
- UChar* name;
- int name_len; /* byte length */
- int back_num; /* number of backrefs */
- int back_alloc;
- int back_ref1;
- int* back_refs;
-} NameEntry;
#ifdef USE_ST_LIBRARY
#include "ruby/st.h"
typedef struct {
- unsigned char* s;
- unsigned char* end;
-} st_strend_key;
+ UChar* s;
+ UChar* end;
+} st_str_end_key;
static int
-str_end_cmp(st_strend_key* x, st_strend_key* y)
+str_end_cmp(st_str_end_key* x, st_str_end_key* y)
{
- unsigned char *p, *q;
+ UChar *p, *q;
int c;
if ((x->end - x->s) != (y->end - y->s))
@@ -331,7 +326,7 @@ str_end_cmp(st_strend_key* x, st_strend_key* y)
}
static int
-str_end_hash(st_strend_key* x)
+str_end_hash(st_str_end_key* x)
{
UChar *p;
int val = 0;
@@ -347,7 +342,7 @@ str_end_hash(st_strend_key* x)
extern hash_table_type*
onig_st_init_strend_table_with_size(int size)
{
- static const struct st_hash_type hashType = {
+ static struct st_hash_type hashType = {
str_end_cmp,
str_end_hash,
};
@@ -360,7 +355,7 @@ extern int
onig_st_lookup_strend(hash_table_type* table, const UChar* str_key,
const UChar* end_key, hash_data_type *value)
{
- st_strend_key key;
+ st_str_end_key key;
key.s = (UChar* )str_key;
key.end = (UChar* )end_key;
@@ -372,10 +367,10 @@ extern int
onig_st_insert_strend(hash_table_type* table, const UChar* str_key,
const UChar* end_key, hash_data_type value)
{
- st_strend_key* key;
+ st_str_end_key* key;
int result;
- key = (st_strend_key* )xmalloc(sizeof(st_strend_key));
+ key = (st_str_end_key* )xmalloc(sizeof(st_str_end_key));
key->s = (UChar* )str_key;
key->end = (UChar* )end_key;
result = onig_st_insert(table, (st_data_t )key, value);
@@ -385,6 +380,23 @@ onig_st_insert_strend(hash_table_type* table, const UChar* str_key,
return result;
}
+#endif /* USE_ST_LIBRARY */
+
+
+#ifdef USE_NAMED_GROUP
+
+#define INIT_NAME_BACKREFS_ALLOC_NUM 8
+
+typedef struct {
+ UChar* name;
+ int name_len; /* byte length */
+ int back_num; /* number of backrefs */
+ int back_alloc;
+ int back_ref1;
+ int* back_refs;
+} NameEntry;
+
+#ifdef USE_ST_LIBRARY
typedef st_table NameTable;
typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */
@@ -426,10 +438,10 @@ onig_print_names(FILE* fp, regex_t* reg)
}
return 0;
}
-#endif
+#endif /* ONIG_DEBUG */
static int
-i_free_name_entry(UChar* key, NameEntry* e, void* arg)
+i_free_name_entry(UChar* key, NameEntry* e, void* arg ARG_UNUSED)
{
xfree(e->name);
if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs);
@@ -486,7 +498,7 @@ typedef struct {
} INamesArg;
static int
-i_names(UChar* key, NameEntry* e, INamesArg* arg)
+i_names(UChar* key ARG_UNUSED, NameEntry* e, INamesArg* arg)
{
int r = (*(arg->func))(e->name,
e->name + e->name_len,
@@ -519,7 +531,7 @@ onig_foreach_name(regex_t* reg,
}
static int
-i_renumber_name(UChar* key, NameEntry* e, GroupNumRemap* map)
+i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumRemap* map)
{
int i;
@@ -1140,7 +1152,7 @@ node_new_cclass_by_codepoint_range(int not, OnigCodePoint sb_out,
n = ONIGENC_CODE_RANGE_NUM(ranges);
for (i = 0; i < n; i++) {
for (j = ONIGENC_CODE_RANGE_FROM(ranges, i);
- j <= (int )ONIGENC_CODE_RANGE_TO(ranges, i); j++) {
+ j <= (OnigCodePoint )ONIGENC_CODE_RANGE_TO(ranges, i); j++) {
if (j >= sb_out) goto sb_end;
BITSET_SET_BIT(cc->bs, j);
@@ -1256,7 +1268,7 @@ onig_node_new_anchor(int type)
static Node*
node_new_backref(int back_num, int* backrefs, int by_name,
-#ifdef USE_BACKREF_AT_LEVEL
+#ifdef USE_BACKREF_WITH_LEVEL
int exist_level, int nest_level,
#endif
ScanEnv* env)
@@ -1273,7 +1285,7 @@ node_new_backref(int back_num, int* backrefs, int by_name,
if (by_name != 0)
NBREF(node)->state |= NST_NAME_REF;
-#ifdef USE_BACKREF_AT_LEVEL
+#ifdef USE_BACKREF_WITH_LEVEL
if (exist_level != 0) {
NBREF(node)->state |= NST_NEST_LEVEL;
NBREF(node)->nest_level = nest_level;
@@ -1307,17 +1319,17 @@ node_new_backref(int back_num, int* backrefs, int by_name,
#ifdef USE_SUBEXP_CALL
static Node*
-node_new_call(UChar* name, UChar* name_end)
+node_new_call(UChar* name, UChar* name_end, int gnum)
{
Node* node = node_new();
CHECK_NULL_RETURN(node);
SET_NTYPE(node, NT_CALL);
- NCALL(node)->state = 0;
- NCALL(node)->ref_num = CALLNODE_REFNUM_UNDEF;
- NCALL(node)->target = NULL_NODE;
- NCALL(node)->name = name;
- NCALL(node)->name_end = name_end;
+ NCALL(node)->state = 0;
+ NCALL(node)->target = NULL_NODE;
+ NCALL(node)->name = name;
+ NCALL(node)->name_end = name_end;
+ NCALL(node)->group_num = gnum; /* call by number if gnum != 0 */
return node;
}
#endif
@@ -1539,7 +1551,7 @@ static int
str_node_can_be_split(StrNode* sn, OnigEncoding enc)
{
if (sn->end > sn->s) {
- return ((enc_len(enc, sn->s, sn->end) < sn->end - sn->s) ? 1 : 0);
+ return ((enclen(enc, sn->s, sn->end) < sn->end - sn->s) ? 1 : 0);
}
return 0;
}
@@ -1956,29 +1968,6 @@ and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)
}
static int
-clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)
-{
- BBuf *tbuf;
- int r;
-
- if (IS_NCCLASS_NOT(cc)) {
- bitset_invert(cc->bs);
-
- if (! ONIGENC_IS_SINGLEBYTE(enc)) {
- r = not_code_range_buf(enc, cc->mbuf, &tbuf);
- if (r != 0) return r;
-
- bbuf_free(cc->mbuf);
- cc->mbuf = tbuf;
- }
-
- NCCLASS_CLEAR_NOT(cc);
- }
-
- return 0;
-}
-
-static int
and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
{
int r, not1, not2;
@@ -2297,7 +2286,7 @@ typedef struct {
int ref1;
int* refs;
int by_name;
-#ifdef USE_BACKREF_AT_LEVEL
+#ifdef USE_BACKREF_WITH_LEVEL
int exist_level;
int level; /* \k<name+n> */
#endif
@@ -2305,6 +2294,7 @@ typedef struct {
struct {
UChar* name;
UChar* name_end;
+ int gnum;
} call;
struct {
int ctype;
@@ -2494,22 +2484,31 @@ get_name_end_code_point(OnigCodePoint start)
}
#ifdef USE_NAMED_GROUP
-#ifdef USE_BACKREF_AT_LEVEL
+#ifdef USE_BACKREF_WITH_LEVEL
/*
\k<name+n>, \k<name-n>
+ \k<num+n>, \k<num-n>
+ \k<-num+n>, \k<-num-n>
*/
static int
fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
- UChar** rname_end, ScanEnv* env, int* level)
+ UChar** rname_end, ScanEnv* env,
+ int* rback_num, int* rlevel)
{
- int r, exist_level = 0;
+ int r, sign, is_num, exist_level;
OnigCodePoint end_code;
OnigCodePoint c = 0;
OnigEncoding enc = env->enc;
UChar *name_end;
+ UChar *pnum_head;
UChar *p = *src;
PFETCH_READY;
+ *rback_num = 0;
+ is_num = exist_level = 0;
+ sign = 1;
+ pnum_head = *src;
+
end_code = get_name_end_code_point(start_code);
name_end = end;
@@ -2522,7 +2521,15 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
if (c == end_code)
return ONIGERR_EMPTY_GROUP_NAME;
- if (!ONIGENC_IS_CODE_WORD(enc, c)) {
+ if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
+ is_num = 1;
+ }
+ else if (c == '-') {
+ is_num = 2;
+ sign = -1;
+ pnum_head = p;
+ }
+ else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
}
}
@@ -2530,24 +2537,36 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
while (!PEND) {
name_end = p;
PFETCH(c);
- if (c == end_code || c == ')' || c == '+' || c == '-') break;
+ if (c == end_code || c == ')' || c == '+' || c == '-') {
+ if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME;
+ break;
+ }
- if (!ONIGENC_IS_CODE_WORD(enc, c)) {
+ if (is_num != 0) {
+ if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
+ is_num = 1;
+ }
+ else {
+ r = ONIGERR_INVALID_GROUP_NAME;
+ is_num = 0;
+ }
+ }
+ else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
}
}
- if (c != end_code) {
+ if (r == 0 && c != end_code) {
if (c == '+' || c == '-') {
- int num;
+ int level;
int flag = (c == '-' ? -1 : 1);
PFETCH(c);
if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto err;
PUNFETCH;
- num = onig_scan_unsigned_number(&p, end, enc);
- if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
- *level = (num * flag);
+ level = onig_scan_unsigned_number(&p, end, enc);
+ if (level < 0) return ONIGERR_TOO_BIG_NUMBER;
+ *rlevel = (level * flag);
exist_level = 1;
PFETCH(c);
@@ -2562,6 +2581,14 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
end:
if (r == 0) {
+ if (is_num != 0) {
+ *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
+ if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
+ else if (*rback_num == 0) goto err;
+
+ *rback_num *= sign;
+ }
+
*rname_end = name_end;
*src = p;
return (exist_level ? 1 : 0);
@@ -2571,7 +2598,7 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
return r;
}
}
-#endif /* USE_BACKREF_AT_LEVEL */
+#endif /* USE_BACKREF_WITH_LEVEL */
/*
def: 0 -> define name (don't allow number name)
@@ -2579,21 +2606,26 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
*/
static int
fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
- UChar** rname_end, ScanEnv* env, int ref)
+ UChar** rname_end, ScanEnv* env, int* rback_num, int ref)
{
- int r, is_num;
+ int r, is_num, sign;
OnigCodePoint end_code;
OnigCodePoint c = 0;
OnigEncoding enc = env->enc;
UChar *name_end;
+ UChar *pnum_head;
UChar *p = *src;
PFETCH_READY;
+ *rback_num = 0;
+
end_code = get_name_end_code_point(start_code);
name_end = end;
+ pnum_head = *src;
r = 0;
is_num = 0;
+ sign = 1;
if (PEND) {
return ONIGERR_EMPTY_GROUP_NAME;
}
@@ -2607,6 +2639,18 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
is_num = 1;
else {
r = ONIGERR_INVALID_GROUP_NAME;
+ is_num = 0;
+ }
+ }
+ else if (c == '-') {
+ if (ref == 1) {
+ is_num = 2;
+ sign = -1;
+ pnum_head = p;
+ }
+ else {
+ r = ONIGERR_INVALID_GROUP_NAME;
+ is_num = 0;
}
}
else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
@@ -2614,37 +2658,66 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
}
}
- while (!PEND) {
- name_end = p;
- PFETCH(c);
- if (c == end_code || c == ')') break;
+ if (r == 0) {
+ while (!PEND) {
+ name_end = p;
+ PFETCH(c);
+ if (c == end_code || c == ')') {
+ if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME;
+ break;
+ }
- if (is_num == 1) {
- if (! ONIGENC_IS_CODE_DIGIT(enc, c)) {
- if (!ONIGENC_IS_CODE_WORD(enc, c))
+ if (is_num != 0) {
+ if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
+ is_num = 1;
+ }
+ else {
+ if (!ONIGENC_IS_CODE_WORD(enc, c))
+ r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
+ else
+ r = ONIGERR_INVALID_GROUP_NAME;
+
+ is_num = 0;
+ }
+ }
+ else {
+ if (!ONIGENC_IS_CODE_WORD(enc, c)) {
r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
- else
- r = ONIGERR_INVALID_GROUP_NAME;
+ }
}
}
- else {
- if (!ONIGENC_IS_CODE_WORD(enc, c)) {
- r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
- }
+
+ if (c != end_code) {
+ r = ONIGERR_INVALID_GROUP_NAME;
+ name_end = end;
}
- }
- if (c != end_code) {
- r = ONIGERR_INVALID_GROUP_NAME;
- name_end = end;
- }
+ if (is_num != 0) {
+ *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
+ if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
+ else if (*rback_num == 0) {
+ r = ONIGERR_INVALID_GROUP_NAME;
+ goto err;
+ }
+
+ *rback_num *= sign;
+ }
- if (r == 0) {
*rname_end = name_end;
*src = p;
return 0;
}
else {
+ while (!PEND) {
+ name_end = p;
+ PFETCH(c);
+ if (c == end_code || c == ')')
+ break;
+ }
+ if (PEND)
+ name_end = end;
+
+ err:
onig_scan_env_set_error_string(env, r, *src, name_end);
return r;
}
@@ -2652,35 +2725,70 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
#else
static int
fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
- UChar** rname_end, ScanEnv* env, int ref)
+ UChar** rname_end, ScanEnv* env, int* rback_num, int ref)
{
- int r, len;
+ int r, is_num, sign;
OnigCodePoint end_code;
OnigCodePoint c = 0;
UChar *name_end;
OnigEncoding enc = env->enc;
+ UChar *pnum_head;
UChar *p = *src;
PFETCH_READY;
+ *rback_num = 0;
+
end_code = get_name_end_code_point(start_code);
+ *rname_end = name_end = end;
r = 0;
+ pnum_head = *src;
+ is_num = 0;
+ sign = 1;
+
+ if (PEND) {
+ return ONIGERR_EMPTY_GROUP_NAME;
+ }
+ else {
+ PFETCH(c);
+ if (c == end_code)
+ return ONIGERR_EMPTY_GROUP_NAME;
+
+ if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
+ is_num = 1;
+ }
+ else if (c == '-') {
+ is_num = 2;
+ sign = -1;
+ pnum_head = p;
+ }
+ else {
+ r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
+ }
+ }
+
while (!PEND) {
name_end = p;
- if (enc_len(enc, p) > 1)
- r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
PFETCH(c);
if (c == end_code || c == ')') break;
if (! ONIGENC_IS_CODE_DIGIT(enc, c))
r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
}
- if (c != end_code) {
+ if (r == 0 && c != end_code) {
r = ONIGERR_INVALID_GROUP_NAME;
name_end = end;
}
if (r == 0) {
+ *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
+ if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
+ else if (*rback_num == 0) {
+ r = ONIGERR_INVALID_GROUP_NAME;
+ goto err;
+ }
+ *rback_num *= sign;
+
*rname_end = name_end;
*src = p;
return 0;
@@ -2691,7 +2799,7 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
return r;
}
}
-#endif
+#endif /* USE_NAMED_GROUP */
static void
CC_ESC_WARN(ScanEnv* env, UChar *c)
@@ -2733,12 +2841,12 @@ find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to,
while (p < to) {
x = ONIGENC_MBC_TO_CODE(enc, p, to);
- q = p + enc_len(enc, p, to);
+ q = p + enclen(enc, p, to);
if (x == s[0]) {
for (i = 1; i < n && q < to; i++) {
x = ONIGENC_MBC_TO_CODE(enc, q, to);
if (x != s[i]) break;
- q += enc_len(enc, q, to);
+ q += enclen(enc, q, to);
}
if (i >= n) {
if (IS_NOT_NULL(next))
@@ -2764,19 +2872,19 @@ str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to,
while (p < to) {
if (in_esc) {
in_esc = 0;
- p += enc_len(enc, p, to);
+ p += enclen(enc, p, to);
}
else {
x = ONIGENC_MBC_TO_CODE(enc, p, to);
- q = p + enc_len(enc, p, to);
+ q = p + enclen(enc, p, to);
if (x == s[0]) {
for (i = 1; i < n && q < to; i++) {
x = ONIGENC_MBC_TO_CODE(enc, q, to);
if (x != s[i]) break;
- q += enc_len(enc, q, to);
+ q += enclen(enc, q, to);
}
if (i >= n) return 1;
- p += enc_len(enc, p, to);
+ p += enclen(enc, p, to);
}
else {
x = ONIGENC_MBC_TO_CODE(enc, p, to);
@@ -2904,7 +3012,7 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
}
- if (p > prev + enc_len(enc, prev, end) && !PEND && (PPEEK_IS('}'))) {
+ if (p > prev + enclen(enc, prev, end) && !PEND && (PPEEK_IS('}'))) {
PINC;
tok->type = TK_CODE_POINT;
tok->base = 16;
@@ -3244,7 +3352,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
}
- if ((p > prev + enc_len(enc, prev, end)) && !PEND && PPEEK_IS('}')) {
+ if ((p > prev + enclen(enc, prev, end)) && !PEND && PPEEK_IS('}')) {
PINC;
tok->type = TK_CODE_POINT;
tok->u.code = (OnigCodePoint )num;
@@ -3302,7 +3410,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
tok->u.backref.num = 1;
tok->u.backref.ref1 = num;
tok->u.backref.by_name = 0;
-#ifdef USE_BACKREF_AT_LEVEL
+#ifdef USE_BACKREF_WITH_LEVEL
tok->u.backref.exist_level = 0;
#endif
break;
@@ -3341,44 +3449,64 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
if (c == '<' || c == '\'') {
UChar* name_end;
int* backs;
+ int back_num;
prev = p;
-#ifdef USE_BACKREF_AT_LEVEL
+#ifdef USE_BACKREF_WITH_LEVEL
name_end = NULL_UCHARP; /* no need. escape gcc warning. */
r = fetch_name_with_level((OnigCodePoint )c, &p, end, &name_end,
- env, &tok->u.backref.level);
+ env, &back_num, &tok->u.backref.level);
if (r == 1) tok->u.backref.exist_level = 1;
else tok->u.backref.exist_level = 0;
#else
- r = fetch_name(&p, end, &name_end, env, 1);
+ r = fetch_name(&p, end, &name_end, env, &back_num, 1);
#endif
if (r < 0) return r;
- num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);
- if (num <= 0) {
- onig_scan_env_set_error_string(env,
- ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);
- return ONIGERR_UNDEFINED_NAME_REFERENCE;
- }
- if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
- int i;
- for (i = 0; i < num; i++) {
- if (backs[i] > env->num_mem ||
- IS_NULL(SCANENV_MEM_NODES(env)[backs[i]]))
+ if (back_num != 0) {
+ if (back_num < 0) {
+ back_num = BACKREF_REL_TO_ABS(back_num, env);
+ if (back_num <= 0)
return ONIGERR_INVALID_BACKREF;
}
- }
- tok->type = TK_BACKREF;
- tok->u.backref.by_name = 1;
- if (num == 1) {
+ if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
+ if (back_num > env->num_mem ||
+ IS_NULL(SCANENV_MEM_NODES(env)[back_num]))
+ return ONIGERR_INVALID_BACKREF;
+ }
+ tok->type = TK_BACKREF;
+ tok->u.backref.by_name = 0;
tok->u.backref.num = 1;
- tok->u.backref.ref1 = backs[0];
+ tok->u.backref.ref1 = back_num;
}
else {
- tok->u.backref.num = num;
- tok->u.backref.refs = backs;
+ num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);
+ if (num <= 0) {
+ onig_scan_env_set_error_string(env,
+ ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);
+ return ONIGERR_UNDEFINED_NAME_REFERENCE;
+ }
+ if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
+ int i;
+ for (i = 0; i < num; i++) {
+ if (backs[i] > env->num_mem ||
+ IS_NULL(SCANENV_MEM_NODES(env)[backs[i]]))
+ return ONIGERR_INVALID_BACKREF;
+ }
+ }
+
+ tok->type = TK_BACKREF;
+ tok->u.backref.by_name = 1;
+ if (num == 1) {
+ tok->u.backref.num = 1;
+ tok->u.backref.ref1 = backs[0];
+ }
+ else {
+ tok->u.backref.num = num;
+ tok->u.backref.refs = backs;
+ }
}
}
else
@@ -3392,15 +3520,17 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) {
PFETCH(c);
if (c == '<' || c == '\'') {
+ int gnum;
UChar* name_end;
prev = p;
- r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, 1);
+ r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &gnum, 1);
if (r < 0) return r;
tok->type = TK_CALL;
tok->u.call.name = prev;
tok->u.call.name_end = name_end;
+ tok->u.call.gnum = gnum;
}
else
PUNFETCH;
@@ -3443,7 +3573,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
tok->u.code = (OnigCodePoint )num;
}
else { /* string */
- p = tok->backp + enc_len(enc, tok->backp, end);
+ p = tok->backp + enclen(enc, tok->backp, end);
}
break;
}
@@ -3616,7 +3746,8 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
}
static int
-add_ctype_to_cc_by_range(CClassNode* cc, int ctype, int not, OnigEncoding enc,
+add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not,
+ OnigEncoding enc ARG_UNUSED,
OnigCodePoint sb_out, const OnigCodePoint mbr[])
{
int i, r;
@@ -3758,7 +3889,7 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
}
else {
for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
- if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) /* 0: invalid code point */
+ if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) /* check invalid code point */
&& ! ONIGENC_IS_CODE_WORD(enc, c))
BITSET_SET_BIT(cc->bs, c);
}
@@ -3959,7 +4090,7 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
if (intype == *type) {
if (intype == CCV_SB) {
if (*vs > 0xff || v > 0xff)
- return ONIGERR_INVALID_WIDE_CHAR_VALUE;
+ return ONIGERR_INVALID_CODE_POINT_VALUE;
if (*vs > v) {
if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
@@ -4087,6 +4218,10 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
if (len > 1) {
in_type = CCV_CODE_POINT;
}
+ else if (len < 0) {
+ r = len;
+ goto err;
+ }
else {
sb_char:
in_type = CCV_SB;
@@ -4120,7 +4255,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
goto err;
}
- len = enc_len(env->enc, buf, buf+i);
+ len = enclen(env->enc, buf, buf+i);
if (i < len) {
r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
goto err;
@@ -4367,11 +4502,15 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
ScanEnv* env)
{
int r, num;
- int list_capture;
Node *target;
OnigOptionType option;
- OnigEncoding enc = env->enc;
OnigCodePoint c;
+ OnigEncoding enc = env->enc;
+
+#ifdef USE_NAMED_GROUP
+ int list_capture;
+#endif
+
UChar* p = *src;
PFETCH_READY;
@@ -4406,6 +4545,7 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
*np = node_new_enclose(ENCLOSE_STOP_BACKTRACK);
break;
+#ifdef USE_NAMED_GROUP
case '\'':
if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
goto named_group1;
@@ -4413,6 +4553,7 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
else
return ONIGERR_UNDEFINED_GROUP_OPTION;
break;
+#endif
case '<': /* look behind (?<=...), (?<!...) */
PFETCH(c);
@@ -4434,12 +4575,12 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
named_group2:
name = p;
- r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, 0);
+ r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num, 0);
if (r < 0) return r;
num = scan_env_add_mem_entry(env);
if (num < 0) return num;
- if (list_capture != 0 && num >= BIT_STATUS_BITS_NUM)
+ if (list_capture != 0 && num >= (int )BIT_STATUS_BITS_NUM)
return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
r = name_add(env->reg, name, name_end, num, env);
@@ -4481,7 +4622,7 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
onig_node_free(*np);
return num;
}
- else if (num >= BIT_STATUS_BITS_NUM) {
+ else if (num >= (int )BIT_STATUS_BITS_NUM) {
onig_node_free(*np);
return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
}
@@ -4727,12 +4868,12 @@ static int type_cclass_hash(type_cclass_key* key)
val = 0;
p = (UChar* )&(key->enc);
- for (i = 0; i < sizeof(key->enc); i++) {
+ for (i = 0; i < (int )sizeof(key->enc); i++) {
val = val * 997 + (int )*p++;
}
p = (UChar* )(&key->type);
- for (i = 0; i < sizeof(key->type); i++) {
+ for (i = 0; i < (int )sizeof(key->type); i++) {
val = val * 997 + (int )*p++;
}
@@ -4740,7 +4881,7 @@ static int type_cclass_hash(type_cclass_key* key)
return val + (val >> 5);
}
-static const struct st_hash_type type_type_cclass_hash = {
+static struct st_hash_type type_type_cclass_hash = {
type_cclass_cmp,
type_cclass_hash,
};
@@ -4749,7 +4890,7 @@ static st_table* OnigTypeCClassTable;
static int
-i_free_shared_class(type_cclass_key* key, Node* node, void* arg)
+i_free_shared_class(type_cclass_key* key, Node* node, void* arg ARG_UNUSED)
{
if (IS_NOT_NULL(node)) {
CClassNode* cc = NCCLASS(node);
@@ -4776,6 +4917,31 @@ onig_free_shared_cclass_table(void)
#endif /* USE_SHARED_CCLASS_TABLE */
+#ifndef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
+static int
+clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)
+{
+ BBuf *tbuf;
+ int r;
+
+ if (IS_NCCLASS_NOT(cc)) {
+ bitset_invert(cc->bs);
+
+ if (! ONIGENC_IS_SINGLEBYTE(enc)) {
+ r = not_code_range_buf(enc, cc->mbuf, &tbuf);
+ if (r != 0) return r;
+
+ bbuf_free(cc->mbuf);
+ cc->mbuf = tbuf;
+ }
+
+ NCCLASS_CLEAR_NOT(cc);
+ }
+
+ return 0;
+}
+#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */
+
typedef struct {
ScanEnv* env;
CClassNode* cc;
@@ -4798,28 +4964,43 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[],
bs = cc->bs;
if (to_len == 1) {
- int in_cc;
- in_cc = onig_is_code_in_cc(env->enc, from, cc);
- if ((in_cc != 0 && !IS_NCCLASS_NOT(cc)) ||
- (in_cc == 0 && IS_NCCLASS_NOT(cc))) {
+ int is_in = onig_is_code_in_cc(env->enc, from, cc);
+#ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
+ if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) ||
+ (is_in == 0 && IS_NCCLASS_NOT(cc))) {
if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
- if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc);
add_code_range(&(cc->mbuf), env, *to, *to);
}
else {
- /* /(?i:[^A-C])/.match("a") ==> fail. */
BITSET_SET_BIT(bs, *to);
}
}
+#else
+ if (is_in != 0) {
+ if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
+ if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc);
+ add_code_range(&(cc->mbuf), env, *to, *to);
+ }
+ else {
+ if (IS_NCCLASS_NOT(cc)) {
+ BITSET_CLEAR_BIT(bs, *to);
+ }
+ else
+ BITSET_SET_BIT(bs, *to);
+ }
+ }
+#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */
}
else {
int r, i, len;
UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
Node *snode = NULL_NODE;
- if (onig_is_code_in_cc(env->enc, from, cc)) {
- if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc);
-
+ if (onig_is_code_in_cc(env->enc, from, cc)
+#ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
+ && !IS_NCCLASS_NOT(cc)
+#endif
+ ) {
for (i = 0; i < to_len; i++) {
len = ONIGENC_CODE_TO_MBC(env->enc, to[i], buf);
if (i == 0) {
@@ -4857,7 +5038,7 @@ parse_exp(Node** np, OnigToken* tok, int term,
Node** targetp;
*np = NULL;
- if (tok->type == term)
+ if (tok->type == (enum TokenSyms )term)
goto end_of_token;
switch (tok->type) {
@@ -4924,7 +5105,7 @@ parse_exp(Node** np, OnigToken* tok, int term,
len = 1;
while (1) {
if (len >= ONIGENC_MBC_MINLEN(env->enc)) {
- if (len == enc_len(env->enc, NSTR(*np)->s, NSTR(*np)->end)) {
+ if (len == enclen(env->enc, NSTR(*np)->s, NSTR(*np)->end)) {
r = fetch_token(tok, src, end, env);
NSTRING_CLEAR_RAW(*np);
goto string_end;
@@ -4940,7 +5121,7 @@ parse_exp(Node** np, OnigToken* tok, int term,
if (len < ONIGENC_MBC_MINLEN(env->enc)) {
rem = ONIGENC_MBC_MINLEN(env->enc) - len;
(void )node_str_head_pad(NSTR(*np), rem, (UChar )0);
- if (len + rem == enc_len(env->enc, NSTR(*np)->s)) {
+ if (len + rem == enclen(env->enc, NSTR(*np)->s)) {
NSTRING_CLEAR_RAW(*np);
goto string_end;
}
@@ -5131,7 +5312,7 @@ parse_exp(Node** np, OnigToken* tok, int term,
*np = node_new_backref(len,
(len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)),
tok->u.backref.by_name,
-#ifdef USE_BACKREF_AT_LEVEL
+#ifdef USE_BACKREF_WITH_LEVEL
tok->u.backref.exist_level,
tok->u.backref.level,
#endif
@@ -5141,9 +5322,18 @@ parse_exp(Node** np, OnigToken* tok, int term,
#ifdef USE_SUBEXP_CALL
case TK_CALL:
- *np = node_new_call(tok->u.call.name, tok->u.call.name_end);
- CHECK_NULL_RETURN_MEMERR(*np);
- env->num_call++;
+ {
+ int gnum = tok->u.call.gnum;
+
+ if (gnum < 0) {
+ gnum = BACKREF_REL_TO_ABS(gnum, env);
+ if (gnum <= 0)
+ return ONIGERR_INVALID_BACKREF;
+ }
+ *np = node_new_call(tok->u.call.name, tok->u.call.name_end, gnum);
+ CHECK_NULL_RETURN_MEMERR(*np);
+ env->num_call++;
+ }
break;
#endif
@@ -5282,7 +5472,7 @@ parse_subexp(Node** top, OnigToken* tok, int term,
headp = &(NCDR(*headp));
}
- if (tok->type != term)
+ if (tok->type != (enum TokenSyms )term)
goto err;
}
else {
@@ -5337,7 +5527,7 @@ onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end,
}
extern void
-onig_scan_env_set_error_string(ScanEnv* env, int ecode,
+onig_scan_env_set_error_string(ScanEnv* env, int ecode ARG_UNUSED,
UChar* arg, UChar* arg_end)
{
env->error = arg;
diff --git a/regparse.h b/regparse.h
index 7a6314098a..0c5c2c936c 100644
--- a/regparse.h
+++ b/regparse.h
@@ -213,7 +213,7 @@ typedef struct {
typedef struct {
NodeBase base;
int state;
- int ref_num;
+ int group_num;
UChar* name;
UChar* name_end;
struct _Node* target; /* EncloseNode : ENCLOSE_MEMORY */
@@ -340,6 +340,7 @@ extern void onig_node_str_clear P_((Node* node));
extern int onig_free_node_list P_((void));
extern int onig_names_free P_((regex_t* reg));
extern int onig_parse_make_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env));
+extern int onig_free_shared_cclass_table P_((void));
#ifdef ONIG_DEBUG
#ifdef USE_NAMED_GROUP
diff --git a/version.h b/version.h
index b68337c30d..1c21a2c5e4 100644
--- a/version.h
+++ b/version.h
@@ -1,7 +1,7 @@
#define RUBY_VERSION "1.9.0"
-#define RUBY_RELEASE_DATE "2008-01-03"
+#define RUBY_RELEASE_DATE "2008-01-04"
#define RUBY_VERSION_CODE 190
-#define RUBY_RELEASE_CODE 20080103
+#define RUBY_RELEASE_CODE 20080104
#define RUBY_PATCHLEVEL 0
#define RUBY_VERSION_MAJOR 1
@@ -9,7 +9,7 @@
#define RUBY_VERSION_TEENY 0
#define RUBY_RELEASE_YEAR 2008
#define RUBY_RELEASE_MONTH 1
-#define RUBY_RELEASE_DAY 3
+#define RUBY_RELEASE_DAY 4
#ifdef RUBY_EXTERN
RUBY_EXTERN const char ruby_version[];