aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorksaito <ksaito@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2005-01-28 15:21:48 +0000
committerksaito <ksaito@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2005-01-28 15:21:48 +0000
commita19d6b33d7419ed3724ee6646fa9303d9542b5e5 (patch)
tree297425b4ec8a0690858d8a21f49797a0ddacaedf
parent7d765e5203d8c909c1956276ead32b1f3d8c351f (diff)
downloadruby-a19d6b33d7419ed3724ee6646fa9303d9542b5e5.tar.gz
* ascii.c, euc_jp.c, hash.c, oniggnu.h, oniguruma.h, regcomp.c, regenc.c, regenc.h, regerror.c, regexec.c, reggnu.c, regint.h, regparse.c, regparse.h, sjis.c, st.c, st.h, utf8.c: imported Oni Guruma 3.5.4.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@7846 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog7
-rw-r--r--hash.c2
-rw-r--r--imp.log8
-rw-r--r--oniguruma.h37
-rw-r--r--regcomp.c251
-rw-r--r--regexec.c184
-rw-r--r--regint.h107
-rw-r--r--regparse.c410
-rw-r--r--regparse.h18
-rw-r--r--st.c157
-rw-r--r--st.h16
-rw-r--r--utf8.c3486
12 files changed, 4237 insertions, 446 deletions
diff --git a/ChangeLog b/ChangeLog
index 684fec5fe4..f6bde16499 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+Sat Jan 29 00:10:33 2005 Kazuo Saito <ksaito@uranus.dti.ne.jp>
+
+ * ascii.c, euc_jp.c, hash.c, oniggnu.h, oniguruma.h, regcomp.c,
+ regenc.c, regenc.h, regerror.c, regexec.c, reggnu.c, regint.h,
+ regparse.c, regparse.h, sjis.c, st.c, st.h, utf8.c: imported
+ Oni Guruma 3.5.4.
+
Fri Jan 28 17:16:55 2005 Tanaka Akira <akr@m17n.org>
* lib/resolv.rb (Resolv::DNS::Config.parse_resolv_conf):
diff --git a/hash.c b/hash.c
index 16f6325955..beadac14ee 100644
--- a/hash.c
+++ b/hash.c
@@ -102,6 +102,8 @@ rb_any_hash(a)
static struct st_hash_type objhash = {
rb_any_cmp,
rb_any_hash,
+ st_nothing_key_free,
+ st_nothing_key_clone
};
struct foreach_safe_arg {
diff --git a/imp.log b/imp.log
deleted file mode 100644
index e3828c79a8..0000000000
--- a/imp.log
+++ /dev/null
@@ -1,8 +0,0 @@
-Vim: Warning: Output is not to a terminal
-7[?47h[?1h="/tmp/cvss7mRju" 4L, 229CCVS: ----------------------------------------------------------------------
-CVS: Enter Log. Lines beginning with `CVS:' are removed automatically
-CVS:
-CVS: ----------------------------------------------------------------------
-~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ :q!-- INSERT --i-- INSERT --mported Oni Guruma 3.5.4."/private/tmp/cvss7mRju" 5L, 256C written
-
-[?1l>[?47l8 \ No newline at end of file
diff --git a/oniguruma.h b/oniguruma.h
index c10f3b4d18..95cd109384 100644
--- a/oniguruma.h
+++ b/oniguruma.h
@@ -4,7 +4,7 @@
oniguruma.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2004 K.Kosako <kosako AT sofnec DOT co DOT jp>
+ * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -31,8 +31,17 @@
#define ONIGURUMA
#define ONIGURUMA_VERSION_MAJOR 3
-#define ONIGURUMA_VERSION_MINOR 4
-#define ONIGURUMA_VERSION_TEENY 0
+#define ONIGURUMA_VERSION_MINOR 5
+#define ONIGURUMA_VERSION_TEENY 4
+
+#ifdef __cplusplus
+# ifndef HAVE_PROTOTYPES
+# define HAVE_PROTOTYPES 1
+# endif
+# ifndef HAVE_STDARG_PROTOTYPES
+# define HAVE_STDARG_PROTOTYPES 1
+# endif
+#endif
#ifndef P_
#if defined(__STDC__) || defined(_WIN32)
@@ -72,12 +81,6 @@ typedef unsigned int OnigDistance;
#define ONIG_INFINITE_DISTANCE ~((OnigDistance )0)
-typedef struct {
- OnigCodePoint from;
- OnigCodePoint to;
-} OnigCodePointRange;
-
-
/* ambiguous match flag */
#define ONIGENC_AMBIGUOUS_MATCH_NONE 0
#define ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE (1<<0)
@@ -103,6 +106,11 @@ typedef unsigned int OnigAmbigType;
#define ONIGENC_MAX_COMP_AMBIG_CODE_LEN 3
#define ONIGENC_MAX_COMP_AMBIG_CODE_ITEM_NUM 4
+/* code range */
+#define ONIGENC_CODE_RANGE_NUM(range) ((int )range[0])
+#define ONIGENC_CODE_RANGE_FROM(range,i) range[((i)*2) + 1]
+#define ONIGENC_CODE_RANGE_TO(range,i) range[((i)*2) + 2]
+
typedef struct {
int len;
OnigCodePoint code[ONIGENC_MAX_COMP_AMBIG_CODE_LEN];
@@ -152,7 +160,7 @@ typedef struct {
int (*get_all_pair_ambig_codes)(OnigAmbigType flag, OnigPairAmbigCodes** acs);
int (*get_all_comp_ambig_codes)(OnigAmbigType flag, OnigCompAmbigCodes** acs);
int (*is_code_ctype)(OnigCodePoint code, unsigned int ctype);
- int (*get_ctype_code_range)(int ctype, int* nsb, int* nmb, OnigCodePointRange* sbr[], OnigCodePointRange* mbr[]);
+ int (*get_ctype_code_range)(int ctype, OnigCodePoint* sb_range[], OnigCodePoint* mb_range[]);
UChar* (*left_adjust_char_head)(UChar* start, UChar* p);
int (*is_allowed_reverse_match)(UChar* p, UChar* end);
} OnigEncodingType;
@@ -245,7 +253,6 @@ ONIG_EXTERN OnigEncodingType OnigEncodingBIG5;
#define ONIGENC_CTYPE_ASCII (1<<13)
#define ONIGENC_CTYPE_ALNUM (ONIGENC_CTYPE_ALPHA | ONIGENC_CTYPE_DIGIT)
-
#define enc_len(enc,p) ONIGENC_MBC_ENC_LEN(enc,p)
#define ONIGENC_IS_UNDEF(enc) ((enc) == ONIG_ENCODING_UNDEF)
@@ -275,7 +282,7 @@ ONIG_EXTERN OnigEncodingType OnigEncodingBIG5;
onigenc_get_left_adjust_char_head(enc, start, s)
#define ONIGENC_GET_ALL_PAIR_AMBIG_CODES(enc, ambig_flag, acs) 0
#define ONIGENC_GET_ALL_COMP_AMBIG_CODES(enc, ambig_flag, acs) 0
-#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,nsb,nmb,sbr,mbr) \
+#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,sbr,mbr) \
ONIG_NO_SUPPORT_CONFIG
#define ONIGENC_MBC_ENC_LEN(enc,p) m17n_mbclen(enc,(int )(*p))
#define ONIGENC_MBC_MAXLEN(enc) m17n_mbmaxlen(enc)
@@ -390,8 +397,8 @@ int onigenc_is_allowed_reverse_match P_((OnigEncoding enc, UChar* s, UChar* end)
#define ONIGENC_IS_CODE_WORD(enc,code) \
ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_WORD)
-#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,nsb,nmb,sbr,mbr) \
- (enc)->get_ctype_code_range(ctype,nsb,nmb,sbr,mbr)
+#define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,sbr,mbr) \
+ (enc)->get_ctype_code_range(ctype,sbr,mbr)
ONIG_EXTERN
UChar* onigenc_step_back P_((OnigEncoding enc, UChar* start, UChar* s, int n));
@@ -600,7 +607,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
#define ONIGERR_END_PATTERN_AT_LEFT_BRACKET -101
#define ONIGERR_EMPTY_CHAR_CLASS -102
#define ONIGERR_PREMATURE_END_OF_CHAR_CLASS -103
-#define ONIGERR_END_PATTERN_AT_BACKSLASH -104
+#define ONIGERR_END_PATTERN_AT_ESCAPE -104
#define ONIGERR_END_PATTERN_AT_META -105
#define ONIGERR_END_PATTERN_AT_CONTROL -106
#define ONIGERR_META_CODE_SYNTAX -108
diff --git a/regcomp.c b/regcomp.c
index 7217f71ab8..116bcb7c9b 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -2,7 +2,7 @@
regcomp.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2004 K.Kosako <kosako AT sofnec DOT co DOT jp>
+ * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -33,6 +33,21 @@
static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
#endif
+static UChar*
+k_strdup(UChar* s, UChar* end)
+{
+ int len = end - s;
+
+ if (len > 0) {
+ UChar* r = (UChar* )xmalloc(len + 1);
+ CHECK_NULL_RETURN(r);
+ xmemcpy(r, s, len);
+ r[len] = (UChar )0;
+ return r;
+ }
+ else return NULL;
+}
+
/*
Caution: node should not be a string node.
(s and end member address break)
@@ -189,16 +204,14 @@ add_mem_num(regex_t* reg, int num)
return 0;
}
-#if 0
static int
-add_repeat_num(regex_t* reg, int num)
+add_pointer(regex_t* reg, void* addr)
{
- RepeatNumType n = (RepeatNumType )num;
+ PointerType ptr = (PointerType )addr;
- BBUF_ADD(reg, &n, SIZE_REPEATNUM);
+ BBUF_ADD(reg, &ptr, SIZE_POINTER);
return 0;
}
-#endif
static int
add_option(regex_t* reg, OnigOptionType option)
@@ -518,6 +531,11 @@ compile_length_cclass_node(CClassNode* cc, regex_t* reg)
{
int len;
+ if (IS_CCLASS_SHARE(cc)) {
+ len = SIZE_OPCODE + SIZE_POINTER;
+ return len;
+ }
+
if (IS_NULL(cc->mbuf)) {
len = SIZE_OPCODE + SIZE_BITSET;
}
@@ -543,22 +561,34 @@ compile_cclass_node(CClassNode* cc, regex_t* reg)
{
int r;
+ if (IS_CCLASS_SHARE(cc)) {
+ add_opcode(reg, OP_CCLASS_NODE);
+ r = add_pointer(reg, cc);
+ return r;
+ }
+
if (IS_NULL(cc->mbuf)) {
- if (cc->not) add_opcode(reg, OP_CCLASS_NOT);
- else add_opcode(reg, OP_CCLASS);
+ if (IS_CCLASS_NOT(cc))
+ add_opcode(reg, OP_CCLASS_NOT);
+ else
+ add_opcode(reg, OP_CCLASS);
r = add_bitset(reg, cc->bs);
}
else {
if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
- if (cc->not) add_opcode(reg, OP_CCLASS_MB_NOT);
- else add_opcode(reg, OP_CCLASS_MB);
+ if (IS_CCLASS_NOT(cc))
+ add_opcode(reg, OP_CCLASS_MB_NOT);
+ else
+ add_opcode(reg, OP_CCLASS_MB);
r = add_multi_byte_cclass(cc->mbuf, reg);
}
else {
- if (cc->not) add_opcode(reg, OP_CCLASS_MIX_NOT);
- else add_opcode(reg, OP_CCLASS_MIX);
+ if (IS_CCLASS_NOT(cc))
+ add_opcode(reg, OP_CCLASS_MIX_NOT);
+ else
+ add_opcode(reg, OP_CCLASS_MIX);
r = add_bitset(reg, cc->bs);
if (r) return r;
@@ -631,7 +661,6 @@ compile_range_repeat_node(QualifierNode* qn, int target_len, int empty_info,
else {
r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
}
-
if (r) return r;
r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
return r;
@@ -1408,12 +1437,9 @@ compile_tree(Node* node, regex_t* reg)
}
#ifdef USE_NAMED_GROUP
-typedef struct {
- int new_val;
-} NumMap;
static int
-noname_disable_map(Node** plink, NumMap* map, int* counter)
+noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
{
int r = 0;
Node* node = *plink;
@@ -1467,7 +1493,7 @@ noname_disable_map(Node** plink, NumMap* map, int* counter)
}
static int
-renumber_node_backref(Node* node, NumMap* map)
+renumber_node_backref(Node* node, GroupNumRemap* map)
{
int i, pos, n, old_num;
int *backs;
@@ -1495,7 +1521,7 @@ renumber_node_backref(Node* node, NumMap* map)
}
static int
-renumber_by_map(Node* node, NumMap* map)
+renumber_by_map(Node* node, GroupNumRemap* map)
{
int r = 0;
@@ -1560,9 +1586,9 @@ disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
{
int r, i, pos, counter;
BitStatusType loc;
- NumMap* map;
+ GroupNumRemap* map;
- map = (NumMap* )xalloca(sizeof(NumMap) * (env->num_mem + 1));
+ map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
CHECK_NULL_RETURN_VAL(map, ONIGERR_MEMORY);
for (i = 1; i <= env->num_mem; i++) {
map[i].new_val = 0;
@@ -1591,7 +1617,8 @@ disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
env->num_mem = env->num_named;
reg->num_mem = env->num_named;
- return 0;
+
+ return onig_renumber_name_table(reg, map);
}
#endif /* USE_NAMED_GROUP */
@@ -2092,10 +2119,10 @@ onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
}
- if (cc->not == 0)
- return found;
- else
+ if (IS_CCLASS_NOT(cc))
return !found;
+ else
+ return found;
}
/* x is not included y ==> 1 : 0 */
@@ -2158,7 +2185,7 @@ is_not_included(Node* x, Node* y, regex_t* reg)
case N_CTYPE:
switch (NCTYPE(y).type) {
case CTYPE_WORD:
- if (IS_NULL(xc->mbuf) && xc->not == 0) {
+ if (IS_NULL(xc->mbuf) && !IS_CCLASS_NOT(xc)) {
for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
if (BITSET_AT(xc->bs, i)) {
if (ONIGENC_IS_CODE_SB_WORD(reg->enc, i)) return 0;
@@ -2171,7 +2198,7 @@ is_not_included(Node* x, Node* y, regex_t* reg)
case CTYPE_NOT_WORD:
for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
if (! ONIGENC_IS_CODE_SB_WORD(reg->enc, i)) {
- if (xc->not == 0) {
+ if (!IS_CCLASS_NOT(xc)) {
if (BITSET_AT(xc->bs, i))
return 0;
}
@@ -2196,14 +2223,16 @@ is_not_included(Node* x, Node* y, regex_t* reg)
for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
v = BITSET_AT(xc->bs, i);
- if ((v != 0 && xc->not == 0) || (v == 0 && xc->not)) {
+ if ((v != 0 && !IS_CCLASS_NOT(xc)) ||
+ (v == 0 && IS_CCLASS_NOT(xc))) {
v = BITSET_AT(yc->bs, i);
- if ((v != 0 && yc->not == 0) || (v == 0 && yc->not))
+ if ((v != 0 && !IS_CCLASS_NOT(yc)) ||
+ (v == 0 && IS_CCLASS_NOT(yc)))
return 0;
}
}
- if ((IS_NULL(xc->mbuf) && xc->not == 0) ||
- (IS_NULL(yc->mbuf) && yc->not == 0))
+ if ((IS_NULL(xc->mbuf) && !IS_CCLASS_NOT(xc)) ||
+ (IS_NULL(yc->mbuf) && !IS_CCLASS_NOT(yc)))
return 1;
return 0;
}
@@ -3333,22 +3362,27 @@ typedef struct {
OptMapInfo map; /* boundary */
} NodeOptInfo;
-static short int ByteValTable[] = {
- 14, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
- 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
- 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
- 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
-};
static int
-map_position_value(int i)
-{
- if (i < sizeof(ByteValTable)/sizeof(ByteValTable[0]))
- return (int )ByteValTable[i];
+map_position_value(OnigEncoding enc, int i)
+{
+ static short int ByteValTable[] = {
+ 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
+ 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
+ 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
+ };
+
+ if (i < sizeof(ByteValTable)/sizeof(ByteValTable[0])) {
+ if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
+ return 20;
+ else
+ return (int )ByteValTable[i];
+ }
else
return 4; /* Take it easy. */
}
@@ -3634,7 +3668,7 @@ alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
}
static void
-select_opt_exact_info(OptExactInfo* now, OptExactInfo* alt)
+select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
{
int v1, v2;
@@ -3643,8 +3677,8 @@ select_opt_exact_info(OptExactInfo* now, OptExactInfo* alt)
if (v1 <= 2 && v2 <= 2) {
/* ByteValTable[x] is big value --> low price */
- v2 = map_position_value(now->s[0]);
- v1 = map_position_value(alt->s[0]);
+ v2 = map_position_value(enc, now->s[0]);
+ v1 = map_position_value(enc, alt->s[0]);
if (now->len > 1) v1 += 5;
if (alt->len > 1) v2 += 5;
@@ -3660,13 +3694,29 @@ select_opt_exact_info(OptExactInfo* now, OptExactInfo* alt)
static void
clear_opt_map_info(OptMapInfo* map)
{
- int i;
+ static OptMapInfo clean_info = {
+ {0, 0}, {0, 0}, 0,
+ {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ }
+ };
- clear_mml(&map->mmd);
- clear_opt_anc_info(&map->anc);
- map->value = 0;
- for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
- map->map[i] = 0;
+ xmemcpy(map, &clean_info, sizeof(OptMapInfo));
}
static void
@@ -3676,11 +3726,11 @@ copy_opt_map_info(OptMapInfo* to, OptMapInfo* from)
}
static void
-add_char_opt_map_info(OptMapInfo* map, UChar c)
+add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)
{
if (map->map[c] == 0) {
map->map[c] = 1;
- map->value += map_position_value(c);
+ map->value += map_position_value(enc, c);
}
}
@@ -3695,7 +3745,7 @@ add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
OnigPairAmbigCodes* pccs;
OnigAmbigType amb;
- add_char_opt_map_info(map, p[0]);
+ add_char_opt_map_info(map, p[0], enc);
code = ONIGENC_MBC_TO_CODE(enc, p, end);
for (amb = 0x01; amb <= ONIGENC_AMBIGUOUS_MATCH_LIMIT; amb <<= 1) {
@@ -3706,7 +3756,7 @@ add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
if (pccs[i].from == code) {
len = ONIGENC_CODE_TO_MBC(enc, pccs[i].to, buf);
if (len < 0) return len;
- add_char_opt_map_info(map, buf[0]);
+ add_char_opt_map_info(map, buf[0], enc);
}
}
@@ -3718,7 +3768,7 @@ add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
ccode = ccs[i].items[j].code[0];
len = ONIGENC_CODE_TO_MBC(enc, ccode, buf);
if (len < 0) return len;
- add_char_opt_map_info(map, buf[0]);
+ add_char_opt_map_info(map, buf[0], enc);
}
break;
}
@@ -3761,7 +3811,7 @@ comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)
}
static void
-alt_merge_opt_map_info(OptMapInfo* to, OptMapInfo* add)
+alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add)
{
int i, val;
@@ -3780,7 +3830,7 @@ alt_merge_opt_map_info(OptMapInfo* to, OptMapInfo* add)
to->map[i] = 1;
if (to->map[i])
- val += map_position_value(i);
+ val += map_position_value(enc, i);
}
to->value = val;
@@ -3813,7 +3863,7 @@ copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from)
}
static void
-concat_left_node_opt_info(NodeOptInfo* to, NodeOptInfo* add)
+concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
{
int exb_reach, exm_reach;
OptAncInfo tanc;
@@ -3848,8 +3898,8 @@ concat_left_node_opt_info(NodeOptInfo* to, NodeOptInfo* add)
clear_opt_exact_info(&add->exb);
}
}
- select_opt_exact_info(&to->exm, &add->exb);
- select_opt_exact_info(&to->exm, &add->exm);
+ select_opt_exact_info(enc, &to->exm, &add->exb);
+ select_opt_exact_info(enc, &to->exm, &add->exm);
if (to->expr.len > 0) {
if (add->len.max > 0) {
@@ -3857,9 +3907,9 @@ concat_left_node_opt_info(NodeOptInfo* to, NodeOptInfo* add)
to->expr.len = add->len.max;
if (to->expr.mmd.max == 0)
- select_opt_exact_info(&to->exb, &to->expr);
+ select_opt_exact_info(enc, &to->exb, &to->expr);
else
- select_opt_exact_info(&to->exm, &to->expr);
+ select_opt_exact_info(enc, &to->exm, &to->expr);
}
}
else if (add->expr.len > 0) {
@@ -3878,7 +3928,7 @@ alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)
alt_merge_opt_exact_info(&to->exb, &add->exb, env);
alt_merge_opt_exact_info(&to->exm, &add->exm, env);
alt_merge_opt_exact_info(&to->expr, &add->expr, env);
- alt_merge_opt_map_info (&to->map, &add->map);
+ alt_merge_opt_map_info(env->enc, &to->map, &add->map);
alt_merge_mml(&to->len, &add->len);
}
@@ -3908,7 +3958,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
r = optimize_node_left(NCONS(nd).left, &nopt, &nenv);
if (r == 0) {
add_mml(&nenv.mmd, &nopt.len);
- concat_left_node_opt_info(opt, &nopt);
+ concat_left_node_opt_info(env->enc, opt, &nopt);
}
} while (r == 0 && IS_NOT_NULL(nd = NCONS(nd).right));
}
@@ -3939,7 +3989,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
NSTRING_IS_RAW(node), env->enc);
if (slen > 0) {
- add_char_opt_map_info(&opt->map, *(sn->s));
+ add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
}
set_mml(&opt->len, slen, slen);
}
@@ -3978,7 +4028,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
/* no need to check ignore case. (setted in setup_tree()) */
- if (IS_NOT_NULL(cc->mbuf) || cc->not != 0) {
+ if (IS_NOT_NULL(cc->mbuf) || IS_CCLASS_NOT(cc)) {
OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
@@ -3987,8 +4037,8 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
else {
for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
z = BITSET_AT(cc->bs, i);
- if ((z && !cc->not) || (!z && cc->not)) {
- add_char_opt_map_info(&opt->map, (UChar )i);
+ if ((z && !IS_CCLASS_NOT(cc)) || (!z && IS_CCLASS_NOT(cc))) {
+ add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
}
}
set_mml(&opt->len, 1, 1);
@@ -4009,7 +4059,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
case CTYPE_NOT_WORD:
for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
if (! ONIGENC_IS_CODE_WORD(env->enc, i)) {
- add_char_opt_map_info(&opt->map, (UChar )i);
+ add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
}
}
break;
@@ -4017,7 +4067,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
case CTYPE_WORD:
for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
- add_char_opt_map_info(&opt->map, (UChar )i);
+ add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
}
}
break;
@@ -4245,7 +4295,7 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
else {
int allow_reverse;
- reg->exact = onig_strdup(e->s, e->s + e->len);
+ reg->exact = k_strdup(e->s, e->s + e->len);
CHECK_NULL_RETURN_VAL(reg->exact, ONIGERR_MEMORY);
reg->exact_end = reg->exact + e->len;
@@ -4334,7 +4384,7 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
}
if (opt.exb.len > 0 || opt.exm.len > 0) {
- select_opt_exact_info(&opt.exb, &opt.exm);
+ select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
if (opt.map.value > 0 &&
comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
goto set_map;
@@ -4506,7 +4556,7 @@ onig_free_body(regex_t* reg)
if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map);
if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);
if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);
- if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain);
+ if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain);
#ifdef USE_NAMED_GROUP
onig_names_free(reg);
@@ -4579,11 +4629,12 @@ onig_clone(regex_t** to, regex_t* from)
int r, size;
regex_t* reg;
- if (ONIG_STATE(from) == ONIG_STATE_NORMAL) {
- from->state++; /* increment as search counter */
- if (IS_NOT_NULL(from->chain)) {
+#ifdef USE_MULTI_THREAD_SYSTEM
+ if (ONIG_STATE(from) >= ONIG_STATE_NORMAL) {
+ ONIG_STATE_INC(from);
+ if (IS_NOT_NULL(from->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
onig_chain_reduce(from);
- from->state++;
+ ONIG_STATE_INC(from);
}
}
else {
@@ -4593,19 +4644,20 @@ onig_clone(regex_t** to, regex_t* from)
return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
THREAD_PASS;
}
- from->state++; /* increment as search counter */
+ ONIG_STATE_INC(from);
}
+#endif /* USE_MULTI_THREAD_SYSTEM */
r = onig_alloc_init(&reg, ONIG_OPTION_NONE, ONIGENC_AMBIGUOUS_MATCH_DEFAULT,
from->enc, ONIG_SYNTAX_DEFAULT);
if (r != 0) {
- from->state--;
+ ONIG_STATE_DEC(from);
return r;
}
xmemcpy(reg, from, sizeof(onig_t));
- reg->state = ONIG_STATE_NORMAL;
reg->chain = (regex_t* )NULL;
+ reg->state = ONIG_STATE_NORMAL;
if (from->p) {
reg->p = (UChar* )xmalloc(reg->alloc);
@@ -4638,12 +4690,12 @@ onig_clone(regex_t** to, regex_t* from)
reg->name_table = names_clone(from); /* names_clone is not implemented */
#endif
- from->state--;
+ ONIG_STATE_DEC(from);
*to = reg;
return 0;
mem_error:
- from->state--;
+ ONIG_STATE_DEC(from);
return ONIGERR_MEMORY;
}
#endif
@@ -4839,6 +4891,7 @@ onig_alloc_init(regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag,
*reg = (regex_t* )xmalloc(sizeof(regex_t));
if (IS_NULL(*reg)) return ONIGERR_MEMORY;
+ (*reg)->state = ONIG_STATE_MODIFY;
if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
option |= syntax->options;
@@ -4847,7 +4900,6 @@ onig_alloc_init(regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag,
else
option |= syntax->options;
- (*reg)->state = ONIG_STATE_NORMAL;
(*reg)->enc = enc;
(*reg)->options = option;
(*reg)->syntax = syntax;
@@ -4910,9 +4962,14 @@ onig_init()
return 0;
}
+
extern int
onig_end()
{
+ extern int onig_free_shared_cclass_table();
+
+ THREAD_ATOMIC_START;
+
#ifdef ONIG_DEBUG_STATISTICS
onig_print_statistics(stderr);
#endif
@@ -4921,10 +4978,17 @@ onig_end()
onig_free_node_list();
#endif
+#ifdef USE_SHARED_CCLASS_TABLE
+ onig_free_shared_cclass_table();
+#endif
+
onig_inited = 0;
+
+ THREAD_ATOMIC_END;
return 0;
}
+
#ifdef ONIG_DEBUG
OnigOpInfoType OnigOpInfo[] = {
@@ -4950,6 +5014,7 @@ OnigOpInfoType OnigOpInfo[] = {
{ OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL },
{ OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL },
{ OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL },
+ { OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL },
{ OP_ANYCHAR, "anychar", ARG_NON },
{ OP_ANYCHAR_ML, "anychar-ml", ARG_NON },
{ OP_ANYCHAR_STAR, "anychar*", ARG_NON },
@@ -5203,6 +5268,16 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp,
fprintf(f, ":%d:%d:%d", n, (int )code, len);
break;
+ case OP_CCLASS_NODE:
+ {
+ CClassNode *cc;
+
+ GET_POINTER_INC(cc, bp);
+ n = bitset_on_num(cc->bs);
+ fprintf(f, ":%u:%d", (unsigned int )cc, n);
+ }
+ break;
+
case OP_BACKREFN_IC:
mem = *((MemNumType* )bp);
bp += SIZE_MEMNUM;
@@ -5330,7 +5405,7 @@ print_indent_tree(FILE* f, Node* node, int indent)
case N_CCLASS:
fprintf(f, "<cclass:%x>", (int )node);
- if (NCCLASS(node).not) fputs(" not", f);
+ if (IS_CCLASS_NOT(&NCCLASS(node))) fputs(" not", f);
if (NCCLASS(node).mbuf) {
BBuf* bbuf = NCCLASS(node).mbuf;
for (i = 0; i < bbuf->used; i++) {
diff --git a/regexec.c b/regexec.c
index 07af4fe104..795a26dd76 100644
--- a/regexec.c
+++ b/regexec.c
@@ -2,7 +2,7 @@
regexec.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2004 K.Kosako <kosako AT sofnec DOT co DOT jp>
+ * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -274,7 +274,7 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)
/** stack **/
#define INVALID_STACK_INDEX -1
-typedef int StackIndex;
+typedef long StackIndex;
typedef struct _StackType {
unsigned int type;
@@ -986,7 +986,7 @@ trap_ensure(VALUE arg)
TrapEnsureArg* ta = (TrapEnsureArg* )arg;
if (ta->state == 0) { /* trap_exec() is not normal return */
- ta->reg->state--;
+ ONIG_STATE_DEC(ta->reg);
if (! IS_NULL(ta->msa->stack_p) && ta->stk_base != ta->msa->stack_p)
xfree(ta->stk_base);
@@ -1147,6 +1147,27 @@ onig_is_in_code_range(UChar* p, OnigCodePoint code)
return ((low < n && code >= data[low * 2]) ? 1 : 0);
}
+static int
+code_is_in_cclass_node(void* node, OnigCodePoint code, int enclen)
+{
+ unsigned int in_cc;
+ CClassNode* cc = (CClassNode* )node;
+
+ if (enclen == 1) {
+ in_cc = BITSET_AT(cc->bs, code);
+ }
+ else {
+ UChar* p = ((BBuf* )(cc->mbuf))->p;
+ in_cc = onig_is_in_code_range(p, code);
+ }
+
+ if (IS_CCLASS_NOT(cc)) {
+ return (in_cc ? 0 : 1);
+ }
+ else {
+ return (in_cc ? 1 : 0);
+ }
+}
/* matching region of POSIX API */
typedef int regoff_t;
@@ -1340,14 +1361,31 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
case OP_EXACT1_IC: STAT_OP_IN(OP_EXACT1_IC);
{
int len;
- UChar *q, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
+ UChar *q, *ss, *sp, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
DATA_ENSURE(1);
+ ss = s;
+ sp = p;
+
+ exact1_ic_retry:
len = ONIGENC_MBC_TO_NORMALIZE(encode, ambig_flag, &s, end, lowbuf);
DATA_ENSURE(0);
q = lowbuf;
while (len-- > 0) {
- if (*p != *q) goto fail;
+ if (*p != *q) {
+#if 1
+ if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
+ ambig_flag &= ~ONIGENC_AMBIGUOUS_MATCH_COMPOUND;
+ s = ss;
+ p = sp;
+ goto exact1_ic_retry;
+ }
+ else
+ goto fail;
+#else
+ goto fail;
+#endif
+ }
p++; q++;
}
}
@@ -1424,7 +1462,7 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
case OP_EXACTN_IC: STAT_OP_IN(OP_EXACTN_IC);
{
int len;
- UChar *q, *endp, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
+ UChar *ss, *sp, *q, *endp, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
GET_LENGTH_INC(tlen, p);
endp = p + tlen;
@@ -1432,11 +1470,28 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
while (p < endp) {
sprev = s;
DATA_ENSURE(1);
+ ss = s;
+ sp = p;
+
+ exactn_ic_retry:
len = ONIGENC_MBC_TO_NORMALIZE(encode, ambig_flag, &s, end, lowbuf);
DATA_ENSURE(0);
q = lowbuf;
while (len-- > 0) {
- if (*p != *q) goto fail;
+ if (*p != *q) {
+#if 1
+ if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
+ ambig_flag &= ~ONIGENC_AMBIGUOUS_MATCH_COMPOUND;
+ s = ss;
+ p = sp;
+ goto exactn_ic_retry;
+ }
+ else
+ goto fail;
+#else
+ goto fail;
+#endif
+ }
p++; q++;
}
}
@@ -1655,6 +1710,24 @@ match_at(regex_t* reg, UChar* str, UChar* end, UChar* sstart,
STAT_OP_OUT;
break;
+ case OP_CCLASS_NODE: STAT_OP_IN(OP_CCLASS_NODE);
+ {
+ OnigCodePoint code;
+ void *node;
+ int mb_len;
+ UChar *ss;
+
+ DATA_ENSURE(1);
+ GET_POINTER_INC(node, p);
+ mb_len = enc_len(encode, s);
+ ss = s;
+ s += mb_len;
+ code = ONIGENC_MBC_TO_CODE(encode, ss, s);
+ if (code_is_in_cclass_node(node, code, mb_len) == 0) goto fail;
+ }
+ STAT_OP_OUT;
+ break;
+
case OP_ANYCHAR: STAT_OP_IN(OP_ANYCHAR);
DATA_ENSURE(1);
n = enc_len(encode, s);
@@ -2519,13 +2592,26 @@ str_lower_case_match(OnigEncoding enc, int ambig_flag,
UChar* t, UChar* tend, UChar* p, UChar* end)
{
int lowlen;
- UChar *q, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
+ UChar *q, *tsave, *psave, lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
+
+ tsave = t;
+ psave = p;
+ retry:
while (t < tend) {
lowlen = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &p, end, lowbuf);
q = lowbuf;
while (lowlen > 0) {
- if (*t++ != *q++) return 0;
+ if (*t++ != *q++) {
+ if ((ambig_flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
+ ambig_flag &= ~ONIGENC_AMBIGUOUS_MATCH_COMPOUND;
+ t = tsave;
+ p = psave;
+ goto retry;
+ }
+ else
+ return 0;
+ }
lowlen--;
}
}
@@ -2538,9 +2624,7 @@ slow_search_ic(OnigEncoding enc, int ambig_flag,
UChar* target, UChar* target_end,
UChar* text, UChar* text_end, UChar* text_range)
{
- int lowlen;
- UChar *t, *p, *s, *end, *z;
- UChar lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
+ UChar *s, *end;
end = text_end - (target_end - target) + 1;
if (end > text_range)
@@ -2549,21 +2633,10 @@ slow_search_ic(OnigEncoding enc, int ambig_flag,
s = text;
while (s < end) {
- z = s;
- lowlen = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &s, text_end, lowbuf);
- if (*target == *lowbuf) {
- p = lowbuf + 1;
- t = target + 1;
- while (--lowlen > 0) {
- if (*p != *t) break;
- p++; t++;
- }
- if (lowlen == 0) {
- if (str_lower_case_match(enc, ambig_flag,
- t, target_end, s, text_end))
- return z;
- }
- }
+ if (str_lower_case_match(enc, ambig_flag, target, target_end, s, text_end))
+ return s;
+
+ s += enc_len(enc, s);
}
return (UChar* )NULL;
@@ -2605,9 +2678,7 @@ slow_search_backward_ic(OnigEncoding enc, int ambig_flag,
UChar* text, UChar* adjust_text,
UChar* text_end, UChar* text_start)
{
- int len, lowlen;
- UChar *t, *p, *s, *z;
- UChar lowbuf[ONIGENC_MBC_NORMALIZE_MAXLEN];
+ UChar *s;
s = text_end - (target_end - target);
if (s > text_start)
@@ -2616,24 +2687,11 @@ slow_search_backward_ic(OnigEncoding enc, int ambig_flag,
s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
while (s >= text) {
- len = enc_len(enc, s);
- z = s;
- lowlen = ONIGENC_MBC_TO_NORMALIZE(enc, ambig_flag, &s, text_end, lowbuf);
- if (*target == *lowbuf) {
- p = lowbuf + 1;
- t = target + 1;
- while (--lowlen > 0) {
- if (*p != *t) break;
- p++; t++;
- }
- if (lowlen == 0) {
- if (str_lower_case_match(enc, ambig_flag,
- t, target_end, s, text_end))
- return z;
- }
- }
+ if (str_lower_case_match(enc, ambig_flag,
+ target, target_end, s, text_end))
+ return s;
- s = onigenc_get_prev_char_head(enc, adjust_text, z);
+ s = onigenc_get_prev_char_head(enc, adjust_text, s);
}
return (UChar* )NULL;
@@ -2828,11 +2886,12 @@ onig_match(regex_t* reg, UChar* str, UChar* end, UChar* at, OnigRegion* region,
UChar *prev;
MatchArg msa;
- if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
- reg->state++; /* increment as search counter */
- if (IS_NOT_NULL(reg->chain)) {
+#ifdef USE_MULTI_THREAD_SYSTEM
+ if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) {
+ ONIG_STATE_INC(reg);
+ if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
onig_chain_reduce(reg);
- reg->state++;
+ ONIG_STATE_INC(reg);
}
}
else {
@@ -2842,8 +2901,9 @@ onig_match(regex_t* reg, UChar* str, UChar* end, UChar* at, OnigRegion* region,
return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
THREAD_PASS;
}
- reg->state++; /* increment as search counter */
+ ONIG_STATE_INC(reg);
}
+#endif /* USE_MULTI_THREAD_SYSTEM */
MATCH_ARG_INIT(msa, option, region, at);
@@ -2863,7 +2923,7 @@ onig_match(regex_t* reg, UChar* str, UChar* end, UChar* at, OnigRegion* region,
}
MATCH_ARG_FREE(msa);
- reg->state--; /* decrement as search counter */
+ ONIG_STATE_DEC(reg);
return r;
}
@@ -3098,11 +3158,12 @@ onig_search(regex_t* reg, UChar* str, UChar* end,
UChar *s, *prev;
MatchArg msa;
- if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
- reg->state++; /* increment as search counter */
- if (IS_NOT_NULL(reg->chain)) {
+#ifdef USE_MULTI_THREAD_SYSTEM
+ if (ONIG_STATE(reg) >= ONIG_STATE_NORMAL) {
+ ONIG_STATE_INC(reg);
+ if (IS_NOT_NULL(reg->chain) && ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
onig_chain_reduce(reg);
- reg->state++;
+ ONIG_STATE_INC(reg);
}
}
else {
@@ -3112,8 +3173,9 @@ onig_search(regex_t* reg, UChar* str, UChar* end,
return ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT;
THREAD_PASS;
}
- reg->state++; /* increment as search counter */
+ ONIG_STATE_INC(reg);
}
+#endif /* USE_MULTI_THREAD_SYSTEM */
#ifdef ONIG_DEBUG_SEARCH
fprintf(stderr, "onig_search (entry point): str: %d, end: %d, start: %d, range: %d\n",
@@ -3360,7 +3422,7 @@ onig_search(regex_t* reg, UChar* str, UChar* end,
finish:
MATCH_ARG_FREE(msa);
- reg->state--; /* decrement as search counter */
+ ONIG_STATE_DEC(reg);
/* If result is mismatch and no FIND_NOT_EMPTY option,
then the region is not setted in match_at(). */
@@ -3381,7 +3443,7 @@ onig_search(regex_t* reg, UChar* str, UChar* end,
mismatch_no_msa:
r = ONIG_MISMATCH;
finish_no_msa:
- reg->state--; /* decrement as search counter */
+ ONIG_STATE_DEC(reg);
#ifdef ONIG_DEBUG
if (r != ONIG_MISMATCH)
fprintf(stderr, "onig_search: error %d\n", r);
@@ -3389,7 +3451,7 @@ onig_search(regex_t* reg, UChar* str, UChar* end,
return r;
match:
- reg->state--; /* decrement as search counter */
+ ONIG_STATE_DEC(reg);
MATCH_ARG_FREE(msa);
return s - str;
}
diff --git a/regint.h b/regint.h
index e77536c124..4cfd9c9768 100644
--- a/regint.h
+++ b/regint.h
@@ -4,7 +4,7 @@
regint.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2004 K.Kosako <kosako AT sofnec DOT co DOT jp>
+ * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -56,6 +56,7 @@
/* config */
/* spec. config */
+/* #define USE_UNICODE_FULL_RANGE_CTYPE */ /* --> move to regenc.h */
#define USE_NAMED_GROUP
#define USE_SUBEXP_CALL
#define USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK /* /(?:()|())*\2/ */
@@ -65,6 +66,8 @@
#define USE_RECYCLE_NODE
#define USE_OP_PUSH_OR_JUMP_EXACT
#define USE_QUALIFIER_PEEK_NEXT
+#define USE_ST_HASH_TABLE
+#define USE_SHARED_CCLASS_TABLE
#define INIT_MATCH_STACK_SIZE 160
#define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */
@@ -76,17 +79,21 @@
#define USE_VARIABLE_META_CHARS
#define USE_WORD_BEGIN_END /* "\<": word-begin, "\>": word-end */
#define USE_POSIX_REGION_OPTION /* needed for POSIX API support */
+/* #define USE_MULTI_THREAD_SYSTEM */
#define THREAD_ATOMIC_START /* depend on thread system */
#define THREAD_ATOMIC_END /* depend on thread system */
#define THREAD_PASS /* depend on thread system */
#define CHECK_INTERRUPT /* depend on application */
#define xmalloc malloc
#define xrealloc realloc
+#define xcalloc calloc
#define xfree free
#else
#include "ruby.h"
#include "version.h"
#include "rubysig.h" /* for DEFER_INTS, ENABLE_INTS */
+
+#define USE_MULTI_THREAD_SYSTEM
#define THREAD_ATOMIC_START DEFER_INTS
#define THREAD_ATOMIC_END ENABLE_INTS
#define THREAD_PASS rb_thread_schedule()
@@ -101,17 +108,9 @@
#define DEFAULT_WARN_FUNCTION rb_warn
#define DEFAULT_VERB_WARN_FUNCTION rb_warning
-#if defined(RUBY_VERSION_MAJOR)
-#if RUBY_VERSION_MAJOR > 1 || \
-(RUBY_VERSION_MAJOR == 1 && \
- defined(RUBY_VERSION_MINOR) && RUBY_VERSION_MINOR >= 8)
-#define USE_ST_HASH_TABLE
-#endif
-#endif
-
#endif /* else NOT_RUBY */
-#define THREAD_PASS_LIMIT_COUNT 10
+#define THREAD_PASS_LIMIT_COUNT 8
#define xmemset memset
#define xmemcpy memcpy
#define xmemmove memmove
@@ -124,6 +123,69 @@
#define xalloca alloca
#endif
+
+#ifdef USE_MULTI_THREAD_SYSTEM
+#define ONIG_STATE_INC(reg) (reg)->state++
+#define ONIG_STATE_DEC(reg) (reg)->state--
+#else
+#define ONIG_STATE_INC(reg) /* Nothing */
+#define ONIG_STATE_DEC(reg) /* Nothing */
+#endif /* USE_MULTI_THREAD_SYSTEM */
+
+
+#define onig_st_is_member st_is_member
+
+#ifdef NOT_RUBY
+
+#define st_init_table onig_st_init_table
+#define st_init_table_with_size onig_st_init_table_with_size
+#define st_init_numtable onig_st_init_numtable
+#define st_init_numtable_with_size onig_st_init_numtable_with_size
+#define st_init_strtable onig_st_init_strtable
+#define st_init_strtable_with_size onig_st_init_strtable_with_size
+#define st_init_strend_table_with_size onig_st_init_strend_table_with_size
+#define st_delete onig_st_delete
+#define st_delete_safe onig_st_delete_safe
+#define st_insert onig_st_insert
+#define st_insert_strend onig_st_insert_strend
+#define st_lookup onig_st_lookup
+#define st_lookup_strend onig_st_lookup_strend
+#define st_foreach onig_st_foreach
+#define st_add_direct onig_st_add_direct
+#define st_add_direct_strend onig_st_add_direct_strend
+#define st_free_table onig_st_free_table
+#define st_cleanup_safe onig_st_cleanup_safe
+#define st_copy onig_st_copy
+#define st_nothing_key_clone onig_st_nothing_key_clone
+#define st_nothing_key_free onig_st_nothing_key_free
+
+#else /* NOT_RUBY */
+
+#define onig_st_init_table st_init_table
+#define onig_st_init_table_with_size st_init_table_with_size
+#define onig_st_init_numtable st_init_numtable
+#define onig_st_init_numtable_with_size st_init_numtable_with_size
+#define onig_st_init_strtable st_init_strtable
+#define onig_st_init_strtable_with_size st_init_strtable_with_size
+#define onig_st_init_strend_table_with_size st_init_strend_table_with_size
+#define onig_st_delete st_delete
+#define onig_st_delete_safe st_delete_safe
+#define onig_st_insert st_insert
+#define onig_st_insert_strend st_insert_strend
+#define onig_st_lookup st_lookup
+#define onig_st_lookup_strend st_lookup_strend
+#define onig_st_foreach st_foreach
+#define onig_st_add_direct st_add_direct
+#define onig_st_add_direct_strend st_add_direct_strend
+#define onig_st_free_table st_free_table
+#define onig_st_cleanup_safe st_cleanup_safe
+#define onig_st_copy st_copy
+#define onig_st_nothing_key_clone st_nothing_key_clone
+#define onig_st_nothing_key_free st_nothing_key_free
+
+#endif /* NOT_RUBY */
+
+
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
@@ -139,9 +201,11 @@
#endif
#include <ctype.h>
+#ifdef HAVE_SYS_TYPES_H
#ifndef __BORLANDC__
#include <sys/types.h>
#endif
+#endif
#ifdef ONIG_DEBUG
# include <stdio.h>
@@ -483,6 +547,7 @@ enum OpCode {
OP_CCLASS_NOT,
OP_CCLASS_MB_NOT,
OP_CCLASS_MIX_NOT,
+ OP_CCLASS_NODE, /* pointer to CClassNode node */
OP_ANYCHAR, /* "." */
OP_ANYCHAR_ML, /* "." multi-line */
@@ -570,6 +635,7 @@ typedef int AbsAddrType;
typedef int LengthType;
typedef int RepeatNumType;
typedef short int MemNumType;
+typedef void* PointerType;
#define SIZE_OPCODE 1
#define SIZE_RELADDR sizeof(RelAddrType)
@@ -579,7 +645,7 @@ typedef short int MemNumType;
#define SIZE_REPEATNUM sizeof(RepeatNumType)
#define SIZE_OPTION sizeof(OnigOptionType)
#define SIZE_CODE_POINT sizeof(OnigCodePoint)
-
+#define SIZE_POINTER sizeof(PointerType)
#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
@@ -604,6 +670,7 @@ typedef short int MemNumType;
#define GET_MEMNUM_INC(num,p) PLATFORM_GET_INC(num, p, MemNumType)
#define GET_REPEATNUM_INC(num,p) PLATFORM_GET_INC(num, p, RepeatNumType)
#define GET_OPTION_INC(option,p) PLATFORM_GET_INC(option, p, OnigOptionType)
+#define GET_POINTER_INC(ptr,p) PLATFORM_GET_INC(ptr, p, PointerType)
/* code point's address must be aligned address. */
#define GET_CODE_POINT(code,p) code = *((OnigCodePoint* )(p))
@@ -679,6 +746,22 @@ typedef short int MemNumType;
ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS | ONIG_SYN_ALLOW_INVALID_INTERVAL | \
ONIG_SYN_BACKSLASH_ESCAPE_IN_CC | ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC )
+/* cclass node */
+#define FLAG_CCLASS_NOT 1
+#define FLAG_CCLASS_SHARE (1<<1)
+
+#define CCLASS_SET_NOT(cc) (cc)->flags |= FLAG_CCLASS_NOT
+#define CCLASS_CLEAR_NOT(cc) (cc)->flags &= ~FLAG_CCLASS_NOT
+#define CCLASS_SET_SHARE(cc) (cc)->flags |= FLAG_CCLASS_SHARE
+#define IS_CCLASS_NOT(cc) (((cc)->flags & FLAG_CCLASS_NOT) != 0)
+#define IS_CCLASS_SHARE(cc) (((cc)->flags & FLAG_CCLASS_SHARE) != 0)
+
+typedef struct {
+ int flags;
+ BitSet bs;
+ BBuf* mbuf; /* multi-byte info or NULL */
+} CClassNode;
+
#ifdef ONIG_DEBUG
@@ -700,13 +783,11 @@ extern void onig_print_statistics P_((FILE* f));
extern char* onig_error_code_to_format P_((int code));
extern void onig_snprintf_with_pattern PV_((char buf[], int bufsize, OnigEncoding enc, char* pat, char* pat_end, char *fmt, ...));
-extern UChar* onig_strdup P_((UChar* s, UChar* end));
extern int onig_bbuf_init P_((BBuf* buf, int size));
extern int onig_alloc_init P_((regex_t** reg, OnigOptionType option, OnigAmbigType ambig_flag, OnigEncoding enc, OnigSyntaxType* syntax));
extern int onig_compile P_((regex_t* reg, UChar* pattern, UChar* pattern_end, OnigErrorInfo* einfo));
extern void onig_chain_reduce P_((regex_t* reg));
extern void onig_chain_link_add P_((regex_t* to, regex_t* add));
extern void onig_transfer P_((regex_t* to, regex_t* from));
-extern int onig_is_in_code_range P_((UChar* p, OnigCodePoint code));
#endif /* REGINT_H */
diff --git a/regparse.c b/regparse.c
index 2d26786771..6fe9044bdd 100644
--- a/regparse.c
+++ b/regparse.c
@@ -2,7 +2,7 @@
regparse.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2004 K.Kosako <kosako AT sofnec DOT co DOT jp>
+ * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -219,21 +219,26 @@ k_strcpy(UChar* dest, UChar* src, UChar* end)
}
}
-extern UChar*
-onig_strdup(UChar* s, UChar* end)
+static UChar*
+strdup_with_null(OnigEncoding enc, UChar* s, UChar* end)
{
- int len = end - s;
+ int slen, term_len, i;
+ UChar *r;
- if (len > 0) {
- UChar* r = (UChar* )xmalloc(len + 1);
- CHECK_NULL_RETURN(r);
- xmemcpy(r, s, len);
- r[len] = (UChar )0;
- return r;
- }
- else return NULL;
+ slen = end - s;
+ term_len = ONIGENC_MBC_MINLEN(enc);
+
+ r = (UChar* )xmalloc(slen + term_len);
+ CHECK_NULL_RETURN(r);
+ xmemcpy(r, s, slen);
+
+ for (i = 0; i < term_len; i++)
+ r[slen + i] = (UChar )0;
+
+ return r;
}
+
/* scan pattern methods */
#define PEND_VALUE 0
@@ -298,7 +303,7 @@ typedef struct {
#ifdef USE_ST_HASH_TABLE
-#include <st.h>
+#include "st.h"
typedef st_table NameTable;
typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */
@@ -335,7 +340,7 @@ onig_print_names(FILE* fp, regex_t* reg)
if (IS_NOT_NULL(t)) {
fprintf(fp, "name table\n");
- st_foreach(t, i_print_name_entry, (HashDataType )fp);
+ onig_st_foreach(t, i_print_name_entry, (HashDataType )fp);
fputs("\n", fp);
}
return 0;
@@ -356,7 +361,7 @@ names_clear(regex_t* reg)
NameTable* t = (NameTable* )reg->name_table;
if (IS_NOT_NULL(t)) {
- st_foreach(t, i_free_name_entry, 0);
+ onig_st_foreach(t, i_free_name_entry, 0);
}
return 0;
}
@@ -371,7 +376,7 @@ onig_names_free(regex_t* reg)
if (r) return r;
t = (NameTable* )reg->name_table;
- if (IS_NOT_NULL(t)) st_free_table(t);
+ if (IS_NOT_NULL(t)) onig_st_free_table(t);
reg->name_table = (void* )NULL;
return 0;
}
@@ -379,33 +384,12 @@ onig_names_free(regex_t* reg)
static NameEntry*
name_find(regex_t* reg, UChar* name, UChar* name_end)
{
- int len;
- UChar namebuf[NAMEBUF_SIZE_1];
- UChar *key;
NameEntry* e;
NameTable* t = (NameTable* )reg->name_table;
e = (NameEntry* )NULL;
if (IS_NOT_NULL(t)) {
- if (*name_end == '\0') {
- key = name;
- }
- else {
- /* dirty, but st.c API claims NULL terminated key. */
- len = name_end - name;
- if (len <= NAMEBUF_SIZE) {
- xmemcpy(namebuf, name, len);
- namebuf[len] = '\0';
- key = namebuf;
- }
- else {
- key = onig_strdup(name, name_end);
- if (IS_NULL(key)) return (NameEntry* )NULL;
- }
- }
-
- st_lookup(t, (HashDataType )key, (HashDataType * )&e);
- if (key != name && key != namebuf) xfree(key);
+ onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e)));
}
return e;
}
@@ -422,7 +406,8 @@ static int
i_names(UChar* key, NameEntry* e, INamesArg* arg)
{
int r = (*(arg->func))(e->name,
- e->name + onigenc_str_bytelen_null(arg->enc, e->name),
+ /*e->name + onigenc_str_bytelen_null(arg->enc, e->name), */
+ e->name + e->name_len,
e->back_num,
(e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
arg->reg, arg->arg);
@@ -447,11 +432,40 @@ onig_foreach_name(regex_t* reg,
narg.reg = reg;
narg.arg = arg;
narg.enc = reg->enc; /* should be pattern encoding. */
- st_foreach(t, i_names, (HashDataType )&narg);
+ onig_st_foreach(t, i_names, (HashDataType )&narg);
}
return narg.ret;
}
+static int
+i_renumber_name(UChar* key, NameEntry* e, GroupNumRemap* map)
+{
+ int i;
+
+ if (e->back_num > 1) {
+ for (i = 0; i < e->back_num; i++) {
+ e->back_refs[i] = map[e->back_refs[i]].new_val;
+ }
+ }
+ else if (e->back_num == 1) {
+ e->back_ref1 = map[e->back_ref1].new_val;
+ }
+
+ return ST_CONTINUE;
+}
+
+extern int
+onig_renumber_name_table(regex_t* reg, GroupNumRemap* map)
+{
+ NameTable* t = (NameTable* )reg->name_table;
+
+ if (IS_NOT_NULL(t)) {
+ onig_st_foreach(t, i_renumber_name, (HashDataType )map);
+ }
+ return 0;
+}
+
+
extern int
onig_number_of_names(regex_t* reg)
{
@@ -617,14 +631,16 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)
if (IS_NULL(e)) {
#ifdef USE_ST_HASH_TABLE
if (IS_NULL(t)) {
- reg->name_table = t = st_init_strtable();
+ t = onig_st_init_strend_table_with_size(5);
+ reg->name_table = (void* )t;
}
e = (NameEntry* )xmalloc(sizeof(NameEntry));
CHECK_NULL_RETURN_VAL(e, ONIGERR_MEMORY);
- e->name = onig_strdup(name, name_end);
+ e->name = strdup_with_null(reg->enc, name, name_end);
if (IS_NULL(e->name)) return ONIGERR_MEMORY;
- st_insert(t, (HashDataType )e->name, (HashDataType )e);
+ onig_st_insert_strend(t, e->name, (e->name + (name_end - name)),
+ (HashDataType )e);
e->name_len = name_end - name;
e->back_num = 0;
@@ -669,7 +685,7 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)
}
e = &(t->e[t->num]);
t->num++;
- e->name = onig_strdup(name, name_end);
+ e->name = strdup_with_null(reg->enc, name, name_end);
e->name_len = name_end - name;
#endif
}
@@ -886,8 +902,11 @@ onig_node_free(Node* node)
#ifdef USE_RECYCLE_NODE
{
FreeNode* n = (FreeNode* )node;
+
+ THREAD_ATOMIC_START;
n->next = FreeNodeList;
FreeNodeList = n;
+ THREAD_ATOMIC_END;
}
#else
xfree(node);
@@ -899,8 +918,15 @@ onig_node_free(Node* node)
break;
case N_CCLASS:
- if (NCCLASS(node).mbuf)
- bbuf_free(NCCLASS(node).mbuf);
+ {
+ CClassNode* cc = &(NCCLASS(node));
+
+ if (IS_CCLASS_SHARE(cc))
+ return ;
+
+ if (cc->mbuf)
+ bbuf_free(cc->mbuf);
+ }
break;
case N_QUALIFIER:
@@ -927,8 +953,11 @@ onig_node_free(Node* node)
#ifdef USE_RECYCLE_NODE
{
FreeNode* n = (FreeNode* )node;
+
+ THREAD_ATOMIC_START;
n->next = FreeNodeList;
FreeNodeList = n;
+ THREAD_ATOMIC_END;
}
#else
xfree(node);
@@ -959,8 +988,10 @@ node_new()
#ifdef USE_RECYCLE_NODE
if (IS_NOT_NULL(FreeNodeList)) {
+ THREAD_ATOMIC_START;
node = (Node* )FreeNodeList;
FreeNodeList = FreeNodeList->next;
+ THREAD_ATOMIC_END;
return node;
}
#endif
@@ -974,8 +1005,8 @@ static void
initialize_cclass(CClassNode* cc)
{
BITSET_CLEAR(cc->bs);
- cc->not = 0;
- cc->mbuf = NULL;
+ cc->flags = 0;
+ cc->mbuf = NULL;
}
static Node*
@@ -989,6 +1020,54 @@ node_new_cclass()
return node;
}
+extern Node*
+node_new_cclass_by_codepoint_range(int not,
+ OnigCodePoint sbr[], OnigCodePoint mbr[])
+{
+ CClassNode* cc;
+ int n, i, j;
+
+ Node* node = node_new();
+ CHECK_NULL_RETURN(node);
+ node->type = N_CCLASS;
+
+ cc = &(NCCLASS(node));
+ cc->flags = 0;
+ if (not != 0) CCLASS_SET_NOT(cc);
+
+ BITSET_CLEAR(cc->bs);
+ if (IS_NOT_NULL(sbr)) {
+ n = ONIGENC_CODE_RANGE_NUM(sbr);
+ for (i = 0; i < n; i++) {
+ for (j = ONIGENC_CODE_RANGE_FROM(sbr, i);
+ j <= (int )ONIGENC_CODE_RANGE_TO(sbr, i); j++) {
+ BITSET_SET_BIT(cc->bs, j);
+ }
+ }
+ }
+
+ if (IS_NULL(mbr)) {
+ is_null:
+ cc->mbuf = NULL;
+ }
+ else {
+ BBuf* bbuf;
+
+ n = ONIGENC_CODE_RANGE_NUM(mbr);
+ if (n == 0) goto is_null;
+
+ bbuf = (BBuf* )xmalloc(sizeof(BBuf));
+ CHECK_NULL_RETURN_VAL(bbuf, NULL);
+ bbuf->alloc = n + 1;
+ bbuf->used = n + 1;
+ bbuf->p = (UChar* )((void* )mbr);
+
+ cc->mbuf = bbuf;
+ }
+
+ return node;
+}
+
static Node*
node_new_ctype(int type)
{
@@ -1711,7 +1790,7 @@ clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)
BBuf *tbuf;
int r;
- if (cc->not != 0) {
+ if (IS_CCLASS_NOT(cc)) {
bitset_invert(cc->bs);
if (! ONIGENC_IS_SINGLEBYTE(enc)) {
@@ -1722,7 +1801,7 @@ clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)
cc->mbuf = tbuf;
}
- cc->not = 0;
+ CCLASS_CLEAR_NOT(cc);
}
return 0;
@@ -1736,10 +1815,10 @@ and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
BitSetRef bsr1, bsr2;
BitSet bs1, bs2;
- not1 = dest->not;
+ not1 = IS_CCLASS_NOT(dest);
bsr1 = dest->bs;
buf1 = dest->mbuf;
- not2 = cc->not;
+ not2 = IS_CCLASS_NOT(cc);
bsr2 = cc->bs;
buf2 = cc->mbuf;
@@ -1794,10 +1873,10 @@ or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
BitSetRef bsr1, bsr2;
BitSet bs1, bs2;
- not1 = dest->not;
+ not1 = IS_CCLASS_NOT(dest);
bsr1 = dest->bs;
buf1 = dest->mbuf;
- not2 = cc->not;
+ not2 = IS_CCLASS_NOT(cc);
bsr2 = cc->bs;
buf2 = cc->mbuf;
@@ -2158,7 +2237,7 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env)
UChar* p = *src;
PFETCH_READY;
- if (PEND) return ONIGERR_END_PATTERN_AT_BACKSLASH;
+ if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
PFETCH(c);
switch (c) {
@@ -2468,7 +2547,7 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
if (! IS_SYNTAX_BV(syn, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC))
goto end;
- if (PEND) return ONIGERR_END_PATTERN_AT_BACKSLASH;
+ if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
PFETCH(c);
tok->escaped = 1;
@@ -2576,9 +2655,9 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
if (p == prev) { /* can't read nothing. */
num = 0; /* but, it's not error */
}
- tok->type = TK_CODE_POINT;
- tok->base = 16;
- tok->u.code = (OnigCodePoint )num;
+ tok->type = TK_CODE_POINT;
+ tok->base = 16;
+ tok->u.code = (OnigCodePoint )num;
}
break;
@@ -2669,7 +2748,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
PFETCH(c);
if (c == MC_ESC(enc)) {
- if (PEND) return ONIGERR_END_PATTERN_AT_BACKSLASH;
+ if (PEND) return ONIGERR_END_PATTERN_AT_ESCAPE;
tok->backp = p;
PFETCH(c);
@@ -2907,9 +2986,9 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
if (p == prev) { /* can't read nothing. */
num = 0; /* but, it's not error */
}
- tok->type = TK_CODE_POINT;
- tok->base = 16;
- tok->u.code = (OnigCodePoint )num;
+ tok->type = TK_CODE_POINT;
+ tok->base = 16;
+ tok->u.code = (OnigCodePoint )num;
}
break;
@@ -3057,7 +3136,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
if (num < 0) return num;
/* set_raw: */
if (tok->u.c != num) {
- tok->type = TK_CODE_POINT;
+ tok->type = TK_CODE_POINT;
tok->u.code = (OnigCodePoint )num;
}
else { /* string */
@@ -3225,21 +3304,26 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
static int
add_ctype_to_cc_by_range(CClassNode* cc, int ctype, int not, OnigEncoding enc,
- int nsb, int nmb,
- OnigCodePointRange *sbr, OnigCodePointRange *mbr)
+ OnigCodePoint sbr[], OnigCodePoint mbr[])
{
int i, r;
OnigCodePoint j;
+ int nsb = ONIGENC_CODE_RANGE_NUM(sbr);
+ int nmb = ONIGENC_CODE_RANGE_NUM(mbr);
+
if (not == 0) {
for (i = 0; i < nsb; i++) {
- for (j = sbr[i].from; j <= sbr[i].to; j++) {
+ for (j = ONIGENC_CODE_RANGE_FROM(sbr, i);
+ j <= ONIGENC_CODE_RANGE_TO(sbr, i); j++) {
BITSET_SET_BIT(cc->bs, j);
}
}
for (i = 0; i < nmb; i++) {
- r = add_code_range_to_buf(&(cc->mbuf), mbr[i].from, mbr[i].to);
+ r = add_code_range_to_buf(&(cc->mbuf),
+ ONIGENC_CODE_RANGE_FROM(mbr, i),
+ ONIGENC_CODE_RANGE_TO(mbr, i));
if (r != 0) return r;
}
}
@@ -3248,10 +3332,11 @@ add_ctype_to_cc_by_range(CClassNode* cc, int ctype, int not, OnigEncoding enc,
if (ONIGENC_MBC_MINLEN(enc) == 1) {
for (i = 0; i < nsb; i++) {
- for (j = prev; j < sbr[i].from; j++) {
+ for (j = prev;
+ j < ONIGENC_CODE_RANGE_FROM(sbr, i); j++) {
BITSET_SET_BIT(cc->bs, j);
}
- prev = sbr[i].to + 1;
+ prev = ONIGENC_CODE_RANGE_TO(sbr, i) + 1;
}
if (prev < 0x7f) {
for (j = prev; j < 0x7f; j++) {
@@ -3263,11 +3348,12 @@ add_ctype_to_cc_by_range(CClassNode* cc, int ctype, int not, OnigEncoding enc,
}
for (i = 0; i < nmb; i++) {
- if (prev < mbr[i].from) {
- r = add_code_range_to_buf(&(cc->mbuf), prev, mbr[i].from - 1);
+ if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) {
+ r = add_code_range_to_buf(&(cc->mbuf), prev,
+ ONIGENC_CODE_RANGE_FROM(mbr, i) - 1);
if (r != 0) return r;
}
- prev = mbr[i].to + 1;
+ prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
}
if (prev < 0x7fffffff) {
r = add_code_range_to_buf(&(cc->mbuf), prev, 0x7fffffff);
@@ -3282,14 +3368,12 @@ static int
add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
{
int c, r;
- int nsb, nmb;
- OnigCodePointRange *sbr, *mbr;
+ OnigCodePoint *sbr, *mbr;
OnigEncoding enc = env->enc;
- r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &nsb, &nmb, &sbr, &mbr);
+ r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sbr, &mbr);
if (r == 0) {
- return add_ctype_to_cc_by_range(cc, ctype, not, env->enc,
- nsb, nmb, sbr, mbr);
+ return add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sbr, mbr);
}
else if (r != ONIG_NO_SUPPORT_CONFIG) {
return r;
@@ -3349,8 +3433,8 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
}
else {
for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
- if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) &&
- ! ONIGENC_IS_CODE_WORD(enc, c))
+ if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) /* 0: invalid code point */
+ && ! ONIGENC_IS_CODE_WORD(enc, c))
BITSET_SET_BIT(cc->bs, c);
}
}
@@ -3839,7 +3923,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
break;
case TK_CODE_POINT:
- v = (OnigCodePoint )tok->u.code;
+ v = tok->u.code;
in_israw = 1;
val_entry:
len = ONIGENC_CODE_TO_MBCLEN(env->enc, v);
@@ -4017,8 +4101,11 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
cc = prev_cc;
}
- cc->not = neg;
- if (cc->not != 0 &&
+ if (neg != 0)
+ CCLASS_SET_NOT(cc);
+ else
+ CCLASS_CLEAR_NOT(cc);
+ if (IS_CCLASS_NOT(cc) &&
IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) {
int is_empty;
@@ -4388,7 +4475,7 @@ make_compound_alt_node_from_cc(OnigAmbigType ambig_flag, OnigEncoding enc,
for (j = 0; j < ccs[i].n; j++) {
ci = &(ccs[i].items[j]);
if (ci->len > 1) { /* compound only */
- if (cc->not) clear_not_flag_cclass(cc, enc);
+ if (IS_CCLASS_NOT(cc)) clear_not_flag_cclass(cc, enc);
clen = ci->len;
for (k = 0; k < clen; k++) {
@@ -4417,6 +4504,98 @@ make_compound_alt_node_from_cc(OnigAmbigType ambig_flag, OnigEncoding enc,
return n;
}
+
+#ifdef USE_SHARED_CCLASS_TABLE
+
+#define THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS 8
+
+/* for ctype node hash table */
+
+typedef struct {
+ OnigEncoding enc;
+ int not;
+ int type;
+} type_cclass_key;
+
+static int type_cclass_cmp(type_cclass_key* x, type_cclass_key* y)
+{
+ if (x->type != y->type) return 1;
+ if (x->enc != y->enc) return 1;
+ if (x->not != y->not) return 1;
+ return 0;
+}
+
+static int type_cclass_hash(type_cclass_key* key)
+{
+ int i, val;
+ unsigned char *p;
+
+ val = 0;
+
+ p = (unsigned char* )&(key->enc);
+ for (i = 0; i < sizeof(OnigEncodingType); i++) {
+ val = val * 997 + (int )*p++;
+ }
+
+ p = (unsigned char* )(&key->type);
+ for (i = 0; i < sizeof(int); i++) {
+ val = val * 997 + (int )*p++;
+ }
+
+ val += key->not;
+ return val + (val >> 5);
+}
+
+static int type_cclass_key_free(st_data_t x)
+{
+ xfree((void* )x);
+ return 0;
+}
+
+static st_data_t type_cclass_key_clone(st_data_t x)
+{
+ type_cclass_key* new_key;
+ type_cclass_key* key = (type_cclass_key* )x;
+
+ new_key = (type_cclass_key* )xmalloc(sizeof(type_cclass_key));
+ *new_key = *key;
+ return (st_data_t )new_key;
+}
+
+static struct st_hash_type type_type_cclass_hash = {
+ type_cclass_cmp,
+ type_cclass_hash,
+ type_cclass_key_free,
+ type_cclass_key_clone
+};
+
+static st_table* OnigTypeCClassTable;
+
+
+static int
+i_free_shared_class(type_cclass_key* key, Node* node, void* arg)
+{
+ if (IS_NOT_NULL(node)) {
+ CClassNode* cc = &(NCCLASS(node));
+ if (IS_NOT_NULL(cc->mbuf)) xfree(cc->mbuf);
+ xfree(node);
+ }
+ return ST_DELETE;
+}
+
+extern int
+onig_free_shared_cclass_table()
+{
+ if (IS_NOT_NULL(OnigTypeCClassTable)) {
+ onig_st_foreach(OnigTypeCClassTable, i_free_shared_class, 0);
+ }
+
+ return 0;
+}
+
+#endif /* USE_SHARED_CCLASS_TABLE */
+
+
static int
parse_exp(Node** np, OnigToken* tok, int term,
UChar** src, UChar* end, ScanEnv* env)
@@ -4561,13 +4740,63 @@ parse_exp(Node** np, OnigToken* tok, int term,
CClassNode* cc;
int ctype, not;
+#ifdef USE_SHARED_CCLASS_TABLE
+ OnigCodePoint *sbr, *mbr;
+
ctype = parse_ctype_to_enc_ctype(tok->u.subtype, &not);
+ r = ONIGENC_GET_CTYPE_CODE_RANGE(env->enc, ctype, &sbr, &mbr);
+ if (r == 0 &&
+ ONIGENC_CODE_RANGE_NUM(mbr)
+ >= THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS) {
+ type_cclass_key key;
+ type_cclass_key* new_key;
+
+ key.enc = env->enc;
+ key.not = not;
+ key.type = ctype;
+
+ THREAD_ATOMIC_START;
+
+ if (IS_NULL(OnigTypeCClassTable)) {
+ OnigTypeCClassTable
+ = onig_st_init_table_with_size(&type_type_cclass_hash, 10);
+ if (IS_NULL(OnigTypeCClassTable)) {
+ THREAD_ATOMIC_END;
+ return ONIGERR_MEMORY;
+ }
+ }
+ else {
+ if (onig_st_lookup(OnigTypeCClassTable, (st_data_t )&key,
+ (st_data_t* )np)) {
+ THREAD_ATOMIC_END;
+ break;
+ }
+ }
+
+ *np = node_new_cclass_by_codepoint_range(not, sbr, mbr);
+ if (IS_NULL(*np)) {
+ THREAD_ATOMIC_END;
+ return ONIGERR_MEMORY;
+ }
- *np = node_new_cclass();
- CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
- cc = &(NCCLASS(*np));
- add_ctype_to_cc(cc, ctype, 0, env);
- if (not != 0) CCLASS_SET_NOT(cc);
+ CCLASS_SET_SHARE(&(NCCLASS(*np)));
+ new_key = (type_cclass_key* )xmalloc(sizeof(type_cclass_key));
+ onig_st_add_direct(OnigTypeCClassTable, (st_data_t )new_key,
+ (st_data_t )*np);
+
+ THREAD_ATOMIC_END;
+ }
+ else {
+#endif
+ ctype = parse_ctype_to_enc_ctype(tok->u.subtype, &not);
+ *np = node_new_cclass();
+ CHECK_NULL_RETURN_VAL(*np, ONIGERR_MEMORY);
+ cc = &(NCCLASS(*np));
+ add_ctype_to_cc(cc, ctype, 0, env);
+ if (not != 0) CCLASS_SET_NOT(cc);
+#ifdef USE_SHARED_CCLASS_TABLE
+ }
+#endif
}
break;
@@ -4605,7 +4834,8 @@ parse_exp(Node** np, OnigToken* tok, int term,
for (i = 0; i < n; i++) {
in_cc = onig_is_code_in_cc(env->enc, ccs[i].from, cc);
- if ((in_cc != 0 && cc->not == 0) || (in_cc == 0 && cc->not != 0)) {
+ if ((in_cc != 0 && !IS_CCLASS_NOT(cc)) ||
+ (in_cc == 0 && IS_CCLASS_NOT(cc))) {
if (ONIGENC_MBC_MINLEN(env->enc) > 1 ||
ccs[i].from >= SINGLE_BYTE_SIZE) {
/* if (cc->not) clear_not_flag_cclass(cc, env->enc); */
diff --git a/regparse.h b/regparse.h
index 5982ec8081..6014b9290b 100644
--- a/regparse.h
+++ b/regparse.h
@@ -4,7 +4,7 @@
regparse.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2004 K.Kosako <kosako AT sofnec DOT co DOT jp>
+ * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -95,8 +95,6 @@
#define BACKREFS_P(br) \
(IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static);
-#define CCLASS_SET_NOT(cc) (cc)->not = 1
-
#define NQ_TARGET_ISNOT_EMPTY 0
#define NQ_TARGET_IS_EMPTY 1
#define NQ_TARGET_IS_EMPTY_MEM 2
@@ -111,11 +109,14 @@ typedef struct {
UChar buf[NODE_STR_BUF_SIZE];
} StrNode;
+/* move to regint.h */
+#if 0
typedef struct {
- int not;
+ int flags;
BitSet bs;
BBuf* mbuf; /* multi-byte info or NULL */
} CClassNode;
+#endif
typedef struct {
int state;
@@ -280,6 +281,15 @@ typedef struct {
#define IS_SYNTAX_OP2(syn, opm) (((syn)->op2 & (opm)) != 0)
#define IS_SYNTAX_BV(syn, bvm) (((syn)->behavior & (bvm)) != 0)
+
+#ifdef USE_NAMED_GROUP
+typedef struct {
+ int new_val;
+} GroupNumRemap;
+
+extern int onig_renumber_name_table P_((regex_t* reg, GroupNumRemap* map));
+#endif
+
extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc));
extern int onig_strncmp P_((UChar* s1, UChar* s2, int n));
extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end));
diff --git a/st.c b/st.c
index e4036f1025..cad73288f6 100644
--- a/st.c
+++ b/st.c
@@ -6,12 +6,29 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include "st.h"
#ifdef _WIN32
#include <malloc.h>
#endif
+#ifdef NOT_RUBY
+#include "regint.h"
+#else
+#ifdef RUBY_PLATFORM
+#define xmalloc ruby_xmalloc
+#define xcalloc ruby_xcalloc
+#define xrealloc ruby_xrealloc
+#define xfree ruby_xfree
+
+void *xmalloc(long);
+void *xcalloc(long, long);
+void *xrealloc(void *, long);
+void xfree(void *);
+#endif
+#endif
+
+#include "st.h"
+
typedef struct st_table_entry st_table_entry;
struct st_table_entry {
@@ -33,11 +50,14 @@ struct st_table_entry {
* allocated initially
*
*/
+
static int numcmp(long, long);
static int numhash(long);
static struct st_hash_type type_numhash = {
numcmp,
numhash,
+ st_nothing_key_free,
+ st_nothing_key_clone
};
/* extern int strcmp(const char *, const char *); */
@@ -45,19 +65,21 @@ static int strhash(const char *);
static struct st_hash_type type_strhash = {
strcmp,
strhash,
+ st_nothing_key_free,
+ st_nothing_key_clone
};
-#ifdef RUBY_PLATFORM
-#define xmalloc ruby_xmalloc
-#define xcalloc ruby_xcalloc
-#define xrealloc ruby_xrealloc
-#define xfree ruby_xfree
+static int strend_cmp(st_strend_key*, st_strend_key*);
+static int strend_hash(st_strend_key*);
+static int strend_key_free(st_data_t key);
+static st_data_t strend_key_clone(st_data_t x);
-void *xmalloc(long);
-void *xcalloc(long, long);
-void *xrealloc(void *, long);
-void xfree(void *);
-#endif
+static struct st_hash_type type_strend_hash = {
+ strend_cmp,
+ strend_hash,
+ strend_key_free,
+ strend_key_clone
+};
static void rehash(st_table *);
@@ -125,7 +147,7 @@ new_size(size)
int newsize;
for (i = 0, newsize = MINSIZE;
- i < sizeof(primes)/sizeof(primes[0]);
+ i < (int )(sizeof(primes)/sizeof(primes[0]));
i++, newsize <<= 1)
{
if (newsize > size) return primes[i];
@@ -206,6 +228,13 @@ st_init_strtable_with_size(size)
return st_init_table_with_size(&type_strhash, size);
}
+st_table*
+st_init_strend_table_with_size(size)
+ int size;
+{
+ return st_init_table_with_size(&type_strend_hash, size);
+}
+
void
st_free_table(table)
st_table *table;
@@ -267,6 +296,21 @@ st_lookup(table, key, value)
}
}
+int
+st_lookup_strend(table, str_key, end_key, value)
+ st_table *table;
+ unsigned char* str_key;
+ unsigned char* end_key;
+ st_data_t *value;
+{
+ st_strend_key key;
+
+ key.s = (unsigned char* )str_key;
+ key.end = (unsigned char* )end_key;
+
+ return st_lookup(table, (st_data_t )(&key), value);
+}
+
#define ADD_DIRECT(table, key, value, hash_val, bin_pos)\
do {\
st_table_entry *entry;\
@@ -307,6 +351,22 @@ st_insert(table, key, value)
}
}
+int
+st_insert_strend(table, str_key, end_key, value)
+ st_table *table;
+ unsigned char* str_key;
+ unsigned char* end_key;
+ st_data_t value;
+{
+ st_strend_key* key;
+
+ key = alloc(st_strend_key);
+ key->s = (unsigned char* )str_key;
+ key->end = (unsigned char* )end_key;
+
+ return st_insert(table, (st_data_t )key, value);
+}
+
void
st_add_direct(table, key, value)
st_table *table;
@@ -320,6 +380,21 @@ st_add_direct(table, key, value)
ADD_DIRECT(table, key, value, hash_val, bin_pos);
}
+void
+st_add_direct_strend(table, str_key, end_key, value)
+ st_table *table;
+ unsigned char* str_key;
+ unsigned char* end_key;
+ st_data_t value;
+{
+ st_strend_key* key;
+
+ key = alloc(st_strend_key);
+ key->s = (unsigned char* )str_key;
+ key->end = (unsigned char* )end_key;
+ st_add_direct(table, (st_data_t )key, value);
+}
+
static void
rehash(table)
register st_table *table;
@@ -379,6 +454,7 @@ st_copy(old_table)
return 0;
}
*entry = *ptr;
+ entry->key = old_table->type->key_clone(ptr->key);
entry->next = new_table->bins[i];
new_table->bins[i] = entry;
ptr = ptr->next;
@@ -522,6 +598,7 @@ st_foreach(table, func, arg)
last->next = ptr->next;
}
ptr = ptr->next;
+ table->type->key_free(tmp->key);
free(tmp);
table->num_entries--;
}
@@ -581,3 +658,59 @@ numhash(n)
{
return n;
}
+
+extern int
+st_nothing_key_free(st_data_t key) { return 0; }
+
+extern st_data_t
+st_nothing_key_clone(st_data_t x) { return x; }
+
+static int strend_cmp(st_strend_key* x, st_strend_key* y)
+{
+ unsigned char *p, *q;
+ int c;
+
+ if ((x->end - x->s) != (y->end - y->s))
+ return 1;
+
+ p = x->s;
+ q = y->s;
+ while (p < x->end) {
+ c = (int )*p - (int )*q;
+ if (c != 0) return c;
+
+ p++; q++;
+ }
+
+ return 0;
+}
+
+static int strend_hash(st_strend_key* x)
+{
+ int val;
+ unsigned char *p;
+
+ val = 0;
+ p = x->s;
+ while (p < x->end) {
+ val = val * 997 + (int )*p++;
+ }
+
+ return val + (val >> 5);
+}
+
+static int strend_key_free(st_data_t x)
+{
+ xfree((void* )x);
+ return 0;
+}
+
+static st_data_t strend_key_clone(st_data_t x)
+{
+ st_strend_key* new_key;
+ st_strend_key* key = (st_strend_key* )x;
+
+ new_key = alloc(st_strend_key);
+ *new_key = *key;
+ return (st_data_t )new_key;
+}
diff --git a/st.h b/st.h
index bc12624b90..fffbb32376 100644
--- a/st.h
+++ b/st.h
@@ -14,6 +14,8 @@ typedef struct st_table st_table;
struct st_hash_type {
int (*compare)();
int (*hash)();
+ int (*key_free)();
+ st_data_t (*key_clone)();
};
struct st_table {
@@ -23,6 +25,11 @@ struct st_table {
struct st_table_entry **bins;
};
+typedef struct {
+ unsigned char* s;
+ unsigned char* end;
+} st_strend_key;
+
#define st_is_member(table,key) st_lookup(table,key,(st_data_t *)0)
enum st_retval {ST_CONTINUE, ST_STOP, ST_DELETE, ST_CHECK};
@@ -44,22 +51,27 @@ st_table *st_init_numtable _((void));
st_table *st_init_numtable_with_size _((int));
st_table *st_init_strtable _((void));
st_table *st_init_strtable_with_size _((int));
+st_table *st_init_strend_table_with_size _((int));
int st_delete _((st_table *, st_data_t *, st_data_t *));
int st_delete_safe _((st_table *, st_data_t *, st_data_t *, st_data_t));
int st_insert _((st_table *, st_data_t, st_data_t));
+int st_insert_strend _((st_table *, unsigned char*, unsigned char*, st_data_t));
int st_lookup _((st_table *, st_data_t, st_data_t *));
+int st_lookup_strend _((st_table *, unsigned char*, unsigned char*, st_data_t*));
void st_foreach _((st_table *, int (*)(ANYARGS), st_data_t));
void st_add_direct _((st_table *, st_data_t, st_data_t));
+void st_add_direct_strend _((st_table *, unsigned char*, unsigned char*, st_data_t));
void st_free_table _((st_table *));
void st_cleanup_safe _((st_table *, st_data_t));
st_table *st_copy _((st_table *));
+extern st_data_t st_nothing_key_clone _((st_data_t key));
+extern int st_nothing_key_free _((st_data_t key));
+
#define ST_NUMCMP ((int (*)()) 0)
#define ST_NUMHASH ((int (*)()) -2)
#define st_numcmp ST_NUMCMP
#define st_numhash ST_NUMHASH
-int st_strhash();
-
#endif /* ST_INCLUDED */
diff --git a/utf8.c b/utf8.c
index 5a777e177c..e7095baa5c 100644
--- a/utf8.c
+++ b/utf8.c
@@ -2,7 +2,7 @@
utf8.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2004 K.Kosako <kosako AT sofnec DOT co DOT jp>
+ * Copyright (c) 2002-2004 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -226,9 +226,9 @@ utf8_mbc_to_normalize(OnigAmbigType flag, UChar** pp, UChar* end, UChar* lower)
if (*p == 195) { /* 195 == '\303' */
int c = *(p + 1);
if (c >= 128) {
- if (c <= (unsigned char)'\236' && /* upper */
+ if (c <= (UChar )'\236' && /* upper */
(flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0) {
- if (c != (unsigned char)'\227') {
+ if (c != (UChar )'\227') {
*lower++ = *p;
*lower = (UChar )(c + 32);
(*pp) += 2;
@@ -236,7 +236,7 @@ utf8_mbc_to_normalize(OnigAmbigType flag, UChar** pp, UChar* end, UChar* lower)
}
}
#if 0
- else if (c == '\237' &&
+ else if (c == (UChar )'\237' &&
(flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
*lower++ = '\303';
*lower = '\237';
@@ -286,16 +286,16 @@ utf8_is_mbc_ambiguous(OnigAmbigType flag, UChar** pp, UChar* end)
int c = *(p + 1);
if (c >= 128) {
if ((flag & ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE) != 0) {
- if (c <= (unsigned char)'\236') { /* upper */
- if (c == '\227') return FALSE;
+ if (c <= (UChar )'\236') { /* upper */
+ if (c == (UChar )'\227') return FALSE;
return TRUE;
}
- else if (c >= (unsigned char)'\240' && c <= (unsigned char)'\276') { /* lower */
- if (c == '\267') return FALSE;
+ else if (c >= (UChar )'\240' && c <= (UChar )'\276') { /* lower */
+ if (c == (UChar )'\267') return FALSE;
return TRUE;
}
}
- else if (c == '\237' &&
+ else if (c == (UChar )'\237' &&
(flag & ONIGENC_AMBIGUOUS_MATCH_COMPOUND) != 0) {
return TRUE;
}
@@ -306,163 +306,3266 @@ utf8_is_mbc_ambiguous(OnigAmbigType flag, UChar** pp, UChar* end)
return FALSE;
}
-static int
-utf8_is_code_ctype(OnigCodePoint code, unsigned int ctype)
-{
- if (code < 256) {
- return ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code, ctype);
- }
- if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
-#ifdef USE_INVALID_CODE_SCHEME
- if (code <= VALID_CODE_LIMIT)
+static OnigCodePoint EmptyRange[] = { 0 };
+
+static OnigCodePoint SBAlnum[] = {
+ 3,
+ 0x0030, 0x0039,
+ 0x0041, 0x005a,
+ 0x0061, 0x007a
+};
+
+static OnigCodePoint MBAlnum[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 411,
+#else
+ 6,
#endif
- return TRUE;
- }
+ 0x00aa, 0x00aa,
+ 0x00b5, 0x00b5,
+ 0x00ba, 0x00ba,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00f6,
+ 0x00f8, 0x0236
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0250, 0x02c1,
+ 0x02c6, 0x02d1,
+ 0x02e0, 0x02e4,
+ 0x02ee, 0x02ee,
+ 0x0300, 0x0357,
+ 0x035d, 0x036f,
+ 0x037a, 0x037a,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x03f5,
+ 0x03f7, 0x03fb,
+ 0x0400, 0x0481,
+ 0x0483, 0x0486,
+ 0x0488, 0x04ce,
+ 0x04d0, 0x04f5,
+ 0x04f8, 0x04f9,
+ 0x0500, 0x050f,
+ 0x0531, 0x0556,
+ 0x0559, 0x0559,
+ 0x0561, 0x0587,
+ 0x0591, 0x05a1,
+ 0x05a3, 0x05b9,
+ 0x05bb, 0x05bd,
+ 0x05bf, 0x05bf,
+ 0x05c1, 0x05c2,
+ 0x05c4, 0x05c4,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f2,
+ 0x0610, 0x0615,
+ 0x0621, 0x063a,
+ 0x0640, 0x0658,
+ 0x0660, 0x0669,
+ 0x066e, 0x06d3,
+ 0x06d5, 0x06dc,
+ 0x06de, 0x06e8,
+ 0x06ea, 0x06fc,
+ 0x06ff, 0x06ff,
+ 0x0710, 0x074a,
+ 0x074d, 0x074f,
+ 0x0780, 0x07b1,
+ 0x0901, 0x0939,
+ 0x093c, 0x094d,
+ 0x0950, 0x0954,
+ 0x0958, 0x0963,
+ 0x0966, 0x096f,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09cd,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09e6, 0x09f1,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a66, 0x0a74,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0ae6, 0x0aef,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b66, 0x0b6f,
+ 0x0b71, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb5,
+ 0x0bb7, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0be7, 0x0bef,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c60, 0x0c61,
+ 0x0c66, 0x0c6f,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0ce6, 0x0cef,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d61,
+ 0x0d66, 0x0d6f,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df3,
+ 0x0e01, 0x0e3a,
+ 0x0e40, 0x0e4e,
+ 0x0e50, 0x0e59,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0ed0, 0x0ed9,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f00,
+ 0x0f18, 0x0f19,
+ 0x0f20, 0x0f29,
+ 0x0f35, 0x0f35,
+ 0x0f37, 0x0f37,
+ 0x0f39, 0x0f39,
+ 0x0f3e, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f71, 0x0f84,
+ 0x0f86, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fc6, 0x0fc6,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1040, 0x1049,
+ 0x1050, 0x1059,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10f8,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1206,
+ 0x1208, 0x1246,
+ 0x1248, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1286,
+ 0x1288, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12ae,
+ 0x12b0, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12ce,
+ 0x12d0, 0x12d6,
+ 0x12d8, 0x12ee,
+ 0x12f0, 0x130e,
+ 0x1310, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x131e,
+ 0x1320, 0x1346,
+ 0x1348, 0x135a,
+ 0x1369, 0x1371,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x166c,
+ 0x166f, 0x1676,
+ 0x1681, 0x169a,
+ 0x16a0, 0x16ea,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1734,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17b3,
+ 0x17b6, 0x17d3,
+ 0x17d7, 0x17d7,
+ 0x17dc, 0x17dd,
+ 0x17e0, 0x17e9,
+ 0x180b, 0x180d,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a9,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1946, 0x196d,
+ 0x1970, 0x1974,
+ 0x1d00, 0x1d6b,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fbc,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fcc,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fe0, 0x1fec,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffc,
+ 0x2071, 0x2071,
+ 0x207f, 0x207f,
+ 0x20d0, 0x20ea,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210a, 0x2113,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x212f, 0x2131,
+ 0x2133, 0x2139,
+ 0x213d, 0x213f,
+ 0x2145, 0x2149,
+ 0x3005, 0x3006,
+ 0x302a, 0x302f,
+ 0x3031, 0x3035,
+ 0x303b, 0x303c,
+ 0x3041, 0x3096,
+ 0x3099, 0x309a,
+ 0x309d, 0x309f,
+ 0x30a1, 0x30fa,
+ 0x30fc, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x31a0, 0x31b7,
+ 0x31f0, 0x31ff,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fa5,
+ 0xa000, 0xa48c,
+ 0xac00, 0xd7a3,
+ 0xf900, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb28,
+ 0xfb2a, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfb,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe23,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xff10, 0xff19,
+ 0xff21, 0xff3a,
+ 0xff41, 0xff5a,
+ 0xff66, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10300, 0x1031e,
+ 0x10330, 0x10349,
+ 0x10380, 0x1039d,
+ 0x10400, 0x1049d,
+ 0x104a0, 0x104a9,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x1d165, 0x1d169,
+ 0x1d16d, 0x1d172,
+ 0x1d17b, 0x1d182,
+ 0x1d185, 0x1d18b,
+ 0x1d1aa, 0x1d1ad,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a3,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6fa,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d734,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d76e,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d7a8,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7c9,
+ 0x1d7ce, 0x1d7ff,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d,
+ 0xe0100, 0xe01ef
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of MBAlnum */
+
+static OnigCodePoint SBAlpha[] = {
+ 2,
+ 0x0041, 0x005a,
+ 0x0061, 0x007a
+};
+
+static OnigCodePoint MBAlpha[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 394,
+#else
+ 6,
+#endif
+ 0x00aa, 0x00aa,
+ 0x00b5, 0x00b5,
+ 0x00ba, 0x00ba,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00f6,
+ 0x00f8, 0x0236
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0250, 0x02c1,
+ 0x02c6, 0x02d1,
+ 0x02e0, 0x02e4,
+ 0x02ee, 0x02ee,
+ 0x0300, 0x0357,
+ 0x035d, 0x036f,
+ 0x037a, 0x037a,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x03f5,
+ 0x03f7, 0x03fb,
+ 0x0400, 0x0481,
+ 0x0483, 0x0486,
+ 0x0488, 0x04ce,
+ 0x04d0, 0x04f5,
+ 0x04f8, 0x04f9,
+ 0x0500, 0x050f,
+ 0x0531, 0x0556,
+ 0x0559, 0x0559,
+ 0x0561, 0x0587,
+ 0x0591, 0x05a1,
+ 0x05a3, 0x05b9,
+ 0x05bb, 0x05bd,
+ 0x05bf, 0x05bf,
+ 0x05c1, 0x05c2,
+ 0x05c4, 0x05c4,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f2,
+ 0x0610, 0x0615,
+ 0x0621, 0x063a,
+ 0x0640, 0x0658,
+ 0x066e, 0x06d3,
+ 0x06d5, 0x06dc,
+ 0x06de, 0x06e8,
+ 0x06ea, 0x06ef,
+ 0x06fa, 0x06fc,
+ 0x06ff, 0x06ff,
+ 0x0710, 0x074a,
+ 0x074d, 0x074f,
+ 0x0780, 0x07b1,
+ 0x0901, 0x0939,
+ 0x093c, 0x094d,
+ 0x0950, 0x0954,
+ 0x0958, 0x0963,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09cd,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09f0, 0x09f1,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a70, 0x0a74,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b71, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb5,
+ 0x0bb7, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c60, 0x0c61,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d61,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df3,
+ 0x0e01, 0x0e3a,
+ 0x0e40, 0x0e4e,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f00,
+ 0x0f18, 0x0f19,
+ 0x0f35, 0x0f35,
+ 0x0f37, 0x0f37,
+ 0x0f39, 0x0f39,
+ 0x0f3e, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f71, 0x0f84,
+ 0x0f86, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fc6, 0x0fc6,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1050, 0x1059,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10f8,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1206,
+ 0x1208, 0x1246,
+ 0x1248, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1286,
+ 0x1288, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12ae,
+ 0x12b0, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12ce,
+ 0x12d0, 0x12d6,
+ 0x12d8, 0x12ee,
+ 0x12f0, 0x130e,
+ 0x1310, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x131e,
+ 0x1320, 0x1346,
+ 0x1348, 0x135a,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x166c,
+ 0x166f, 0x1676,
+ 0x1681, 0x169a,
+ 0x16a0, 0x16ea,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1734,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17b3,
+ 0x17b6, 0x17d3,
+ 0x17d7, 0x17d7,
+ 0x17dc, 0x17dd,
+ 0x180b, 0x180d,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a9,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1950, 0x196d,
+ 0x1970, 0x1974,
+ 0x1d00, 0x1d6b,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fbc,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fcc,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fe0, 0x1fec,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffc,
+ 0x2071, 0x2071,
+ 0x207f, 0x207f,
+ 0x20d0, 0x20ea,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210a, 0x2113,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x212f, 0x2131,
+ 0x2133, 0x2139,
+ 0x213d, 0x213f,
+ 0x2145, 0x2149,
+ 0x3005, 0x3006,
+ 0x302a, 0x302f,
+ 0x3031, 0x3035,
+ 0x303b, 0x303c,
+ 0x3041, 0x3096,
+ 0x3099, 0x309a,
+ 0x309d, 0x309f,
+ 0x30a1, 0x30fa,
+ 0x30fc, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x31a0, 0x31b7,
+ 0x31f0, 0x31ff,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fa5,
+ 0xa000, 0xa48c,
+ 0xac00, 0xd7a3,
+ 0xf900, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb28,
+ 0xfb2a, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfb,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe23,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xff21, 0xff3a,
+ 0xff41, 0xff5a,
+ 0xff66, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10300, 0x1031e,
+ 0x10330, 0x10349,
+ 0x10380, 0x1039d,
+ 0x10400, 0x1049d,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x1d165, 0x1d169,
+ 0x1d16d, 0x1d172,
+ 0x1d17b, 0x1d182,
+ 0x1d185, 0x1d18b,
+ 0x1d1aa, 0x1d1ad,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a3,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6fa,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d734,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d76e,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d7a8,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7c9,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d,
+ 0xe0100, 0xe01ef
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of MBAlpha */
+
+static OnigCodePoint SBBlank[] = {
+ 2,
+ 0x0009, 0x0009,
+ 0x0020, 0x0020
+};
+
+static OnigCodePoint MBBlank[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 7,
+#else
+ 1,
+#endif
+ 0x00a0, 0x00a0
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x1680, 0x1680,
+ 0x180e, 0x180e,
+ 0x2000, 0x200a,
+ 0x202f, 0x202f,
+ 0x205f, 0x205f,
+ 0x3000, 0x3000
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of MBBlank */
+
+static OnigCodePoint SBCntrl[] = {
+ 2,
+ 0x0000, 0x001f,
+ 0x007f, 0x007f
+};
+
+static OnigCodePoint MBCntrl[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 18,
+#else
+ 2,
+#endif
+ 0x0080, 0x009f,
+ 0x00ad, 0x00ad
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0600, 0x0603,
+ 0x06dd, 0x06dd,
+ 0x070f, 0x070f,
+ 0x17b4, 0x17b5,
+ 0x200b, 0x200f,
+ 0x202a, 0x202e,
+ 0x2060, 0x2063,
+ 0x206a, 0x206f,
+ 0xd800, 0xf8ff,
+ 0xfeff, 0xfeff,
+ 0xfff9, 0xfffb,
+ 0x1d173, 0x1d17a,
+ 0xe0001, 0xe0001,
+ 0xe0020, 0xe007f,
+ 0xf0000, 0xffffd,
+ 0x100000, 0x10fffd
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of MBCntrl */
+
+static OnigCodePoint SBDigit[] = {
+ 1,
+ 0x0030, 0x0039
+};
+
+static OnigCodePoint MBDigit[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 22,
+#else
+ 0
+#endif
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 0x0660, 0x0669,
+ 0x06f0, 0x06f9,
+ 0x0966, 0x096f,
+ 0x09e6, 0x09ef,
+ 0x0a66, 0x0a6f,
+ 0x0ae6, 0x0aef,
+ 0x0b66, 0x0b6f,
+ 0x0be7, 0x0bef,
+ 0x0c66, 0x0c6f,
+ 0x0ce6, 0x0cef,
+ 0x0d66, 0x0d6f,
+ 0x0e50, 0x0e59,
+ 0x0ed0, 0x0ed9,
+ 0x0f20, 0x0f29,
+ 0x1040, 0x1049,
+ 0x1369, 0x1371,
+ 0x17e0, 0x17e9,
+ 0x1810, 0x1819,
+ 0x1946, 0x194f,
+ 0xff10, 0xff19,
+ 0x104a0, 0x104a9,
+ 0x1d7ce, 0x1d7ff
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of MBDigit */
+
+static OnigCodePoint SBGraph[] = {
+ 1,
+ 0x0021, 0x007e
+};
+
+static OnigCodePoint MBGraph[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 404,
+#else
+ 1,
+#endif
+ 0x00a1, 0x0236
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0250, 0x0357,
+ 0x035d, 0x036f,
+ 0x0374, 0x0375,
+ 0x037a, 0x037a,
+ 0x037e, 0x037e,
+ 0x0384, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x03fb,
+ 0x0400, 0x0486,
+ 0x0488, 0x04ce,
+ 0x04d0, 0x04f5,
+ 0x04f8, 0x04f9,
+ 0x0500, 0x050f,
+ 0x0531, 0x0556,
+ 0x0559, 0x055f,
+ 0x0561, 0x0587,
+ 0x0589, 0x058a,
+ 0x0591, 0x05a1,
+ 0x05a3, 0x05b9,
+ 0x05bb, 0x05c4,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f4,
+ 0x0600, 0x0603,
+ 0x060c, 0x0615,
+ 0x061b, 0x061b,
+ 0x061f, 0x061f,
+ 0x0621, 0x063a,
+ 0x0640, 0x0658,
+ 0x0660, 0x070d,
+ 0x070f, 0x074a,
+ 0x074d, 0x074f,
+ 0x0780, 0x07b1,
+ 0x0901, 0x0939,
+ 0x093c, 0x094d,
+ 0x0950, 0x0954,
+ 0x0958, 0x0970,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09cd,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09e6, 0x09fa,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a66, 0x0a74,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0ae6, 0x0aef,
+ 0x0af1, 0x0af1,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b66, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb5,
+ 0x0bb7, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0be7, 0x0bfa,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c60, 0x0c61,
+ 0x0c66, 0x0c6f,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0ce6, 0x0cef,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d61,
+ 0x0d66, 0x0d6f,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df4,
+ 0x0e01, 0x0e3a,
+ 0x0e3f, 0x0e5b,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0ed0, 0x0ed9,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f71, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fbe, 0x0fcc,
+ 0x0fcf, 0x0fcf,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1040, 0x1059,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10f8,
+ 0x10fb, 0x10fb,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1206,
+ 0x1208, 0x1246,
+ 0x1248, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1286,
+ 0x1288, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12ae,
+ 0x12b0, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12ce,
+ 0x12d0, 0x12d6,
+ 0x12d8, 0x12ee,
+ 0x12f0, 0x130e,
+ 0x1310, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x131e,
+ 0x1320, 0x1346,
+ 0x1348, 0x135a,
+ 0x1361, 0x137c,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x1676,
+ 0x1681, 0x169c,
+ 0x16a0, 0x16f0,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1736,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17dd,
+ 0x17e0, 0x17e9,
+ 0x17f0, 0x17f9,
+ 0x1800, 0x180d,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a9,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1940, 0x1940,
+ 0x1944, 0x196d,
+ 0x1970, 0x1974,
+ 0x19e0, 0x19ff,
+ 0x1d00, 0x1d6b,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fc4,
+ 0x1fc6, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fdd, 0x1fef,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffe,
+ 0x200b, 0x2027,
+ 0x202a, 0x202e,
+ 0x2030, 0x2054,
+ 0x2057, 0x2057,
+ 0x2060, 0x2063,
+ 0x206a, 0x2071,
+ 0x2074, 0x208e,
+ 0x20a0, 0x20b1,
+ 0x20d0, 0x20ea,
+ 0x2100, 0x213b,
+ 0x213d, 0x214b,
+ 0x2153, 0x2183,
+ 0x2190, 0x23d0,
+ 0x2400, 0x2426,
+ 0x2440, 0x244a,
+ 0x2460, 0x2617,
+ 0x2619, 0x267d,
+ 0x2680, 0x2691,
+ 0x26a0, 0x26a1,
+ 0x2701, 0x2704,
+ 0x2706, 0x2709,
+ 0x270c, 0x2727,
+ 0x2729, 0x274b,
+ 0x274d, 0x274d,
+ 0x274f, 0x2752,
+ 0x2756, 0x2756,
+ 0x2758, 0x275e,
+ 0x2761, 0x2794,
+ 0x2798, 0x27af,
+ 0x27b1, 0x27be,
+ 0x27d0, 0x27eb,
+ 0x27f0, 0x2b0d,
+ 0x2e80, 0x2e99,
+ 0x2e9b, 0x2ef3,
+ 0x2f00, 0x2fd5,
+ 0x2ff0, 0x2ffb,
+ 0x3001, 0x303f,
+ 0x3041, 0x3096,
+ 0x3099, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x3190, 0x31b7,
+ 0x31f0, 0x321e,
+ 0x3220, 0x3243,
+ 0x3250, 0x327d,
+ 0x327f, 0x32fe,
+ 0x3300, 0x4db5,
+ 0x4dc0, 0x9fa5,
+ 0xa000, 0xa48c,
+ 0xa490, 0xa4c6,
+ 0xac00, 0xd7a3,
+ 0xe000, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3f,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfd,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe23,
+ 0xfe30, 0xfe52,
+ 0xfe54, 0xfe66,
+ 0xfe68, 0xfe6b,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xfeff, 0xfeff,
+ 0xff01, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0xffe0, 0xffe6,
+ 0xffe8, 0xffee,
+ 0xfff9, 0xfffd,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10100, 0x10102,
+ 0x10107, 0x10133,
+ 0x10137, 0x1013f,
+ 0x10300, 0x1031e,
+ 0x10320, 0x10323,
+ 0x10330, 0x1034a,
+ 0x10380, 0x1039d,
+ 0x1039f, 0x1039f,
+ 0x10400, 0x1049d,
+ 0x104a0, 0x104a9,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x1d000, 0x1d0f5,
+ 0x1d100, 0x1d126,
+ 0x1d12a, 0x1d1dd,
+ 0x1d300, 0x1d356,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a3,
+ 0x1d6a8, 0x1d7c9,
+ 0x1d7ce, 0x1d7ff,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d,
+ 0xe0001, 0xe0001,
+ 0xe0020, 0xe007f,
+ 0xe0100, 0xe01ef,
+ 0xf0000, 0xffffd,
+ 0x100000, 0x10fffd
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of MBGraph */
+
+static OnigCodePoint SBLower[] = {
+ 1,
+ 0x0061, 0x007a
+};
+
+static OnigCodePoint MBLower[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 423,
+#else
+ 5,
+#endif
+ 0x00aa, 0x00aa,
+ 0x00b5, 0x00b5,
+ 0x00ba, 0x00ba,
+ 0x00df, 0x00f6,
+ 0x00f8, 0x00ff
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0101, 0x0101,
+ 0x0103, 0x0103,
+ 0x0105, 0x0105,
+ 0x0107, 0x0107,
+ 0x0109, 0x0109,
+ 0x010b, 0x010b,
+ 0x010d, 0x010d,
+ 0x010f, 0x010f,
+ 0x0111, 0x0111,
+ 0x0113, 0x0113,
+ 0x0115, 0x0115,
+ 0x0117, 0x0117,
+ 0x0119, 0x0119,
+ 0x011b, 0x011b,
+ 0x011d, 0x011d,
+ 0x011f, 0x011f,
+ 0x0121, 0x0121,
+ 0x0123, 0x0123,
+ 0x0125, 0x0125,
+ 0x0127, 0x0127,
+ 0x0129, 0x0129,
+ 0x012b, 0x012b,
+ 0x012d, 0x012d,
+ 0x012f, 0x012f,
+ 0x0131, 0x0131,
+ 0x0133, 0x0133,
+ 0x0135, 0x0135,
+ 0x0137, 0x0138,
+ 0x013a, 0x013a,
+ 0x013c, 0x013c,
+ 0x013e, 0x013e,
+ 0x0140, 0x0140,
+ 0x0142, 0x0142,
+ 0x0144, 0x0144,
+ 0x0146, 0x0146,
+ 0x0148, 0x0149,
+ 0x014b, 0x014b,
+ 0x014d, 0x014d,
+ 0x014f, 0x014f,
+ 0x0151, 0x0151,
+ 0x0153, 0x0153,
+ 0x0155, 0x0155,
+ 0x0157, 0x0157,
+ 0x0159, 0x0159,
+ 0x015b, 0x015b,
+ 0x015d, 0x015d,
+ 0x015f, 0x015f,
+ 0x0161, 0x0161,
+ 0x0163, 0x0163,
+ 0x0165, 0x0165,
+ 0x0167, 0x0167,
+ 0x0169, 0x0169,
+ 0x016b, 0x016b,
+ 0x016d, 0x016d,
+ 0x016f, 0x016f,
+ 0x0171, 0x0171,
+ 0x0173, 0x0173,
+ 0x0175, 0x0175,
+ 0x0177, 0x0177,
+ 0x017a, 0x017a,
+ 0x017c, 0x017c,
+ 0x017e, 0x0180,
+ 0x0183, 0x0183,
+ 0x0185, 0x0185,
+ 0x0188, 0x0188,
+ 0x018c, 0x018d,
+ 0x0192, 0x0192,
+ 0x0195, 0x0195,
+ 0x0199, 0x019b,
+ 0x019e, 0x019e,
+ 0x01a1, 0x01a1,
+ 0x01a3, 0x01a3,
+ 0x01a5, 0x01a5,
+ 0x01a8, 0x01a8,
+ 0x01aa, 0x01ab,
+ 0x01ad, 0x01ad,
+ 0x01b0, 0x01b0,
+ 0x01b4, 0x01b4,
+ 0x01b6, 0x01b6,
+ 0x01b9, 0x01ba,
+ 0x01bd, 0x01bf,
+ 0x01c6, 0x01c6,
+ 0x01c9, 0x01c9,
+ 0x01cc, 0x01cc,
+ 0x01ce, 0x01ce,
+ 0x01d0, 0x01d0,
+ 0x01d2, 0x01d2,
+ 0x01d4, 0x01d4,
+ 0x01d6, 0x01d6,
+ 0x01d8, 0x01d8,
+ 0x01da, 0x01da,
+ 0x01dc, 0x01dd,
+ 0x01df, 0x01df,
+ 0x01e1, 0x01e1,
+ 0x01e3, 0x01e3,
+ 0x01e5, 0x01e5,
+ 0x01e7, 0x01e7,
+ 0x01e9, 0x01e9,
+ 0x01eb, 0x01eb,
+ 0x01ed, 0x01ed,
+ 0x01ef, 0x01f0,
+ 0x01f3, 0x01f3,
+ 0x01f5, 0x01f5,
+ 0x01f9, 0x01f9,
+ 0x01fb, 0x01fb,
+ 0x01fd, 0x01fd,
+ 0x01ff, 0x01ff,
+ 0x0201, 0x0201,
+ 0x0203, 0x0203,
+ 0x0205, 0x0205,
+ 0x0207, 0x0207,
+ 0x0209, 0x0209,
+ 0x020b, 0x020b,
+ 0x020d, 0x020d,
+ 0x020f, 0x020f,
+ 0x0211, 0x0211,
+ 0x0213, 0x0213,
+ 0x0215, 0x0215,
+ 0x0217, 0x0217,
+ 0x0219, 0x0219,
+ 0x021b, 0x021b,
+ 0x021d, 0x021d,
+ 0x021f, 0x021f,
+ 0x0221, 0x0221,
+ 0x0223, 0x0223,
+ 0x0225, 0x0225,
+ 0x0227, 0x0227,
+ 0x0229, 0x0229,
+ 0x022b, 0x022b,
+ 0x022d, 0x022d,
+ 0x022f, 0x022f,
+ 0x0231, 0x0231,
+ 0x0233, 0x0236,
+ 0x0250, 0x02af,
+ 0x0390, 0x0390,
+ 0x03ac, 0x03ce,
+ 0x03d0, 0x03d1,
+ 0x03d5, 0x03d7,
+ 0x03d9, 0x03d9,
+ 0x03db, 0x03db,
+ 0x03dd, 0x03dd,
+ 0x03df, 0x03df,
+ 0x03e1, 0x03e1,
+ 0x03e3, 0x03e3,
+ 0x03e5, 0x03e5,
+ 0x03e7, 0x03e7,
+ 0x03e9, 0x03e9,
+ 0x03eb, 0x03eb,
+ 0x03ed, 0x03ed,
+ 0x03ef, 0x03f3,
+ 0x03f5, 0x03f5,
+ 0x03f8, 0x03f8,
+ 0x03fb, 0x03fb,
+ 0x0430, 0x045f,
+ 0x0461, 0x0461,
+ 0x0463, 0x0463,
+ 0x0465, 0x0465,
+ 0x0467, 0x0467,
+ 0x0469, 0x0469,
+ 0x046b, 0x046b,
+ 0x046d, 0x046d,
+ 0x046f, 0x046f,
+ 0x0471, 0x0471,
+ 0x0473, 0x0473,
+ 0x0475, 0x0475,
+ 0x0477, 0x0477,
+ 0x0479, 0x0479,
+ 0x047b, 0x047b,
+ 0x047d, 0x047d,
+ 0x047f, 0x047f,
+ 0x0481, 0x0481,
+ 0x048b, 0x048b,
+ 0x048d, 0x048d,
+ 0x048f, 0x048f,
+ 0x0491, 0x0491,
+ 0x0493, 0x0493,
+ 0x0495, 0x0495,
+ 0x0497, 0x0497,
+ 0x0499, 0x0499,
+ 0x049b, 0x049b,
+ 0x049d, 0x049d,
+ 0x049f, 0x049f,
+ 0x04a1, 0x04a1,
+ 0x04a3, 0x04a3,
+ 0x04a5, 0x04a5,
+ 0x04a7, 0x04a7,
+ 0x04a9, 0x04a9,
+ 0x04ab, 0x04ab,
+ 0x04ad, 0x04ad,
+ 0x04af, 0x04af,
+ 0x04b1, 0x04b1,
+ 0x04b3, 0x04b3,
+ 0x04b5, 0x04b5,
+ 0x04b7, 0x04b7,
+ 0x04b9, 0x04b9,
+ 0x04bb, 0x04bb,
+ 0x04bd, 0x04bd,
+ 0x04bf, 0x04bf,
+ 0x04c2, 0x04c2,
+ 0x04c4, 0x04c4,
+ 0x04c6, 0x04c6,
+ 0x04c8, 0x04c8,
+ 0x04ca, 0x04ca,
+ 0x04cc, 0x04cc,
+ 0x04ce, 0x04ce,
+ 0x04d1, 0x04d1,
+ 0x04d3, 0x04d3,
+ 0x04d5, 0x04d5,
+ 0x04d7, 0x04d7,
+ 0x04d9, 0x04d9,
+ 0x04db, 0x04db,
+ 0x04dd, 0x04dd,
+ 0x04df, 0x04df,
+ 0x04e1, 0x04e1,
+ 0x04e3, 0x04e3,
+ 0x04e5, 0x04e5,
+ 0x04e7, 0x04e7,
+ 0x04e9, 0x04e9,
+ 0x04eb, 0x04eb,
+ 0x04ed, 0x04ed,
+ 0x04ef, 0x04ef,
+ 0x04f1, 0x04f1,
+ 0x04f3, 0x04f3,
+ 0x04f5, 0x04f5,
+ 0x04f9, 0x04f9,
+ 0x0501, 0x0501,
+ 0x0503, 0x0503,
+ 0x0505, 0x0505,
+ 0x0507, 0x0507,
+ 0x0509, 0x0509,
+ 0x050b, 0x050b,
+ 0x050d, 0x050d,
+ 0x050f, 0x050f,
+ 0x0561, 0x0587,
+ 0x1d00, 0x1d2b,
+ 0x1d62, 0x1d6b,
+ 0x1e01, 0x1e01,
+ 0x1e03, 0x1e03,
+ 0x1e05, 0x1e05,
+ 0x1e07, 0x1e07,
+ 0x1e09, 0x1e09,
+ 0x1e0b, 0x1e0b,
+ 0x1e0d, 0x1e0d,
+ 0x1e0f, 0x1e0f,
+ 0x1e11, 0x1e11,
+ 0x1e13, 0x1e13,
+ 0x1e15, 0x1e15,
+ 0x1e17, 0x1e17,
+ 0x1e19, 0x1e19,
+ 0x1e1b, 0x1e1b,
+ 0x1e1d, 0x1e1d,
+ 0x1e1f, 0x1e1f,
+ 0x1e21, 0x1e21,
+ 0x1e23, 0x1e23,
+ 0x1e25, 0x1e25,
+ 0x1e27, 0x1e27,
+ 0x1e29, 0x1e29,
+ 0x1e2b, 0x1e2b,
+ 0x1e2d, 0x1e2d,
+ 0x1e2f, 0x1e2f,
+ 0x1e31, 0x1e31,
+ 0x1e33, 0x1e33,
+ 0x1e35, 0x1e35,
+ 0x1e37, 0x1e37,
+ 0x1e39, 0x1e39,
+ 0x1e3b, 0x1e3b,
+ 0x1e3d, 0x1e3d,
+ 0x1e3f, 0x1e3f,
+ 0x1e41, 0x1e41,
+ 0x1e43, 0x1e43,
+ 0x1e45, 0x1e45,
+ 0x1e47, 0x1e47,
+ 0x1e49, 0x1e49,
+ 0x1e4b, 0x1e4b,
+ 0x1e4d, 0x1e4d,
+ 0x1e4f, 0x1e4f,
+ 0x1e51, 0x1e51,
+ 0x1e53, 0x1e53,
+ 0x1e55, 0x1e55,
+ 0x1e57, 0x1e57,
+ 0x1e59, 0x1e59,
+ 0x1e5b, 0x1e5b,
+ 0x1e5d, 0x1e5d,
+ 0x1e5f, 0x1e5f,
+ 0x1e61, 0x1e61,
+ 0x1e63, 0x1e63,
+ 0x1e65, 0x1e65,
+ 0x1e67, 0x1e67,
+ 0x1e69, 0x1e69,
+ 0x1e6b, 0x1e6b,
+ 0x1e6d, 0x1e6d,
+ 0x1e6f, 0x1e6f,
+ 0x1e71, 0x1e71,
+ 0x1e73, 0x1e73,
+ 0x1e75, 0x1e75,
+ 0x1e77, 0x1e77,
+ 0x1e79, 0x1e79,
+ 0x1e7b, 0x1e7b,
+ 0x1e7d, 0x1e7d,
+ 0x1e7f, 0x1e7f,
+ 0x1e81, 0x1e81,
+ 0x1e83, 0x1e83,
+ 0x1e85, 0x1e85,
+ 0x1e87, 0x1e87,
+ 0x1e89, 0x1e89,
+ 0x1e8b, 0x1e8b,
+ 0x1e8d, 0x1e8d,
+ 0x1e8f, 0x1e8f,
+ 0x1e91, 0x1e91,
+ 0x1e93, 0x1e93,
+ 0x1e95, 0x1e9b,
+ 0x1ea1, 0x1ea1,
+ 0x1ea3, 0x1ea3,
+ 0x1ea5, 0x1ea5,
+ 0x1ea7, 0x1ea7,
+ 0x1ea9, 0x1ea9,
+ 0x1eab, 0x1eab,
+ 0x1ead, 0x1ead,
+ 0x1eaf, 0x1eaf,
+ 0x1eb1, 0x1eb1,
+ 0x1eb3, 0x1eb3,
+ 0x1eb5, 0x1eb5,
+ 0x1eb7, 0x1eb7,
+ 0x1eb9, 0x1eb9,
+ 0x1ebb, 0x1ebb,
+ 0x1ebd, 0x1ebd,
+ 0x1ebf, 0x1ebf,
+ 0x1ec1, 0x1ec1,
+ 0x1ec3, 0x1ec3,
+ 0x1ec5, 0x1ec5,
+ 0x1ec7, 0x1ec7,
+ 0x1ec9, 0x1ec9,
+ 0x1ecb, 0x1ecb,
+ 0x1ecd, 0x1ecd,
+ 0x1ecf, 0x1ecf,
+ 0x1ed1, 0x1ed1,
+ 0x1ed3, 0x1ed3,
+ 0x1ed5, 0x1ed5,
+ 0x1ed7, 0x1ed7,
+ 0x1ed9, 0x1ed9,
+ 0x1edb, 0x1edb,
+ 0x1edd, 0x1edd,
+ 0x1edf, 0x1edf,
+ 0x1ee1, 0x1ee1,
+ 0x1ee3, 0x1ee3,
+ 0x1ee5, 0x1ee5,
+ 0x1ee7, 0x1ee7,
+ 0x1ee9, 0x1ee9,
+ 0x1eeb, 0x1eeb,
+ 0x1eed, 0x1eed,
+ 0x1eef, 0x1eef,
+ 0x1ef1, 0x1ef1,
+ 0x1ef3, 0x1ef3,
+ 0x1ef5, 0x1ef5,
+ 0x1ef7, 0x1ef7,
+ 0x1ef9, 0x1ef9,
+ 0x1f00, 0x1f07,
+ 0x1f10, 0x1f15,
+ 0x1f20, 0x1f27,
+ 0x1f30, 0x1f37,
+ 0x1f40, 0x1f45,
+ 0x1f50, 0x1f57,
+ 0x1f60, 0x1f67,
+ 0x1f70, 0x1f7d,
+ 0x1f80, 0x1f87,
+ 0x1f90, 0x1f97,
+ 0x1fa0, 0x1fa7,
+ 0x1fb0, 0x1fb4,
+ 0x1fb6, 0x1fb7,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fc7,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fd7,
+ 0x1fe0, 0x1fe7,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ff7,
+ 0x2071, 0x2071,
+ 0x207f, 0x207f,
+ 0x210a, 0x210a,
+ 0x210e, 0x210f,
+ 0x2113, 0x2113,
+ 0x212f, 0x212f,
+ 0x2134, 0x2134,
+ 0x2139, 0x2139,
+ 0x213d, 0x213d,
+ 0x2146, 0x2149,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xff41, 0xff5a,
+ 0x10428, 0x1044f,
+ 0x1d41a, 0x1d433,
+ 0x1d44e, 0x1d454,
+ 0x1d456, 0x1d467,
+ 0x1d482, 0x1d49b,
+ 0x1d4b6, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d4cf,
+ 0x1d4ea, 0x1d503,
+ 0x1d51e, 0x1d537,
+ 0x1d552, 0x1d56b,
+ 0x1d586, 0x1d59f,
+ 0x1d5ba, 0x1d5d3,
+ 0x1d5ee, 0x1d607,
+ 0x1d622, 0x1d63b,
+ 0x1d656, 0x1d66f,
+ 0x1d68a, 0x1d6a3,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6e1,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d71b,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d755,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d78f,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7c9
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of MBLower */
+
+static OnigCodePoint SBPrint[] = {
+ 2,
+ 0x0009, 0x000d,
+ 0x0020, 0x007e
+};
+
+static OnigCodePoint MBPrint[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 403,
+#else
+ 2,
+#endif
+ 0x0085, 0x0085,
+ 0x00a0, 0x0236
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0250, 0x0357,
+ 0x035d, 0x036f,
+ 0x0374, 0x0375,
+ 0x037a, 0x037a,
+ 0x037e, 0x037e,
+ 0x0384, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x03fb,
+ 0x0400, 0x0486,
+ 0x0488, 0x04ce,
+ 0x04d0, 0x04f5,
+ 0x04f8, 0x04f9,
+ 0x0500, 0x050f,
+ 0x0531, 0x0556,
+ 0x0559, 0x055f,
+ 0x0561, 0x0587,
+ 0x0589, 0x058a,
+ 0x0591, 0x05a1,
+ 0x05a3, 0x05b9,
+ 0x05bb, 0x05c4,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f4,
+ 0x0600, 0x0603,
+ 0x060c, 0x0615,
+ 0x061b, 0x061b,
+ 0x061f, 0x061f,
+ 0x0621, 0x063a,
+ 0x0640, 0x0658,
+ 0x0660, 0x070d,
+ 0x070f, 0x074a,
+ 0x074d, 0x074f,
+ 0x0780, 0x07b1,
+ 0x0901, 0x0939,
+ 0x093c, 0x094d,
+ 0x0950, 0x0954,
+ 0x0958, 0x0970,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09cd,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09e6, 0x09fa,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a66, 0x0a74,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0ae6, 0x0aef,
+ 0x0af1, 0x0af1,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b66, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb5,
+ 0x0bb7, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0be7, 0x0bfa,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c60, 0x0c61,
+ 0x0c66, 0x0c6f,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0ce6, 0x0cef,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d61,
+ 0x0d66, 0x0d6f,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df4,
+ 0x0e01, 0x0e3a,
+ 0x0e3f, 0x0e5b,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0ed0, 0x0ed9,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f71, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fbe, 0x0fcc,
+ 0x0fcf, 0x0fcf,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1040, 0x1059,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10f8,
+ 0x10fb, 0x10fb,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1206,
+ 0x1208, 0x1246,
+ 0x1248, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1286,
+ 0x1288, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12ae,
+ 0x12b0, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12ce,
+ 0x12d0, 0x12d6,
+ 0x12d8, 0x12ee,
+ 0x12f0, 0x130e,
+ 0x1310, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x131e,
+ 0x1320, 0x1346,
+ 0x1348, 0x135a,
+ 0x1361, 0x137c,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x1676,
+ 0x1680, 0x169c,
+ 0x16a0, 0x16f0,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1736,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17dd,
+ 0x17e0, 0x17e9,
+ 0x17f0, 0x17f9,
+ 0x1800, 0x180e,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a9,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1940, 0x1940,
+ 0x1944, 0x196d,
+ 0x1970, 0x1974,
+ 0x19e0, 0x19ff,
+ 0x1d00, 0x1d6b,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fc4,
+ 0x1fc6, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fdd, 0x1fef,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffe,
+ 0x2000, 0x2054,
+ 0x2057, 0x2057,
+ 0x205f, 0x2063,
+ 0x206a, 0x2071,
+ 0x2074, 0x208e,
+ 0x20a0, 0x20b1,
+ 0x20d0, 0x20ea,
+ 0x2100, 0x213b,
+ 0x213d, 0x214b,
+ 0x2153, 0x2183,
+ 0x2190, 0x23d0,
+ 0x2400, 0x2426,
+ 0x2440, 0x244a,
+ 0x2460, 0x2617,
+ 0x2619, 0x267d,
+ 0x2680, 0x2691,
+ 0x26a0, 0x26a1,
+ 0x2701, 0x2704,
+ 0x2706, 0x2709,
+ 0x270c, 0x2727,
+ 0x2729, 0x274b,
+ 0x274d, 0x274d,
+ 0x274f, 0x2752,
+ 0x2756, 0x2756,
+ 0x2758, 0x275e,
+ 0x2761, 0x2794,
+ 0x2798, 0x27af,
+ 0x27b1, 0x27be,
+ 0x27d0, 0x27eb,
+ 0x27f0, 0x2b0d,
+ 0x2e80, 0x2e99,
+ 0x2e9b, 0x2ef3,
+ 0x2f00, 0x2fd5,
+ 0x2ff0, 0x2ffb,
+ 0x3000, 0x303f,
+ 0x3041, 0x3096,
+ 0x3099, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x3190, 0x31b7,
+ 0x31f0, 0x321e,
+ 0x3220, 0x3243,
+ 0x3250, 0x327d,
+ 0x327f, 0x32fe,
+ 0x3300, 0x4db5,
+ 0x4dc0, 0x9fa5,
+ 0xa000, 0xa48c,
+ 0xa490, 0xa4c6,
+ 0xac00, 0xd7a3,
+ 0xe000, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3f,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfd,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe23,
+ 0xfe30, 0xfe52,
+ 0xfe54, 0xfe66,
+ 0xfe68, 0xfe6b,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xfeff, 0xfeff,
+ 0xff01, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0xffe0, 0xffe6,
+ 0xffe8, 0xffee,
+ 0xfff9, 0xfffd,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10100, 0x10102,
+ 0x10107, 0x10133,
+ 0x10137, 0x1013f,
+ 0x10300, 0x1031e,
+ 0x10320, 0x10323,
+ 0x10330, 0x1034a,
+ 0x10380, 0x1039d,
+ 0x1039f, 0x1039f,
+ 0x10400, 0x1049d,
+ 0x104a0, 0x104a9,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x1d000, 0x1d0f5,
+ 0x1d100, 0x1d126,
+ 0x1d12a, 0x1d1dd,
+ 0x1d300, 0x1d356,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a3,
+ 0x1d6a8, 0x1d7c9,
+ 0x1d7ce, 0x1d7ff,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d,
+ 0xe0001, 0xe0001,
+ 0xe0020, 0xe007f,
+ 0xe0100, 0xe01ef,
+ 0xf0000, 0xffffd,
+ 0x100000, 0x10fffd
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of MBPrint */
+
+static OnigCodePoint SBPunct[] = {
+ 9,
+ 0x0021, 0x0023,
+ 0x0025, 0x002a,
+ 0x002c, 0x002f,
+ 0x003a, 0x003b,
+ 0x003f, 0x0040,
+ 0x005b, 0x005d,
+ 0x005f, 0x005f,
+ 0x007b, 0x007b,
+ 0x007d, 0x007d
+}; /* end of SBPunct */
+
+static OnigCodePoint MBPunct[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 77,
+#else
+ 5,
+#endif
+ 0x00a1, 0x00a1,
+ 0x00ab, 0x00ab,
+ 0x00b7, 0x00b7,
+ 0x00bb, 0x00bb,
+ 0x00bf, 0x00bf
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x037e, 0x037e,
+ 0x0387, 0x0387,
+ 0x055a, 0x055f,
+ 0x0589, 0x058a,
+ 0x05be, 0x05be,
+ 0x05c0, 0x05c0,
+ 0x05c3, 0x05c3,
+ 0x05f3, 0x05f4,
+ 0x060c, 0x060d,
+ 0x061b, 0x061b,
+ 0x061f, 0x061f,
+ 0x066a, 0x066d,
+ 0x06d4, 0x06d4,
+ 0x0700, 0x070d,
+ 0x0964, 0x0965,
+ 0x0970, 0x0970,
+ 0x0df4, 0x0df4,
+ 0x0e4f, 0x0e4f,
+ 0x0e5a, 0x0e5b,
+ 0x0f04, 0x0f12,
+ 0x0f3a, 0x0f3d,
+ 0x0f85, 0x0f85,
+ 0x104a, 0x104f,
+ 0x10fb, 0x10fb,
+ 0x1361, 0x1368,
+ 0x166d, 0x166e,
+ 0x169b, 0x169c,
+ 0x16eb, 0x16ed,
+ 0x1735, 0x1736,
+ 0x17d4, 0x17d6,
+ 0x17d8, 0x17da,
+ 0x1800, 0x180a,
+ 0x1944, 0x1945,
+ 0x2010, 0x2027,
+ 0x2030, 0x2043,
+ 0x2045, 0x2051,
+ 0x2053, 0x2054,
+ 0x2057, 0x2057,
+ 0x207d, 0x207e,
+ 0x208d, 0x208e,
+ 0x2329, 0x232a,
+ 0x23b4, 0x23b6,
+ 0x2768, 0x2775,
+ 0x27e6, 0x27eb,
+ 0x2983, 0x2998,
+ 0x29d8, 0x29db,
+ 0x29fc, 0x29fd,
+ 0x3001, 0x3003,
+ 0x3008, 0x3011,
+ 0x3014, 0x301f,
+ 0x3030, 0x3030,
+ 0x303d, 0x303d,
+ 0x30a0, 0x30a0,
+ 0x30fb, 0x30fb,
+ 0xfd3e, 0xfd3f,
+ 0xfe30, 0xfe52,
+ 0xfe54, 0xfe61,
+ 0xfe63, 0xfe63,
+ 0xfe68, 0xfe68,
+ 0xfe6a, 0xfe6b,
+ 0xff01, 0xff03,
+ 0xff05, 0xff0a,
+ 0xff0c, 0xff0f,
+ 0xff1a, 0xff1b,
+ 0xff1f, 0xff20,
+ 0xff3b, 0xff3d,
+ 0xff3f, 0xff3f,
+ 0xff5b, 0xff5b,
+ 0xff5d, 0xff5d,
+ 0xff5f, 0xff65,
+ 0x10100, 0x10101,
+ 0x1039f, 0x1039f
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of MBPunct */
+
+static OnigCodePoint SBSpace[] = {
+ 2,
+ 0x0009, 0x000d,
+ 0x0020, 0x0020
+};
+
+static OnigCodePoint MBSpace[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 9,
+#else
+ 2,
+#endif
+ 0x0085, 0x0085,
+ 0x00a0, 0x00a0
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x1680, 0x1680,
+ 0x180e, 0x180e,
+ 0x2000, 0x200a,
+ 0x2028, 0x2029,
+ 0x202f, 0x202f,
+ 0x205f, 0x205f,
+ 0x3000, 0x3000
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of MBSpace */
+
+static OnigCodePoint SBUpper[] = {
+ 1,
+ 0x0041, 0x005a
+};
+
+static OnigCodePoint MBUpper[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 420,
+#else
+ 2,
+#endif
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00de
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ ,
+ 0x0100, 0x0100,
+ 0x0102, 0x0102,
+ 0x0104, 0x0104,
+ 0x0106, 0x0106,
+ 0x0108, 0x0108,
+ 0x010a, 0x010a,
+ 0x010c, 0x010c,
+ 0x010e, 0x010e,
+ 0x0110, 0x0110,
+ 0x0112, 0x0112,
+ 0x0114, 0x0114,
+ 0x0116, 0x0116,
+ 0x0118, 0x0118,
+ 0x011a, 0x011a,
+ 0x011c, 0x011c,
+ 0x011e, 0x011e,
+ 0x0120, 0x0120,
+ 0x0122, 0x0122,
+ 0x0124, 0x0124,
+ 0x0126, 0x0126,
+ 0x0128, 0x0128,
+ 0x012a, 0x012a,
+ 0x012c, 0x012c,
+ 0x012e, 0x012e,
+ 0x0130, 0x0130,
+ 0x0132, 0x0132,
+ 0x0134, 0x0134,
+ 0x0136, 0x0136,
+ 0x0139, 0x0139,
+ 0x013b, 0x013b,
+ 0x013d, 0x013d,
+ 0x013f, 0x013f,
+ 0x0141, 0x0141,
+ 0x0143, 0x0143,
+ 0x0145, 0x0145,
+ 0x0147, 0x0147,
+ 0x014a, 0x014a,
+ 0x014c, 0x014c,
+ 0x014e, 0x014e,
+ 0x0150, 0x0150,
+ 0x0152, 0x0152,
+ 0x0154, 0x0154,
+ 0x0156, 0x0156,
+ 0x0158, 0x0158,
+ 0x015a, 0x015a,
+ 0x015c, 0x015c,
+ 0x015e, 0x015e,
+ 0x0160, 0x0160,
+ 0x0162, 0x0162,
+ 0x0164, 0x0164,
+ 0x0166, 0x0166,
+ 0x0168, 0x0168,
+ 0x016a, 0x016a,
+ 0x016c, 0x016c,
+ 0x016e, 0x016e,
+ 0x0170, 0x0170,
+ 0x0172, 0x0172,
+ 0x0174, 0x0174,
+ 0x0176, 0x0176,
+ 0x0178, 0x0179,
+ 0x017b, 0x017b,
+ 0x017d, 0x017d,
+ 0x0181, 0x0182,
+ 0x0184, 0x0184,
+ 0x0186, 0x0187,
+ 0x0189, 0x018b,
+ 0x018e, 0x0191,
+ 0x0193, 0x0194,
+ 0x0196, 0x0198,
+ 0x019c, 0x019d,
+ 0x019f, 0x01a0,
+ 0x01a2, 0x01a2,
+ 0x01a4, 0x01a4,
+ 0x01a6, 0x01a7,
+ 0x01a9, 0x01a9,
+ 0x01ac, 0x01ac,
+ 0x01ae, 0x01af,
+ 0x01b1, 0x01b3,
+ 0x01b5, 0x01b5,
+ 0x01b7, 0x01b8,
+ 0x01bc, 0x01bc,
+ 0x01c4, 0x01c4,
+ 0x01c7, 0x01c7,
+ 0x01ca, 0x01ca,
+ 0x01cd, 0x01cd,
+ 0x01cf, 0x01cf,
+ 0x01d1, 0x01d1,
+ 0x01d3, 0x01d3,
+ 0x01d5, 0x01d5,
+ 0x01d7, 0x01d7,
+ 0x01d9, 0x01d9,
+ 0x01db, 0x01db,
+ 0x01de, 0x01de,
+ 0x01e0, 0x01e0,
+ 0x01e2, 0x01e2,
+ 0x01e4, 0x01e4,
+ 0x01e6, 0x01e6,
+ 0x01e8, 0x01e8,
+ 0x01ea, 0x01ea,
+ 0x01ec, 0x01ec,
+ 0x01ee, 0x01ee,
+ 0x01f1, 0x01f1,
+ 0x01f4, 0x01f4,
+ 0x01f6, 0x01f8,
+ 0x01fa, 0x01fa,
+ 0x01fc, 0x01fc,
+ 0x01fe, 0x01fe,
+ 0x0200, 0x0200,
+ 0x0202, 0x0202,
+ 0x0204, 0x0204,
+ 0x0206, 0x0206,
+ 0x0208, 0x0208,
+ 0x020a, 0x020a,
+ 0x020c, 0x020c,
+ 0x020e, 0x020e,
+ 0x0210, 0x0210,
+ 0x0212, 0x0212,
+ 0x0214, 0x0214,
+ 0x0216, 0x0216,
+ 0x0218, 0x0218,
+ 0x021a, 0x021a,
+ 0x021c, 0x021c,
+ 0x021e, 0x021e,
+ 0x0220, 0x0220,
+ 0x0222, 0x0222,
+ 0x0224, 0x0224,
+ 0x0226, 0x0226,
+ 0x0228, 0x0228,
+ 0x022a, 0x022a,
+ 0x022c, 0x022c,
+ 0x022e, 0x022e,
+ 0x0230, 0x0230,
+ 0x0232, 0x0232,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x038f,
+ 0x0391, 0x03a1,
+ 0x03a3, 0x03ab,
+ 0x03d2, 0x03d4,
+ 0x03d8, 0x03d8,
+ 0x03da, 0x03da,
+ 0x03dc, 0x03dc,
+ 0x03de, 0x03de,
+ 0x03e0, 0x03e0,
+ 0x03e2, 0x03e2,
+ 0x03e4, 0x03e4,
+ 0x03e6, 0x03e6,
+ 0x03e8, 0x03e8,
+ 0x03ea, 0x03ea,
+ 0x03ec, 0x03ec,
+ 0x03ee, 0x03ee,
+ 0x03f4, 0x03f4,
+ 0x03f7, 0x03f7,
+ 0x03f9, 0x03fa,
+ 0x0400, 0x042f,
+ 0x0460, 0x0460,
+ 0x0462, 0x0462,
+ 0x0464, 0x0464,
+ 0x0466, 0x0466,
+ 0x0468, 0x0468,
+ 0x046a, 0x046a,
+ 0x046c, 0x046c,
+ 0x046e, 0x046e,
+ 0x0470, 0x0470,
+ 0x0472, 0x0472,
+ 0x0474, 0x0474,
+ 0x0476, 0x0476,
+ 0x0478, 0x0478,
+ 0x047a, 0x047a,
+ 0x047c, 0x047c,
+ 0x047e, 0x047e,
+ 0x0480, 0x0480,
+ 0x048a, 0x048a,
+ 0x048c, 0x048c,
+ 0x048e, 0x048e,
+ 0x0490, 0x0490,
+ 0x0492, 0x0492,
+ 0x0494, 0x0494,
+ 0x0496, 0x0496,
+ 0x0498, 0x0498,
+ 0x049a, 0x049a,
+ 0x049c, 0x049c,
+ 0x049e, 0x049e,
+ 0x04a0, 0x04a0,
+ 0x04a2, 0x04a2,
+ 0x04a4, 0x04a4,
+ 0x04a6, 0x04a6,
+ 0x04a8, 0x04a8,
+ 0x04aa, 0x04aa,
+ 0x04ac, 0x04ac,
+ 0x04ae, 0x04ae,
+ 0x04b0, 0x04b0,
+ 0x04b2, 0x04b2,
+ 0x04b4, 0x04b4,
+ 0x04b6, 0x04b6,
+ 0x04b8, 0x04b8,
+ 0x04ba, 0x04ba,
+ 0x04bc, 0x04bc,
+ 0x04be, 0x04be,
+ 0x04c0, 0x04c1,
+ 0x04c3, 0x04c3,
+ 0x04c5, 0x04c5,
+ 0x04c7, 0x04c7,
+ 0x04c9, 0x04c9,
+ 0x04cb, 0x04cb,
+ 0x04cd, 0x04cd,
+ 0x04d0, 0x04d0,
+ 0x04d2, 0x04d2,
+ 0x04d4, 0x04d4,
+ 0x04d6, 0x04d6,
+ 0x04d8, 0x04d8,
+ 0x04da, 0x04da,
+ 0x04dc, 0x04dc,
+ 0x04de, 0x04de,
+ 0x04e0, 0x04e0,
+ 0x04e2, 0x04e2,
+ 0x04e4, 0x04e4,
+ 0x04e6, 0x04e6,
+ 0x04e8, 0x04e8,
+ 0x04ea, 0x04ea,
+ 0x04ec, 0x04ec,
+ 0x04ee, 0x04ee,
+ 0x04f0, 0x04f0,
+ 0x04f2, 0x04f2,
+ 0x04f4, 0x04f4,
+ 0x04f8, 0x04f8,
+ 0x0500, 0x0500,
+ 0x0502, 0x0502,
+ 0x0504, 0x0504,
+ 0x0506, 0x0506,
+ 0x0508, 0x0508,
+ 0x050a, 0x050a,
+ 0x050c, 0x050c,
+ 0x050e, 0x050e,
+ 0x0531, 0x0556,
+ 0x10a0, 0x10c5,
+ 0x1e00, 0x1e00,
+ 0x1e02, 0x1e02,
+ 0x1e04, 0x1e04,
+ 0x1e06, 0x1e06,
+ 0x1e08, 0x1e08,
+ 0x1e0a, 0x1e0a,
+ 0x1e0c, 0x1e0c,
+ 0x1e0e, 0x1e0e,
+ 0x1e10, 0x1e10,
+ 0x1e12, 0x1e12,
+ 0x1e14, 0x1e14,
+ 0x1e16, 0x1e16,
+ 0x1e18, 0x1e18,
+ 0x1e1a, 0x1e1a,
+ 0x1e1c, 0x1e1c,
+ 0x1e1e, 0x1e1e,
+ 0x1e20, 0x1e20,
+ 0x1e22, 0x1e22,
+ 0x1e24, 0x1e24,
+ 0x1e26, 0x1e26,
+ 0x1e28, 0x1e28,
+ 0x1e2a, 0x1e2a,
+ 0x1e2c, 0x1e2c,
+ 0x1e2e, 0x1e2e,
+ 0x1e30, 0x1e30,
+ 0x1e32, 0x1e32,
+ 0x1e34, 0x1e34,
+ 0x1e36, 0x1e36,
+ 0x1e38, 0x1e38,
+ 0x1e3a, 0x1e3a,
+ 0x1e3c, 0x1e3c,
+ 0x1e3e, 0x1e3e,
+ 0x1e40, 0x1e40,
+ 0x1e42, 0x1e42,
+ 0x1e44, 0x1e44,
+ 0x1e46, 0x1e46,
+ 0x1e48, 0x1e48,
+ 0x1e4a, 0x1e4a,
+ 0x1e4c, 0x1e4c,
+ 0x1e4e, 0x1e4e,
+ 0x1e50, 0x1e50,
+ 0x1e52, 0x1e52,
+ 0x1e54, 0x1e54,
+ 0x1e56, 0x1e56,
+ 0x1e58, 0x1e58,
+ 0x1e5a, 0x1e5a,
+ 0x1e5c, 0x1e5c,
+ 0x1e5e, 0x1e5e,
+ 0x1e60, 0x1e60,
+ 0x1e62, 0x1e62,
+ 0x1e64, 0x1e64,
+ 0x1e66, 0x1e66,
+ 0x1e68, 0x1e68,
+ 0x1e6a, 0x1e6a,
+ 0x1e6c, 0x1e6c,
+ 0x1e6e, 0x1e6e,
+ 0x1e70, 0x1e70,
+ 0x1e72, 0x1e72,
+ 0x1e74, 0x1e74,
+ 0x1e76, 0x1e76,
+ 0x1e78, 0x1e78,
+ 0x1e7a, 0x1e7a,
+ 0x1e7c, 0x1e7c,
+ 0x1e7e, 0x1e7e,
+ 0x1e80, 0x1e80,
+ 0x1e82, 0x1e82,
+ 0x1e84, 0x1e84,
+ 0x1e86, 0x1e86,
+ 0x1e88, 0x1e88,
+ 0x1e8a, 0x1e8a,
+ 0x1e8c, 0x1e8c,
+ 0x1e8e, 0x1e8e,
+ 0x1e90, 0x1e90,
+ 0x1e92, 0x1e92,
+ 0x1e94, 0x1e94,
+ 0x1ea0, 0x1ea0,
+ 0x1ea2, 0x1ea2,
+ 0x1ea4, 0x1ea4,
+ 0x1ea6, 0x1ea6,
+ 0x1ea8, 0x1ea8,
+ 0x1eaa, 0x1eaa,
+ 0x1eac, 0x1eac,
+ 0x1eae, 0x1eae,
+ 0x1eb0, 0x1eb0,
+ 0x1eb2, 0x1eb2,
+ 0x1eb4, 0x1eb4,
+ 0x1eb6, 0x1eb6,
+ 0x1eb8, 0x1eb8,
+ 0x1eba, 0x1eba,
+ 0x1ebc, 0x1ebc,
+ 0x1ebe, 0x1ebe,
+ 0x1ec0, 0x1ec0,
+ 0x1ec2, 0x1ec2,
+ 0x1ec4, 0x1ec4,
+ 0x1ec6, 0x1ec6,
+ 0x1ec8, 0x1ec8,
+ 0x1eca, 0x1eca,
+ 0x1ecc, 0x1ecc,
+ 0x1ece, 0x1ece,
+ 0x1ed0, 0x1ed0,
+ 0x1ed2, 0x1ed2,
+ 0x1ed4, 0x1ed4,
+ 0x1ed6, 0x1ed6,
+ 0x1ed8, 0x1ed8,
+ 0x1eda, 0x1eda,
+ 0x1edc, 0x1edc,
+ 0x1ede, 0x1ede,
+ 0x1ee0, 0x1ee0,
+ 0x1ee2, 0x1ee2,
+ 0x1ee4, 0x1ee4,
+ 0x1ee6, 0x1ee6,
+ 0x1ee8, 0x1ee8,
+ 0x1eea, 0x1eea,
+ 0x1eec, 0x1eec,
+ 0x1eee, 0x1eee,
+ 0x1ef0, 0x1ef0,
+ 0x1ef2, 0x1ef2,
+ 0x1ef4, 0x1ef4,
+ 0x1ef6, 0x1ef6,
+ 0x1ef8, 0x1ef8,
+ 0x1f08, 0x1f0f,
+ 0x1f18, 0x1f1d,
+ 0x1f28, 0x1f2f,
+ 0x1f38, 0x1f3f,
+ 0x1f48, 0x1f4d,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f5f,
+ 0x1f68, 0x1f6f,
+ 0x1fb8, 0x1fbb,
+ 0x1fc8, 0x1fcb,
+ 0x1fd8, 0x1fdb,
+ 0x1fe8, 0x1fec,
+ 0x1ff8, 0x1ffb,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210b, 0x210d,
+ 0x2110, 0x2112,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x2130, 0x2131,
+ 0x2133, 0x2133,
+ 0x213e, 0x213f,
+ 0x2145, 0x2145,
+ 0xff21, 0xff3a,
+ 0x10400, 0x10427,
+ 0x1d400, 0x1d419,
+ 0x1d434, 0x1d44d,
+ 0x1d468, 0x1d481,
+ 0x1d49c, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b5,
+ 0x1d4d0, 0x1d4e9,
+ 0x1d504, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d538, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d56c, 0x1d585,
+ 0x1d5a0, 0x1d5b9,
+ 0x1d5d4, 0x1d5ed,
+ 0x1d608, 0x1d621,
+ 0x1d63c, 0x1d655,
+ 0x1d670, 0x1d689,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6e2, 0x1d6fa,
+ 0x1d71c, 0x1d734,
+ 0x1d756, 0x1d76e,
+ 0x1d790, 0x1d7a8
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of MBUpper */
+
+static OnigCodePoint SBXDigit[] = {
+ 3,
+ 0x0030, 0x0039,
+ 0x0041, 0x0046,
+ 0x0061, 0x0066
+};
+
+static OnigCodePoint SBASCII[] = {
+ 1,
+ 0x0000, 0x007f
+};
+
+static OnigCodePoint SBWord[] = {
+ 4,
+ 0x0030, 0x0039,
+ 0x0041, 0x005a,
+ 0x005f, 0x005f,
+ 0x0061, 0x007a
+};
+
+static OnigCodePoint MBWord[] = {
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ 432,
+#else
+ 8,
+#endif
+ 0x00aa, 0x00aa,
+ 0x00b2, 0x00b3,
+ 0x00b5, 0x00b5,
+ 0x00b9, 0x00ba,
+ 0x00bc, 0x00be,
+ 0x00c0, 0x00d6,
+ 0x00d8, 0x00f6,
+#ifndef USE_UNICODE_FULL_RANGE_CTYPE
+ 0x00f8, 0x7fffffff
+#else /* not USE_UNICODE_FULL_RANGE_CTYPE */
+ 0x00f8, 0x0236,
+ 0x0250, 0x02c1,
+ 0x02c6, 0x02d1,
+ 0x02e0, 0x02e4,
+ 0x02ee, 0x02ee,
+ 0x0300, 0x0357,
+ 0x035d, 0x036f,
+ 0x037a, 0x037a,
+ 0x0386, 0x0386,
+ 0x0388, 0x038a,
+ 0x038c, 0x038c,
+ 0x038e, 0x03a1,
+ 0x03a3, 0x03ce,
+ 0x03d0, 0x03f5,
+ 0x03f7, 0x03fb,
+ 0x0400, 0x0481,
+ 0x0483, 0x0486,
+ 0x0488, 0x04ce,
+ 0x04d0, 0x04f5,
+ 0x04f8, 0x04f9,
+ 0x0500, 0x050f,
+ 0x0531, 0x0556,
+ 0x0559, 0x0559,
+ 0x0561, 0x0587,
+ 0x0591, 0x05a1,
+ 0x05a3, 0x05b9,
+ 0x05bb, 0x05bd,
+ 0x05bf, 0x05bf,
+ 0x05c1, 0x05c2,
+ 0x05c4, 0x05c4,
+ 0x05d0, 0x05ea,
+ 0x05f0, 0x05f2,
+ 0x0610, 0x0615,
+ 0x0621, 0x063a,
+ 0x0640, 0x0658,
+ 0x0660, 0x0669,
+ 0x066e, 0x06d3,
+ 0x06d5, 0x06dc,
+ 0x06de, 0x06e8,
+ 0x06ea, 0x06fc,
+ 0x06ff, 0x06ff,
+ 0x0710, 0x074a,
+ 0x074d, 0x074f,
+ 0x0780, 0x07b1,
+ 0x0901, 0x0939,
+ 0x093c, 0x094d,
+ 0x0950, 0x0954,
+ 0x0958, 0x0963,
+ 0x0966, 0x096f,
+ 0x0981, 0x0983,
+ 0x0985, 0x098c,
+ 0x098f, 0x0990,
+ 0x0993, 0x09a8,
+ 0x09aa, 0x09b0,
+ 0x09b2, 0x09b2,
+ 0x09b6, 0x09b9,
+ 0x09bc, 0x09c4,
+ 0x09c7, 0x09c8,
+ 0x09cb, 0x09cd,
+ 0x09d7, 0x09d7,
+ 0x09dc, 0x09dd,
+ 0x09df, 0x09e3,
+ 0x09e6, 0x09f1,
+ 0x09f4, 0x09f9,
+ 0x0a01, 0x0a03,
+ 0x0a05, 0x0a0a,
+ 0x0a0f, 0x0a10,
+ 0x0a13, 0x0a28,
+ 0x0a2a, 0x0a30,
+ 0x0a32, 0x0a33,
+ 0x0a35, 0x0a36,
+ 0x0a38, 0x0a39,
+ 0x0a3c, 0x0a3c,
+ 0x0a3e, 0x0a42,
+ 0x0a47, 0x0a48,
+ 0x0a4b, 0x0a4d,
+ 0x0a59, 0x0a5c,
+ 0x0a5e, 0x0a5e,
+ 0x0a66, 0x0a74,
+ 0x0a81, 0x0a83,
+ 0x0a85, 0x0a8d,
+ 0x0a8f, 0x0a91,
+ 0x0a93, 0x0aa8,
+ 0x0aaa, 0x0ab0,
+ 0x0ab2, 0x0ab3,
+ 0x0ab5, 0x0ab9,
+ 0x0abc, 0x0ac5,
+ 0x0ac7, 0x0ac9,
+ 0x0acb, 0x0acd,
+ 0x0ad0, 0x0ad0,
+ 0x0ae0, 0x0ae3,
+ 0x0ae6, 0x0aef,
+ 0x0b01, 0x0b03,
+ 0x0b05, 0x0b0c,
+ 0x0b0f, 0x0b10,
+ 0x0b13, 0x0b28,
+ 0x0b2a, 0x0b30,
+ 0x0b32, 0x0b33,
+ 0x0b35, 0x0b39,
+ 0x0b3c, 0x0b43,
+ 0x0b47, 0x0b48,
+ 0x0b4b, 0x0b4d,
+ 0x0b56, 0x0b57,
+ 0x0b5c, 0x0b5d,
+ 0x0b5f, 0x0b61,
+ 0x0b66, 0x0b6f,
+ 0x0b71, 0x0b71,
+ 0x0b82, 0x0b83,
+ 0x0b85, 0x0b8a,
+ 0x0b8e, 0x0b90,
+ 0x0b92, 0x0b95,
+ 0x0b99, 0x0b9a,
+ 0x0b9c, 0x0b9c,
+ 0x0b9e, 0x0b9f,
+ 0x0ba3, 0x0ba4,
+ 0x0ba8, 0x0baa,
+ 0x0bae, 0x0bb5,
+ 0x0bb7, 0x0bb9,
+ 0x0bbe, 0x0bc2,
+ 0x0bc6, 0x0bc8,
+ 0x0bca, 0x0bcd,
+ 0x0bd7, 0x0bd7,
+ 0x0be7, 0x0bf2,
+ 0x0c01, 0x0c03,
+ 0x0c05, 0x0c0c,
+ 0x0c0e, 0x0c10,
+ 0x0c12, 0x0c28,
+ 0x0c2a, 0x0c33,
+ 0x0c35, 0x0c39,
+ 0x0c3e, 0x0c44,
+ 0x0c46, 0x0c48,
+ 0x0c4a, 0x0c4d,
+ 0x0c55, 0x0c56,
+ 0x0c60, 0x0c61,
+ 0x0c66, 0x0c6f,
+ 0x0c82, 0x0c83,
+ 0x0c85, 0x0c8c,
+ 0x0c8e, 0x0c90,
+ 0x0c92, 0x0ca8,
+ 0x0caa, 0x0cb3,
+ 0x0cb5, 0x0cb9,
+ 0x0cbc, 0x0cc4,
+ 0x0cc6, 0x0cc8,
+ 0x0cca, 0x0ccd,
+ 0x0cd5, 0x0cd6,
+ 0x0cde, 0x0cde,
+ 0x0ce0, 0x0ce1,
+ 0x0ce6, 0x0cef,
+ 0x0d02, 0x0d03,
+ 0x0d05, 0x0d0c,
+ 0x0d0e, 0x0d10,
+ 0x0d12, 0x0d28,
+ 0x0d2a, 0x0d39,
+ 0x0d3e, 0x0d43,
+ 0x0d46, 0x0d48,
+ 0x0d4a, 0x0d4d,
+ 0x0d57, 0x0d57,
+ 0x0d60, 0x0d61,
+ 0x0d66, 0x0d6f,
+ 0x0d82, 0x0d83,
+ 0x0d85, 0x0d96,
+ 0x0d9a, 0x0db1,
+ 0x0db3, 0x0dbb,
+ 0x0dbd, 0x0dbd,
+ 0x0dc0, 0x0dc6,
+ 0x0dca, 0x0dca,
+ 0x0dcf, 0x0dd4,
+ 0x0dd6, 0x0dd6,
+ 0x0dd8, 0x0ddf,
+ 0x0df2, 0x0df3,
+ 0x0e01, 0x0e3a,
+ 0x0e40, 0x0e4e,
+ 0x0e50, 0x0e59,
+ 0x0e81, 0x0e82,
+ 0x0e84, 0x0e84,
+ 0x0e87, 0x0e88,
+ 0x0e8a, 0x0e8a,
+ 0x0e8d, 0x0e8d,
+ 0x0e94, 0x0e97,
+ 0x0e99, 0x0e9f,
+ 0x0ea1, 0x0ea3,
+ 0x0ea5, 0x0ea5,
+ 0x0ea7, 0x0ea7,
+ 0x0eaa, 0x0eab,
+ 0x0ead, 0x0eb9,
+ 0x0ebb, 0x0ebd,
+ 0x0ec0, 0x0ec4,
+ 0x0ec6, 0x0ec6,
+ 0x0ec8, 0x0ecd,
+ 0x0ed0, 0x0ed9,
+ 0x0edc, 0x0edd,
+ 0x0f00, 0x0f00,
+ 0x0f18, 0x0f19,
+ 0x0f20, 0x0f33,
+ 0x0f35, 0x0f35,
+ 0x0f37, 0x0f37,
+ 0x0f39, 0x0f39,
+ 0x0f3e, 0x0f47,
+ 0x0f49, 0x0f6a,
+ 0x0f71, 0x0f84,
+ 0x0f86, 0x0f8b,
+ 0x0f90, 0x0f97,
+ 0x0f99, 0x0fbc,
+ 0x0fc6, 0x0fc6,
+ 0x1000, 0x1021,
+ 0x1023, 0x1027,
+ 0x1029, 0x102a,
+ 0x102c, 0x1032,
+ 0x1036, 0x1039,
+ 0x1040, 0x1049,
+ 0x1050, 0x1059,
+ 0x10a0, 0x10c5,
+ 0x10d0, 0x10f8,
+ 0x1100, 0x1159,
+ 0x115f, 0x11a2,
+ 0x11a8, 0x11f9,
+ 0x1200, 0x1206,
+ 0x1208, 0x1246,
+ 0x1248, 0x1248,
+ 0x124a, 0x124d,
+ 0x1250, 0x1256,
+ 0x1258, 0x1258,
+ 0x125a, 0x125d,
+ 0x1260, 0x1286,
+ 0x1288, 0x1288,
+ 0x128a, 0x128d,
+ 0x1290, 0x12ae,
+ 0x12b0, 0x12b0,
+ 0x12b2, 0x12b5,
+ 0x12b8, 0x12be,
+ 0x12c0, 0x12c0,
+ 0x12c2, 0x12c5,
+ 0x12c8, 0x12ce,
+ 0x12d0, 0x12d6,
+ 0x12d8, 0x12ee,
+ 0x12f0, 0x130e,
+ 0x1310, 0x1310,
+ 0x1312, 0x1315,
+ 0x1318, 0x131e,
+ 0x1320, 0x1346,
+ 0x1348, 0x135a,
+ 0x1369, 0x137c,
+ 0x13a0, 0x13f4,
+ 0x1401, 0x166c,
+ 0x166f, 0x1676,
+ 0x1681, 0x169a,
+ 0x16a0, 0x16ea,
+ 0x16ee, 0x16f0,
+ 0x1700, 0x170c,
+ 0x170e, 0x1714,
+ 0x1720, 0x1734,
+ 0x1740, 0x1753,
+ 0x1760, 0x176c,
+ 0x176e, 0x1770,
+ 0x1772, 0x1773,
+ 0x1780, 0x17b3,
+ 0x17b6, 0x17d3,
+ 0x17d7, 0x17d7,
+ 0x17dc, 0x17dd,
+ 0x17e0, 0x17e9,
+ 0x17f0, 0x17f9,
+ 0x180b, 0x180d,
+ 0x1810, 0x1819,
+ 0x1820, 0x1877,
+ 0x1880, 0x18a9,
+ 0x1900, 0x191c,
+ 0x1920, 0x192b,
+ 0x1930, 0x193b,
+ 0x1946, 0x196d,
+ 0x1970, 0x1974,
+ 0x1d00, 0x1d6b,
+ 0x1e00, 0x1e9b,
+ 0x1ea0, 0x1ef9,
+ 0x1f00, 0x1f15,
+ 0x1f18, 0x1f1d,
+ 0x1f20, 0x1f45,
+ 0x1f48, 0x1f4d,
+ 0x1f50, 0x1f57,
+ 0x1f59, 0x1f59,
+ 0x1f5b, 0x1f5b,
+ 0x1f5d, 0x1f5d,
+ 0x1f5f, 0x1f7d,
+ 0x1f80, 0x1fb4,
+ 0x1fb6, 0x1fbc,
+ 0x1fbe, 0x1fbe,
+ 0x1fc2, 0x1fc4,
+ 0x1fc6, 0x1fcc,
+ 0x1fd0, 0x1fd3,
+ 0x1fd6, 0x1fdb,
+ 0x1fe0, 0x1fec,
+ 0x1ff2, 0x1ff4,
+ 0x1ff6, 0x1ffc,
+ 0x203f, 0x2040,
+ 0x2054, 0x2054,
+ 0x2070, 0x2071,
+ 0x2074, 0x2079,
+ 0x207f, 0x2089,
+ 0x20d0, 0x20ea,
+ 0x2102, 0x2102,
+ 0x2107, 0x2107,
+ 0x210a, 0x2113,
+ 0x2115, 0x2115,
+ 0x2119, 0x211d,
+ 0x2124, 0x2124,
+ 0x2126, 0x2126,
+ 0x2128, 0x2128,
+ 0x212a, 0x212d,
+ 0x212f, 0x2131,
+ 0x2133, 0x2139,
+ 0x213d, 0x213f,
+ 0x2145, 0x2149,
+ 0x2153, 0x2183,
+ 0x2460, 0x249b,
+ 0x24ea, 0x24ff,
+ 0x2776, 0x2793,
+ 0x3005, 0x3007,
+ 0x3021, 0x302f,
+ 0x3031, 0x3035,
+ 0x3038, 0x303c,
+ 0x3041, 0x3096,
+ 0x3099, 0x309a,
+ 0x309d, 0x309f,
+ 0x30a1, 0x30ff,
+ 0x3105, 0x312c,
+ 0x3131, 0x318e,
+ 0x3192, 0x3195,
+ 0x31a0, 0x31b7,
+ 0x31f0, 0x31ff,
+ 0x3220, 0x3229,
+ 0x3251, 0x325f,
+ 0x3280, 0x3289,
+ 0x32b1, 0x32bf,
+ 0x3400, 0x4db5,
+ 0x4e00, 0x9fa5,
+ 0xa000, 0xa48c,
+ 0xac00, 0xd7a3,
+ 0xf900, 0xfa2d,
+ 0xfa30, 0xfa6a,
+ 0xfb00, 0xfb06,
+ 0xfb13, 0xfb17,
+ 0xfb1d, 0xfb28,
+ 0xfb2a, 0xfb36,
+ 0xfb38, 0xfb3c,
+ 0xfb3e, 0xfb3e,
+ 0xfb40, 0xfb41,
+ 0xfb43, 0xfb44,
+ 0xfb46, 0xfbb1,
+ 0xfbd3, 0xfd3d,
+ 0xfd50, 0xfd8f,
+ 0xfd92, 0xfdc7,
+ 0xfdf0, 0xfdfb,
+ 0xfe00, 0xfe0f,
+ 0xfe20, 0xfe23,
+ 0xfe33, 0xfe34,
+ 0xfe4d, 0xfe4f,
+ 0xfe70, 0xfe74,
+ 0xfe76, 0xfefc,
+ 0xff10, 0xff19,
+ 0xff21, 0xff3a,
+ 0xff3f, 0xff3f,
+ 0xff41, 0xff5a,
+ 0xff65, 0xffbe,
+ 0xffc2, 0xffc7,
+ 0xffca, 0xffcf,
+ 0xffd2, 0xffd7,
+ 0xffda, 0xffdc,
+ 0x10000, 0x1000b,
+ 0x1000d, 0x10026,
+ 0x10028, 0x1003a,
+ 0x1003c, 0x1003d,
+ 0x1003f, 0x1004d,
+ 0x10050, 0x1005d,
+ 0x10080, 0x100fa,
+ 0x10107, 0x10133,
+ 0x10300, 0x1031e,
+ 0x10320, 0x10323,
+ 0x10330, 0x1034a,
+ 0x10380, 0x1039d,
+ 0x10400, 0x1049d,
+ 0x104a0, 0x104a9,
+ 0x10800, 0x10805,
+ 0x10808, 0x10808,
+ 0x1080a, 0x10835,
+ 0x10837, 0x10838,
+ 0x1083c, 0x1083c,
+ 0x1083f, 0x1083f,
+ 0x1d165, 0x1d169,
+ 0x1d16d, 0x1d172,
+ 0x1d17b, 0x1d182,
+ 0x1d185, 0x1d18b,
+ 0x1d1aa, 0x1d1ad,
+ 0x1d400, 0x1d454,
+ 0x1d456, 0x1d49c,
+ 0x1d49e, 0x1d49f,
+ 0x1d4a2, 0x1d4a2,
+ 0x1d4a5, 0x1d4a6,
+ 0x1d4a9, 0x1d4ac,
+ 0x1d4ae, 0x1d4b9,
+ 0x1d4bb, 0x1d4bb,
+ 0x1d4bd, 0x1d4c3,
+ 0x1d4c5, 0x1d505,
+ 0x1d507, 0x1d50a,
+ 0x1d50d, 0x1d514,
+ 0x1d516, 0x1d51c,
+ 0x1d51e, 0x1d539,
+ 0x1d53b, 0x1d53e,
+ 0x1d540, 0x1d544,
+ 0x1d546, 0x1d546,
+ 0x1d54a, 0x1d550,
+ 0x1d552, 0x1d6a3,
+ 0x1d6a8, 0x1d6c0,
+ 0x1d6c2, 0x1d6da,
+ 0x1d6dc, 0x1d6fa,
+ 0x1d6fc, 0x1d714,
+ 0x1d716, 0x1d734,
+ 0x1d736, 0x1d74e,
+ 0x1d750, 0x1d76e,
+ 0x1d770, 0x1d788,
+ 0x1d78a, 0x1d7a8,
+ 0x1d7aa, 0x1d7c2,
+ 0x1d7c4, 0x1d7c9,
+ 0x1d7ce, 0x1d7ff,
+ 0x20000, 0x2a6d6,
+ 0x2f800, 0x2fa1d,
+ 0xe0100, 0xe01ef
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+}; /* end of MBWord */
- return FALSE;
-}
static int
-utf8_get_ctype_code_range(int ctype, int* nsb, int* nmb,
- OnigCodePointRange* sbr[], OnigCodePointRange* mbr[])
+utf8_get_ctype_code_range(int ctype,
+ OnigCodePoint* sbr[], OnigCodePoint* mbr[])
{
#define CR_SET(sbl,mbl) do { \
- *nsb = sizeof(sbl) / sizeof(OnigCodePointRange); \
- *nmb = sizeof(mbl) / sizeof(OnigCodePointRange); \
*sbr = sbl; \
*mbr = mbl; \
} while (0)
#define CR_SB_SET(sbl) do { \
- *nsb = sizeof(sbl) / sizeof(OnigCodePointRange); \
- *nmb = 0; \
*sbr = sbl; \
+ *mbr = EmptyRange; \
} while (0)
- static OnigCodePointRange SBAlpha[] = {
- { 0x41, 0x5a }, { 0x61, 0x7a }
- };
-
- static OnigCodePointRange MBAlpha[] = {
- { 0xaa, 0xaa }, { 0xb5, 0xb5 },
- { 0xba, 0xba }, { 0xc0, 0xd6 },
- { 0xd8, 0xf6 }, { 0xf8, 0x220 }
- };
-
- static OnigCodePointRange SBBlank[] = {
- { 0x09, 0x09 }, { 0x20, 0x20 }
- };
-
- static OnigCodePointRange MBBlank[] = {
- { 0xa0, 0xa0 }
- };
-
- static OnigCodePointRange SBCntrl[] = {
- { 0x00, 0x1f }, { 0x7f, 0x7f }
- };
-
- static OnigCodePointRange MBCntrl[] = {
- { 0x80, 0x9f }
- };
-
- static OnigCodePointRange SBDigit[] = {
- { 0x30, 0x39 }
- };
-
- static OnigCodePointRange SBGraph[] = {
- { 0x21, 0x7e }
- };
-
- static OnigCodePointRange MBGraph[] = {
- { 0xa1, 0x220 }
- };
-
- static OnigCodePointRange SBLower[] = {
- { 0x61, 0x7a }
- };
-
- static OnigCodePointRange MBLower[] = {
- { 0xaa, 0xaa }, { 0xb5, 0xb5 },
- { 0xba, 0xba }, { 0xdf, 0xf6 },
- { 0xf8, 0xff }
- };
-
- static OnigCodePointRange SBPrint[] = {
- { 0x20, 0x7e }
- };
-
- static OnigCodePointRange MBPrint[] = {
- { 0xa0, 0x220 }
- };
-
- static OnigCodePointRange SBPunct[] = {
- { 0x21, 0x23 }, { 0x25, 0x2a },
- { 0x2c, 0x2f }, { 0x3a, 0x3b },
- { 0x3f, 0x40 }, { 0x5b, 0x5d },
- { 0x5f, 0x5f }, { 0x7b, 0x7b },
- { 0x7d, 0x7d }
- };
-
- static OnigCodePointRange MBPunct[] = {
- { 0xa1, 0xa1 }, { 0xab, 0xab },
- { 0xad, 0xad }, { 0xb7, 0xb7 },
- { 0xbb, 0xbb }, { 0xbf, 0xbf }
- };
-
- static OnigCodePointRange SBSpace[] = {
- { 0x09, 0x0d }, { 0x20, 0x20 }
- };
-
- static OnigCodePointRange MBSpace[] = {
- { 0xa0, 0xa0 }
- };
-
- static OnigCodePointRange SBUpper[] = {
- { 0x41, 0x5a }
- };
-
- static OnigCodePointRange MBUpper[] = {
- { 0xc0, 0xd6 }, { 0xd8, 0xde }
- };
-
- static OnigCodePointRange SBXDigit[] = {
- { 0x30, 0x39 }, { 0x41, 0x46 },
- { 0x61, 0x66 }
- };
-
- static OnigCodePointRange SBWord[] = {
- { 0x30, 0x39 }, { 0x41, 0x5a },
- { 0x5f, 0x5f }, { 0x61, 0x7a }
- };
-
- static OnigCodePointRange MBWord[] = {
- { 0xaa, 0xaa }, { 0xb2, 0xb3 },
- { 0xb5, 0xb5 }, { 0xb9, 0xba },
- { 0xbc, 0xbe }, { 0xc0, 0xd6 },
- { 0xd8, 0xf6 },
-#if 0
- { 0xf8, 0x220 }
-#else
- { 0xf8, 0x7fffffff } /* all multibyte code as word */
-#endif
- };
-
- static OnigCodePointRange SBAscii[] = {
- { 0x00, 0x7f }
- };
-
- static OnigCodePointRange SBAlnum[] = {
- { 0x30, 0x39 }, { 0x41, 0x5a },
- { 0x61, 0x7a }
- };
-
- static OnigCodePointRange MBAlnum[] = {
- { 0xaa, 0xaa }, { 0xb5, 0xb5 },
- { 0xba, 0xba }, { 0xc0, 0xd6 },
- { 0xd8, 0xf6 }, { 0xf8, 0x220 }
- };
-
switch (ctype) {
case ONIGENC_CTYPE_ALPHA:
CR_SET(SBAlpha, MBAlpha);
@@ -474,7 +3577,7 @@ utf8_get_ctype_code_range(int ctype, int* nsb, int* nmb,
CR_SET(SBCntrl, MBCntrl);
break;
case ONIGENC_CTYPE_DIGIT:
- CR_SB_SET(SBDigit);
+ CR_SET(SBDigit, MBDigit);
break;
case ONIGENC_CTYPE_GRAPH:
CR_SET(SBGraph, MBGraph);
@@ -501,7 +3604,7 @@ utf8_get_ctype_code_range(int ctype, int* nsb, int* nmb,
CR_SET(SBWord, MBWord);
break;
case ONIGENC_CTYPE_ASCII:
- CR_SB_SET(SBAscii);
+ CR_SB_SET(SBASCII);
break;
case ONIGENC_CTYPE_ALNUM:
CR_SET(SBAlnum, MBAlnum);
@@ -515,6 +3618,83 @@ utf8_get_ctype_code_range(int ctype, int* nsb, int* nmb,
return 0;
}
+static int
+utf8_is_code_ctype(OnigCodePoint code, unsigned int ctype)
+{
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+ OnigCodePoint *range;
+#endif
+
+ if (code < 256) {
+ return ONIGENC_IS_UNICODE_ISO_8859_1_CTYPE(code, ctype);
+ }
+
+#ifdef USE_UNICODE_FULL_RANGE_CTYPE
+
+ switch (ctype) {
+ case ONIGENC_CTYPE_ALPHA:
+ range = MBAlpha;
+ break;
+ case ONIGENC_CTYPE_BLANK:
+ range = MBBlank;
+ break;
+ case ONIGENC_CTYPE_CNTRL:
+ range = MBCntrl;
+ break;
+ case ONIGENC_CTYPE_DIGIT:
+ range = MBDigit;
+ break;
+ case ONIGENC_CTYPE_GRAPH:
+ range = MBGraph;
+ break;
+ case ONIGENC_CTYPE_LOWER:
+ range = MBLower;
+ break;
+ case ONIGENC_CTYPE_PRINT:
+ range = MBPrint;
+ break;
+ case ONIGENC_CTYPE_PUNCT:
+ range = MBPunct;
+ break;
+ case ONIGENC_CTYPE_SPACE:
+ range = MBSpace;
+ break;
+ case ONIGENC_CTYPE_UPPER:
+ range = MBUpper;
+ break;
+ case ONIGENC_CTYPE_XDIGIT:
+ return FALSE;
+ break;
+ case ONIGENC_CTYPE_WORD:
+ range = MBWord;
+ break;
+ case ONIGENC_CTYPE_ASCII:
+ return FALSE;
+ break;
+ case ONIGENC_CTYPE_ALNUM:
+ range = MBAlnum;
+ break;
+
+ default:
+ return ONIGENCERR_TYPE_BUG;
+ break;
+ }
+
+ return onig_is_in_code_range((UChar* )range, code);
+
+#else
+
+ if ((ctype & ONIGENC_CTYPE_WORD) != 0) {
+#ifdef USE_INVALID_CODE_SCHEME
+ if (code <= VALID_CODE_LIMIT)
+#endif
+ return TRUE;
+ }
+#endif /* USE_UNICODE_FULL_RANGE_CTYPE */
+
+ return FALSE;
+}
+
static UChar*
utf8_left_adjust_char_head(UChar* start, UChar* s)
{