diff options
author | ksaito <ksaito@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2004-11-04 14:43:08 +0000 |
---|---|---|
committer | ksaito <ksaito@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2004-11-04 14:43:08 +0000 |
commit | 82cb9eaa3bb49a77df4452cfdff18f817ecf63a6 (patch) | |
tree | 62fb3445ee466b5710d977707c048a0f26c5781d /oniguruma.h | |
parent | 5e853c811ce1d6d6edc187e580a14133667e1058 (diff) | |
download | ruby-82cb9eaa3bb49a77df4452cfdff18f817ecf63a6.tar.gz |
* ascii.c, euc_jp.c, oniggnu.h, oniguruma.h, regcomp.c, regenc.c, regenc.h, regerror.c, regexec.c, reggnu.c, regint.h, regparse.c, regparse.h, sjis.c, utf8.c:
imported Oni Guruma 3.4.0.
* parse.y, re.c: Now mbclen() takes unsigned char as its argument.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@7206 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'oniguruma.h')
-rw-r--r-- | oniguruma.h | 296 |
1 files changed, 213 insertions, 83 deletions
diff --git a/oniguruma.h b/oniguruma.h index 3fd9f4c395..c10f3b4d18 100644 --- a/oniguruma.h +++ b/oniguruma.h @@ -1,17 +1,38 @@ +#ifndef ONIGURUMA_H +#define ONIGURUMA_H /********************************************************************** - oniguruma.h - Oniguruma (regular expression library) - - Copyright (C) 2002-2004 K.Kosako (kosako@sofnec.co.jp) - **********************************************************************/ -#ifndef ONIGURUMA_H -#define ONIGURUMA_H +/*- + * Copyright (c) 2002-2004 K.Kosako <kosako AT sofnec DOT co DOT jp> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ #define ONIGURUMA -#define ONIGURUMA_VERSION_MAJOR 2 -#define ONIGURUMA_VERSION_MINOR 2 -#define ONIGURUMA_VERSION_TEENY 8 +#define ONIGURUMA_VERSION_MAJOR 3 +#define ONIGURUMA_VERSION_MINOR 4 +#define ONIGURUMA_VERSION_TEENY 0 #ifndef P_ #if defined(__STDC__) || defined(_WIN32) @@ -56,12 +77,56 @@ typedef struct { OnigCodePoint to; } OnigCodePointRange; -#define ONIGENC_FOLD_MATCH_MAX_TARGET_NUM_SIZE 16 + +/* ambiguous match flag */ +#define ONIGENC_AMBIGUOUS_MATCH_NONE 0 +#define ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE (1<<0) +#define ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE (1<<1) +/* #define ONIGENC_AMBIGUOUS_MATCH_ACCENT (1<<2) */ +/* #define ONIGENC_AMBIGUOUS_MATCH_HIRAGANA_KATAKANA (1<<3) */ +/* #define ONIGENC_AMBIGUOUS_MATCH_KATAKANA_WIDTH (1<<4) */ + +#define ONIGENC_AMBIGUOUS_MATCH_LIMIT (1<<1) +#define ONIGENC_AMBIGUOUS_MATCH_COMPOUND (1<<30) + +#define ONIGENC_AMBIGUOUS_MATCH_FULL \ + ( ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | \ + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | \ + ONIGENC_AMBIGUOUS_MATCH_COMPOUND ) +#define ONIGENC_AMBIGUOUS_MATCH_DEFAULT \ + (ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE | \ + ONIGENC_AMBIGUOUS_MATCH_NONASCII_CASE | \ + ONIGENC_AMBIGUOUS_MATCH_COMPOUND ) + +typedef unsigned int OnigAmbigType; + +#define ONIGENC_MAX_COMP_AMBIG_CODE_LEN 3 +#define ONIGENC_MAX_COMP_AMBIG_CODE_ITEM_NUM 4 + +typedef struct { + int len; + OnigCodePoint code[ONIGENC_MAX_COMP_AMBIG_CODE_LEN]; +} OnigCompAmbigCodeItem; + typedef struct { - int target_num; - int target_byte_len[ONIGENC_FOLD_MATCH_MAX_TARGET_NUM_SIZE]; - UChar* target_str[ONIGENC_FOLD_MATCH_MAX_TARGET_NUM_SIZE]; -} OnigEncFoldMatchInfo; + int n; + OnigCodePoint code; + OnigCompAmbigCodeItem items[ONIGENC_MAX_COMP_AMBIG_CODE_ITEM_NUM]; +} OnigCompAmbigCodes; + +typedef struct { + OnigCodePoint from; + OnigCodePoint to; +} OnigPairAmbigCodes; + +typedef struct { + OnigCodePoint esc; + OnigCodePoint anychar; + OnigCodePoint anytime; + OnigCodePoint zero_or_one_time; + OnigCodePoint one_or_more_time; + OnigCodePoint anychar_anytime; +} OnigMetaCharTableType; #if defined(RUBY_PLATFORM) && defined(M17N_H) @@ -72,23 +137,24 @@ typedef m17n_encoding* OnigEncoding; #else typedef struct { - const char len_table[256]; - const char* name; - int max_enc_len; - int is_fold_match; - int ctype_support_level; /* sb-only/full */ - int is_continuous_sb_mb; /* code point is continuous from sb to mb */ + int (*mbc_enc_len)(UChar* p); + const char* name; + int max_enc_len; + int min_enc_len; + OnigAmbigType support_ambig_flag; + OnigMetaCharTableType meta_char_table; + int (*is_mbc_newline)(UChar* p, UChar* end); OnigCodePoint (*mbc_to_code)(UChar* p, UChar* end); int (*code_to_mbclen)(OnigCodePoint code); int (*code_to_mbc)(OnigCodePoint code, UChar *buf); - int (*mbc_to_lower)(UChar* p, UChar* lower); - int (*mbc_is_case_ambig)(UChar* p); - int (*code_is_ctype)(OnigCodePoint code, unsigned int ctype); + int (*mbc_to_normalize)(OnigAmbigType flag, UChar** pp, UChar* end, UChar* to); + int (*is_mbc_ambiguous)(OnigAmbigType flag, UChar** pp, UChar* end); + int (*get_all_pair_ambig_codes)(OnigAmbigType flag, OnigPairAmbigCodes** acs); + int (*get_all_comp_ambig_codes)(OnigAmbigType flag, OnigCompAmbigCodes** acs); + int (*is_code_ctype)(OnigCodePoint code, unsigned int ctype); int (*get_ctype_code_range)(int ctype, int* nsb, int* nmb, OnigCodePointRange* sbr[], OnigCodePointRange* mbr[]); - UChar* (*left_adjust_char_head)(UChar* start, UChar* s); - int (*is_allowed_reverse_match)(UChar* p, UChar* e); - int (*get_all_fold_match_code)(OnigCodePoint** codes); - int (*get_fold_match_info)(UChar* p, UChar* end, OnigEncFoldMatchInfo** info); + UChar* (*left_adjust_char_head)(UChar* start, UChar* p); + int (*is_allowed_reverse_match)(UChar* p, UChar* end); } OnigEncodingType; typedef OnigEncodingType* OnigEncoding; @@ -110,6 +176,10 @@ ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_14; ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_15; ONIG_EXTERN OnigEncodingType OnigEncodingISO_8859_16; ONIG_EXTERN OnigEncodingType OnigEncodingUTF8; +ONIG_EXTERN OnigEncodingType OnigEncodingUTF16_BE; +ONIG_EXTERN OnigEncodingType OnigEncodingUTF16_LE; +ONIG_EXTERN OnigEncodingType OnigEncodingUTF32_BE; +ONIG_EXTERN OnigEncodingType OnigEncodingUTF32_LE; ONIG_EXTERN OnigEncodingType OnigEncodingEUC_JP; ONIG_EXTERN OnigEncodingType OnigEncodingEUC_TW; ONIG_EXTERN OnigEncodingType OnigEncodingEUC_KR; @@ -136,6 +206,10 @@ ONIG_EXTERN OnigEncodingType OnigEncodingBIG5; #define ONIG_ENCODING_ISO_8859_15 (&OnigEncodingISO_8859_15) #define ONIG_ENCODING_ISO_8859_16 (&OnigEncodingISO_8859_16) #define ONIG_ENCODING_UTF8 (&OnigEncodingUTF8) +#define ONIG_ENCODING_UTF16_BE (&OnigEncodingUTF16_BE) +#define ONIG_ENCODING_UTF16_LE (&OnigEncodingUTF16_LE) +#define ONIG_ENCODING_UTF32_BE (&OnigEncodingUTF32_BE) +#define ONIG_ENCODING_UTF32_LE (&OnigEncodingUTF32_LE) #define ONIG_ENCODING_EUC_JP (&OnigEncodingEUC_JP) #define ONIG_ENCODING_EUC_TW (&OnigEncodingEUC_TW) #define ONIG_ENCODING_EUC_KR (&OnigEncodingEUC_KR) @@ -151,35 +225,32 @@ ONIG_EXTERN OnigEncodingType OnigEncodingBIG5; /* work size */ -#define ONIGENC_CODE_TO_MBC_MAXLEN 7 -#define ONIGENC_MBC_TO_LOWER_MAXLEN ONIGENC_CODE_TO_MBC_MAXLEN +#define ONIGENC_CODE_TO_MBC_MAXLEN 7 +#define ONIGENC_MBC_NORMALIZE_MAXLEN ONIGENC_CODE_TO_MBC_MAXLEN /* character types */ -#define ONIGENC_CTYPE_ALPHA (1<< 0) -#define ONIGENC_CTYPE_BLANK (1<< 1) -#define ONIGENC_CTYPE_CNTRL (1<< 2) -#define ONIGENC_CTYPE_DIGIT (1<< 3) -#define ONIGENC_CTYPE_GRAPH (1<< 4) -#define ONIGENC_CTYPE_LOWER (1<< 5) -#define ONIGENC_CTYPE_PRINT (1<< 6) -#define ONIGENC_CTYPE_PUNCT (1<< 7) -#define ONIGENC_CTYPE_SPACE (1<< 8) -#define ONIGENC_CTYPE_UPPER (1<< 9) -#define ONIGENC_CTYPE_XDIGIT (1<<10) -#define ONIGENC_CTYPE_WORD (1<<11) -#define ONIGENC_CTYPE_ASCII (1<<12) +#define ONIGENC_CTYPE_NEWLINE (1<< 0) +#define ONIGENC_CTYPE_ALPHA (1<< 1) +#define ONIGENC_CTYPE_BLANK (1<< 2) +#define ONIGENC_CTYPE_CNTRL (1<< 3) +#define ONIGENC_CTYPE_DIGIT (1<< 4) +#define ONIGENC_CTYPE_GRAPH (1<< 5) +#define ONIGENC_CTYPE_LOWER (1<< 6) +#define ONIGENC_CTYPE_PRINT (1<< 7) +#define ONIGENC_CTYPE_PUNCT (1<< 8) +#define ONIGENC_CTYPE_SPACE (1<< 9) +#define ONIGENC_CTYPE_UPPER (1<<10) +#define ONIGENC_CTYPE_XDIGIT (1<<11) +#define ONIGENC_CTYPE_WORD (1<<12) +#define ONIGENC_CTYPE_ASCII (1<<13) #define ONIGENC_CTYPE_ALNUM (ONIGENC_CTYPE_ALPHA | ONIGENC_CTYPE_DIGIT) -/* ctype support level */ -#define ONIGENC_CTYPE_SUPPORT_LEVEL_SB 0 -#define ONIGENC_CTYPE_SUPPORT_LEVEL_FULL 1 - -#define enc_len(enc,byte) ONIGENC_MBC_LEN_BY_HEAD(enc,byte) +#define enc_len(enc,p) ONIGENC_MBC_ENC_LEN(enc,p) #define ONIGENC_IS_UNDEF(enc) ((enc) == ONIG_ENCODING_UNDEF) #define ONIGENC_IS_SINGLEBYTE(enc) (ONIGENC_MBC_MAXLEN(enc) == 1) -#define ONIGENC_IS_MBC_HEAD(enc,byte) (ONIGENC_MBC_LEN_BY_HEAD(enc,byte) != 1) +#define ONIGENC_IS_MBC_HEAD(enc,p) (ONIGENC_MBC_ENC_LEN(enc,p) != 1) #define ONIGENC_IS_MBC_ASCII(p) (*(p) < 128) #define ONIGENC_IS_CODE_ASCII(code) ((code) < 128) #define ONIGENC_IS_CODE_SB_WORD(enc,code) \ @@ -192,31 +263,33 @@ ONIG_EXTERN OnigEncodingType OnigEncodingBIG5; #include <ctype.h> /* for isblank(), isgraph() */ -#define ONIGENC_MBC_TO_LOWER(enc,p,buf) onigenc_mbc_to_lower(enc,p,buf) -#define ONIGENC_IS_MBC_CASE_AMBIG(enc,p) onigenc_mbc_is_case_ambig(enc,p) +#define ONIGENC_MBC_TO_NORMALIZE(enc,flag,pp,end,buf) \ + onigenc_mbc_to_normalize(enc,flag,pp,end,buf) +#define ONIGENC_IS_MBC_AMBIGUOUS(enc,flag,pp,end) \ + onigenc_is_mbc_ambiguous(enc,flag,pp,end) -#define ONIGENC_IS_FOLD_MATCH(enc) FALSE -#define ONIGENC_IS_CONTINUOUS_SB_MB(enc) FALSE -#define ONIGENC_CTYPE_SUPPORT_LEVEL(enc) ONIGENC_CTYPE_SUPPORT_LEVEL_SB +#define ONIGENC_SUPPORT_AMBIG_FLAG(enc) ONIGENC_AMBIGUOUS_MATCH_ASCII_CASE #define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \ onigenc_is_allowed_reverse_match(enc, s, end) #define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \ onigenc_get_left_adjust_char_head(enc, start, s) -#define ONIGENC_GET_ALL_FOLD_MATCH_CODE(enc,codes) 0 -#define ONIGENC_GET_FOLD_MATCH_INFO(enc,p,end,info) ONIG_NO_SUPPORT_CONFIG +#define ONIGENC_GET_ALL_PAIR_AMBIG_CODES(enc, ambig_flag, acs) 0 +#define ONIGENC_GET_ALL_COMP_AMBIG_CODES(enc, ambig_flag, acs) 0 #define ONIGENC_GET_CTYPE_CODE_RANGE(enc,ctype,nsb,nmb,sbr,mbr) \ ONIG_NO_SUPPORT_CONFIG -#define ONIGENC_MBC_LEN_BY_HEAD(enc,b) m17n_mbclen(enc,(int )b) +#define ONIGENC_MBC_ENC_LEN(enc,p) m17n_mbclen(enc,(int )(*p)) #define ONIGENC_MBC_MAXLEN(enc) m17n_mbmaxlen(enc) #define ONIGENC_MBC_MAXLEN_DIST(enc) \ (ONIGENC_MBC_MAXLEN(enc) > 0 ? ONIGENC_MBC_MAXLEN(enc) \ : ONIG_INFINITE_DISTANCE) +#define ONIGENC_MBC_MINLEN(enc) 1 #define ONIGENC_MBC_TO_CODE(enc,p,e) m17n_codepoint((enc),(p),(e)) #define ONIGENC_CODE_TO_MBCLEN(enc,code) m17n_codelen((enc),(code)) #define ONIGENC_CODE_TO_MBC(enc,code,buf) onigenc_code_to_mbc(enc, code, buf) -#if 0 -#define ONIGENC_STEP_BACK(enc,start,s,n) /* !! not supported !! */ +#if 0 /* !! not supported !! */ +#define ONIGENC_IS_MBC_NEWLINE(enc,p,end) +#define ONIGENC_STEP_BACK(enc,start,s,n) #endif #define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) \ @@ -251,9 +324,9 @@ int onigenc_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, int ctype)); ONIG_EXTERN int onigenc_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf)); ONIG_EXTERN -int onigenc_mbc_to_lower P_((OnigEncoding enc, UChar* p, UChar* buf)); +int onigenc_mbc_to_normalize P_((OnigEncoding enc, OnigAmbigType flag, UChar** pp, UChar* end, UChar* buf)); ONIG_EXTERN -int onigenc_mbc_is_case_ambig P_((OnigEncoding enc, UChar* p)); +int onigenc_is_mbc_ambiguous P_((OnigEncoding enc, OnigAmbigType flag, UChar** pp, UChar* end)); ONIG_EXTERN int onigenc_is_allowed_reverse_match P_((OnigEncoding enc, UChar* s, UChar* end)); @@ -261,32 +334,35 @@ int onigenc_is_allowed_reverse_match P_((OnigEncoding enc, UChar* s, UChar* end) #define ONIGENC_NAME(enc) ((enc)->name) -#define ONIGENC_MBC_TO_LOWER(enc,p,buf) (enc)->mbc_to_lower(p,buf) -#define ONIGENC_IS_MBC_CASE_AMBIG(enc,p) (enc)->mbc_is_case_ambig(p) - -#define ONIGENC_IS_FOLD_MATCH(enc) ((enc)->is_fold_match) -#define ONIGENC_IS_CONTINUOUS_SB_MB(enc) ((enc)->is_continuous_sb_mb) -#define ONIGENC_CTYPE_SUPPORT_LEVEL(enc) ((enc)->ctype_support_level) +#define ONIGENC_MBC_TO_NORMALIZE(enc,flag,pp,end,buf) \ + (enc)->mbc_to_normalize(flag,pp,end,buf) +#define ONIGENC_IS_MBC_AMBIGUOUS(enc,flag,pp,end) \ + (enc)->is_mbc_ambiguous(flag,pp,end) +#define ONIGENC_SUPPORT_AMBIG_FLAG(enc) ((enc)->support_ambig_flag) #define ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc,s,end) \ (enc)->is_allowed_reverse_match(s,end) #define ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc,start,s) \ (enc)->left_adjust_char_head(start, s) -#define ONIGENC_GET_ALL_FOLD_MATCH_CODE(enc,codes) \ - (enc)->get_all_fold_match_code(codes) -#define ONIGENC_GET_FOLD_MATCH_INFO(enc,p,end,info) \ - (enc)->get_fold_match_info(p,end,info) +#define ONIGENC_GET_ALL_PAIR_AMBIG_CODES(enc,ambig_flag,acs) \ + (enc)->get_all_pair_ambig_codes(ambig_flag,acs) +#define ONIGENC_GET_ALL_COMP_AMBIG_CODES(enc,ambig_flag,acs) \ + (enc)->get_all_comp_ambig_codes(ambig_flag,acs) #define ONIGENC_STEP_BACK(enc,start,s,n) \ onigenc_step_back((enc),(start),(s),(n)) -#define ONIGENC_MBC_LEN_BY_HEAD(enc,byte) ((enc)->len_table[(int )(byte)]) +#define ONIGENC_MBC_ENC_LEN(enc,p) (enc)->mbc_enc_len(p) #define ONIGENC_MBC_MAXLEN(enc) ((enc)->max_enc_len) #define ONIGENC_MBC_MAXLEN_DIST(enc) ONIGENC_MBC_MAXLEN(enc) -#define ONIGENC_MBC_TO_CODE(enc,p,e) (enc)->mbc_to_code((p),(e)) +#define ONIGENC_MBC_MINLEN(enc) ((enc)->min_enc_len) +#define ONIGENC_IS_MBC_NEWLINE(enc,p,end) (enc)->is_mbc_newline((p),(end)) +#define ONIGENC_MBC_TO_CODE(enc,p,end) (enc)->mbc_to_code((p),(end)) #define ONIGENC_CODE_TO_MBCLEN(enc,code) (enc)->code_to_mbclen(code) #define ONIGENC_CODE_TO_MBC(enc,code,buf) (enc)->code_to_mbc(code,buf) -#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) (enc)->code_is_ctype(code,ctype) +#define ONIGENC_IS_CODE_CTYPE(enc,code,ctype) (enc)->is_code_ctype(code,ctype) +#define ONIGENC_IS_CODE_NEWLINE(enc,code) \ + ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_NEWLINE) #define ONIGENC_IS_CODE_GRAPH(enc,code) \ ONIGENC_IS_CODE_CTYPE(enc,code,ONIGENC_CTYPE_GRAPH) #define ONIGENC_IS_CODE_PRINT(enc,code) \ @@ -340,6 +416,12 @@ ONIG_EXTERN UChar* onigenc_get_left_adjust_char_head P_((OnigEncoding enc, UChar* start, UChar* s)); ONIG_EXTERN UChar* onigenc_get_right_adjust_char_head P_((OnigEncoding enc, UChar* start, UChar* s)); +ONIG_EXTERN +int onigenc_strlen P_((OnigEncoding enc, UChar* p, UChar* end)); +ONIG_EXTERN +int onigenc_strlen_null P_((OnigEncoding enc, UChar* p)); +ONIG_EXTERN +int onigenc_str_bytelen_null P_((OnigEncoding enc, UChar* p)); @@ -353,13 +435,6 @@ UChar* onigenc_get_right_adjust_char_head P_((OnigEncoding enc, UChar* start, UC /* constants */ #define ONIG_MAX_ERROR_MESSAGE_LEN 90 -#if defined(RUBY_PLATFORM) && !defined(ONIG_RUBY_M17N) -ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding; -#undef ismbchar -#define ismbchar(c) (mbclen((c)) != 1) -#define mbclen(c) (OnigEncDefaultCharEncoding->len_table[(unsigned char )(c)]) -#endif - typedef unsigned int OnigOptionType; #define ONIG_OPTION_DEFAULT ONIG_OPTION_NONE @@ -467,6 +542,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; #define ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY (1<<16) /* \p{...}, \P{...} */ #define ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT (1<<17) /* \p{^..}, \P{^..} */ #define ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS (1<<18) /* \p{IsXDigit} */ +#define ONIG_SYN_OP2_ESC_H_XDIGIT (1<<19) /* \h, \H */ /* syntax (behavior) */ #define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1<<31) /* not implemented */ @@ -479,6 +555,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; #define ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND (1<<6) /* (?<=a|bc) */ #define ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP (1<<7) /* see doc/RE */ #define ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME (1<<8) /* (?<x>)(?<x>) */ +#define ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY (1<<9) /* a{n}?=(?:a{n})? */ /* syntax (behavior) in char class [...] */ #define ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (1<<20) /* [^...] */ @@ -565,6 +642,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; #define ONIGERR_INVALID_CHAR_PROPERTY_NAME -223 #define ONIGERR_INVALID_WIDE_CHAR_VALUE -400 #define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE -401 +#define ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION -402 /* errors related to thread */ #define ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT -1001 @@ -575,6 +653,15 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; #define ONIG_IS_CAPTURE_HISTORY_GROUP(r, i) \ ((i) <= ONIG_MAX_CAPTURE_HISTORY_GROUP && (r)->list && (r)->list[i]) +typedef struct OnigCaptureTreeNodeStruct { + int group; /* group number */ + int beg; + int end; + int allocated; + int num_childs; + struct OnigCaptureTreeNodeStruct** childs; +} OnigCaptureTreeNode; + /* match result region type */ struct re_registers { int allocated; @@ -582,9 +669,16 @@ struct re_registers { int* beg; int* end; /* extended */ - struct re_registers** list; /* capture history. list[1]-list[31] */ + OnigCaptureTreeNode* history_root; /* capture history tree root */ }; +/* capture tree traverse */ +#define ONIG_TRAVERSE_CALLBACK_AT_FIRST 1 +#define ONIG_TRAVERSE_CALLBACK_AT_LAST 2 +#define ONIG_TRAVERSE_CALLBACK_AT_BOTH \ + ( ONIG_TRAVERSE_CALLBACK_AT_FIRST | ONIG_TRAVERSE_CALLBACK_AT_LAST ) + + #define ONIG_REGION_NOTPOS -1 typedef struct re_registers OnigRegion; @@ -635,6 +729,7 @@ typedef struct re_pattern_buffer { OnigEncoding enc; OnigOptionType options; OnigSyntaxType* syntax; + OnigAmbigType ambig_flag; void* name_table; /* optimization info (string search, char-map and anchors) */ @@ -646,7 +741,7 @@ typedef struct re_pattern_buffer { int sub_anchor; /* start-anchor for exact or map */ unsigned char *exact; unsigned char *exact_end; - unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */ + unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */ int *int_map; /* BM skip for exact_len > 255 */ int *int_map_backward; /* BM skip for backward search */ OnigDistance dmin; /* min-distance of exact or map */ @@ -657,6 +752,15 @@ typedef struct re_pattern_buffer { } regex_t; +typedef struct { + int num_of_elements; + OnigEncoding pattern_enc; + OnigEncoding target_enc; + OnigSyntaxType* syntax; + OnigOptionType option; + OnigAmbigType ambig_flag; +} OnigCompileInfo; + /* Oniguruma Native API */ ONIG_EXTERN int onig_init P_((void)); @@ -669,10 +773,14 @@ void onig_set_verb_warn_func P_((OnigWarnFunc f)); ONIG_EXTERN int onig_new P_((regex_t**, UChar* pattern, UChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo)); ONIG_EXTERN +int onig_new_deluxe P_((regex_t** reg, UChar* pattern, UChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo)); +ONIG_EXTERN void onig_free P_((regex_t*)); ONIG_EXTERN int onig_recompile P_((regex_t*, UChar* pattern, UChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo)); ONIG_EXTERN +int onig_recompile_deluxe P_((regex_t* reg, UChar* pattern, UChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo)); +ONIG_EXTERN int onig_search P_((regex_t*, UChar* str, UChar* end, UChar* start, UChar* range, OnigRegion* region, OnigOptionType option)); ONIG_EXTERN int onig_match P_((regex_t*, UChar* str, UChar* end, UChar* at, OnigRegion* region, OnigOptionType option)); @@ -696,16 +804,34 @@ int onig_foreach_name P_((regex_t* reg, int (*func)(UChar*,UChar*,int,int*,regex ONIG_EXTERN int onig_number_of_names P_((regex_t* reg)); ONIG_EXTERN +int onig_number_of_captures P_((regex_t* reg)); +ONIG_EXTERN +int onig_number_of_capture_histories P_((regex_t* reg)); +ONIG_EXTERN +OnigCaptureTreeNode* onig_get_capture_tree P_((OnigRegion* region)); +ONIG_EXTERN +int onig_capture_tree_traverse P_((OnigRegion* region, int at, int(*callback_func)(int,int,int,int,int,void*), void* arg)); +ONIG_EXTERN OnigEncoding onig_get_encoding P_((regex_t* reg)); ONIG_EXTERN OnigOptionType onig_get_options P_((regex_t* reg)); ONIG_EXTERN +OnigAmbigType onig_get_ambig_flag P_((regex_t* reg)); +ONIG_EXTERN OnigSyntaxType* onig_get_syntax P_((regex_t* reg)); ONIG_EXTERN int onig_set_default_syntax P_((OnigSyntaxType* syntax)); ONIG_EXTERN void onig_copy_syntax P_((OnigSyntaxType* to, OnigSyntaxType* from)); ONIG_EXTERN +unsigned int onig_get_syntax_op P_((OnigSyntaxType* syntax)); +ONIG_EXTERN +unsigned int onig_get_syntax_op2 P_((OnigSyntaxType* syntax)); +ONIG_EXTERN +unsigned int onig_get_syntax_behavior P_((OnigSyntaxType* syntax)); +ONIG_EXTERN +OnigOptionType onig_get_syntax_options P_((OnigSyntaxType* syntax)); +ONIG_EXTERN void onig_set_syntax_op P_((OnigSyntaxType* syntax, unsigned int op)); ONIG_EXTERN void onig_set_syntax_op2 P_((OnigSyntaxType* syntax, unsigned int op2)); @@ -714,7 +840,9 @@ void onig_set_syntax_behavior P_((OnigSyntaxType* syntax, unsigned int behavior) ONIG_EXTERN void onig_set_syntax_options P_((OnigSyntaxType* syntax, OnigOptionType options)); ONIG_EXTERN -int onig_set_meta_char P_((unsigned int what, OnigCodePoint code)); +int onig_set_meta_char P_((OnigEncoding enc, unsigned int what, OnigCodePoint code)); +ONIG_EXTERN +void onig_copy_encoding P_((OnigEncoding to, OnigEncoding from)); ONIG_EXTERN unsigned int onig_get_match_stack_limit_size P_((void)); ONIG_EXTERN @@ -723,5 +851,7 @@ ONIG_EXTERN int onig_end P_((void)); ONIG_EXTERN const char* onig_version P_((void)); +ONIG_EXTERN +const char* onig_copyright P_((void)); #endif /* ONIGURUMA_H */ |