From 52ed8c4edd880e9b9482748e9692b1e22917bf92 Mon Sep 17 00:00:00 2001 From: matz Date: Thu, 3 Jan 2008 15:55:04 +0000 Subject: * include/ruby/oniguruma.h: Oniguruma 1.9.1 merged. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@14874 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 4 + enc/euc_jp.c | 21 +- enc/iso_8859_10.c | 97 +++++---- enc/iso_8859_11.c | 2 +- enc/iso_8859_13.c | 67 +++--- enc/iso_8859_14.c | 100 ++++----- enc/iso_8859_15.c | 80 +++---- enc/iso_8859_16.c | 93 ++++----- enc/iso_8859_8.c | 2 +- enc/iso_8859_9.c | 5 +- enc/sjis.c | 20 +- enc/unicode.c | 56 +++-- enc/utf8.c | 40 +--- include/ruby/oniguruma.h | 19 +- regcomp.c | 128 ++++++------ regenc.c | 44 ++-- regenc.h | 18 +- regerror.c | 38 +++- regexec.c | 121 +++++------ regint.h | 30 ++- regparse.c | 530 ++++++++++++++++++++++++++++++++--------------- regparse.h | 3 +- version.h | 6 +- 23 files changed, 872 insertions(+), 652 deletions(-) diff --git a/ChangeLog b/ChangeLog index 54ec3c6214..8a3216f2d5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,7 @@ +Fri Jan 4 00:54:43 2008 Yukihiro Matsumoto + + * include/ruby/oniguruma.h: Oniguruma 1.9.1 merged. + Fri Jan 4 00:20:47 2008 Tanaka Akira * io.c (io_ungetc): move data in buffer if it is required to store the diff --git a/enc/euc_jp.c b/enc/euc_jp.c index 35767412f1..4e397ed25a 100644 --- a/enc/euc_jp.c +++ b/enc/euc_jp.c @@ -137,7 +137,7 @@ eucjp_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc) int c, i, len; OnigCodePoint n; - len = enc_len(ONIG_ENCODING_EUC_JP, p, end); + len = enclen(ONIG_ENCODING_EUC_JP, p, end); n = (OnigCodePoint )*p++; if (len == 1) return n; @@ -156,7 +156,8 @@ eucjp_code_to_mbclen(OnigCodePoint code, OnigEncoding enc) else if (code > 0xffffff) return 0; else if ((code & 0xff0000) >= 0x800000) return 3; else if ((code & 0xff00) >= 0x8000) return 2; - else return 0; + else + return ONIGERR_INVALID_CODE_POINT_VALUE; } #if 0 @@ -188,8 +189,8 @@ eucjp_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc) *p++ = (UChar )(code & 0xff); #if 1 - if (enc_len(ONIG_ENCODING_EUC_JP, buf, p) != (p - buf)) - return ONIGENC_ERR_INVALID_WIDE_CHAR_VALUE; + if (enclen(ONIG_ENCODING_EUC_JP, buf, p) != (p - buf)) + return ONIGERR_INVALID_CODE_POINT_VALUE; #endif return p - buf; } @@ -210,7 +211,7 @@ eucjp_mbc_case_fold(OnigCaseFoldType flag, else { int i; - len = enc_len(ONIG_ENCODING_EUC_JP, p, end); + len = enclen(ONIG_ENCODING_EUC_JP, p, end); for (i = 0; i < len; i++) { *lower++ = *p++; } @@ -232,7 +233,7 @@ eucjp_left_adjust_char_head(const UChar* start, const UChar* s, OnigEncoding enc p = s; while (!eucjp_islead(*p) && p > start) p--; - len = enc_len(ONIG_ENCODING_EUC_JP, p, s); + len = enclen(ONIG_ENCODING_EUC_JP, p, s); if (p + len > s) return (UChar* )p; p += len; return (UChar* )(p + ((s - p) & ~1)); @@ -311,7 +312,7 @@ eucjp_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc) ctype -= (ONIGENC_MAX_STD_CTYPE + 1); if (ctype >= (unsigned int )PropertyListNum) - return ONIGENC_ERR_TYPE_BUG; + return ONIGERR_TYPE_BUG; return onig_is_in_code_range((UChar* )PropertyList[ctype], code); } @@ -320,7 +321,7 @@ eucjp_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc) } static int -eucjp_get_ctype_code_range(int ctype, OnigCodePoint* sb_out, +eucjp_get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[], OnigEncoding enc) { if (ctype <= ONIGENC_MAX_STD_CTYPE) { @@ -332,8 +333,8 @@ eucjp_get_ctype_code_range(int ctype, OnigCodePoint* sb_out, PROPERTY_LIST_INIT_CHECK; ctype -= (ONIGENC_MAX_STD_CTYPE + 1); - if (ctype >= PropertyListNum) - return ONIGENC_ERR_TYPE_BUG; + if (ctype >= (OnigCtype )PropertyListNum) + return ONIGERR_TYPE_BUG; *ranges = PropertyList[ctype]; return 0; diff --git a/enc/iso_8859_10.c b/enc/iso_8859_10.c index 2b5affa4ee..9b8a035253 100644 --- a/enc/iso_8859_10.c +++ b/enc/iso_8859_10.c @@ -2,7 +2,7 @@ iso8859_10.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2006 K.Kosako + * Copyright (c) 2002-2007 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -124,8 +124,7 @@ iso_8859_10_mbc_case_fold(OnigCaseFoldType flag, #if 0 static int -iso_8859_10_is_mbc_ambiguous(OnigCaseFoldType flag, - const UChar** pp, const UChar* end) +is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) { int v; const UChar* p = *pp; @@ -155,52 +154,52 @@ iso_8859_10_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding e } static const OnigPairCaseFoldCodes CaseFoldMap[] = { - { 0xa1, 0xb1 }, - { 0xa2, 0xb2 }, - { 0xa3, 0xb3 }, - { 0xa4, 0xb4 }, - { 0xa5, 0xb5 }, - { 0xa6, 0xb6 }, - { 0xa8, 0xb8 }, - { 0xa9, 0xb9 }, - { 0xaa, 0xba }, - { 0xab, 0xbb }, - { 0xac, 0xbc }, - { 0xae, 0xbe }, - { 0xaf, 0xbf }, - - { 0xc0, 0xe0 }, - { 0xc1, 0xe1 }, - { 0xc2, 0xe2 }, - { 0xc3, 0xe3 }, - { 0xc4, 0xe4 }, - { 0xc5, 0xe5 }, - { 0xc6, 0xe6 }, - { 0xc7, 0xe7 }, - { 0xc8, 0xe8 }, - { 0xc9, 0xe9 }, - { 0xca, 0xea }, - { 0xcb, 0xeb }, - { 0xcc, 0xec }, - { 0xcd, 0xed }, - { 0xce, 0xee }, - { 0xcf, 0xef }, - - { 0xd0, 0xf0 }, - { 0xd1, 0xf1 }, - { 0xd2, 0xf2 }, - { 0xd3, 0xf3 }, - { 0xd4, 0xf4 }, - { 0xd5, 0xf5 }, - { 0xd6, 0xf6 }, - { 0xd7, 0xf7 }, - { 0xd8, 0xf8 }, - { 0xd9, 0xf9 }, - { 0xda, 0xfa }, - { 0xdb, 0xfb }, - { 0xdc, 0xfc }, - { 0xdd, 0xfd }, - { 0xde, 0xfe } + { 0xa1, 0xb1 }, + { 0xa2, 0xb2 }, + { 0xa3, 0xb3 }, + { 0xa4, 0xb4 }, + { 0xa5, 0xb5 }, + { 0xa6, 0xb6 }, + { 0xa8, 0xb8 }, + { 0xa9, 0xb9 }, + { 0xaa, 0xba }, + { 0xab, 0xbb }, + { 0xac, 0xbc }, + { 0xae, 0xbe }, + { 0xaf, 0xbf }, + + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd7, 0xf7 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe } }; static int diff --git a/enc/iso_8859_11.c b/enc/iso_8859_11.c index 343993d01c..aa5d879e95 100644 --- a/enc/iso_8859_11.c +++ b/enc/iso_8859_11.c @@ -2,7 +2,7 @@ iso8859_11.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2006 K.Kosako + * Copyright (c) 2002-2007 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/enc/iso_8859_13.c b/enc/iso_8859_13.c index 4b6db1adf3..799df9e5bd 100644 --- a/enc/iso_8859_13.c +++ b/enc/iso_8859_13.c @@ -2,7 +2,7 @@ iso8859_13.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2006 K.Kosako + * Copyright (c) 2002-2007 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -124,8 +124,7 @@ iso_8859_13_mbc_case_fold(OnigCaseFoldType flag, #if 0 static int -iso_8859_13_is_mbc_ambiguous(OnigCaseFoldType flag, - const UChar** pp, const UChar* end) +is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) { int v; const UChar* p = *pp; @@ -159,37 +158,37 @@ iso_8859_13_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding e } static const OnigPairCaseFoldCodes CaseFoldMap[] = { - { 0xc0, 0xe0 }, - { 0xc1, 0xe1 }, - { 0xc2, 0xe2 }, - { 0xc3, 0xe3 }, - { 0xc4, 0xe4 }, - { 0xc5, 0xe5 }, - { 0xc6, 0xe6 }, - { 0xc7, 0xe7 }, - { 0xc8, 0xe8 }, - { 0xc9, 0xe9 }, - { 0xca, 0xea }, - { 0xcb, 0xeb }, - { 0xcc, 0xec }, - { 0xcd, 0xed }, - { 0xce, 0xee }, - { 0xcf, 0xef }, - - { 0xd0, 0xf0 }, - { 0xd1, 0xf1 }, - { 0xd2, 0xf2 }, - { 0xd3, 0xf3 }, - { 0xd4, 0xf4 }, - { 0xd5, 0xf5 }, - { 0xd6, 0xf6 }, - { 0xd8, 0xf8 }, - { 0xd9, 0xf9 }, - { 0xda, 0xfa }, - { 0xdb, 0xfb }, - { 0xdc, 0xfc }, - { 0xdd, 0xfd }, - { 0xde, 0xfe } + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe } }; static int diff --git a/enc/iso_8859_14.c b/enc/iso_8859_14.c index c54d8fcbd2..225154a0a6 100644 --- a/enc/iso_8859_14.c +++ b/enc/iso_8859_14.c @@ -2,7 +2,7 @@ iso8859_14.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2006 K.Kosako + * Copyright (c) 2002-2007 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -124,8 +124,8 @@ iso_8859_14_mbc_case_fold(OnigCaseFoldType flag, #if 0 static int -iso_8859_14_is_mbc_ambiguous(OnigCaseFoldType flag, - const UChar** pp, const UChar* end) +is_mbc_ambiguous(OnigCaseFoldType flag, + const UChar** pp, const UChar* end) { int v; const UChar* p = *pp; @@ -155,53 +155,53 @@ iso_8859_14_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding e } static const OnigPairCaseFoldCodes CaseFoldMap[] = { - { 0xa1, 0xa2 }, - { 0xa4, 0xa5 }, - { 0xa6, 0xab }, - { 0xa8, 0xb8 }, - { 0xaa, 0xba }, - { 0xac, 0xbc }, - { 0xaf, 0xff }, - - { 0xb0, 0xb1 }, - { 0xb2, 0xb3 }, - { 0xb4, 0xb5 }, - { 0xb7, 0xb9 }, - { 0xbb, 0xbf }, - { 0xbd, 0xbe }, - - { 0xc0, 0xe0 }, - { 0xc1, 0xe1 }, - { 0xc2, 0xe2 }, - { 0xc3, 0xe3 }, - { 0xc4, 0xe4 }, - { 0xc5, 0xe5 }, - { 0xc6, 0xe6 }, - { 0xc7, 0xe7 }, - { 0xc8, 0xe8 }, - { 0xc9, 0xe9 }, - { 0xca, 0xea }, - { 0xcb, 0xeb }, - { 0xcc, 0xec }, - { 0xcd, 0xed }, - { 0xce, 0xee }, - { 0xcf, 0xef }, - - { 0xd0, 0xf0 }, - { 0xd1, 0xf1 }, - { 0xd2, 0xf2 }, - { 0xd3, 0xf3 }, - { 0xd4, 0xf4 }, - { 0xd5, 0xf5 }, - { 0xd6, 0xf6 }, - { 0xd7, 0xf7 }, - { 0xd8, 0xf8 }, - { 0xd9, 0xf9 }, - { 0xda, 0xfa }, - { 0xdb, 0xfb }, - { 0xdc, 0xfc }, - { 0xdd, 0xfd }, - { 0xde, 0xfe } + { 0xa1, 0xa2 }, + { 0xa4, 0xa5 }, + { 0xa6, 0xab }, + { 0xa8, 0xb8 }, + { 0xaa, 0xba }, + { 0xac, 0xbc }, + { 0xaf, 0xff }, + + { 0xb0, 0xb1 }, + { 0xb2, 0xb3 }, + { 0xb4, 0xb5 }, + { 0xb7, 0xb9 }, + { 0xbb, 0xbf }, + { 0xbd, 0xbe }, + + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd7, 0xf7 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe } }; static int diff --git a/enc/iso_8859_15.c b/enc/iso_8859_15.c index de963706f4..339c7d9c32 100644 --- a/enc/iso_8859_15.c +++ b/enc/iso_8859_15.c @@ -2,7 +2,7 @@ iso8859_15.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2006 K.Kosako + * Copyright (c) 2002-2007 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -124,8 +124,8 @@ iso_8859_15_mbc_case_fold(OnigCaseFoldType flag, #if 0 static int -iso_8859_15_is_mbc_ambiguous(OnigCaseFoldType flag, - const UChar** pp, const UChar* end) +is_mbc_ambiguous(OnigCaseFoldType flag, + const UChar** pp, const UChar* end) { int v; const UChar* p = *pp; @@ -159,43 +159,43 @@ iso_8859_15_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding e } static const OnigPairCaseFoldCodes CaseFoldMap[] = { - { 0xa6, 0xa8 }, - - { 0xb4, 0xb8 }, - { 0xbc, 0xbd }, - { 0xbe, 0xff }, - - { 0xc0, 0xe0 }, - { 0xc1, 0xe1 }, - { 0xc2, 0xe2 }, - { 0xc3, 0xe3 }, - { 0xc4, 0xe4 }, - { 0xc5, 0xe5 }, - { 0xc6, 0xe6 }, - { 0xc7, 0xe7 }, - { 0xc8, 0xe8 }, - { 0xc9, 0xe9 }, - { 0xca, 0xea }, - { 0xcb, 0xeb }, - { 0xcc, 0xec }, - { 0xcd, 0xed }, - { 0xce, 0xee }, - { 0xcf, 0xef }, - - { 0xd0, 0xf0 }, - { 0xd1, 0xf1 }, - { 0xd2, 0xf2 }, - { 0xd3, 0xf3 }, - { 0xd4, 0xf4 }, - { 0xd5, 0xf5 }, - { 0xd6, 0xf6 }, - { 0xd8, 0xf8 }, - { 0xd9, 0xf9 }, - { 0xda, 0xfa }, - { 0xdb, 0xfb }, - { 0xdc, 0xfc }, - { 0xdd, 0xfd }, - { 0xde, 0xfe } + { 0xa6, 0xa8 }, + + { 0xb4, 0xb8 }, + { 0xbc, 0xbd }, + { 0xbe, 0xff }, + + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe } }; static int diff --git a/enc/iso_8859_16.c b/enc/iso_8859_16.c index 1cf4f65672..9bd42b4689 100644 --- a/enc/iso_8859_16.c +++ b/enc/iso_8859_16.c @@ -2,7 +2,7 @@ iso8859_16.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2006 K.Kosako + * Copyright (c) 2002-2007 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -124,8 +124,7 @@ iso_8859_16_mbc_case_fold(OnigCaseFoldType flag, #if 0 static int -iso_8859_16_is_mbc_ambiguous(OnigCaseFoldType flag, - const UChar** pp, const UChar* end) +is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) { int v; const UChar* p = *pp; @@ -155,50 +154,50 @@ iso_8859_16_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding e } static const OnigPairCaseFoldCodes CaseFoldMap[] = { - { 0xa1, 0xa2 }, - { 0xa3, 0xb3 }, - { 0xa6, 0xa8 }, - { 0xaa, 0xba }, - { 0xac, 0xae }, - { 0xaf, 0xbf }, - - { 0xb2, 0xb9 }, - { 0xb4, 0xb8 }, - { 0xbc, 0xbd }, - { 0xbe, 0xff }, - - { 0xc0, 0xe0 }, - { 0xc1, 0xe1 }, - { 0xc2, 0xe2 }, - { 0xc3, 0xe3 }, - { 0xc4, 0xe4 }, - { 0xc5, 0xe5 }, - { 0xc6, 0xe6 }, - { 0xc7, 0xe7 }, - { 0xc8, 0xe8 }, - { 0xc9, 0xe9 }, - { 0xca, 0xea }, - { 0xcb, 0xeb }, - { 0xcc, 0xec }, - { 0xcd, 0xed }, - { 0xce, 0xee }, - { 0xcf, 0xef }, - - { 0xd0, 0xf0 }, - { 0xd1, 0xf1 }, - { 0xd2, 0xf2 }, - { 0xd3, 0xf3 }, - { 0xd4, 0xf4 }, - { 0xd5, 0xf5 }, - { 0xd6, 0xf6 }, - { 0xd7, 0xf7 }, - { 0xd8, 0xf8 }, - { 0xd9, 0xf9 }, - { 0xda, 0xfa }, - { 0xdb, 0xfb }, - { 0xdc, 0xfc }, - { 0xdd, 0xfd }, - { 0xde, 0xfe } + { 0xa1, 0xa2 }, + { 0xa3, 0xb3 }, + { 0xa6, 0xa8 }, + { 0xaa, 0xba }, + { 0xac, 0xae }, + { 0xaf, 0xbf }, + + { 0xb2, 0xb9 }, + { 0xb4, 0xb8 }, + { 0xbc, 0xbd }, + { 0xbe, 0xff }, + + { 0xc0, 0xe0 }, + { 0xc1, 0xe1 }, + { 0xc2, 0xe2 }, + { 0xc3, 0xe3 }, + { 0xc4, 0xe4 }, + { 0xc5, 0xe5 }, + { 0xc6, 0xe6 }, + { 0xc7, 0xe7 }, + { 0xc8, 0xe8 }, + { 0xc9, 0xe9 }, + { 0xca, 0xea }, + { 0xcb, 0xeb }, + { 0xcc, 0xec }, + { 0xcd, 0xed }, + { 0xce, 0xee }, + { 0xcf, 0xef }, + + { 0xd0, 0xf0 }, + { 0xd1, 0xf1 }, + { 0xd2, 0xf2 }, + { 0xd3, 0xf3 }, + { 0xd4, 0xf4 }, + { 0xd5, 0xf5 }, + { 0xd6, 0xf6 }, + { 0xd7, 0xf7 }, + { 0xd8, 0xf8 }, + { 0xd9, 0xf9 }, + { 0xda, 0xfa }, + { 0xdb, 0xfb }, + { 0xdc, 0xfc }, + { 0xdd, 0xfd }, + { 0xde, 0xfe } }; static int diff --git a/enc/iso_8859_8.c b/enc/iso_8859_8.c index 3a075b0af3..e0a70483a1 100644 --- a/enc/iso_8859_8.c +++ b/enc/iso_8859_8.c @@ -2,7 +2,7 @@ iso8859_8.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2006 K.Kosako + * Copyright (c) 2002-2007 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/enc/iso_8859_9.c b/enc/iso_8859_9.c index 1921c591ba..51ce4c8f66 100644 --- a/enc/iso_8859_9.c +++ b/enc/iso_8859_9.c @@ -2,7 +2,7 @@ iso8859_9.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2006 K.Kosako + * Copyright (c) 2002-2007 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -124,8 +124,7 @@ iso_8859_9_mbc_case_fold(OnigCaseFoldType flag, #if 0 static int -iso_8859_9_is_mbc_ambiguous(OnigCaseFoldType flag, - const UChar** pp, const UChar* end) +is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) { int v; const UChar* p = *pp; diff --git a/enc/sjis.c b/enc/sjis.c index 6d64acd02b..d526f66865 100644 --- a/enc/sjis.c +++ b/enc/sjis.c @@ -141,7 +141,7 @@ sjis_code_to_mbclen(OnigCodePoint code, OnigEncoding enc) return 2; } else - return 0; + return ONIGERR_INVALID_CODE_POINT_VALUE; } static OnigCodePoint @@ -150,7 +150,7 @@ sjis_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc) int c, i, len; OnigCodePoint n; - len = enc_len(ONIG_ENCODING_SJIS, p, end); + len = enclen(ONIG_ENCODING_SJIS, p, end); c = *p++; n = c; if (len == 1) return n; @@ -172,8 +172,8 @@ sjis_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc) *p++ = (UChar )(code & 0xff); #if 0 - if (enc_len(ONIG_ENCODING_SJIS, buf) != (p - buf)) - return REGERR_INVALID_WIDE_CHAR_VALUE; + if (enclen(ONIG_ENCODING_SJIS, buf) != (p - buf)) + return REGERR_INVALID_CODE_POINT_VALUE; #endif return p - buf; } @@ -192,7 +192,7 @@ sjis_mbc_case_fold(OnigCaseFoldType flag, } else { int i; - int len = enc_len(ONIG_ENCODING_SJIS, p, end); + int len = enclen(ONIG_ENCODING_SJIS, p, end); for (i = 0; i < len; i++) { *lower++ = *p++; @@ -245,7 +245,7 @@ sjis_left_adjust_char_head(const UChar* start, const UChar* s, OnigEncoding enc) } } } - len = enc_len(ONIG_ENCODING_SJIS, p, s); + len = enclen(ONIG_ENCODING_SJIS, p, s); if (p + len > s) return (UChar* )p; p += len; return (UChar* )(p + ((s - p) & ~1)); @@ -322,7 +322,7 @@ sjis_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc) ctype -= (ONIGENC_MAX_STD_CTYPE + 1); if (ctype >= (unsigned int )PropertyListNum) - return ONIGENC_ERR_TYPE_BUG; + return ONIGERR_TYPE_BUG; return onig_is_in_code_range((UChar* )PropertyList[ctype], code); } @@ -331,7 +331,7 @@ sjis_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc) } static int -sjis_get_ctype_code_range(int ctype, OnigCodePoint* sb_out, +sjis_get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[], OnigEncoding enc) { if (ctype <= ONIGENC_MAX_STD_CTYPE) { @@ -343,8 +343,8 @@ sjis_get_ctype_code_range(int ctype, OnigCodePoint* sb_out, PROPERTY_LIST_INIT_CHECK; ctype -= (ONIGENC_MAX_STD_CTYPE + 1); - if (ctype >= PropertyListNum) - return ONIGENC_ERR_TYPE_BUG; + if (ctype >= (OnigCtype )PropertyListNum) + return ONIGERR_TYPE_BUG; *ranges = PropertyList[ctype]; return 0; diff --git a/enc/unicode.c b/enc/unicode.c index 8b1a1308dc..4642bbe5c1 100644 --- a/enc/unicode.c +++ b/enc/unicode.c @@ -10618,7 +10618,7 @@ static PosixBracketEntryType HashEntryData[] = { static const OnigCodePoint* CodeRanges[CODE_RANGES_NUM]; static int CodeRangeTableInited = 0; -static void init_code_range_array() { +static void init_code_range_array(void) { THREAD_ATOMIC_START; CodeRanges[0] = CR_NEWLINE; @@ -10756,7 +10756,7 @@ onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncodi } if (ctype >= CODE_RANGES_NUM) { - return ONIGENC_ERR_TYPE_BUG; + return ONIGERR_TYPE_BUG; } if (CodeRangeTableInited == 0) init_code_range_array(); @@ -10769,7 +10769,7 @@ extern int onigenc_unicode_ctype_code_range(int ctype, const OnigCodePoint* ranges[]) { if (ctype >= CODE_RANGES_NUM) { - return ONIGENC_ERR_TYPE_BUG; + return ONIGERR_TYPE_BUG; } if (CodeRangeTableInited == 0) init_code_range_array(); @@ -10780,7 +10780,7 @@ onigenc_unicode_ctype_code_range(int ctype, const OnigCodePoint* ranges[]) } extern int -onigenc_utf16_32_get_ctype_code_range(int ctype, OnigCodePoint* sb_out, +onigenc_utf16_32_get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[]) { *sb_out = 0x00; @@ -10832,7 +10832,7 @@ onigenc_unicode_property_name_to_ctype(OnigEncoding enc, UChar* name, UChar* end if (len >= PROPERTY_NAME_MAX_SIZE) return ONIGERR_INVALID_CHAR_PROPERTY_NAME; - p += enc_len(enc, p, end); + p += enclen(enc, p, end); } buf[len] = 0; @@ -10903,11 +10903,12 @@ static int init_case_fold_table(void) FoldTable = st_init_numtable_with_size(1200); if (ONIG_IS_NULL(FoldTable)) return ONIGERR_MEMORY; - for (i = 0; i < sizeof(CaseFold)/sizeof(CaseFold_11_Type); i++) { + for (i = 0; i < (int )(sizeof(CaseFold)/sizeof(CaseFold_11_Type)); i++) { p = &CaseFold[i]; st_add_direct(FoldTable, (st_data_t )p->from, (st_data_t )&(p->to)); } - for (i = 0; i < sizeof(CaseFold_Locale)/sizeof(CaseFold_11_Type); i++) { + for (i = 0; i < (int )(sizeof(CaseFold_Locale)/sizeof(CaseFold_11_Type)); + i++) { p = &CaseFold_Locale[i]; st_add_direct(FoldTable, (st_data_t )p->from, (st_data_t )&(p->to)); } @@ -10915,11 +10916,13 @@ static int init_case_fold_table(void) Unfold1Table = st_init_numtable_with_size(1000); if (ONIG_IS_NULL(Unfold1Table)) return ONIGERR_MEMORY; - for (i = 0; i < sizeof(CaseUnfold_11)/sizeof(CaseUnfold_11_Type); i++) { + for (i = 0; i < (int )(sizeof(CaseUnfold_11)/sizeof(CaseUnfold_11_Type)); + i++) { p1 = &CaseUnfold_11[i]; st_add_direct(Unfold1Table, (st_data_t )p1->from, (st_data_t )&(p1->to)); } - for (i = 0; i < sizeof(CaseUnfold_11_Locale)/sizeof(CaseUnfold_11_Type); + for (i = 0; + i < (int )(sizeof(CaseUnfold_11_Locale)/sizeof(CaseUnfold_11_Type)); i++) { p1 = &CaseUnfold_11_Locale[i]; st_add_direct(Unfold1Table, (st_data_t )p1->from, (st_data_t )&(p1->to)); @@ -10928,11 +10931,13 @@ static int init_case_fold_table(void) Unfold2Table = st_init_table_with_size(&type_code2_hash, 200); if (ONIG_IS_NULL(Unfold2Table)) return ONIGERR_MEMORY; - for (i = 0; i < sizeof(CaseUnfold_12)/sizeof(CaseUnfold_12_Type); i++) { + for (i = 0; i < (int )(sizeof(CaseUnfold_12)/sizeof(CaseUnfold_12_Type)); + i++) { p2 = &CaseUnfold_12[i]; st_add_direct(Unfold2Table, (st_data_t )p2->from, (st_data_t )(&p2->to)); } - for (i = 0; i < sizeof(CaseUnfold_12_Locale)/sizeof(CaseUnfold_12_Type); + for (i = 0; + i < (int )(sizeof(CaseUnfold_12_Locale)/sizeof(CaseUnfold_12_Type)); i++) { p2 = &CaseUnfold_12_Locale[i]; st_add_direct(Unfold2Table, (st_data_t )p2->from, (st_data_t )(&p2->to)); @@ -10941,7 +10946,8 @@ static int init_case_fold_table(void) Unfold3Table = st_init_table_with_size(&type_code3_hash, 30); if (ONIG_IS_NULL(Unfold3Table)) return ONIGERR_MEMORY; - for (i = 0; i < sizeof(CaseUnfold_13)/sizeof(CaseUnfold_13_Type); i++) { + for (i = 0; i < (int )(sizeof(CaseUnfold_13)/sizeof(CaseUnfold_13_Type)); + i++) { p3 = &CaseUnfold_13[i]; st_add_direct(Unfold3Table, (st_data_t )p3->from, (st_data_t )(&p3->to)); } @@ -10953,7 +10959,8 @@ static int init_case_fold_table(void) extern int onigenc_unicode_mbc_case_fold(OnigEncoding enc, - OnigCaseFoldType flag, const UChar** pp, const UChar* end, UChar* fold) + OnigCaseFoldType flag ARG_UNUSED, const UChar** pp, const UChar* end, + UChar* fold) { CodePointList3 *to; OnigCodePoint code; @@ -10963,7 +10970,7 @@ onigenc_unicode_mbc_case_fold(OnigEncoding enc, if (CaseFoldInited == 0) init_case_fold_table(); code = ONIGENC_MBC_TO_CODE(enc, p, end); - len = enc_len(enc, p, end); + len = enclen(enc, p, end); *pp += len; #ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI @@ -11014,7 +11021,8 @@ onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag, /* if (CaseFoldInited == 0) init_case_fold_table(); */ - for (i = 0; i < sizeof(CaseUnfold_11)/sizeof(CaseUnfold_11_Type); i++) { + for (i = 0; i < (int )(sizeof(CaseUnfold_11)/sizeof(CaseUnfold_11_Type)); + i++) { p11 = &CaseUnfold_11[i]; for (j = 0; j < p11->to.n; j++) { code = p11->from; @@ -11053,7 +11061,8 @@ onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag, } else { #endif - for (i = 0; i < sizeof(CaseUnfold_11_Locale)/sizeof(CaseUnfold_11_Type); + for (i = 0; + i < (int )(sizeof(CaseUnfold_11_Locale)/sizeof(CaseUnfold_11_Type)); i++) { p11 = &CaseUnfold_11_Locale[i]; for (j = 0; j < p11->to.n; j++) { @@ -11081,7 +11090,8 @@ onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag, #endif if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { - for (i = 0; i < sizeof(CaseUnfold_12)/sizeof(CaseUnfold_12_Type); i++) { + for (i = 0; i < (int )(sizeof(CaseUnfold_12)/sizeof(CaseUnfold_12_Type)); + i++) { for (j = 0; j < CaseUnfold_12[i].to.n; j++) { r = (*f)(CaseUnfold_12[i].to.code[j], (OnigCodePoint* )CaseUnfold_12[i].from, 2, arg); @@ -11100,7 +11110,8 @@ onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag, #ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) == 0) { #endif - for (i = 0; i < sizeof(CaseUnfold_12_Locale)/sizeof(CaseUnfold_12_Type); + for (i = 0; + i < (int )(sizeof(CaseUnfold_12_Locale)/sizeof(CaseUnfold_12_Type)); i++) { for (j = 0; j < CaseUnfold_12_Locale[i].to.n; j++) { r = (*f)(CaseUnfold_12_Locale[i].to.code[j], @@ -11121,7 +11132,8 @@ onigenc_unicode_apply_all_case_fold(OnigCaseFoldType flag, } #endif - for (i = 0; i < sizeof(CaseUnfold_13)/sizeof(CaseUnfold_13_Type); i++) { + for (i = 0; i < (int )(sizeof(CaseUnfold_13)/sizeof(CaseUnfold_13_Type)); + i++) { for (j = 0; j < CaseUnfold_13[i].to.n; j++) { r = (*f)(CaseUnfold_13[i].to.code[j], (OnigCodePoint* )CaseUnfold_13[i].from, 3, arg); @@ -11156,7 +11168,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc, n = 0; code = ONIGENC_MBC_TO_CODE(enc, p, end); - len = enc_len(enc, p, end); + len = enclen(enc, p, end); #ifdef USE_UNICODE_CASE_FOLD_TURKISH_AZERI if ((flag & ONIGENC_CASE_FOLD_TURKISH_AZERI) != 0) { @@ -11305,7 +11317,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc, else codes[1] = code; - clen = enc_len(enc, p, end); + clen = enclen(enc, p, end); len += clen; if (onig_st_lookup(Unfold2Table, (st_data_t )codes, (void* )&z2) != 0) { for (i = 0; i < z2->n; i++) { @@ -11326,7 +11338,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc, else codes[2] = code; - clen = enc_len(enc, p, end); + clen = enclen(enc, p, end); len += clen; if (onig_st_lookup(Unfold3Table, (st_data_t )codes, (void* )&z2) != 0) { diff --git a/enc/utf8.c b/enc/utf8.c index adfb2742a5..33cb0eed53 100644 --- a/enc/utf8.c +++ b/enc/utf8.c @@ -272,7 +272,7 @@ utf8_mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc) int c, len; OnigCodePoint n; - len = enc_len(ONIG_ENCODING_UTF8, p, end); + len = enclen(ONIG_ENCODING_UTF8, p, end); c = *p++; if (len > 1) { len--; @@ -307,33 +307,9 @@ utf8_code_to_mbclen(OnigCodePoint code, OnigEncoding enc) else if (code == INVALID_CODE_FF) return 1; #endif else - return ONIGENC_ERR_TOO_BIG_WIDE_CHAR_VALUE; + return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; } -#if 0 -static int -utf8_code_to_mbc_first(OnigCodePoint code) -{ - if ((code & 0xffffff80) == 0) - return code; - else { - if ((code & 0xfffff800) == 0) - return ((code>>6)& 0x1f) | 0xc0; - else if ((code & 0xffff0000) == 0) - return ((code>>12) & 0x0f) | 0xe0; - else if ((code & 0xffe00000) == 0) - return ((code>>18) & 0x07) | 0xf0; - else if ((code & 0xfc000000) == 0) - return ((code>>24) & 0x03) | 0xf8; - else if ((code & 0x80000000) == 0) - return ((code>>30) & 0x01) | 0xfc; - else { - return ONIGENC_ERR_TOO_BIG_WIDE_CHAR_VALUE; - } - } -} -#endif - static int utf8_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc) { @@ -383,7 +359,7 @@ utf8_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc) } #endif else { - return ONIGENC_ERR_TOO_BIG_WIDE_CHAR_VALUE; + return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; } *p++ = UTF8_TRAIL0(code); @@ -421,7 +397,7 @@ utf8_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, #if 0 static int -utf8_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) +is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) { const UChar* p = *pp; @@ -430,7 +406,7 @@ utf8_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p); } else { - (*pp) += enc_len(ONIG_ENCODING_UTF8, p); + (*pp) += enclen(ONIG_ENCODING_UTF8, p); if (*p == 0xc3) { int c = *(p + 1); @@ -457,7 +433,7 @@ utf8_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) static int -utf8_get_ctype_code_range(int ctype, OnigCodePoint *sb_out, +utf8_get_ctype_code_range(OnigCtype ctype, OnigCodePoint *sb_out, const OnigCodePoint* ranges[], OnigEncoding enc) { *sb_out = 0x80; @@ -478,7 +454,7 @@ utf8_left_adjust_char_head(const UChar* start, const UChar* s, OnigEncoding enc) } static int -utf8_get_case_fold_codes_by_str(OnigCaseFoldType flag, +get_case_fold_codes_by_str(OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[], OnigEncoding enc) { @@ -497,7 +473,7 @@ OnigEncodingDefine(utf8, UTF8) = { utf8_code_to_mbc, utf8_mbc_case_fold, onigenc_unicode_apply_all_case_fold, - utf8_get_case_fold_codes_by_str, + get_case_fold_codes_by_str, onigenc_unicode_property_name_to_ctype, onigenc_unicode_is_code_ctype, utf8_get_ctype_code_range, diff --git a/include/ruby/oniguruma.h b/include/ruby/oniguruma.h index 0cf2e0c6e4..37305bc17c 100644 --- a/include/ruby/oniguruma.h +++ b/include/ruby/oniguruma.h @@ -39,7 +39,7 @@ extern "C" { #define ONIGURUMA #define ONIGURUMA_VERSION_MAJOR 5 #define ONIGURUMA_VERSION_MINOR 9 -#define ONIGURUMA_VERSION_TEENY 0 +#define ONIGURUMA_VERSION_TEENY 1 #ifdef __cplusplus # ifndef HAVE_PROTOTYPES @@ -57,6 +57,12 @@ extern "C" { # endif #endif +#ifdef HAVE_STDARG_H +# ifndef HAVE_STDARG_PROTOTYPES +# define HAVE_STDARG_PROTOTYPES 1 +# endif +#endif + #ifndef P_ #if defined(__STDC__) || defined(_WIN32) # define P_(args) args @@ -99,12 +105,12 @@ extern "C" { typedef unsigned char OnigUChar; typedef unsigned long OnigCodePoint; +typedef unsigned int OnigCtype; typedef unsigned int OnigDistance; #define ONIG_INFINITE_DISTANCE ~((OnigDistance )0) -/* case fold flag */ -typedef unsigned int OnigCaseFoldType; +typedef unsigned int OnigCaseFoldType; /* case fold flag */ ONIG_EXTERN OnigCaseFoldType OnigDefaultCaseFoldFlag; @@ -156,8 +162,8 @@ typedef struct OnigEncodingTypeST { int (*apply_all_case_fold)(OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg, struct OnigEncodingTypeST* enc); int (*get_case_fold_codes_by_str)(OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem acs[], struct OnigEncodingTypeST* enc); int (*property_name_to_ctype)(struct OnigEncodingTypeST* enc, OnigUChar* p, OnigUChar* end); - int (*is_code_ctype)(OnigCodePoint code, unsigned int ctype, struct OnigEncodingTypeST* enc); - int (*get_ctype_code_range)(int ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[], struct OnigEncodingTypeST* enc); + int (*is_code_ctype)(OnigCodePoint code, OnigCtype ctype, struct OnigEncodingTypeST* enc); + int (*get_ctype_code_range)(OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[], struct OnigEncodingTypeST* enc); OnigUChar* (*left_adjust_char_head)(const OnigUChar* start, const OnigUChar* p, struct OnigEncodingTypeST* enc); int (*is_allowed_reverse_match)(const OnigUChar* p, const OnigUChar* end, struct OnigEncodingTypeST* enc); void *auxiliary_data; @@ -256,7 +262,7 @@ ONIG_EXTERN OnigEncodingType OnigEncodingGB18030; #define ONIGENC_MAX_STD_CTYPE ONIGENC_CTYPE_ASCII -#define enc_len(enc,p,e) ONIGENC_MBC_ENC_LEN(enc, p, e) +#define onig_enc_len(enc,p,e) ONIGENC_MBC_ENC_LEN(enc, p, e) #define ONIGENC_IS_UNDEF(enc) ((enc) == ONIG_ENCODING_UNDEF) #define ONIGENC_IS_SINGLEBYTE(enc) (ONIGENC_MBC_MAXLEN(enc) == 1) @@ -604,6 +610,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; #define ONIGERR_NEVER_ENDING_RECURSION -221 #define ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY -222 #define ONIGERR_INVALID_CHAR_PROPERTY_NAME -223 +#define ONIGERR_INVALID_CODE_POINT_VALUE -400 #define ONIGERR_INVALID_WIDE_CHAR_VALUE -400 #define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE -401 #define ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION -402 diff --git a/regcomp.c b/regcomp.c index f3d604a21d..19cd91fc03 100644 --- a/regcomp.c +++ b/regcomp.c @@ -115,7 +115,7 @@ static int bitset_is_empty(BitSetRef bs) { int i; - for (i = 0; i < BITSET_SIZE; i++) { + for (i = 0; i < (int )BITSET_SIZE; i++) { if (bs[i] != 0) return 0; } return 1; @@ -416,8 +416,8 @@ compile_tree_n_times(Node* node, int n, regex_t* reg) } static int -add_compile_string_length(UChar* s, int mb_len, int str_len, - regex_t* reg, int ignore_case) +add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, int str_len, + regex_t* reg ARG_UNUSED, int ignore_case) { int len; int op = select_str_opcode(mb_len, str_len, ignore_case); @@ -469,13 +469,13 @@ compile_length_string_node(Node* node, regex_t* reg) ambig = NSTRING_IS_AMBIG(node); p = prev = sn->s; - prev_len = enc_len(enc, p, sn->end); + prev_len = enclen(enc, p, sn->end); p += prev_len; slen = 1; rlen = 0; for (; p < sn->end; ) { - len = enc_len(enc, p, sn->end); + len = enclen(enc, p, sn->end); if (len == prev_len) { slen++; } @@ -518,12 +518,12 @@ compile_string_node(Node* node, regex_t* reg) ambig = NSTRING_IS_AMBIG(node); p = prev = sn->s; - prev_len = enc_len(enc, p, end); + prev_len = enclen(enc, p, end); p += prev_len; slen = 1; for (; p < end; ) { - len = enc_len(enc, p, end); + len = enclen(enc, p, end); if (len == prev_len) { slen++; } @@ -1535,7 +1535,7 @@ compile_length_tree(Node* node, regex_t* reg) { BRefNode* br = NBREF(node); -#ifdef USE_BACKREF_AT_LEVEL +#ifdef USE_BACKREF_WITH_LEVEL if (IS_BACKREF_NEST_LEVEL(br)) { r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); @@ -1659,9 +1659,9 @@ compile_tree(Node* node, regex_t* reg) { BRefNode* br = NBREF(node); -#ifdef USE_BACKREF_AT_LEVEL +#ifdef USE_BACKREF_WITH_LEVEL if (IS_BACKREF_NEST_LEVEL(br)) { - r = add_opcode(reg, OP_BACKREF_AT_LEVEL); + r = add_opcode(reg, OP_BACKREF_WITH_LEVEL); if (r) return r; r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE)); if (r) return r; @@ -1703,7 +1703,7 @@ compile_tree(Node* node, regex_t* reg) } if (r) return r; -#ifdef USE_BACKREF_AT_LEVEL +#ifdef USE_BACKREF_WITH_LEVEL add_bacref_mems: #endif r = add_length(reg, br->back_num); @@ -1951,7 +1951,7 @@ unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg) } #endif -#ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK +#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT static int quantifiers_memory_node_info(Node* node) { @@ -2018,7 +2018,7 @@ quantifiers_memory_node_info(Node* node) return r; } -#endif /* USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK */ +#endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */ static int get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env) @@ -2312,7 +2312,7 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) StrNode* sn = NSTR(node); UChar *s = sn->s; while (s < sn->end) { - s += enc_len(reg->enc, s, sn->end); + s += enclen(reg->enc, s, sn->end); (*len)++; } } @@ -3003,25 +3003,12 @@ setup_subexp_call(Node* node, ScanEnv* env) case NT_CALL: { - int n, num, *refs; - UChar *p; CallNode* cn = NCALL(node); Node** nodes = SCANENV_MEM_NODES(env); -#ifdef USE_NAMED_GROUP - n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end, &refs); -#else - n = -1; -#endif - if (n <= 0) { - /* name not found, check group number. (?*ddd) */ - p = cn->name; - num = onig_scan_unsigned_number(&p, cn->name_end, env->enc); - if (num <= 0 || p != cn->name_end) { - onig_scan_env_set_error_string(env, - ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); - return ONIGERR_UNDEFINED_NAME_REFERENCE; - } + if (cn->group_num != 0) { + int gnum = cn->group_num; + #ifdef USE_NAMED_GROUP if (env->num_named > 0 && IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && @@ -3029,32 +3016,47 @@ setup_subexp_call(Node* node, ScanEnv* env) return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; } #endif - if (num > env->num_mem) { + if (gnum > env->num_mem) { onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end); return ONIGERR_UNDEFINED_GROUP_REFERENCE; } - cn->ref_num = num; - goto set_call_attr; - } - else if (n > 1) { - onig_scan_env_set_error_string(env, - ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end); - return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL; - } - else { - cn->ref_num = refs[0]; + +#ifdef USE_NAMED_GROUP set_call_attr: - cn->target = nodes[cn->ref_num]; +#endif + cn->target = nodes[cn->group_num]; if (IS_NULL(cn->target)) { onig_scan_env_set_error_string(env, - ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); + ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); return ONIGERR_UNDEFINED_NAME_REFERENCE; } SET_ENCLOSE_STATUS(cn->target, NST_CALLED); - BIT_STATUS_ON_AT(env->bt_mem_start, cn->ref_num); + BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num); cn->unset_addr_list = env->unset_addr_list; } +#ifdef USE_NAMED_GROUP + else { + int *refs; + + int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end, + &refs); + if (n <= 0) { + onig_scan_env_set_error_string(env, + ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); + return ONIGERR_UNDEFINED_NAME_REFERENCE; + } + else if (n > 1) { + onig_scan_env_set_error_string(env, + ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end); + return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL; + } + else { + cn->group_num = refs[0]; + goto set_call_attr; + } + } +#endif } break; @@ -3389,7 +3391,7 @@ expand_case_fold_string(Node* node, regex_t* reg) goto err; } - len = enc_len(reg->enc, p, end); + len = enclen(reg->enc, p, end); if (n == 0) { if (IS_NULL(snode)) { @@ -3706,7 +3708,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; BIT_STATUS_ON_AT(env->backrefed_mem, p[i]); BIT_STATUS_ON_AT(env->bt_mem_start, p[i]); -#ifdef USE_BACKREF_AT_LEVEL +#ifdef USE_BACKREF_WITH_LEVEL if (IS_BACKREF_NEST_LEVEL(br)) { BIT_STATUS_ON_AT(env->bt_mem_end, p[i]); } @@ -3731,7 +3733,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) if (r) break; if (d == 0) { qn->target_empty_info = NQ_TARGET_IS_EMPTY; -#ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK +#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT r = quantifiers_memory_node_info(target); if (r < 0) break; if (r > 0) { @@ -3898,7 +3900,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) /* set skip map for Boyer-Moor search */ static int -set_bm_skip(UChar* s, UChar* end, OnigEncoding enc, +set_bm_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, UChar skip[], int** int_skip) { int i, len; @@ -3987,7 +3989,7 @@ map_position_value(OnigEncoding enc, int i) 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1 }; - if (i < sizeof(ByteValTable)/sizeof(ByteValTable[0])) { + if (i < (int )(sizeof(ByteValTable)/sizeof(ByteValTable[0]))) { if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1) return 20; else @@ -4019,7 +4021,7 @@ distance_value(MinMaxLen* mm) if (mm->max == ONIG_INFINITE_DISTANCE) return 0; d = mm->max - mm->min; - if (d < sizeof(dist_vals)/sizeof(dist_vals[0])) + if (d < (int )(sizeof(dist_vals)/sizeof(dist_vals[0]))) /* return dist_vals[d] * 16 / (mm->min + 12); */ return (int )dist_vals[d]; else @@ -4212,7 +4214,7 @@ concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc) p = add->s; end = p + add->len; for (i = to->len; p < end; ) { - len = enc_len(enc, p, end); + len = enclen(enc, p, end); if (i + len > OPT_EXACT_MAXLEN) break; for (j = 0; j < len && p < end; j++) to->s[i++] = *p++; @@ -4227,14 +4229,14 @@ concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc) } static void -concat_opt_exact_info_str(OptExactInfo* to, - UChar* s, UChar* end, int raw, OnigEncoding enc) +concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end, + int raw ARG_UNUSED, OnigEncoding enc) { int i, j, len; UChar *p; for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) { - len = enc_len(enc, p, end); + len = enclen(enc, p, end); if (i + len > OPT_EXACT_MAXLEN) break; for (j = 0; j < len && p < end; j++) to->s[i++] = *p++; @@ -4260,7 +4262,7 @@ alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env) for (i = 0; i < to->len && i < add->len; ) { if (to->s[i] != add->s[i]) break; - len = enc_len(env->enc, to->s + i, to->s + to->len); + len = enclen(env->enc, to->s + i, to->s + to->len); for (j = 1; j < len; j++) { if (to->s[i+j] != add->s[i+j]) break; @@ -5044,7 +5046,7 @@ static void print_enc_string(FILE* fp, OnigEncoding enc, fputc((int )code, fp); } - p += enc_len(enc, p); + p += enclen(enc, p); } } else { @@ -5634,8 +5636,6 @@ onig_init(void) extern int onig_end(void) { - extern int onig_free_shared_cclass_table(void); - THREAD_ATOMIC_START; #ifdef ONIG_DEBUG_STATISTICS @@ -5679,11 +5679,11 @@ onig_is_in_code_range(const UChar* p, OnigCodePoint code) } extern int -onig_is_code_in_cc_len(int enclen, OnigCodePoint code, CClassNode* cc) +onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc) { int found; - if (enclen > 1 || (code >= SINGLE_BYTE_SIZE)) { + if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) { if (IS_NULL(cc->mbuf)) { found = 0; } @@ -5776,7 +5776,7 @@ OnigOpInfoType OnigOpInfo[] = { { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL }, { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL }, { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL }, - { OP_BACKREF_AT_LEVEL, "backref_at_level", ARG_SPECIAL }, + { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL }, { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM }, { OP_MEMORY_START, "mem-start", ARG_MEMNUM }, { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM }, @@ -5968,7 +5968,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, break; case OP_EXACT1_IC: - len = enc_len(enc, bp); + len = enclen(enc, bp); p_string(f, len, bp); bp += len; break; @@ -6043,7 +6043,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, } break; - case OP_BACKREF_AT_LEVEL: + case OP_BACKREF_WITH_LEVEL: { OnigOptionType option; LengthType level; @@ -6182,7 +6182,7 @@ print_indent_tree(FILE* f, Node* node, int indent) case NT_CCLASS: fprintf(f, "", (int )node); - if (IS_CCLASS_NOT(NCCLASS(node)) fputs(" not", f); + if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(" not", f); if (NCCLASS(node)->mbuf) { BBuf* bbuf = NCCLASS(node)->mbuf; for (i = 0; i < bbuf->used; i++) { diff --git a/regenc.c b/regenc.c index ebeb086810..c6262b0909 100644 --- a/regenc.c +++ b/regenc.c @@ -55,7 +55,7 @@ onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const U { UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s); if (p < s) { - p += enc_len(enc, p, s); + p += enclen(enc, p, s); } return p; } @@ -68,7 +68,7 @@ onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc, if (p < s) { if (prev) *prev = (const UChar* )p; - p += enc_len(enc, p, s); + p += enclen(enc, p, s); } else { if (prev) *prev = (const UChar* )NULL; /* Sorry */ @@ -351,7 +351,7 @@ const UChar OnigEncISO_8859_1_ToUpperCaseTable[256] = { #endif extern void -onigenc_set_default_caseconv_table(const UChar* table) +onigenc_set_default_caseconv_table(const UChar* table ARG_UNUSED) { /* nothing */ /* obsoleted. */ @@ -393,15 +393,16 @@ const OnigPairCaseFoldCodes OnigAsciiLowerMap[] = { }; extern int -onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag, +onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED, OnigApplyAllCaseFoldFunc f, void* arg, OnigEncoding enc) { OnigCodePoint code; int i, r; - for (i = 0; i < sizeof(OnigAsciiLowerMap)/sizeof(OnigPairCaseFoldCodes) - ; i++) { + for (i = 0; + i < (int )(sizeof(OnigAsciiLowerMap)/sizeof(OnigPairCaseFoldCodes)); + i++) { code = OnigAsciiLowerMap[i].to; r = (*f)(OnigAsciiLowerMap[i].from, &code, 1, arg); if (r != 0) return r; @@ -435,8 +436,8 @@ onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag, return 0; } -extern int -ss_apply_all_case_fold(OnigCaseFoldType flag, +static int +ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED, OnigApplyAllCaseFoldFunc f, void* arg) { static OnigCodePoint ss[] = { 0x73, 0x73 }; @@ -475,7 +476,7 @@ onigenc_apply_all_case_fold_with_map(int map_size, extern int onigenc_get_case_fold_codes_by_str_with_map(int map_size, const OnigPairCaseFoldCodes map[], - int ess_tsett_flag, OnigCaseFoldType flag, + int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) { if (0x41 <= *p && *p <= 0x5a) { @@ -555,7 +556,7 @@ onigenc_get_case_fold_codes_by_str_with_map(int map_size, extern int -onigenc_not_support_get_ctype_code_range(int ctype, +onigenc_not_support_get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[], OnigEncoding enc) { @@ -656,7 +657,7 @@ onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end) int c, i, len; OnigCodePoint n; - len = enc_len(enc, p, end); + len = enclen(enc, p, end); n = (OnigCodePoint )(*p++); if (len == 1) return n; @@ -669,8 +670,9 @@ onigenc_mbn_mbc_to_code(OnigEncoding enc, const UChar* p, const UChar* end) } extern int -onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag, - const UChar** pp, const UChar* end, UChar* lower) +onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED, + const UChar** pp, const UChar* end ARG_UNUSED, + UChar* lower) { int len; const UChar *p = *pp; @@ -683,7 +685,7 @@ onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag, else { int i; - len = enc_len(enc, p, end); + len = enclen(enc, p, end); for (i = 0; i < len; i++) { *lower++ = *p++; } @@ -704,7 +706,7 @@ onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag, return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p); } - (*pp) += enc_len(enc, p); + (*pp) += enclen(enc, p); return FALSE; } #endif @@ -736,8 +738,8 @@ onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf) *p++ = (UChar )(code & 0xff); #if 1 - if (enc_len(enc, buf, p) != (p - buf)) - return ONIGENC_ERR_INVALID_WIDE_CHAR_VALUE; + if (enclen(enc, buf, p) != (p - buf)) + return ONIGERR_INVALID_CODE_POINT_VALUE; #endif return p - buf; } @@ -759,8 +761,8 @@ onigenc_mb4_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf) *p++ = (UChar )(code & 0xff); #if 1 - if (enc_len(enc, buf, p) != (p - buf)) - return ONIGENC_ERR_INVALID_WIDE_CHAR_VALUE; + if (enclen(enc, buf, p) != (p - buf)) + return ONIGERR_INVALID_CODE_POINT_VALUE; #endif return p - buf; } @@ -843,7 +845,7 @@ onigenc_with_ascii_strncmp(OnigEncoding enc, const UChar* p, const UChar* end, if (x) return x; sascii++; - p += enc_len(enc, p, end); + p += enclen(enc, p, end); } return 0; } @@ -900,7 +902,7 @@ onigenc_property_list_add_property(UChar* name, const OnigCodePoint* prop, } extern int -onigenc_property_list_init(int (*f)()) +onigenc_property_list_init(int (*f)(void)) { int r; diff --git a/regenc.h b/regenc.h index 0bd0abeeb2..e34ce40b47 100644 --- a/regenc.h +++ b/regenc.h @@ -57,17 +57,21 @@ typedef struct { #define FALSE 0 #endif -/* error codes */ -#define ONIGENC_ERR_MEMORY -5 -#define ONIGENC_ERR_TYPE_BUG -6 -#define ONIGENC_ERR_INVALID_WIDE_CHAR_VALUE -400 -#define ONIGENC_ERR_TOO_BIG_WIDE_CHAR_VALUE -401 +#ifndef ARG_UNUSED +#if defined(__GNUC__) +# define ARG_UNUSED __attribute__ ((unused)) +#else +# define ARG_UNUSED +#endif +#endif #define ONIG_IS_NULL(p) (((void*)(p)) == (void*)0) #define ONIG_IS_NOT_NULL(p) (((void*)(p)) != (void*)0) #define ONIG_CHECK_NULL_RETURN(p) if (ONIG_IS_NULL(p)) return NULL #define ONIG_CHECK_NULL_RETURN_VAL(p,val) if (ONIG_IS_NULL(p)) return (val) +#define enclen(enc,p,e) ONIGENC_MBC_ENC_LEN(enc,p,e) + /* character types bit flag */ #define BIT_CTYPE_NEWLINE (1<< ONIGENC_CTYPE_NEWLINE) #define BIT_CTYPE_ALPHA (1<< ONIGENC_CTYPE_ALPHA) @@ -111,7 +115,7 @@ ONIG_EXTERN int onigenc_ascii_apply_all_case_fold P_((OnigCaseFoldType flag, Oni ONIG_EXTERN int onigenc_ascii_get_case_fold_codes_by_str P_((OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[], OnigEncoding enc)); ONIG_EXTERN int onigenc_apply_all_case_fold_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg)); ONIG_EXTERN int onigenc_get_case_fold_codes_by_str_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])); -ONIG_EXTERN int onigenc_not_support_get_ctype_code_range P_((int ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[], OnigEncoding enc)); +ONIG_EXTERN int onigenc_not_support_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[], OnigEncoding enc)); ONIG_EXTERN int onigenc_is_mbc_newline_0x0a P_((const UChar* p, const UChar* end, OnigEncoding enc)); @@ -141,7 +145,7 @@ ONIG_EXTERN int onigenc_mb4_is_code_ctype P_((OnigEncoding enc, OnigCodePoint co /* in enc/unicode.c */ ONIG_EXTERN int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype, OnigEncoding enc)); -ONIG_EXTERN int onigenc_utf16_32_get_ctype_code_range P_((int ctype, OnigCodePoint *sb_out, const OnigCodePoint* ranges[])); +ONIG_EXTERN int onigenc_utf16_32_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint *sb_out, const OnigCodePoint* ranges[])); ONIG_EXTERN int onigenc_unicode_ctype_code_range P_((int ctype, const OnigCodePoint* ranges[])); ONIG_EXTERN int onigenc_unicode_get_case_fold_codes_by_str P_((OnigEncoding enc, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])); ONIG_EXTERN int onigenc_unicode_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** pp, const UChar* end, UChar* fold)); diff --git a/regerror.c b/regerror.c index cf80bbfc99..2bc2da4c71 100644 --- a/regerror.c +++ b/regerror.c @@ -142,8 +142,8 @@ onig_error_code_to_format(int code) p = "too big wide-char value"; break; case ONIGERR_TOO_LONG_WIDE_CHAR_VALUE: p = "too long wide-char value"; break; - case ONIGERR_INVALID_WIDE_CHAR_VALUE: - p = "invalid wide-char value"; break; + case ONIGERR_INVALID_CODE_POINT_VALUE: + p = "invalid code point value"; break; case ONIGERR_EMPTY_GROUP_NAME: p = "group name is empty"; break; case ONIGERR_INVALID_GROUP_NAME: @@ -182,6 +182,15 @@ onig_error_code_to_format(int code) return (UChar* )p; } +static void sprint_byte(char* s, unsigned int v) +{ + sprintf(s, "%02x", (v & 0377)); +} + +static void sprint_byte_with_x(char* s, unsigned int v) +{ + sprintf(s, "\\x%02x", (v & 0377)); +} static int to_ascii(OnigEncoding enc, UChar *s, UChar *end, UChar buf[], int buf_size, int *is_over) @@ -196,10 +205,17 @@ static int to_ascii(OnigEncoding enc, UChar *s, UChar *end, while (p < end) { code = ONIGENC_MBC_TO_CODE(enc, p, end); if (code >= 0x80) { - if (len + 5 <= buf_size) { - sprintf((char* )(&(buf[len])), "\\x%02X", - (unsigned int )(code & 0377)); - len += 5; + if (code > 0xffff && len + 10 <= buf_size) { + sprint_byte_with_x((char*)(&(buf[len])), (unsigned int)(code >> 24)); + sprint_byte((char*)(&(buf[len+4])), (unsigned int)(code >> 16)); + sprint_byte((char*)(&(buf[len+6])), (unsigned int)(code >> 8)); + sprint_byte((char*)(&(buf[len+8])), (unsigned int)code); + len += 10; + } + else if (len + 6 <= buf_size) { + sprint_byte_with_x((char*)(&(buf[len])), (unsigned int)(code >> 8)); + sprint_byte((char*)(&(buf[len+4])), (unsigned int)code); + len += 6; } else { break; @@ -209,7 +225,7 @@ static int to_ascii(OnigEncoding enc, UChar *s, UChar *end, buf[len++] = (UChar )code; } - p += enc_len(enc, p, end); + p += enclen(enc, p, end); if (len >= buf_size) break; } @@ -330,7 +346,7 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist) while (p < pat_end) { if (*p == '\\') { *s++ = *p++; - len = enc_len(enc, p, pat_end); + len = enclen(enc, p, pat_end); while (len-- > 0) *s++ = *p++; } else if (*p == '/') { @@ -338,7 +354,7 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist) *s++ = *p++; } else if (ONIGENC_IS_MBC_HEAD(enc, p, pat_end)) { - len = enc_len(enc, p, pat_end); + len = enclen(enc, p, pat_end); if (ONIGENC_MBC_MINLEN(enc) == 1) { while (len-- > 0) *s++ = *p++; } @@ -346,7 +362,7 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist) int blen; while (len-- > 0) { - sprintf((char* )bs, "\\x%02X", *p++ & 0377); + sprint_byte_with_x((char* )bs, (unsigned int )(*p++)); blen = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs); bp = bs; while (blen-- > 0) *s++ = *bp++; @@ -355,7 +371,7 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist) } else if (!ONIGENC_IS_CODE_PRINT(enc, *p) && !ONIGENC_IS_CODE_SPACE(enc, *p)) { - sprintf((char* )bs, "\\x%02X", *p++ & 0377); + sprint_byte_with_x((char* )bs, (unsigned int )(*p++)); len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs); bp = bs; while (len-- > 0) *s++ = *bp++; diff --git a/regexec.c b/regexec.c index 84d69659c2..be3398961e 100644 --- a/regexec.c +++ b/regexec.c @@ -29,10 +29,12 @@ #include "regint.h" +#define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE + #ifdef USE_CRNL_AS_LINE_TERMINATOR #define ONIGENC_IS_MBC_CRNL(enc,p,end) \ (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \ - ONIGENC_IS_MBC_NEWLINE(enc,(p+enc_len(enc,p)),end)) + ONIGENC_IS_MBC_NEWLINE(enc,(p+enclen(enc,p)),end)) #endif #ifdef USE_CAPTURE_HISTORY @@ -196,7 +198,7 @@ onig_region_resize(OnigRegion* region, int n) return 0; } -extern int +static int onig_region_resize_clear(OnigRegion* region, int n) { int r; @@ -1019,7 +1021,7 @@ static int string_cmp_ic(OnigEncoding enc, int case_fold_flag, #define IS_EMPTY_STR (str == end) #define ON_STR_BEGIN(s) ((s) == str) #define ON_STR_END(s) ((s) == end) -#ifdef USE_MATCH_RANGE_IS_COMPLETE_RANGE +#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE #define DATA_ENSURE_CHECK1 (s < right_range) #define DATA_ENSURE_CHECK(n) (s + (n) <= right_range) #define DATA_ENSURE(n) if (s + (n) > right_range) goto fail @@ -1027,7 +1029,7 @@ static int string_cmp_ic(OnigEncoding enc, int case_fold_flag, #define DATA_ENSURE_CHECK1 (s < end) #define DATA_ENSURE_CHECK(n) (s + (n) <= end) #define DATA_ENSURE(n) if (s + (n) > end) goto fail -#endif /* USE_MATCH_RANGE_IS_COMPLETE_RANGE */ +#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */ #ifdef USE_CAPTURE_HISTORY @@ -1072,7 +1074,7 @@ make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp, } #endif -#ifdef USE_BACKREF_AT_LEVEL +#ifdef USE_BACKREF_WITH_LEVEL static int mem_is_in_memp(int mem, int num, UChar* memp) { int i; @@ -1140,7 +1142,7 @@ static int backref_match_at_nested_level(regex_t* reg return 0; } -#endif /* USE_BACKREF_AT_LEVEL */ +#endif /* USE_BACKREF_WITH_LEVEL */ #ifdef ONIG_DEBUG_STATISTICS @@ -1234,7 +1236,7 @@ typedef struct { /* if sstart == str then set sprev to NULL. */ static int match_at(regex_t* reg, const UChar* str, const UChar* end, -#ifdef USE_MATCH_RANGE_IS_COMPLETE_RANGE +#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE const UChar* right_range, #endif const UChar* sstart, UChar* sprev, OnigMatchArg* msa) @@ -1296,7 +1298,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, fprintf(stderr, "%4d> \"", (int )(s - str)); bp = buf; for (i = 0, q = s; i < 7 && q < end; i++) { - len = enc_len(encode, q); + len = enclen(encode, q); while (len-- > 0) *bp++ = *q++; } if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; } @@ -1328,7 +1330,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, best_len = n; region = msa->region; if (region) { -#ifdef USE_POSIX_REGION_OPTION +#ifdef USE_POSIX_API_REGION_OPTION if (IS_POSIX_REGION(msa->options)) { posix_regmatch_t* rmt = (posix_regmatch_t* )region; @@ -1351,7 +1353,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } } else { -#endif /* USE_POSIX_REGION_OPTION */ +#endif /* USE_POSIX_API_REGION_OPTION */ region->beg[0] = sstart - str; region->end[0] = s - str; for (i = 1; i <= num_mem; i++) { @@ -1397,7 +1399,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } } #endif /* USE_CAPTURE_HISTORY */ -#ifdef USE_POSIX_REGION_OPTION +#ifdef USE_POSIX_API_REGION_OPTION } /* else IS_POSIX_REGION() */ #endif } /* if (region) */ @@ -1642,7 +1644,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, DATA_ENSURE(1); if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail; p += SIZE_BITSET; - s += enc_len(encode, s, end); /* OP_CCLASS can match mb-code. \D, \S */ + s += enclen(encode, s, end); /* OP_CCLASS can match mb-code. \D, \S */ MOP_OUT; break; @@ -1657,7 +1659,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, int mb_len; DATA_ENSURE(1); - mb_len = enc_len(encode, s, end); + mb_len = enclen(encode, s, end); DATA_ENSURE(mb_len); ss = s; s += mb_len; @@ -1697,7 +1699,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, DATA_ENSURE(1); if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail; p += SIZE_BITSET; - s += enc_len(encode, s, end); + s += enclen(encode, s, end); MOP_OUT; break; @@ -1715,7 +1717,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, { OnigCodePoint code; UChar *ss; - int mb_len = enc_len(encode, s, end); + int mb_len = enclen(encode, s, end); if (! DATA_ENSURE_CHECK(mb_len)) { DATA_ENSURE(1); @@ -1769,7 +1771,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, DATA_ENSURE(1); GET_POINTER_INC(node, p); - mb_len = enc_len(encode, s, end); + mb_len = enclen(encode, s, end); ss = s; s += mb_len; DATA_ENSURE(0); @@ -1781,7 +1783,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, case OP_ANYCHAR: MOP_IN(OP_ANYCHAR); DATA_ENSURE(1); - n = enc_len(encode, s, end); + n = enclen(encode, s, end); DATA_ENSURE(n); if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; s += n; @@ -1790,7 +1792,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, case OP_ANYCHAR_ML: MOP_IN(OP_ANYCHAR_ML); DATA_ENSURE(1); - n = enc_len(encode, s, end); + n = enclen(encode, s, end); DATA_ENSURE(n); s += n; MOP_OUT; @@ -1799,7 +1801,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, case OP_ANYCHAR_STAR: MOP_IN(OP_ANYCHAR_STAR); while (DATA_ENSURE_CHECK1) { STACK_PUSH_ALT(p, s, sprev); - n = enc_len(encode, s, end); + n = enclen(encode, s, end); DATA_ENSURE(n); if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; sprev = s; @@ -1811,7 +1813,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, case OP_ANYCHAR_ML_STAR: MOP_IN(OP_ANYCHAR_ML_STAR); while (DATA_ENSURE_CHECK1) { STACK_PUSH_ALT(p, s, sprev); - n = enc_len(encode, s, end); + n = enclen(encode, s, end); if (n > 1) { DATA_ENSURE(n); sprev = s; @@ -1830,7 +1832,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*p == *s) { STACK_PUSH_ALT(p + 1, s, sprev); } - n = enc_len(encode, s, end); + n = enclen(encode, s, end); DATA_ENSURE(n); if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; sprev = s; @@ -1845,7 +1847,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*p == *s) { STACK_PUSH_ALT(p + 1, s, sprev); } - n = enc_len(encode, s, end); + n = enclen(encode, s, end); if (n > 1) { DATA_ENSURE(n); sprev = s; @@ -1868,7 +1870,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (scv) goto fail; STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem); - n = enc_len(encode, s); + n = enclen(encode, s); DATA_ENSURE(n); if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; sprev = s; @@ -1886,7 +1888,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (scv) goto fail; STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem); - n = enc_len(encode, s); + n = enclen(encode, s); if (n > 1) { DATA_ENSURE(n); sprev = s; @@ -1906,7 +1908,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (! ONIGENC_IS_MBC_WORD(encode, s, end)) goto fail; - s += enc_len(encode, s, end); + s += enclen(encode, s, end); MOP_OUT; break; @@ -1915,7 +1917,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (ONIGENC_IS_MBC_WORD(encode, s, end)) goto fail; - s += enc_len(encode, s, end); + s += enclen(encode, s, end); MOP_OUT; break; @@ -2043,14 +2045,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, #endif } else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) && - ON_STR_END(s + enc_len(encode, s, end))) { + ON_STR_END(s + enclen(encode, s, end))) { MOP_OUT; continue; } #ifdef USE_CRNL_AS_LINE_TERMINATOR else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) { - UChar* ss = s + enc_len(encode, s); - ss += enc_len(encode, ss); + UChar* ss = s + enclen(encode, s); + ss += enclen(encode, ss); if (ON_STR_END(ss)) { MOP_OUT; continue; @@ -2157,7 +2159,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, DATA_ENSURE(n); sprev = s; STRING_CMP(pstart, s, n); - while (sprev + (len = enc_len(encode, sprev, end)) < s) + while (sprev + (len = enclen(encode, sprev, end)) < s) sprev += len; MOP_OUT; @@ -2189,7 +2191,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, DATA_ENSURE(n); sprev = s; STRING_CMP_IC(case_fold_flag, pstart, &s, n); - while (sprev + (len = enc_len(encode, sprev, end)) < s) + while (sprev + (len = enclen(encode, sprev, end)) < s) sprev += len; MOP_OUT; @@ -2224,7 +2226,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STRING_CMP_VALUE(pstart, swork, n, is_fail); if (is_fail) continue; s = swork; - while (sprev + (len = enc_len(encode, sprev, end)) < s) + while (sprev + (len = enclen(encode, sprev, end)) < s) sprev += len; p += (SIZE_MEMNUM * (tlen - i - 1)); @@ -2263,7 +2265,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail); if (is_fail) continue; s = swork; - while (sprev + (len = enc_len(encode, sprev, end)) < s) + while (sprev + (len = enclen(encode, sprev, end)) < s) sprev += len; p += (SIZE_MEMNUM * (tlen - i - 1)); @@ -2275,8 +2277,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } break; -#ifdef USE_BACKREF_AT_LEVEL - case OP_BACKREF_AT_LEVEL: +#ifdef USE_BACKREF_WITH_LEVEL + case OP_BACKREF_WITH_LEVEL: { int len; OnigOptionType ic; @@ -2289,7 +2291,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, sprev = s; if (backref_match_at_nested_level(reg, stk, stk_base, ic , case_fold_flag, (int )level, (int )tlen, p, &s, end)) { - while (sprev + (len = enc_len(encode, sprev, end)) < s) + while (sprev + (len = enclen(encode, sprev, end)) < s) sprev += len; p += (SIZE_MEMNUM * tlen); @@ -2361,7 +2363,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, continue; break; -#ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK +#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT case OP_NULL_CHECK_END_MEMST: MOP_IN(OP_NULL_CHECK_END_MEMST); { int isnull; @@ -2389,7 +2391,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, int isnull; GET_MEMNUM_INC(mem, p); /* mem: null check id */ -#ifdef USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK +#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg); #else STACK_NULL_CHECK_REC(isnull, mem, s); @@ -2760,7 +2762,7 @@ slow_search(OnigEncoding enc, UChar* target, UChar* target_end, if (t == target_end) return s; } - s += enc_len(enc, s, end); + s += enclen(enc, s, end); } return (UChar* )NULL; @@ -2805,7 +2807,7 @@ slow_search_ic(OnigEncoding enc, int case_fold_flag, s, text_end)) return s; - s += enc_len(enc, s, text_end); + s += enclen(enc, s, text_end); } return (UChar* )NULL; @@ -2903,7 +2905,7 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, skip = reg->map[*se]; t = s; do { - s += enc_len(reg->enc, s, end); + s += enclen(reg->enc, s, end); } while ((s - t) < skip && s < end); } } @@ -2919,7 +2921,7 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, skip = reg->int_map[*se]; t = s; do { - s += enc_len(reg->enc, s, end); + s += enclen(reg->enc, s, end); } while ((s - t) < skip && s < end); } } @@ -2966,7 +2968,8 @@ bm_search(regex_t* reg, const UChar* target, const UChar* target_end, } static int -set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc, int** skip) +set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, + int** skip) { int i, len; @@ -3024,7 +3027,7 @@ map_search(OnigEncoding enc, UChar map[], while (s < text_range) { if (map[*s]) return (UChar* )s; - s += enc_len(enc, s, text_range); + s += enclen(enc, s, text_range); } return (UChar* )NULL; } @@ -3086,7 +3089,7 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On #endif if (region -#ifdef USE_POSIX_REGION_OPTION +#ifdef USE_POSIX_API_REGION_OPTION && !IS_POSIX_REGION(option) #endif ) { @@ -3098,7 +3101,7 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On if (r == 0) { prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at); r = match_at(reg, str, end, -#ifdef USE_MATCH_RANGE_IS_COMPLETE_RANGE +#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE end, #endif at, prev, &msa); @@ -3127,7 +3130,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, } else { UChar *q = p + reg->dmin; - while (p < q) p += enc_len(reg->enc, p, end); + while (p < q) p += enclen(reg->enc, p, end); } } @@ -3158,7 +3161,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, if (p - reg->dmin < s) { retry_gate: pprev = p; - p += enc_len(reg->enc, p, end); + p += enclen(reg->enc, p, end); goto retry; } @@ -3353,7 +3356,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, UChar *s, *prev; OnigMatchArg msa; const UChar *orig_start = start; -#ifdef USE_MATCH_RANGE_IS_COMPLETE_RANGE +#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE const UChar *orig_range = range; #endif @@ -3389,7 +3392,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, #endif if (region -#ifdef USE_POSIX_REGION_OPTION +#ifdef USE_POSIX_API_REGION_OPTION && !IS_POSIX_REGION(option) #endif ) { @@ -3400,7 +3403,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, if (start > end || start < str) goto mismatch_no_msa; -#ifdef USE_MATCH_RANGE_IS_COMPLETE_RANGE +#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE #define MATCH_AND_RETURN_CHECK(upper_range) \ r = match_at(reg, str, end, (upper_range), s, prev, &msa); \ @@ -3444,7 +3447,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, else goto finish; /* error */ \ } #endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */ -#endif /* USE_MATCH_RANGE_IS_COMPLETE_RANGE */ +#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */ /* anchor optimize: resume search range */ @@ -3604,7 +3607,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, while (s <= high) { MATCH_AND_RETURN_CHECK(orig_range); prev = s; - s += enc_len(reg->enc, s, end); + s += enclen(reg->enc, s, end); } } while (s < range); goto mismatch; @@ -3617,11 +3620,11 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, do { MATCH_AND_RETURN_CHECK(orig_range); prev = s; - s += enc_len(reg->enc, s, end); + s += enclen(reg->enc, s, end); while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) { prev = s; - s += enc_len(reg->enc, s, end); + s += enclen(reg->enc, s, end); } } while (s < range); goto mismatch; @@ -3632,7 +3635,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, do { MATCH_AND_RETURN_CHECK(orig_range); prev = s; - s += enc_len(reg->enc, s, end); + s += enclen(reg->enc, s, end); } while (s < range); if (s == range) { /* because empty match with /$/. */ @@ -3640,9 +3643,9 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, } } else { /* backward search */ -#ifdef USE_MATCH_RANGE_IS_COMPLETE_RANGE +#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE if (orig_start < end) - orig_start += enc_len(reg->enc, orig_start); /* is upper range */ + orig_start += enclen(reg->enc, orig_start, end); /* is upper range */ #endif if (reg->optimize != ONIG_OPTIMIZE_NONE) { @@ -3718,7 +3721,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, /* If result is mismatch and no FIND_NOT_EMPTY option, then the region is not setted in match_at(). */ if (IS_FIND_NOT_EMPTY(reg->options) && region -#ifdef USE_POSIX_REGION_OPTION +#ifdef USE_POSIX_API_REGION_OPTION && !IS_POSIX_REGION(option) #endif ) { diff --git a/regint.h b/regint.h index 182eed2670..58b3b4ab9a 100644 --- a/regint.h +++ b/regint.h @@ -51,19 +51,19 @@ (defined(__ppc__) && defined(__APPLE__)) || \ defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD86) || \ defined(__mc68020__) -/* #define PLATFORM_UNALIGNED_WORD_ACCESS */ +#define PLATFORM_UNALIGNED_WORD_ACCESS #endif /* config */ /* spec. config */ #define USE_NAMED_GROUP #define USE_SUBEXP_CALL -#define USE_BACKREF_AT_LEVEL /* \k, \k */ -#define USE_INFINITE_REPEAT_MONOMANIAC_MEM_STATUS_CHECK /* /(?:()|())*\2/ */ +#define USE_BACKREF_WITH_LEVEL /* \k, \k */ +#define USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT /* /(?:()|())*\2/ */ #define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */ #define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR /* #define USE_RECOMPILE_API */ -/* #define USE_CRNL_AS_LINE_TERMINATOR */ /* moved to regenc.h. */ +/* !!! moved to regenc.h. */ /* #define USE_CRNL_AS_LINE_TERMINATOR */ /* internal config */ #define USE_PARSE_TREE_NODE_RECYCLE @@ -75,6 +75,12 @@ #define INIT_MATCH_STACK_SIZE 160 #define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */ +#if defined(__GNUC__) +# define ARG_UNUSED __attribute__ ((unused)) +#else +# define ARG_UNUSED +#endif + /* */ /* escape other system UChar definition */ #ifndef RUBY_DEFINES_H @@ -236,7 +242,6 @@ #define CHECK_NULL_RETURN_MEMERR(p) if (IS_NULL(p)) return ONIGERR_MEMORY #define NULL_UCHARP ((UChar* )0) - #ifdef PLATFORM_UNALIGNED_WORD_ACCESS #define PLATFORM_GET_INC(val,p,type) do{\ @@ -287,17 +292,17 @@ typedef unsigned int BitStatusType; #define BIT_STATUS_CLEAR(stats) (stats) = 0 #define BIT_STATUS_ON_ALL(stats) (stats) = ~((BitStatusType )0) #define BIT_STATUS_AT(stats,n) \ - ((n) < BIT_STATUS_BITS_NUM ? ((stats) & (1 << n)) : ((stats) & 1)) + ((n) < (int )BIT_STATUS_BITS_NUM ? ((stats) & (1 << n)) : ((stats) & 1)) #define BIT_STATUS_ON_AT(stats,n) do {\ - if ((n) < BIT_STATUS_BITS_NUM)\ + if ((n) < (int )BIT_STATUS_BITS_NUM) \ (stats) |= (1 << (n));\ else\ (stats) |= 1;\ } while (0) #define BIT_STATUS_ON_AT_SIMPLE(stats,n) do {\ - if ((n) < BIT_STATUS_BITS_NUM)\ + if ((n) < (int )BIT_STATUS_BITS_NUM)\ (stats) |= (1 << (n));\ } while (0) @@ -353,7 +358,7 @@ typedef Bits* BitSetRef; #define BITSET_CLEAR(bs) do {\ int i;\ - for (i = 0; i < BITSET_SIZE; i++) { (bs)[i] = 0; }\ + for (i = 0; i < (int )BITSET_SIZE; i++) { (bs)[i] = 0; } \ } while (0) #define BS_ROOM(bs,pos) (bs)[pos / BITS_IN_ROOM] @@ -520,7 +525,7 @@ enum OpCode { OP_BACKREFN_IC, OP_BACKREF_MULTI, OP_BACKREF_MULTI_IC, - OP_BACKREF_AT_LEVEL, /* \k, \k */ + OP_BACKREF_WITH_LEVEL, /* \k, \k */ OP_MEMORY_START, OP_MEMORY_START_PUSH, /* push back-tracker to stack */ @@ -831,6 +836,9 @@ extern int onig_st_insert_strend P_((hash_table_type* table, const UChar* str_ke } extern int onigenc_property_list_add_property P_((UChar* name, const OnigCodePoint* prop, hash_table_type **table, const OnigCodePoint*** plist, int *pnum, int *psize)); -extern int onigenc_property_list_init P_((int (*f)())); + +typedef int (*ONIGENC_INIT_PROPERTY_LIST_FUNC_TYPE)(void); + +extern int onigenc_property_list_init P_((ONIGENC_INIT_PROPERTY_LIST_FUNC_TYPE)); #endif /* REGINT_H */ diff --git a/regparse.c b/regparse.c index f86abc80ab..e5a732053b 100644 --- a/regparse.c +++ b/regparse.c @@ -31,6 +31,9 @@ #define WARN_BUFSIZE 256 +#define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS + + OnigSyntaxType OnigSyntaxRuby = { (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 | @@ -69,7 +72,7 @@ OnigSyntaxType OnigSyntaxRuby = { OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_RUBY; -extern void onig_null_warn(const char* s) { } +extern void onig_null_warn(const char* s ARG_UNUSED) { } #ifdef DEFAULT_WARN_FUNCTION static OnigWarnFunc onig_warn = (OnigWarnFunc )DEFAULT_WARN_FUNCTION; @@ -117,6 +120,9 @@ bbuf_clone(BBuf** rto, BBuf* from) return 0; } +#define BACKREF_REL_TO_ABS(rel_no, env) \ + ((env)->num_mem + 1 + (rel_no)) + #define ONOFF(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f)) #define MBCODE_START_POS(enc) \ @@ -136,7 +142,7 @@ bbuf_clone(BBuf** rto, BBuf* from) #define BITSET_IS_EMPTY(bs,empty) do {\ int i;\ empty = 1;\ - for (i = 0; i < BITSET_SIZE; i++) {\ + for (i = 0; i < (int )BITSET_SIZE; i++) {\ if ((bs)[i] != 0) {\ empty = 0; break;\ }\ @@ -165,35 +171,35 @@ static void bitset_invert(BitSetRef bs) { int i; - for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~(bs[i]); } + for (i = 0; i < (int )BITSET_SIZE; i++) { bs[i] = ~(bs[i]); } } static void bitset_invert_to(BitSetRef from, BitSetRef to) { int i; - for (i = 0; i < BITSET_SIZE; i++) { to[i] = ~(from[i]); } + for (i = 0; i < (int )BITSET_SIZE; i++) { to[i] = ~(from[i]); } } static void bitset_and(BitSetRef dest, BitSetRef bs) { int i; - for (i = 0; i < BITSET_SIZE; i++) { dest[i] &= bs[i]; } + for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] &= bs[i]; } } static void bitset_or(BitSetRef dest, BitSetRef bs) { int i; - for (i = 0; i < BITSET_SIZE; i++) { dest[i] |= bs[i]; } + for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] |= bs[i]; } } static void bitset_copy(BitSetRef dest, BitSetRef bs) { int i; - for (i = 0; i < BITSET_SIZE; i++) { dest[i] = bs[i]; } + for (i = 0; i < (int )BITSET_SIZE; i++) { dest[i] = bs[i]; } } extern int @@ -218,6 +224,7 @@ onig_strcpy(UChar* dest, const UChar* src, const UChar* end) } } +#ifdef USE_NAMED_GROUP static UChar* strdup_with_null(OnigEncoding enc, UChar* s, UChar* end) { @@ -236,7 +243,7 @@ strdup_with_null(OnigEncoding enc, UChar* s, UChar* end) return r; } - +#endif /* scan pattern methods */ #define PEND_VALUE 0 @@ -287,32 +294,20 @@ strcat_capa_from_static(UChar* dest, UChar* dest_end, return r; } -#ifdef USE_NAMED_GROUP - -#define INIT_NAME_BACKREFS_ALLOC_NUM 8 - -typedef struct { - UChar* name; - int name_len; /* byte length */ - int back_num; /* number of backrefs */ - int back_alloc; - int back_ref1; - int* back_refs; -} NameEntry; #ifdef USE_ST_LIBRARY #include "ruby/st.h" typedef struct { - unsigned char* s; - unsigned char* end; -} st_strend_key; + UChar* s; + UChar* end; +} st_str_end_key; static int -str_end_cmp(st_strend_key* x, st_strend_key* y) +str_end_cmp(st_str_end_key* x, st_str_end_key* y) { - unsigned char *p, *q; + UChar *p, *q; int c; if ((x->end - x->s) != (y->end - y->s)) @@ -331,7 +326,7 @@ str_end_cmp(st_strend_key* x, st_strend_key* y) } static int -str_end_hash(st_strend_key* x) +str_end_hash(st_str_end_key* x) { UChar *p; int val = 0; @@ -347,7 +342,7 @@ str_end_hash(st_strend_key* x) extern hash_table_type* onig_st_init_strend_table_with_size(int size) { - static const struct st_hash_type hashType = { + static struct st_hash_type hashType = { str_end_cmp, str_end_hash, }; @@ -360,7 +355,7 @@ extern int onig_st_lookup_strend(hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type *value) { - st_strend_key key; + st_str_end_key key; key.s = (UChar* )str_key; key.end = (UChar* )end_key; @@ -372,10 +367,10 @@ extern int onig_st_insert_strend(hash_table_type* table, const UChar* str_key, const UChar* end_key, hash_data_type value) { - st_strend_key* key; + st_str_end_key* key; int result; - key = (st_strend_key* )xmalloc(sizeof(st_strend_key)); + key = (st_str_end_key* )xmalloc(sizeof(st_str_end_key)); key->s = (UChar* )str_key; key->end = (UChar* )end_key; result = onig_st_insert(table, (st_data_t )key, value); @@ -385,6 +380,23 @@ onig_st_insert_strend(hash_table_type* table, const UChar* str_key, return result; } +#endif /* USE_ST_LIBRARY */ + + +#ifdef USE_NAMED_GROUP + +#define INIT_NAME_BACKREFS_ALLOC_NUM 8 + +typedef struct { + UChar* name; + int name_len; /* byte length */ + int back_num; /* number of backrefs */ + int back_alloc; + int back_ref1; + int* back_refs; +} NameEntry; + +#ifdef USE_ST_LIBRARY typedef st_table NameTable; typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */ @@ -426,10 +438,10 @@ onig_print_names(FILE* fp, regex_t* reg) } return 0; } -#endif +#endif /* ONIG_DEBUG */ static int -i_free_name_entry(UChar* key, NameEntry* e, void* arg) +i_free_name_entry(UChar* key, NameEntry* e, void* arg ARG_UNUSED) { xfree(e->name); if (IS_NOT_NULL(e->back_refs)) xfree(e->back_refs); @@ -486,7 +498,7 @@ typedef struct { } INamesArg; static int -i_names(UChar* key, NameEntry* e, INamesArg* arg) +i_names(UChar* key ARG_UNUSED, NameEntry* e, INamesArg* arg) { int r = (*(arg->func))(e->name, e->name + e->name_len, @@ -519,7 +531,7 @@ onig_foreach_name(regex_t* reg, } static int -i_renumber_name(UChar* key, NameEntry* e, GroupNumRemap* map) +i_renumber_name(UChar* key ARG_UNUSED, NameEntry* e, GroupNumRemap* map) { int i; @@ -1140,7 +1152,7 @@ node_new_cclass_by_codepoint_range(int not, OnigCodePoint sb_out, n = ONIGENC_CODE_RANGE_NUM(ranges); for (i = 0; i < n; i++) { for (j = ONIGENC_CODE_RANGE_FROM(ranges, i); - j <= (int )ONIGENC_CODE_RANGE_TO(ranges, i); j++) { + j <= (OnigCodePoint )ONIGENC_CODE_RANGE_TO(ranges, i); j++) { if (j >= sb_out) goto sb_end; BITSET_SET_BIT(cc->bs, j); @@ -1256,7 +1268,7 @@ onig_node_new_anchor(int type) static Node* node_new_backref(int back_num, int* backrefs, int by_name, -#ifdef USE_BACKREF_AT_LEVEL +#ifdef USE_BACKREF_WITH_LEVEL int exist_level, int nest_level, #endif ScanEnv* env) @@ -1273,7 +1285,7 @@ node_new_backref(int back_num, int* backrefs, int by_name, if (by_name != 0) NBREF(node)->state |= NST_NAME_REF; -#ifdef USE_BACKREF_AT_LEVEL +#ifdef USE_BACKREF_WITH_LEVEL if (exist_level != 0) { NBREF(node)->state |= NST_NEST_LEVEL; NBREF(node)->nest_level = nest_level; @@ -1307,17 +1319,17 @@ node_new_backref(int back_num, int* backrefs, int by_name, #ifdef USE_SUBEXP_CALL static Node* -node_new_call(UChar* name, UChar* name_end) +node_new_call(UChar* name, UChar* name_end, int gnum) { Node* node = node_new(); CHECK_NULL_RETURN(node); SET_NTYPE(node, NT_CALL); - NCALL(node)->state = 0; - NCALL(node)->ref_num = CALLNODE_REFNUM_UNDEF; - NCALL(node)->target = NULL_NODE; - NCALL(node)->name = name; - NCALL(node)->name_end = name_end; + NCALL(node)->state = 0; + NCALL(node)->target = NULL_NODE; + NCALL(node)->name = name; + NCALL(node)->name_end = name_end; + NCALL(node)->group_num = gnum; /* call by number if gnum != 0 */ return node; } #endif @@ -1539,7 +1551,7 @@ static int str_node_can_be_split(StrNode* sn, OnigEncoding enc) { if (sn->end > sn->s) { - return ((enc_len(enc, sn->s, sn->end) < sn->end - sn->s) ? 1 : 0); + return ((enclen(enc, sn->s, sn->end) < sn->end - sn->s) ? 1 : 0); } return 0; } @@ -1955,29 +1967,6 @@ and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf) return 0; } -static int -clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc) -{ - BBuf *tbuf; - int r; - - if (IS_NCCLASS_NOT(cc)) { - bitset_invert(cc->bs); - - if (! ONIGENC_IS_SINGLEBYTE(enc)) { - r = not_code_range_buf(enc, cc->mbuf, &tbuf); - if (r != 0) return r; - - bbuf_free(cc->mbuf); - cc->mbuf = tbuf; - } - - NCCLASS_CLEAR_NOT(cc); - } - - return 0; -} - static int and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc) { @@ -2297,7 +2286,7 @@ typedef struct { int ref1; int* refs; int by_name; -#ifdef USE_BACKREF_AT_LEVEL +#ifdef USE_BACKREF_WITH_LEVEL int exist_level; int level; /* \k */ #endif @@ -2305,6 +2294,7 @@ typedef struct { struct { UChar* name; UChar* name_end; + int gnum; } call; struct { int ctype; @@ -2494,22 +2484,31 @@ get_name_end_code_point(OnigCodePoint start) } #ifdef USE_NAMED_GROUP -#ifdef USE_BACKREF_AT_LEVEL +#ifdef USE_BACKREF_WITH_LEVEL /* \k, \k + \k, \k + \k<-num+n>, \k<-num-n> */ static int fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end, - UChar** rname_end, ScanEnv* env, int* level) + UChar** rname_end, ScanEnv* env, + int* rback_num, int* rlevel) { - int r, exist_level = 0; + int r, sign, is_num, exist_level; OnigCodePoint end_code; OnigCodePoint c = 0; OnigEncoding enc = env->enc; UChar *name_end; + UChar *pnum_head; UChar *p = *src; PFETCH_READY; + *rback_num = 0; + is_num = exist_level = 0; + sign = 1; + pnum_head = *src; + end_code = get_name_end_code_point(start_code); name_end = end; @@ -2522,7 +2521,15 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end, if (c == end_code) return ONIGERR_EMPTY_GROUP_NAME; - if (!ONIGENC_IS_CODE_WORD(enc, c)) { + if (ONIGENC_IS_CODE_DIGIT(enc, c)) { + is_num = 1; + } + else if (c == '-') { + is_num = 2; + sign = -1; + pnum_head = p; + } + else if (!ONIGENC_IS_CODE_WORD(enc, c)) { r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; } } @@ -2530,24 +2537,36 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end, while (!PEND) { name_end = p; PFETCH(c); - if (c == end_code || c == ')' || c == '+' || c == '-') break; + if (c == end_code || c == ')' || c == '+' || c == '-') { + if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME; + break; + } - if (!ONIGENC_IS_CODE_WORD(enc, c)) { + if (is_num != 0) { + if (ONIGENC_IS_CODE_DIGIT(enc, c)) { + is_num = 1; + } + else { + r = ONIGERR_INVALID_GROUP_NAME; + is_num = 0; + } + } + else if (!ONIGENC_IS_CODE_WORD(enc, c)) { r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; } } - if (c != end_code) { + if (r == 0 && c != end_code) { if (c == '+' || c == '-') { - int num; + int level; int flag = (c == '-' ? -1 : 1); PFETCH(c); if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto err; PUNFETCH; - num = onig_scan_unsigned_number(&p, end, enc); - if (num < 0) return ONIGERR_TOO_BIG_NUMBER; - *level = (num * flag); + level = onig_scan_unsigned_number(&p, end, enc); + if (level < 0) return ONIGERR_TOO_BIG_NUMBER; + *rlevel = (level * flag); exist_level = 1; PFETCH(c); @@ -2562,6 +2581,14 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end, end: if (r == 0) { + if (is_num != 0) { + *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc); + if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER; + else if (*rback_num == 0) goto err; + + *rback_num *= sign; + } + *rname_end = name_end; *src = p; return (exist_level ? 1 : 0); @@ -2571,7 +2598,7 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end, return r; } } -#endif /* USE_BACKREF_AT_LEVEL */ +#endif /* USE_BACKREF_WITH_LEVEL */ /* def: 0 -> define name (don't allow number name) @@ -2579,21 +2606,26 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end, */ static int fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, - UChar** rname_end, ScanEnv* env, int ref) + UChar** rname_end, ScanEnv* env, int* rback_num, int ref) { - int r, is_num; + int r, is_num, sign; OnigCodePoint end_code; OnigCodePoint c = 0; OnigEncoding enc = env->enc; UChar *name_end; + UChar *pnum_head; UChar *p = *src; PFETCH_READY; + *rback_num = 0; + end_code = get_name_end_code_point(start_code); name_end = end; + pnum_head = *src; r = 0; is_num = 0; + sign = 1; if (PEND) { return ONIGERR_EMPTY_GROUP_NAME; } @@ -2607,6 +2639,18 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, is_num = 1; else { r = ONIGERR_INVALID_GROUP_NAME; + is_num = 0; + } + } + else if (c == '-') { + if (ref == 1) { + is_num = 2; + sign = -1; + pnum_head = p; + } + else { + r = ONIGERR_INVALID_GROUP_NAME; + is_num = 0; } } else if (!ONIGENC_IS_CODE_WORD(enc, c)) { @@ -2614,37 +2658,66 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, } } - while (!PEND) { - name_end = p; - PFETCH(c); - if (c == end_code || c == ')') break; + if (r == 0) { + while (!PEND) { + name_end = p; + PFETCH(c); + if (c == end_code || c == ')') { + if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME; + break; + } - if (is_num == 1) { - if (! ONIGENC_IS_CODE_DIGIT(enc, c)) { - if (!ONIGENC_IS_CODE_WORD(enc, c)) + if (is_num != 0) { + if (ONIGENC_IS_CODE_DIGIT(enc, c)) { + is_num = 1; + } + else { + if (!ONIGENC_IS_CODE_WORD(enc, c)) + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + else + r = ONIGERR_INVALID_GROUP_NAME; + + is_num = 0; + } + } + else { + if (!ONIGENC_IS_CODE_WORD(enc, c)) { r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; - else - r = ONIGERR_INVALID_GROUP_NAME; + } } } - else { - if (!ONIGENC_IS_CODE_WORD(enc, c)) { - r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; - } + + if (c != end_code) { + r = ONIGERR_INVALID_GROUP_NAME; + name_end = end; } - } - if (c != end_code) { - r = ONIGERR_INVALID_GROUP_NAME; - name_end = end; - } + if (is_num != 0) { + *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc); + if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER; + else if (*rback_num == 0) { + r = ONIGERR_INVALID_GROUP_NAME; + goto err; + } + + *rback_num *= sign; + } - if (r == 0) { *rname_end = name_end; *src = p; return 0; } else { + while (!PEND) { + name_end = p; + PFETCH(c); + if (c == end_code || c == ')') + break; + } + if (PEND) + name_end = end; + + err: onig_scan_env_set_error_string(env, r, *src, name_end); return r; } @@ -2652,35 +2725,70 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, #else static int fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, - UChar** rname_end, ScanEnv* env, int ref) + UChar** rname_end, ScanEnv* env, int* rback_num, int ref) { - int r, len; + int r, is_num, sign; OnigCodePoint end_code; OnigCodePoint c = 0; UChar *name_end; OnigEncoding enc = env->enc; + UChar *pnum_head; UChar *p = *src; PFETCH_READY; + *rback_num = 0; + end_code = get_name_end_code_point(start_code); + *rname_end = name_end = end; r = 0; + pnum_head = *src; + is_num = 0; + sign = 1; + + if (PEND) { + return ONIGERR_EMPTY_GROUP_NAME; + } + else { + PFETCH(c); + if (c == end_code) + return ONIGERR_EMPTY_GROUP_NAME; + + if (ONIGENC_IS_CODE_DIGIT(enc, c)) { + is_num = 1; + } + else if (c == '-') { + is_num = 2; + sign = -1; + pnum_head = p; + } + else { + r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; + } + } + while (!PEND) { name_end = p; - if (enc_len(enc, p) > 1) - r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; PFETCH(c); if (c == end_code || c == ')') break; if (! ONIGENC_IS_CODE_DIGIT(enc, c)) r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; } - if (c != end_code) { + if (r == 0 && c != end_code) { r = ONIGERR_INVALID_GROUP_NAME; name_end = end; } if (r == 0) { + *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc); + if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER; + else if (*rback_num == 0) { + r = ONIGERR_INVALID_GROUP_NAME; + goto err; + } + *rback_num *= sign; + *rname_end = name_end; *src = p; return 0; @@ -2691,7 +2799,7 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, return r; } } -#endif +#endif /* USE_NAMED_GROUP */ static void CC_ESC_WARN(ScanEnv* env, UChar *c) @@ -2733,12 +2841,12 @@ find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to, while (p < to) { x = ONIGENC_MBC_TO_CODE(enc, p, to); - q = p + enc_len(enc, p, to); + q = p + enclen(enc, p, to); if (x == s[0]) { for (i = 1; i < n && q < to; i++) { x = ONIGENC_MBC_TO_CODE(enc, q, to); if (x != s[i]) break; - q += enc_len(enc, q, to); + q += enclen(enc, q, to); } if (i >= n) { if (IS_NOT_NULL(next)) @@ -2764,19 +2872,19 @@ str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to, while (p < to) { if (in_esc) { in_esc = 0; - p += enc_len(enc, p, to); + p += enclen(enc, p, to); } else { x = ONIGENC_MBC_TO_CODE(enc, p, to); - q = p + enc_len(enc, p, to); + q = p + enclen(enc, p, to); if (x == s[0]) { for (i = 1; i < n && q < to; i++) { x = ONIGENC_MBC_TO_CODE(enc, q, to); if (x != s[i]) break; - q += enc_len(enc, q, to); + q += enclen(enc, q, to); } if (i >= n) return 1; - p += enc_len(enc, p, to); + p += enclen(enc, p, to); } else { x = ONIGENC_MBC_TO_CODE(enc, p, to); @@ -2904,7 +3012,7 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; } - if (p > prev + enc_len(enc, prev, end) && !PEND && (PPEEK_IS('}'))) { + if (p > prev + enclen(enc, prev, end) && !PEND && (PPEEK_IS('}'))) { PINC; tok->type = TK_CODE_POINT; tok->base = 16; @@ -3244,7 +3352,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; } - if ((p > prev + enc_len(enc, prev, end)) && !PEND && PPEEK_IS('}')) { + if ((p > prev + enclen(enc, prev, end)) && !PEND && PPEEK_IS('}')) { PINC; tok->type = TK_CODE_POINT; tok->u.code = (OnigCodePoint )num; @@ -3302,7 +3410,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) tok->u.backref.num = 1; tok->u.backref.ref1 = num; tok->u.backref.by_name = 0; -#ifdef USE_BACKREF_AT_LEVEL +#ifdef USE_BACKREF_WITH_LEVEL tok->u.backref.exist_level = 0; #endif break; @@ -3341,44 +3449,64 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) if (c == '<' || c == '\'') { UChar* name_end; int* backs; + int back_num; prev = p; -#ifdef USE_BACKREF_AT_LEVEL +#ifdef USE_BACKREF_WITH_LEVEL name_end = NULL_UCHARP; /* no need. escape gcc warning. */ r = fetch_name_with_level((OnigCodePoint )c, &p, end, &name_end, - env, &tok->u.backref.level); + env, &back_num, &tok->u.backref.level); if (r == 1) tok->u.backref.exist_level = 1; else tok->u.backref.exist_level = 0; #else - r = fetch_name(&p, end, &name_end, env, 1); + r = fetch_name(&p, end, &name_end, env, &back_num, 1); #endif if (r < 0) return r; - num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs); - if (num <= 0) { - onig_scan_env_set_error_string(env, - ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end); - return ONIGERR_UNDEFINED_NAME_REFERENCE; - } - if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) { - int i; - for (i = 0; i < num; i++) { - if (backs[i] > env->num_mem || - IS_NULL(SCANENV_MEM_NODES(env)[backs[i]])) + if (back_num != 0) { + if (back_num < 0) { + back_num = BACKREF_REL_TO_ABS(back_num, env); + if (back_num <= 0) return ONIGERR_INVALID_BACKREF; } - } - tok->type = TK_BACKREF; - tok->u.backref.by_name = 1; - if (num == 1) { + if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) { + if (back_num > env->num_mem || + IS_NULL(SCANENV_MEM_NODES(env)[back_num])) + return ONIGERR_INVALID_BACKREF; + } + tok->type = TK_BACKREF; + tok->u.backref.by_name = 0; tok->u.backref.num = 1; - tok->u.backref.ref1 = backs[0]; + tok->u.backref.ref1 = back_num; } else { - tok->u.backref.num = num; - tok->u.backref.refs = backs; + num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs); + if (num <= 0) { + onig_scan_env_set_error_string(env, + ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end); + return ONIGERR_UNDEFINED_NAME_REFERENCE; + } + if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) { + int i; + for (i = 0; i < num; i++) { + if (backs[i] > env->num_mem || + IS_NULL(SCANENV_MEM_NODES(env)[backs[i]])) + return ONIGERR_INVALID_BACKREF; + } + } + + tok->type = TK_BACKREF; + tok->u.backref.by_name = 1; + if (num == 1) { + tok->u.backref.num = 1; + tok->u.backref.ref1 = backs[0]; + } + else { + tok->u.backref.num = num; + tok->u.backref.refs = backs; + } } } else @@ -3392,15 +3520,17 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) { PFETCH(c); if (c == '<' || c == '\'') { + int gnum; UChar* name_end; prev = p; - r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, 1); + r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &gnum, 1); if (r < 0) return r; tok->type = TK_CALL; tok->u.call.name = prev; tok->u.call.name_end = name_end; + tok->u.call.gnum = gnum; } else PUNFETCH; @@ -3443,7 +3573,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) tok->u.code = (OnigCodePoint )num; } else { /* string */ - p = tok->backp + enc_len(enc, tok->backp, end); + p = tok->backp + enclen(enc, tok->backp, end); } break; } @@ -3616,7 +3746,8 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) } static int -add_ctype_to_cc_by_range(CClassNode* cc, int ctype, int not, OnigEncoding enc, +add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not, + OnigEncoding enc ARG_UNUSED, OnigCodePoint sb_out, const OnigCodePoint mbr[]) { int i, r; @@ -3758,7 +3889,7 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env) } else { for (c = 0; c < SINGLE_BYTE_SIZE; c++) { - if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) /* 0: invalid code point */ + if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) /* check invalid code point */ && ! ONIGENC_IS_CODE_WORD(enc, c)) BITSET_SET_BIT(cc->bs, c); } @@ -3959,7 +4090,7 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v, if (intype == *type) { if (intype == CCV_SB) { if (*vs > 0xff || v > 0xff) - return ONIGERR_INVALID_WIDE_CHAR_VALUE; + return ONIGERR_INVALID_CODE_POINT_VALUE; if (*vs > v) { if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC)) @@ -4087,6 +4218,10 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, if (len > 1) { in_type = CCV_CODE_POINT; } + else if (len < 0) { + r = len; + goto err; + } else { sb_char: in_type = CCV_SB; @@ -4120,7 +4255,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, goto err; } - len = enc_len(env->enc, buf, buf+i); + len = enclen(env->enc, buf, buf+i); if (i < len) { r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING; goto err; @@ -4367,11 +4502,15 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, ScanEnv* env) { int r, num; - int list_capture; Node *target; OnigOptionType option; - OnigEncoding enc = env->enc; OnigCodePoint c; + OnigEncoding enc = env->enc; + +#ifdef USE_NAMED_GROUP + int list_capture; +#endif + UChar* p = *src; PFETCH_READY; @@ -4406,6 +4545,7 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, *np = node_new_enclose(ENCLOSE_STOP_BACKTRACK); break; +#ifdef USE_NAMED_GROUP case '\'': if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) { goto named_group1; @@ -4413,6 +4553,7 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, else return ONIGERR_UNDEFINED_GROUP_OPTION; break; +#endif case '<': /* look behind (?<=...), (?= BIT_STATUS_BITS_NUM) + if (list_capture != 0 && num >= (int )BIT_STATUS_BITS_NUM) return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY; r = name_add(env->reg, name, name_end, num, env); @@ -4481,7 +4622,7 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, onig_node_free(*np); return num; } - else if (num >= BIT_STATUS_BITS_NUM) { + else if (num >= (int )BIT_STATUS_BITS_NUM) { onig_node_free(*np); return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY; } @@ -4727,12 +4868,12 @@ static int type_cclass_hash(type_cclass_key* key) val = 0; p = (UChar* )&(key->enc); - for (i = 0; i < sizeof(key->enc); i++) { + for (i = 0; i < (int )sizeof(key->enc); i++) { val = val * 997 + (int )*p++; } p = (UChar* )(&key->type); - for (i = 0; i < sizeof(key->type); i++) { + for (i = 0; i < (int )sizeof(key->type); i++) { val = val * 997 + (int )*p++; } @@ -4740,7 +4881,7 @@ static int type_cclass_hash(type_cclass_key* key) return val + (val >> 5); } -static const struct st_hash_type type_type_cclass_hash = { +static struct st_hash_type type_type_cclass_hash = { type_cclass_cmp, type_cclass_hash, }; @@ -4749,7 +4890,7 @@ static st_table* OnigTypeCClassTable; static int -i_free_shared_class(type_cclass_key* key, Node* node, void* arg) +i_free_shared_class(type_cclass_key* key, Node* node, void* arg ARG_UNUSED) { if (IS_NOT_NULL(node)) { CClassNode* cc = NCCLASS(node); @@ -4776,6 +4917,31 @@ onig_free_shared_cclass_table(void) #endif /* USE_SHARED_CCLASS_TABLE */ +#ifndef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS +static int +clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc) +{ + BBuf *tbuf; + int r; + + if (IS_NCCLASS_NOT(cc)) { + bitset_invert(cc->bs); + + if (! ONIGENC_IS_SINGLEBYTE(enc)) { + r = not_code_range_buf(enc, cc->mbuf, &tbuf); + if (r != 0) return r; + + bbuf_free(cc->mbuf); + cc->mbuf = tbuf; + } + + NCCLASS_CLEAR_NOT(cc); + } + + return 0; +} +#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */ + typedef struct { ScanEnv* env; CClassNode* cc; @@ -4798,28 +4964,43 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], bs = cc->bs; if (to_len == 1) { - int in_cc; - in_cc = onig_is_code_in_cc(env->enc, from, cc); - if ((in_cc != 0 && !IS_NCCLASS_NOT(cc)) || - (in_cc == 0 && IS_NCCLASS_NOT(cc))) { + int is_in = onig_is_code_in_cc(env->enc, from, cc); +#ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS + if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) || + (is_in == 0 && IS_NCCLASS_NOT(cc))) { if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) { - if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc); add_code_range(&(cc->mbuf), env, *to, *to); } else { - /* /(?i:[^A-C])/.match("a") ==> fail. */ BITSET_SET_BIT(bs, *to); } } +#else + if (is_in != 0) { + if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) { + if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc); + add_code_range(&(cc->mbuf), env, *to, *to); + } + else { + if (IS_NCCLASS_NOT(cc)) { + BITSET_CLEAR_BIT(bs, *to); + } + else + BITSET_SET_BIT(bs, *to); + } + } +#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */ } else { int r, i, len; UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; Node *snode = NULL_NODE; - if (onig_is_code_in_cc(env->enc, from, cc)) { - if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc); - + if (onig_is_code_in_cc(env->enc, from, cc) +#ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS + && !IS_NCCLASS_NOT(cc) +#endif + ) { for (i = 0; i < to_len; i++) { len = ONIGENC_CODE_TO_MBC(env->enc, to[i], buf); if (i == 0) { @@ -4857,7 +5038,7 @@ parse_exp(Node** np, OnigToken* tok, int term, Node** targetp; *np = NULL; - if (tok->type == term) + if (tok->type == (enum TokenSyms )term) goto end_of_token; switch (tok->type) { @@ -4924,7 +5105,7 @@ parse_exp(Node** np, OnigToken* tok, int term, len = 1; while (1) { if (len >= ONIGENC_MBC_MINLEN(env->enc)) { - if (len == enc_len(env->enc, NSTR(*np)->s, NSTR(*np)->end)) { + if (len == enclen(env->enc, NSTR(*np)->s, NSTR(*np)->end)) { r = fetch_token(tok, src, end, env); NSTRING_CLEAR_RAW(*np); goto string_end; @@ -4940,7 +5121,7 @@ parse_exp(Node** np, OnigToken* tok, int term, if (len < ONIGENC_MBC_MINLEN(env->enc)) { rem = ONIGENC_MBC_MINLEN(env->enc) - len; (void )node_str_head_pad(NSTR(*np), rem, (UChar )0); - if (len + rem == enc_len(env->enc, NSTR(*np)->s)) { + if (len + rem == enclen(env->enc, NSTR(*np)->s)) { NSTRING_CLEAR_RAW(*np); goto string_end; } @@ -5131,7 +5312,7 @@ parse_exp(Node** np, OnigToken* tok, int term, *np = node_new_backref(len, (len > 1 ? tok->u.backref.refs : &(tok->u.backref.ref1)), tok->u.backref.by_name, -#ifdef USE_BACKREF_AT_LEVEL +#ifdef USE_BACKREF_WITH_LEVEL tok->u.backref.exist_level, tok->u.backref.level, #endif @@ -5141,9 +5322,18 @@ parse_exp(Node** np, OnigToken* tok, int term, #ifdef USE_SUBEXP_CALL case TK_CALL: - *np = node_new_call(tok->u.call.name, tok->u.call.name_end); - CHECK_NULL_RETURN_MEMERR(*np); - env->num_call++; + { + int gnum = tok->u.call.gnum; + + if (gnum < 0) { + gnum = BACKREF_REL_TO_ABS(gnum, env); + if (gnum <= 0) + return ONIGERR_INVALID_BACKREF; + } + *np = node_new_call(tok->u.call.name, tok->u.call.name_end, gnum); + CHECK_NULL_RETURN_MEMERR(*np); + env->num_call++; + } break; #endif @@ -5282,7 +5472,7 @@ parse_subexp(Node** top, OnigToken* tok, int term, headp = &(NCDR(*headp)); } - if (tok->type != term) + if (tok->type != (enum TokenSyms )term) goto err; } else { @@ -5337,7 +5527,7 @@ onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end, } extern void -onig_scan_env_set_error_string(ScanEnv* env, int ecode, +onig_scan_env_set_error_string(ScanEnv* env, int ecode ARG_UNUSED, UChar* arg, UChar* arg_end) { env->error = arg; diff --git a/regparse.h b/regparse.h index 7a6314098a..0c5c2c936c 100644 --- a/regparse.h +++ b/regparse.h @@ -213,7 +213,7 @@ typedef struct { typedef struct { NodeBase base; int state; - int ref_num; + int group_num; UChar* name; UChar* name_end; struct _Node* target; /* EncloseNode : ENCLOSE_MEMORY */ @@ -340,6 +340,7 @@ extern void onig_node_str_clear P_((Node* node)); extern int onig_free_node_list P_((void)); extern int onig_names_free P_((regex_t* reg)); extern int onig_parse_make_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env)); +extern int onig_free_shared_cclass_table P_((void)); #ifdef ONIG_DEBUG #ifdef USE_NAMED_GROUP diff --git a/version.h b/version.h index b68337c30d..1c21a2c5e4 100644 --- a/version.h +++ b/version.h @@ -1,7 +1,7 @@ #define RUBY_VERSION "1.9.0" -#define RUBY_RELEASE_DATE "2008-01-03" +#define RUBY_RELEASE_DATE "2008-01-04" #define RUBY_VERSION_CODE 190 -#define RUBY_RELEASE_CODE 20080103 +#define RUBY_RELEASE_CODE 20080104 #define RUBY_PATCHLEVEL 0 #define RUBY_VERSION_MAJOR 1 @@ -9,7 +9,7 @@ #define RUBY_VERSION_TEENY 0 #define RUBY_RELEASE_YEAR 2008 #define RUBY_RELEASE_MONTH 1 -#define RUBY_RELEASE_DAY 3 +#define RUBY_RELEASE_DAY 4 #ifdef RUBY_EXTERN RUBY_EXTERN const char ruby_version[]; -- cgit v1.2.3