diff options
author | naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2016-12-10 17:47:04 +0000 |
---|---|---|
committer | naruse <naruse@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2016-12-10 17:47:04 +0000 |
commit | 2873edeafb6f6df1fc99bb9b1167591b99dd378c (patch) | |
tree | deb58ca3dc6d6cd71c1740e62aa7e47bea5ed37e /enc | |
parent | 42a677c895f82bcd611db2773fbe68b0558b142d (diff) | |
download | ruby-2873edeafb6f6df1fc99bb9b1167591b99dd378c.tar.gz |
Merge Onigmo 6.0.0
* https://github.com/k-takata/Onigmo/blob/Onigmo-6.0.0/HISTORY
* fix for ruby 2.4: https://github.com/k-takata/Onigmo/pull/78
* suppress warning: https://github.com/k-takata/Onigmo/pull/79
* include/ruby/oniguruma.h: include onigmo.h.
* template/encdb.h.tmpl: ignore duplicated definition of EUC-CN in
enc/euc_kr.c. It is defined in enc/gb2313.c with CRuby macro.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@57045 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'enc')
42 files changed, 593 insertions, 527 deletions
diff --git a/enc/ascii.c b/enc/ascii.c index d34cc20582..8b32c414fe 100644 --- a/enc/ascii.c +++ b/enc/ascii.c @@ -29,9 +29,12 @@ */ #include "regenc.h" -#include "encindex.h" +#ifdef RUBY +# include "encindex.h" +#endif + #ifndef ENCINDEX_ASCII -#define ENCINDEX_ASCII 0 +# define ENCINDEX_ASCII 0 #endif OnigEncodingDefine(ascii, ASCII) = { @@ -51,9 +54,9 @@ OnigEncodingDefine(ascii, ASCII) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, + onigenc_single_byte_ascii_only_case_map, ENCINDEX_ASCII, ONIGENC_FLAG_NONE, - onigenc_single_byte_ascii_only_case_map, }; ENC_ALIAS("BINARY", "ASCII-8BIT") ENC_REPLICATE("IBM437", "ASCII-8BIT") diff --git a/enc/big5.c b/enc/big5.c index fc2bcadcc1..ab4fb69819 100644 --- a/enc/big5.c +++ b/enc/big5.c @@ -300,9 +300,9 @@ OnigEncodingDefine(big5, BIG5) = { onigenc_not_support_get_ctype_code_range, big5_left_adjust_char_head, big5_is_allowed_reverse_match, + onigenc_ascii_only_case_map, 0, ONIGENC_FLAG_NONE, - onigenc_ascii_only_case_map, }; /* @@ -335,9 +335,9 @@ OnigEncodingDefine(big5_hkscs, BIG5_HKSCS) = { onigenc_not_support_get_ctype_code_range, big5_left_adjust_char_head, big5_is_allowed_reverse_match, + onigenc_ascii_only_case_map, 0, ONIGENC_FLAG_NONE, - onigenc_ascii_only_case_map, }; ENC_ALIAS("Big5-HKSCS:2008", "Big5-HKSCS") @@ -370,7 +370,7 @@ OnigEncodingDefine(big5_uao, BIG5_UAO) = { onigenc_not_support_get_ctype_code_range, big5_left_adjust_char_head, big5_is_allowed_reverse_match, + onigenc_ascii_only_case_map, 0, ONIGENC_FLAG_NONE, - onigenc_ascii_only_case_map, }; diff --git a/enc/cp949.c b/enc/cp949.c index f832cd5758..bd2c8d21a4 100644 --- a/enc/cp949.c +++ b/enc/cp949.c @@ -211,9 +211,9 @@ OnigEncodingDefine(cp949, CP949) = { onigenc_not_support_get_ctype_code_range, cp949_left_adjust_char_head, cp949_is_allowed_reverse_match, + onigenc_ascii_only_case_map, 0, ONIGENC_FLAG_NONE, - onigenc_ascii_only_case_map, }; /* * Name: CP949 diff --git a/enc/emacs_mule.c b/enc/emacs_mule.c index a53f243dfe..f92eb183cf 100644 --- a/enc/emacs_mule.c +++ b/enc/emacs_mule.c @@ -27,7 +27,7 @@ * SUCH DAMAGE. */ -#include "regint.h" +#include "regenc.h" #define emacsmule_islead(c) ((UChar )(c) < 0x9e) @@ -334,9 +334,9 @@ OnigEncodingDefine(emacs_mule, Emacs_Mule) = { onigenc_not_support_get_ctype_code_range, left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, + onigenc_ascii_only_case_map, 0, ONIGENC_FLAG_NONE, - onigenc_ascii_only_case_map, }; ENC_REPLICATE("stateless-ISO-2022-JP", "Emacs-Mule") diff --git a/enc/euc_jp.c b/enc/euc_jp.c index f9604b8d6e..ded051af69 100644 --- a/enc/euc_jp.c +++ b/enc/euc_jp.c @@ -28,7 +28,7 @@ * SUCH DAMAGE. */ -#include "regint.h" +#include "regenc.h" #define eucjp_islead(c) ((UChar )((c) - 0xa1) > 0xfe - 0xa1) @@ -576,9 +576,9 @@ OnigEncodingDefine(euc_jp, EUC_JP) = { get_ctype_code_range, left_adjust_char_head, is_allowed_reverse_match, + onigenc_ascii_only_case_map, 0, ONIGENC_FLAG_NONE, - onigenc_ascii_only_case_map, }; /* * Name: EUC-JP diff --git a/enc/euc_kr.c b/enc/euc_kr.c index eb17f476e9..21d6ab4e1c 100644 --- a/enc/euc_kr.c +++ b/enc/euc_kr.c @@ -188,8 +188,33 @@ OnigEncodingDefine(euc_kr, EUC_KR) = { onigenc_not_support_get_ctype_code_range, euckr_left_adjust_char_head, euckr_is_allowed_reverse_match, + onigenc_ascii_only_case_map, 0, ONIGENC_FLAG_NONE, - onigenc_ascii_only_case_map, }; ENC_ALIAS("eucKR", "EUC-KR") + +#ifndef RUBY +/* Same with OnigEncodingEUC_KR except the name */ +OnigEncodingDefine(euc_cn, EUC_CN) = { + euckr_mbc_enc_len, + "EUC-CN", /* name */ + 2, /* max enc length */ + 1, /* min enc length */ + onigenc_is_mbc_newline_0x0a, + euckr_mbc_to_code, + onigenc_mb2_code_to_mbclen, + euckr_code_to_mbc, + euckr_mbc_case_fold, + onigenc_ascii_apply_all_case_fold, + onigenc_ascii_get_case_fold_codes_by_str, + onigenc_minimum_property_name_to_ctype, + euckr_is_code_ctype, + onigenc_not_support_get_ctype_code_range, + euckr_left_adjust_char_head, + euckr_is_allowed_reverse_match, + onigenc_ascii_only_case_map, + 0, + ONIGENC_FLAG_NONE, +}; +#endif /* RUBY */ diff --git a/enc/euc_tw.c b/enc/euc_tw.c index e7d5187c4a..1c5659cb1d 100644 --- a/enc/euc_tw.c +++ b/enc/euc_tw.c @@ -221,8 +221,8 @@ OnigEncodingDefine(euc_tw, EUC_TW) = { onigenc_not_support_get_ctype_code_range, euctw_left_adjust_char_head, euctw_is_allowed_reverse_match, + onigenc_ascii_only_case_map, 0, ONIGENC_FLAG_NONE, - onigenc_ascii_only_case_map, }; ENC_ALIAS("eucTW", "EUC-TW") diff --git a/enc/gb18030.c b/enc/gb18030.c index 8a00332991..63d2e633ec 100644 --- a/enc/gb18030.c +++ b/enc/gb18030.c @@ -597,8 +597,7 @@ OnigEncodingDefine(gb18030, GB18030) = { onigenc_not_support_get_ctype_code_range, gb18030_left_adjust_char_head, gb18030_is_allowed_reverse_match, + onigenc_ascii_only_case_map, 0, ONIGENC_FLAG_NONE, - onigenc_ascii_only_case_map, }; - @@ -211,9 +211,9 @@ OnigEncodingDefine(gbk, GBK) = { onigenc_not_support_get_ctype_code_range, gbk_left_adjust_char_head, gbk_is_allowed_reverse_match, + onigenc_ascii_only_case_map, 0, ONIGENC_FLAG_NONE, - onigenc_ascii_only_case_map, }; /* * Name: GBK diff --git a/enc/iso_8859_1.c b/enc/iso_8859_1.c index 2440c9f5a1..7af0888c3e 100644 --- a/enc/iso_8859_1.c +++ b/enc/iso_8859_1.c @@ -256,45 +256,46 @@ is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc ARG_UNUSE } static int -case_map (OnigCaseFoldType* flagP, const OnigUChar** pp, - const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, - const struct OnigEncodingTypeST* enc) +case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, + const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, + const struct OnigEncodingTypeST* enc) { OnigCodePoint code; OnigUChar *to_start = to; OnigCaseFoldType flags = *flagP; - while (*pp<end && to<to_end) { + while (*pp < end && to < to_end) { code = *(*pp)++; - if (code==SHARP_s) { - if (flags&ONIGENC_CASE_UPCASE) { + if (code == SHARP_s) { + if (flags & ONIGENC_CASE_UPCASE) { flags |= ONIGENC_CASE_MODIFIED; *to++ = 'S'; - code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S'; + code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S'; } - else if (flags&ONIGENC_CASE_FOLD) { + else if (flags & ONIGENC_CASE_FOLD) { flags |= ONIGENC_CASE_MODIFIED; *to++ = 's'; code = 's'; } } else if ((EncISO_8859_1_CtypeTable[code] & BIT_CTYPE_UPPER) - && (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) { + && (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) { flags |= ONIGENC_CASE_MODIFIED; code += 0x20; } - else if (code==0xAA || code==0xBA || code==0xB5 || code==0xFF) ; - else if ((EncISO_8859_1_CtypeTable[code]&BIT_CTYPE_LOWER) - && (flags&ONIGENC_CASE_UPCASE)) { + else if (code == 0xAA || code == 0xBA || code == 0xB5 || code == 0xFF) + ; + else if ((EncISO_8859_1_CtypeTable[code] & BIT_CTYPE_LOWER) + && (flags & ONIGENC_CASE_UPCASE)) { flags |= ONIGENC_CASE_MODIFIED; code -= 0x20; } *to++ = code; - if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */ - flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE); + if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */ + flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE); } *flagP = flags; - return (int)(to-to_start); + return (int )(to - to_start); } OnigEncodingDefine(iso_8859_1, ISO_8859_1) = { @@ -314,8 +315,8 @@ OnigEncodingDefine(iso_8859_1, ISO_8859_1) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, + case_map, 0, ONIGENC_FLAG_NONE, - case_map, }; ENC_ALIAS("ISO8859-1", "ISO-8859-1") diff --git a/enc/iso_8859_10.c b/enc/iso_8859_10.c index e06f15f9d0..cae4be2db0 100644 --- a/enc/iso_8859_10.c +++ b/enc/iso_8859_10.c @@ -215,9 +215,9 @@ apply_all_case_fold(OnigCaseFoldType flag, static int get_case_fold_codes_by_str(OnigCaseFoldType flag, - const OnigUChar* p, const OnigUChar* end, - OnigCaseFoldCodeItem items[], - OnigEncoding enc ARG_UNUSED) + const OnigUChar* p, const OnigUChar* end, + OnigCaseFoldCodeItem items[], + OnigEncoding enc ARG_UNUSED) { return onigenc_get_case_fold_codes_by_str_with_map( numberof(CaseFoldMap), CaseFoldMap, 1, @@ -225,48 +225,49 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, } static int -case_map (OnigCaseFoldType* flagP, const OnigUChar** pp, - const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, - const struct OnigEncodingTypeST* enc) +case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, + const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, + const struct OnigEncodingTypeST* enc) { OnigCodePoint code; OnigUChar *to_start = to; OnigCaseFoldType flags = *flagP; - while (*pp<end && to<to_end) { + while (*pp < end && to < to_end) { code = *(*pp)++; - if (code==SHARP_s) { - if (flags&ONIGENC_CASE_UPCASE) { + if (code == SHARP_s) { + if (flags & ONIGENC_CASE_UPCASE) { flags |= ONIGENC_CASE_MODIFIED; *to++ = 'S'; - code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S'; + code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S'; } - else if (flags&ONIGENC_CASE_FOLD) { + else if (flags & ONIGENC_CASE_FOLD) { flags |= ONIGENC_CASE_MODIFIED; *to++ = 's'; code = 's'; } } - else if (code==0xBD || code==0xFF) ; + else if (code == 0xBD || code == 0xFF) + ; else if ((EncISO_8859_10_CtypeTable[code] & BIT_CTYPE_UPPER) - && (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) { + && (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) { flags |= ONIGENC_CASE_MODIFIED; code = ENC_ISO_8859_10_TO_LOWER_CASE(code); } - else if ((EncISO_8859_10_CtypeTable[code]&BIT_CTYPE_LOWER) - && (flags&ONIGENC_CASE_UPCASE)) { + else if ((EncISO_8859_10_CtypeTable[code] & BIT_CTYPE_LOWER) + && (flags & ONIGENC_CASE_UPCASE)) { flags |= ONIGENC_CASE_MODIFIED; - if (code>=0xA0 && code<=0xBF) + if (code >= 0xA0 && code <= 0xBF) code -= 0x10; else code -= 0x20; } *to++ = code; - if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */ - flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE); + if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */ + flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE); } *flagP = flags; - return (int)(to-to_start); + return (int )(to - to_start); } OnigEncodingDefine(iso_8859_10, ISO_8859_10) = { @@ -286,8 +287,8 @@ OnigEncodingDefine(iso_8859_10, ISO_8859_10) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, + case_map, 0, ONIGENC_FLAG_NONE, - case_map, }; ENC_ALIAS("ISO8859-10", "ISO-8859-10") diff --git a/enc/iso_8859_11.c b/enc/iso_8859_11.c index a5522da2e3..85e8f2cdb4 100644 --- a/enc/iso_8859_11.c +++ b/enc/iso_8859_11.c @@ -93,9 +93,9 @@ OnigEncodingDefine(iso_8859_11, ISO_8859_11) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, + onigenc_single_byte_ascii_only_case_map, 0, ONIGENC_FLAG_NONE, - onigenc_single_byte_ascii_only_case_map, }; ENC_ALIAS("ISO8859-11", "ISO-8859-11") diff --git a/enc/iso_8859_13.c b/enc/iso_8859_13.c index 6e49e16dfb..fe1ddd7065 100644 --- a/enc/iso_8859_13.c +++ b/enc/iso_8859_13.c @@ -208,9 +208,9 @@ apply_all_case_fold(OnigCaseFoldType flag, static int get_case_fold_codes_by_str(OnigCaseFoldType flag, - const OnigUChar* p, const OnigUChar* end, - OnigCaseFoldCodeItem items[], - OnigEncoding enc ARG_UNUSED) + const OnigUChar* p, const OnigUChar* end, + OnigCaseFoldCodeItem items[], + OnigEncoding enc ARG_UNUSED) { return onigenc_get_case_fold_codes_by_str_with_map( numberof(CaseFoldMap), CaseFoldMap, 1, @@ -218,38 +218,39 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, } static int -case_map (OnigCaseFoldType* flagP, const OnigUChar** pp, - const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, - const struct OnigEncodingTypeST* enc) +case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, + const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, + const struct OnigEncodingTypeST* enc) { OnigCodePoint code; OnigUChar *to_start = to; OnigCaseFoldType flags = *flagP; - while (*pp<end && to<to_end) { + while (*pp < end && to < to_end) { code = *(*pp)++; - if (code==SHARP_s) { - if (flags&ONIGENC_CASE_UPCASE) { + if (code == SHARP_s) { + if (flags & ONIGENC_CASE_UPCASE) { flags |= ONIGENC_CASE_MODIFIED; *to++ = 'S'; - code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S'; + code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S'; } - else if (flags&ONIGENC_CASE_FOLD) { + else if (flags & ONIGENC_CASE_FOLD) { flags |= ONIGENC_CASE_MODIFIED; *to++ = 's'; code = 's'; } } else if ((EncISO_8859_13_CtypeTable[code] & BIT_CTYPE_UPPER) - && (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) { + && (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) { flags |= ONIGENC_CASE_MODIFIED; code = ENC_ISO_8859_13_TO_LOWER_CASE(code); } - else if (code==0xB5) ; - else if ((EncISO_8859_13_CtypeTable[code]&BIT_CTYPE_LOWER) - && (flags&ONIGENC_CASE_UPCASE)) { + else if (code == 0xB5) + ; + else if ((EncISO_8859_13_CtypeTable[code] & BIT_CTYPE_LOWER) + && (flags & ONIGENC_CASE_UPCASE)) { flags |= ONIGENC_CASE_MODIFIED; - if (code==0xB8 || code==0xBA || code==0xBF) { + if (code == 0xB8 || code == 0xBA || code == 0xBF) { code -= 0x10; } else { @@ -257,11 +258,11 @@ case_map (OnigCaseFoldType* flagP, const OnigUChar** pp, } } *to++ = code; - if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */ - flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE); + if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */ + flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE); } *flagP = flags; - return (int)(to-to_start); + return (int )(to - to_start); } OnigEncodingDefine(iso_8859_13, ISO_8859_13) = { @@ -281,8 +282,8 @@ OnigEncodingDefine(iso_8859_13, ISO_8859_13) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, + case_map, 0, ONIGENC_FLAG_NONE, - case_map, }; ENC_ALIAS("ISO8859-13", "ISO-8859-13") diff --git a/enc/iso_8859_14.c b/enc/iso_8859_14.c index 22df367dd9..647514a016 100644 --- a/enc/iso_8859_14.c +++ b/enc/iso_8859_14.c @@ -217,9 +217,9 @@ apply_all_case_fold(OnigCaseFoldType flag, static int get_case_fold_codes_by_str(OnigCaseFoldType flag, - const OnigUChar* p, const OnigUChar* end, - OnigCaseFoldCodeItem items[], - OnigEncoding enc ARG_UNUSED) + const OnigUChar* p, const OnigUChar* end, + OnigCaseFoldCodeItem items[], + OnigEncoding enc ARG_UNUSED) { return onigenc_get_case_fold_codes_by_str_with_map( numberof(CaseFoldMap), CaseFoldMap, 1, @@ -227,58 +227,58 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, } static int -case_map (OnigCaseFoldType* flagP, const OnigUChar** pp, - const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, - const struct OnigEncodingTypeST* enc) +case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, + const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, + const struct OnigEncodingTypeST* enc) { OnigCodePoint code; OnigUChar *to_start = to; OnigCaseFoldType flags = *flagP; - while (*pp<end && to<to_end) { + while (*pp < end && to < to_end) { code = *(*pp)++; - if (code==SHARP_s) { - if (flags&ONIGENC_CASE_UPCASE) { + if (code == SHARP_s) { + if (flags & ONIGENC_CASE_UPCASE) { flags |= ONIGENC_CASE_MODIFIED; *to++ = 'S'; - code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S'; + code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S'; } - else if (flags&ONIGENC_CASE_FOLD) { + else if (flags & ONIGENC_CASE_FOLD) { flags |= ONIGENC_CASE_MODIFIED; *to++ = 's'; code = 's'; } } - /* else if (code==0xAA || code==0xBA) ; */ + /* else if (code == 0xAA || code == 0xBA) ; */ else if ((EncISO_8859_14_CtypeTable[code] & BIT_CTYPE_UPPER) - && (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) { + && (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) { flags |= ONIGENC_CASE_MODIFIED; code = ENC_ISO_8859_14_TO_LOWER_CASE(code); } - else if ((EncISO_8859_14_CtypeTable[code]&BIT_CTYPE_LOWER) - && (flags&ONIGENC_CASE_UPCASE)) { + else if ((EncISO_8859_14_CtypeTable[code] & BIT_CTYPE_LOWER) + && (flags & ONIGENC_CASE_UPCASE)) { flags |= ONIGENC_CASE_MODIFIED; - if(code == 0xA2 || code == 0xA5 || code == 0xB1 || code == 0xB3 || code == 0xB5 || code == 0xBE) + if (code == 0xA2 || code == 0xA5 || code == 0xB1 || code == 0xB3 || code == 0xB5 || code == 0xBE) code -= 0x1; - else if(code == 0xAB) + else if (code == 0xAB) code -= 0x5; - else if(code == 0xFF) + else if (code == 0xFF) code -= 0x50; - else if(code == 0xB9) + else if (code == 0xB9) code -= 0x2; - else if(code == 0xBF) + else if (code == 0xBF) code -= 0x4; - else if(code == 0xB8 || code == 0xBA || code == 0xBC) + else if (code == 0xB8 || code == 0xBA || code == 0xBC) code -= 0x10; else code -= 0x20; } *to++ = code; - if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */ - flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE); + if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */ + flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE); } *flagP = flags; - return (int)(to-to_start); + return (int )(to - to_start); } OnigEncodingDefine(iso_8859_14, ISO_8859_14) = { @@ -298,8 +298,8 @@ OnigEncodingDefine(iso_8859_14, ISO_8859_14) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, + case_map, 0, ONIGENC_FLAG_NONE, - case_map, }; ENC_ALIAS("ISO8859-14", "ISO-8859-14") diff --git a/enc/iso_8859_15.c b/enc/iso_8859_15.c index 06b00b90b3..377a3afc7b 100644 --- a/enc/iso_8859_15.c +++ b/enc/iso_8859_15.c @@ -211,9 +211,9 @@ apply_all_case_fold(OnigCaseFoldType flag, static int get_case_fold_codes_by_str(OnigCaseFoldType flag, - const OnigUChar* p, const OnigUChar* end, - OnigCaseFoldCodeItem items[], - OnigEncoding enc ARG_UNUSED) + const OnigUChar* p, const OnigUChar* end, + OnigCaseFoldCodeItem items[], + OnigEncoding enc ARG_UNUSED) { return onigenc_get_case_fold_codes_by_str_with_map( numberof(CaseFoldMap), CaseFoldMap, 1, @@ -221,54 +221,55 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, } static int -case_map (OnigCaseFoldType* flagP, const OnigUChar** pp, - const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, - const struct OnigEncodingTypeST* enc) +case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, + const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, + const struct OnigEncodingTypeST* enc) { OnigCodePoint code; OnigUChar *to_start = to; OnigCaseFoldType flags = *flagP; - while (*pp<end && to<to_end) { + while (*pp < end && to < to_end) { code = *(*pp)++; - if (code==SHARP_s) { - if (flags&ONIGENC_CASE_UPCASE) { + if (code == SHARP_s) { + if (flags & ONIGENC_CASE_UPCASE) { flags |= ONIGENC_CASE_MODIFIED; *to++ = 'S'; - code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S'; + code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S'; } - else if (flags&ONIGENC_CASE_FOLD) { + else if (flags & ONIGENC_CASE_FOLD) { flags |= ONIGENC_CASE_MODIFIED; *to++ = 's'; code = 's'; } } - else if (code==0xAA || code==0xBA || code==0xB5) ; + else if (code == 0xAA || code == 0xBA || code == 0xB5) + ; else if ((EncISO_8859_15_CtypeTable[code] & BIT_CTYPE_UPPER) - && (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) { + && (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) { flags |= ONIGENC_CASE_MODIFIED; code = ENC_ISO_8859_15_TO_LOWER_CASE(code); } - else if ((EncISO_8859_15_CtypeTable[code]&BIT_CTYPE_LOWER) - && (flags&ONIGENC_CASE_UPCASE)) { + else if ((EncISO_8859_15_CtypeTable[code] & BIT_CTYPE_LOWER) + && (flags & ONIGENC_CASE_UPCASE)) { flags |= ONIGENC_CASE_MODIFIED; - if (code==0xA8) + if (code == 0xA8) code -= 2; - else if (code==0xB8) + else if (code == 0xB8) code -= 4; - else if (code==0xBD) + else if (code == 0xBD) code -= 1; - else if (code==0xFF) + else if (code == 0xFF) code -= 0x41; else code -= 0x20; } *to++ = code; - if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */ - flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE); + if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */ + flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE); } *flagP = flags; - return (int)(to-to_start); + return (int )(to - to_start); } OnigEncodingDefine(iso_8859_15, ISO_8859_15) = { @@ -288,8 +289,8 @@ OnigEncodingDefine(iso_8859_15, ISO_8859_15) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, + case_map, 0, ONIGENC_FLAG_NONE, - case_map, }; ENC_ALIAS("ISO8859-15", "ISO-8859-15") diff --git a/enc/iso_8859_16.c b/enc/iso_8859_16.c index c8695e65f7..135630eb73 100644 --- a/enc/iso_8859_16.c +++ b/enc/iso_8859_16.c @@ -213,9 +213,9 @@ apply_all_case_fold(OnigCaseFoldType flag, static int get_case_fold_codes_by_str(OnigCaseFoldType flag, - const OnigUChar* p, const OnigUChar* end, - OnigCaseFoldCodeItem items[], - OnigEncoding enc ARG_UNUSED) + const OnigUChar* p, const OnigUChar* end, + OnigCaseFoldCodeItem items[], + OnigEncoding enc ARG_UNUSED) { return onigenc_get_case_fold_codes_by_str_with_map( numberof(CaseFoldMap), CaseFoldMap, 1, @@ -223,57 +223,57 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, } static int -case_map (OnigCaseFoldType* flagP, const OnigUChar** pp, - const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, - const struct OnigEncodingTypeST* enc) +case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, + const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, + const struct OnigEncodingTypeST* enc) { OnigCodePoint code; OnigUChar *to_start = to; OnigCaseFoldType flags = *flagP; - while (*pp<end && to<to_end) { + while (*pp < end && to < to_end) { code = *(*pp)++; - if (code==SHARP_s) { - if (flags&ONIGENC_CASE_UPCASE) { + if (code == SHARP_s) { + if (flags & ONIGENC_CASE_UPCASE) { flags |= ONIGENC_CASE_MODIFIED; *to++ = 'S'; - code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S'; + code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S'; } - else if (flags&ONIGENC_CASE_FOLD) { + else if (flags & ONIGENC_CASE_FOLD) { flags |= ONIGENC_CASE_MODIFIED; *to++ = 's'; code = 's'; } } else if ((EncISO_8859_16_CtypeTable[code] & BIT_CTYPE_UPPER) - && (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) { + && (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) { flags |= ONIGENC_CASE_MODIFIED; code = ENC_ISO_8859_16_TO_LOWER_CASE(code); } - else if ((EncISO_8859_16_CtypeTable[code]&BIT_CTYPE_LOWER) + else if ((EncISO_8859_16_CtypeTable[code] & BIT_CTYPE_LOWER) && (flags&ONIGENC_CASE_UPCASE)) { flags |= ONIGENC_CASE_MODIFIED; - if (code==0xA2 || code==0xBD) + if (code == 0xA2 || code == 0xBD) code--; - else if (code==0xB3 || code==0xBA || code==0xBF) + else if (code == 0xB3 || code == 0xBA || code == 0xBF) code -= 0x10; - else if (code==0xA8 || code==0xAE) + else if (code == 0xA8 || code == 0xAE) code -= 0x02; - else if (code==0xB9) + else if (code == 0xB9) code -= 0x07; - else if (code==0xB8) + else if (code == 0xB8) code -= 0x04; - else if (code==0xFF) + else if (code == 0xFF) code -= 0x41; else code -= 0x20; } *to++ = code; - if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */ - flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE); + if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */ + flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE); } *flagP = flags; - return (int)(to-to_start); + return (int )(to - to_start); } OnigEncodingDefine(iso_8859_16, ISO_8859_16) = { @@ -293,8 +293,8 @@ OnigEncodingDefine(iso_8859_16, ISO_8859_16) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, + case_map, 0, ONIGENC_FLAG_NONE, - case_map, }; ENC_ALIAS("ISO8859-16", "ISO-8859-16") diff --git a/enc/iso_8859_2.c b/enc/iso_8859_2.c index 00de8ec757..3a05c6320d 100644 --- a/enc/iso_8859_2.c +++ b/enc/iso_8859_2.c @@ -221,50 +221,50 @@ is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc ARG_UNUSE } static int -case_map (OnigCaseFoldType* flagP, const OnigUChar** pp, - const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, - const struct OnigEncodingTypeST* enc) +case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, + const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, + const struct OnigEncodingTypeST* enc) { OnigCodePoint code; OnigUChar *to_start = to; OnigCaseFoldType flags = *flagP; - while (*pp<end && to<to_end) { + while (*pp < end && to < to_end) { code = *(*pp)++; - if (code==SHARP_s) { - if (flags&ONIGENC_CASE_UPCASE) { + if (code == SHARP_s) { + if (flags & ONIGENC_CASE_UPCASE) { flags |= ONIGENC_CASE_MODIFIED; *to++ = 'S'; - code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S'; + code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S'; } - else if (flags&ONIGENC_CASE_FOLD) { + else if (flags & ONIGENC_CASE_FOLD) { flags |= ONIGENC_CASE_MODIFIED; *to++ = 's'; code = 's'; } } else if ((EncISO_8859_2_CtypeTable[code] & BIT_CTYPE_UPPER) - && (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) { + && (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) { flags |= ONIGENC_CASE_MODIFIED; code = ENC_ISO_8859_2_TO_LOWER_CASE(code); } - else if ((EncISO_8859_2_CtypeTable[code]&BIT_CTYPE_LOWER) - && (flags&ONIGENC_CASE_UPCASE)) { - if (code>=0xB1 && code<=0xBF){ + else if ((EncISO_8859_2_CtypeTable[code] & BIT_CTYPE_LOWER) + && (flags & ONIGENC_CASE_UPCASE)) { + if (code >= 0xB1 && code <= 0xBF) { flags |= ONIGENC_CASE_MODIFIED; code -= 0x10; } - else{ + else { flags |= ONIGENC_CASE_MODIFIED; code -= 0x20; } } *to++ = code; - if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */ - flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE); + if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */ + flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE); } *flagP = flags; - return (int)(to-to_start); + return (int )(to - to_start); } OnigEncodingDefine(iso_8859_2, ISO_8859_2) = { @@ -284,8 +284,8 @@ OnigEncodingDefine(iso_8859_2, ISO_8859_2) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, + case_map, 0, ONIGENC_FLAG_NONE, - case_map, }; ENC_ALIAS("ISO8859-2", "ISO-8859-2") diff --git a/enc/iso_8859_3.c b/enc/iso_8859_3.c index 365d9a77de..2a343eac63 100644 --- a/enc/iso_8859_3.c +++ b/enc/iso_8859_3.c @@ -223,45 +223,46 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, #define DOTLESS_i (0xB9) #define I_WITH_DOT_ABOVE (0xA9) static int -case_map (OnigCaseFoldType* flagP, const OnigUChar** pp, - const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, - const struct OnigEncodingTypeST* enc) +case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, + const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, + const struct OnigEncodingTypeST* enc) { OnigCodePoint code; OnigUChar *to_start = to; OnigCaseFoldType flags = *flagP; - while (*pp<end && to<to_end) { + while (*pp < end && to < to_end) { code = *(*pp)++; - if (code==SHARP_s) { - if (flags&ONIGENC_CASE_UPCASE) { + if (code == SHARP_s) { + if (flags & ONIGENC_CASE_UPCASE) { flags |= ONIGENC_CASE_MODIFIED; *to++ = 'S'; - code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S'; + code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S'; } - else if (flags&ONIGENC_CASE_FOLD) { + else if (flags & ONIGENC_CASE_FOLD) { flags |= ONIGENC_CASE_MODIFIED; *to++ = 's'; code = 's'; } } - else if (code==0xB5) ; + else if (code == 0xB5) + ; else if ((EncISO_8859_3_CtypeTable[code] & BIT_CTYPE_UPPER) - && (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) { + && (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) { flags |= ONIGENC_CASE_MODIFIED; - if (code=='I') - code = flags&ONIGENC_CASE_FOLD_TURKISH_AZERI ? DOTLESS_i : 'i'; + if (code == 'I') + code = flags & ONIGENC_CASE_FOLD_TURKISH_AZERI ? DOTLESS_i : 'i'; else code = ENC_ISO_8859_3_TO_LOWER_CASE(code); } else if ((EncISO_8859_3_CtypeTable[code]&BIT_CTYPE_LOWER) - && (flags&ONIGENC_CASE_UPCASE)) { + && (flags & ONIGENC_CASE_UPCASE)) { flags |= ONIGENC_CASE_MODIFIED; - if (code=='i') - code = flags&ONIGENC_CASE_FOLD_TURKISH_AZERI ? I_WITH_DOT_ABOVE : 'I'; - else if (code==DOTLESS_i) + if (code == 'i') + code = flags & ONIGENC_CASE_FOLD_TURKISH_AZERI ? I_WITH_DOT_ABOVE : 'I'; + else if (code == DOTLESS_i) code = 'I'; - else if (code>=0xB0 && code<=0xBF ) { + else if (code >= 0xB0 && code <= 0xBF) { code -= 0x10; } else { @@ -269,11 +270,11 @@ case_map (OnigCaseFoldType* flagP, const OnigUChar** pp, } } *to++ = code; - if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */ - flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE); + if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */ + flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE); } *flagP = flags; - return (int)(to-to_start); + return (int )(to - to_start); } OnigEncodingDefine(iso_8859_3, ISO_8859_3) = { @@ -293,8 +294,8 @@ OnigEncodingDefine(iso_8859_3, ISO_8859_3) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, + case_map, 0, ONIGENC_FLAG_NONE, - case_map, }; ENC_ALIAS("ISO8859-3", "ISO-8859-3") diff --git a/enc/iso_8859_4.c b/enc/iso_8859_4.c index 6d27300e22..e2134e8c0b 100644 --- a/enc/iso_8859_4.c +++ b/enc/iso_8859_4.c @@ -232,31 +232,32 @@ case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, OnigUChar *to_start = to; OnigCaseFoldType flags = *flagP; - while (*pp<end && to<to_end) { + while (*pp < end && to < to_end) { code = *(*pp)++; - if (code==SHARP_s) { - if (flags&ONIGENC_CASE_UPCASE) { + if (code == SHARP_s) { + if (flags & ONIGENC_CASE_UPCASE) { flags |= ONIGENC_CASE_MODIFIED; *to++ = 'S'; - code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S'; + code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S'; } - else if (flags&ONIGENC_CASE_FOLD) { + else if (flags & ONIGENC_CASE_FOLD) { flags |= ONIGENC_CASE_MODIFIED; *to++ = 's'; code = 's'; } } else if ((EncISO_8859_4_CtypeTable[code] & BIT_CTYPE_UPPER) - && (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) { + && (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) { flags |= ONIGENC_CASE_MODIFIED; code = ENC_ISO_8859_4_TO_LOWER_CASE(code); } - else if (code==0xA2) ; + else if (code == 0xA2) + ; else if ((EncISO_8859_4_CtypeTable[code]&BIT_CTYPE_LOWER) - && (flags&ONIGENC_CASE_UPCASE)) { + && (flags & ONIGENC_CASE_UPCASE)) { flags |= ONIGENC_CASE_MODIFIED; - if (code>=0xA0&&code<=0xBF) { - if (code==0xBF) + if (code >= 0xA0 && code <= 0xBF) { + if (code == 0xBF) code -= 0x02; else code -= 0x10; @@ -265,11 +266,11 @@ case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, code -= 0x20; } *to++ = code; - if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */ - flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE); + if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */ + flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE); } *flagP = flags; - return (int)(to-to_start); + return (int )(to - to_start); } OnigEncodingDefine(iso_8859_4, ISO_8859_4) = { @@ -289,8 +290,8 @@ OnigEncodingDefine(iso_8859_4, ISO_8859_4) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, + case_map, 0, ONIGENC_FLAG_NONE, - case_map, }; ENC_ALIAS("ISO8859-4", "ISO-8859-4") diff --git a/enc/iso_8859_5.c b/enc/iso_8859_5.c index 5d67639f5e..6fafc35823 100644 --- a/enc/iso_8859_5.c +++ b/enc/iso_8859_5.c @@ -210,35 +210,35 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, } static int -case_map (OnigCaseFoldType* flagP, const OnigUChar** pp, - const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, - const struct OnigEncodingTypeST* enc) +case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, + const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, + const struct OnigEncodingTypeST* enc) { OnigCodePoint code; OnigUChar *to_start = to; OnigCaseFoldType flags = *flagP; - while (*pp<end && to<to_end) { + while (*pp < end && to < to_end) { code = *(*pp)++; if ((EncISO_8859_5_CtypeTable[code] & BIT_CTYPE_UPPER) - && (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) { + && (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) { flags |= ONIGENC_CASE_MODIFIED; code = ENC_ISO_8859_5_TO_LOWER_CASE(code); } else if ((EncISO_8859_5_CtypeTable[code]&BIT_CTYPE_LOWER) - && (flags&ONIGENC_CASE_UPCASE)) { + && (flags & ONIGENC_CASE_UPCASE)) { flags |= ONIGENC_CASE_MODIFIED; - if (0xF1<=code && code<=0xFF) + if (0xF1 <= code && code <= 0xFF) code -= 0x50; else code -= 0x20; } *to++ = code; - if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */ - flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE); + if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */ + flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE); } *flagP = flags; - return (int)(to-to_start); + return (int )(to - to_start); } OnigEncodingDefine(iso_8859_5, ISO_8859_5) = { @@ -258,8 +258,8 @@ OnigEncodingDefine(iso_8859_5, ISO_8859_5) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, + case_map, 0, ONIGENC_FLAG_NONE, - case_map, }; ENC_ALIAS("ISO8859-5", "ISO-8859-5") diff --git a/enc/iso_8859_6.c b/enc/iso_8859_6.c index 64dc5aceac..6d852ac8c0 100644 --- a/enc/iso_8859_6.c +++ b/enc/iso_8859_6.c @@ -93,9 +93,9 @@ OnigEncodingDefine(iso_8859_6, ISO_8859_6) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, + onigenc_single_byte_ascii_only_case_map, 0, ONIGENC_FLAG_NONE, - onigenc_single_byte_ascii_only_case_map, }; ENC_ALIAS("ISO8859-6", "ISO-8859-6") diff --git a/enc/iso_8859_7.c b/enc/iso_8859_7.c index 475fecc19c..ac973f74ba 100644 --- a/enc/iso_8859_7.c +++ b/enc/iso_8859_7.c @@ -206,58 +206,58 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, } static int -case_map (OnigCaseFoldType* flagP, const OnigUChar** pp, - const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, - const struct OnigEncodingTypeST* enc) +case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, + const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, + const struct OnigEncodingTypeST* enc) { OnigCodePoint code; OnigUChar *to_start = to; OnigCaseFoldType flags = *flagP; - while (*pp<end && to<to_end) { + while (*pp < end && to < to_end) { code = *(*pp)++; - if (code==0xF2) { - if (flags&ONIGENC_CASE_UPCASE) { + if (code == 0xF2) { + if (flags & ONIGENC_CASE_UPCASE) { flags |= ONIGENC_CASE_MODIFIED; code = 0xD3; } - else if (flags&ONIGENC_CASE_FOLD) { + else if (flags & ONIGENC_CASE_FOLD) { flags |= ONIGENC_CASE_MODIFIED; code = 0xF3; } } else if ((EncISO_8859_7_CtypeTable[code] & BIT_CTYPE_UPPER) - && (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) { + && (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) { flags |= ONIGENC_CASE_MODIFIED; code = ENC_ISO_8859_7_TO_LOWER_CASE(code); } - else if (code==0xC0 || code==0xE0) - ; + else if (code == 0xC0 || code == 0xE0) + ; else if ((EncISO_8859_7_CtypeTable[code]&BIT_CTYPE_LOWER) - && (flags&ONIGENC_CASE_UPCASE)) { + && (flags & ONIGENC_CASE_UPCASE)) { flags |= ONIGENC_CASE_MODIFIED; - if (code==0xDC) { - code-=0x26; + if (code == 0xDC) { + code -= 0x26; } - else if (code>=0xDD && code<=0xDF) { - code-=0x25; + else if (code >= 0xDD && code <= 0xDF) { + code -= 0x25; } - else if (code==0xFC) { - code-=0x40; + else if (code == 0xFC) { + code -= 0x40; } - else if (code==0xFD || code==0xFE) { - code-=0x3F; + else if (code == 0xFD || code == 0xFE) { + code -= 0x3F; } else { - code-=0x20; + code -= 0x20; } } *to++ = code; - if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */ - flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE); + if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */ + flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE); } *flagP = flags; - return (int)(to-to_start); + return (int )(to - to_start); } OnigEncodingDefine(iso_8859_7, ISO_8859_7) = { @@ -277,8 +277,8 @@ OnigEncodingDefine(iso_8859_7, ISO_8859_7) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, + case_map, 0, ONIGENC_FLAG_NONE, - case_map, }; ENC_ALIAS("ISO8859-7", "ISO-8859-7") diff --git a/enc/iso_8859_8.c b/enc/iso_8859_8.c index 4777762849..0a7a29e82e 100644 --- a/enc/iso_8859_8.c +++ b/enc/iso_8859_8.c @@ -93,9 +93,9 @@ OnigEncodingDefine(iso_8859_8, ISO_8859_8) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, + onigenc_single_byte_ascii_only_case_map, 0, ONIGENC_FLAG_NONE, - onigenc_single_byte_ascii_only_case_map, }; ENC_ALIAS("ISO8859-8", "ISO-8859-8") diff --git a/enc/iso_8859_9.c b/enc/iso_8859_9.c index 064a04d480..004eec310f 100644 --- a/enc/iso_8859_9.c +++ b/enc/iso_8859_9.c @@ -204,9 +204,9 @@ apply_all_case_fold(OnigCaseFoldType flag, static int get_case_fold_codes_by_str(OnigCaseFoldType flag, - const OnigUChar* p, const OnigUChar* end, - OnigCaseFoldCodeItem items[], - OnigEncoding enc ARG_UNUSED) + const OnigUChar* p, const OnigUChar* end, + OnigCaseFoldCodeItem items[], + OnigEncoding enc ARG_UNUSED) { return onigenc_get_case_fold_codes_by_str_with_map( numberof(CaseFoldMap), CaseFoldMap, 1, @@ -216,53 +216,54 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, #define DOTLESS_i (0xFD) #define I_WITH_DOT_ABOVE (0xDD) static int -case_map (OnigCaseFoldType* flagP, const OnigUChar** pp, - const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, - const struct OnigEncodingTypeST* enc) +case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, + const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, + const struct OnigEncodingTypeST* enc) { OnigCodePoint code; OnigUChar *to_start = to; OnigCaseFoldType flags = *flagP; - while (*pp<end && to<to_end) { + while (*pp < end && to < to_end) { code = *(*pp)++; - if (code==SHARP_s) { - if (flags&ONIGENC_CASE_UPCASE) { + if (code == SHARP_s) { + if (flags & ONIGENC_CASE_UPCASE) { flags |= ONIGENC_CASE_MODIFIED; *to++ = 'S'; - code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S'; + code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S'; } - else if (flags&ONIGENC_CASE_FOLD) { + else if (flags & ONIGENC_CASE_FOLD) { flags |= ONIGENC_CASE_MODIFIED; *to++ = 's'; code = 's'; } } - else if (code==0xAA || code==0xB5 || code==0xBA || code==0xFF) ; + else if (code == 0xAA || code == 0xB5 || code == 0xBA || code == 0xFF) + ; else if ((EncISO_8859_9_CtypeTable[code] & BIT_CTYPE_UPPER) - && (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) { + && (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) { flags |= ONIGENC_CASE_MODIFIED; - if (code=='I') - code = flags&ONIGENC_CASE_FOLD_TURKISH_AZERI ? DOTLESS_i : 'i'; + if (code == 'I') + code = flags & ONIGENC_CASE_FOLD_TURKISH_AZERI ? DOTLESS_i : 'i'; else code = ENC_ISO_8859_9_TO_LOWER_CASE(code); } else if ((EncISO_8859_9_CtypeTable[code]&BIT_CTYPE_LOWER) - && (flags&ONIGENC_CASE_UPCASE)) { + && (flags & ONIGENC_CASE_UPCASE)) { flags |= ONIGENC_CASE_MODIFIED; - if (code=='i') - code = flags&ONIGENC_CASE_FOLD_TURKISH_AZERI ? I_WITH_DOT_ABOVE : 'I'; - else if (code==DOTLESS_i) + if (code == 'i') + code = flags & ONIGENC_CASE_FOLD_TURKISH_AZERI ? I_WITH_DOT_ABOVE : 'I'; + else if (code == DOTLESS_i) code = 'I'; else code -= 0x20; } *to++ = code; - if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */ - flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE); + if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */ + flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE); } *flagP = flags; - return (int)(to-to_start); + return (int )(to - to_start); } OnigEncodingDefine(iso_8859_9, ISO_8859_9) = { @@ -282,8 +283,8 @@ OnigEncodingDefine(iso_8859_9, ISO_8859_9) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, + case_map, 0, ONIGENC_FLAG_NONE, - case_map, }; ENC_ALIAS("ISO8859-9", "ISO-8859-9") diff --git a/enc/koi8_r.c b/enc/koi8_r.c index a3c05cd27b..a520975774 100644 --- a/enc/koi8_r.c +++ b/enc/koi8_r.c @@ -214,9 +214,8 @@ OnigEncodingDefine(koi8_r, KOI8_R) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, + onigenc_single_byte_ascii_only_case_map, 0, ONIGENC_FLAG_NONE, - onigenc_single_byte_ascii_only_case_map, }; ENC_ALIAS("CP878", "KOI8-R") - diff --git a/enc/koi8_u.c b/enc/koi8_u.c index f97d74d3f0..50bb78bd04 100644 --- a/enc/koi8_u.c +++ b/enc/koi8_u.c @@ -218,7 +218,7 @@ OnigEncodingDefine(koi8_u, KOI8_U) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, + onigenc_single_byte_ascii_only_case_map, 0, ONIGENC_FLAG_NONE, - onigenc_single_byte_ascii_only_case_map, }; diff --git a/enc/mktable.c b/enc/mktable.c index 49acf628d0..4edd5a0ff7 100644 --- a/enc/mktable.c +++ b/enc/mktable.c @@ -2,7 +2,7 @@ mktable.c **********************************************************************/ /*- - * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -31,7 +31,10 @@ #include <stdio.h> #include <locale.h> +#ifndef __USE_ISOC99 #define __USE_ISOC99 +#endif + #include <ctype.h> #include "regenc.h" @@ -1108,11 +1111,13 @@ static int exec(FILE* fp, ENC_INFO* einfo) #define NCOL 8 int c, val, enc; + int r; enc = einfo->num; - fprintf(fp, "static const unsigned short Enc%s_CtypeTable[256] = {\n", - einfo->name); + r = fprintf(fp, "static const unsigned short Enc%s_CtypeTable[256] = {\n", + einfo->name); + if (r < 0) return -1; for (c = 0; c < 256; c++) { val = 0; @@ -1131,20 +1136,33 @@ static int exec(FILE* fp, ENC_INFO* einfo) if (IsWord (enc, c)) val |= BIT_CTYPE_WORD; if (IsAscii (enc, c)) val |= BIT_CTYPE_ASCII; - if (c % NCOL == 0) fputs(" ", fp); - fprintf(fp, "0x%04x", val); - if (c != 255) fputs(",", fp); + if (c % NCOL == 0) { + r = fputs(" ", fp); + if (r < 0) return -1; + } + r = fprintf(fp, "0x%04x", val); + if (r < 0) return -1; + + if (c != 255) { + r = fputs(",", fp); + if (r < 0) return -1; + } if (c != 0 && c % NCOL == (NCOL-1)) - fputs("\n", fp); + r = fputs("\n", fp); else - fputs(" ", fp); + r = fputs(" ", fp); + + if (r < 0) return -1; } - fprintf(fp, "};\n"); + r = fprintf(fp, "};\n"); + if (r < 0) return -1; + return 0; } extern int main(int argc ARG_UNUSED, char* argv[] ARG_UNUSED) { + int r; int i; FILE* fp = stdout; @@ -1155,7 +1173,11 @@ extern int main(int argc ARG_UNUSED, char* argv[] ARG_UNUSED) /* setlocale(LC_ALL, "fr_FR.iso88591"); */ for (i = 0; i < (int )(sizeof(Info)/sizeof(ENC_INFO)); i++) { - exec(fp, &Info[i]); + r = exec(fp, &Info[i]); + if (r < 0) { + fprintf(stderr, "FAIL exec(): %d\n", r); + return -1; + } } return 0; diff --git a/enc/shift_jis.c b/enc/shift_jis.c index c1552bfd13..eacca9a5db 100644 --- a/enc/shift_jis.c +++ b/enc/shift_jis.c @@ -28,7 +28,7 @@ * SUCH DAMAGE. */ -#include "regint.h" +#include "regenc.h" static const int EncLen_SJIS[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -563,9 +563,9 @@ OnigEncodingDefine(shift_jis, Shift_JIS) = { get_ctype_code_range, left_adjust_char_head, is_allowed_reverse_match, + onigenc_ascii_only_case_map, 0, ONIGENC_FLAG_NONE, - onigenc_ascii_only_case_map, }; /* * Name: Shift_JIS diff --git a/enc/unicode.c b/enc/unicode.c index 39fb24408f..72ff5a96e7 100644 --- a/enc/unicode.c +++ b/enc/unicode.c @@ -139,17 +139,17 @@ code3_equal(const OnigCodePoint *x, const OnigCodePoint *y) /* macros related to ONIGENC_CASE flags */ /* defined here because not used in other files */ -#define ONIGENC_CASE_SPECIALS (ONIGENC_CASE_TITLECASE|ONIGENC_CASE_IS_TITLECASE|ONIGENC_CASE_UP_SPECIAL|ONIGENC_CASE_DOWN_SPECIAL) +#define ONIGENC_CASE_SPECIALS (ONIGENC_CASE_TITLECASE | ONIGENC_CASE_IS_TITLECASE | ONIGENC_CASE_UP_SPECIAL | ONIGENC_CASE_DOWN_SPECIAL) /* macros for length in CaseMappingSpecials array in enc/unicode/casefold.h */ #define SpecialsLengthOffset 25 /* needs to be higher than the 22 bits used for Unicode codepoints */ -#define SpecialsLengthExtract(n) ((n)>>SpecialsLengthOffset) -#define SpecialsCodepointExtract(n) ((n)&((1<<SpecialsLengthOffset)-1)) -#define SpecialsLengthEncode(n) ((n)<<SpecialsLengthOffset) +#define SpecialsLengthExtract(n) ((n) >> SpecialsLengthOffset) +#define SpecialsCodepointExtract(n) ((n) & ((1 << SpecialsLengthOffset) - 1)) +#define SpecialsLengthEncode(n) ((n) << SpecialsLengthOffset) -#define OnigSpecialIndexMask (((1<<OnigSpecialIndexWidth)-1)<<OnigSpecialIndexShift) -#define OnigSpecialIndexEncode(n) ((n)<<OnigSpecialIndexShift) -#define OnigSpecialIndexDecode(n) (((n)&OnigSpecialIndexMask)>>OnigSpecialIndexShift) +#define OnigSpecialIndexMask (((1 << OnigSpecialIndexWidth) - 1) << OnigSpecialIndexShift) +#define OnigSpecialIndexEncode(n) ((n) << OnigSpecialIndexShift) +#define OnigSpecialIndexDecode(n) (((n) & OnigSpecialIndexMask) >> OnigSpecialIndexShift) /* macros to shorten "enc/unicode/casefold.h", undefined immediately after including the file */ #define U ONIGENC_CASE_UPCASE @@ -660,128 +660,130 @@ onigenc_unicode_case_map(OnigCaseFoldType* flagP, OnigUChar* to, OnigUChar* to_end, const struct OnigEncodingTypeST* enc) { - OnigCodePoint code; - OnigUChar *to_start = to; - OnigCaseFoldType flags = *flagP; - int codepoint_length; - - to_end -= CASE_MAPPING_SLACK; - /* copy flags ONIGENC_CASE_UPCASE and ONIGENC_CASE_DOWNCASE over to - * ONIGENC_CASE_UP_SPECIAL and ONIGENC_CASE_DOWN_SPECIAL */ - flags |= (flags&(ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE))<<ONIGENC_CASE_SPECIAL_OFFSET; - - while (*pp<end && to<=to_end) { - codepoint_length = ONIGENC_PRECISE_MBC_ENC_LEN(enc, *pp, end); - if (codepoint_length < 0) - return codepoint_length; /* encoding invalid */ - code = ONIGENC_MBC_TO_CODE(enc, *pp, end); - *pp += codepoint_length; - - if (code<='z') { /* ASCII comes first */ - if (code>='a' && code<='z') { - if (flags&ONIGENC_CASE_UPCASE) { - MODIFIED; - if (flags&ONIGENC_CASE_FOLD_TURKISH_AZERI && code=='i') - code = I_WITH_DOT_ABOVE; - else - code += 'A'-'a'; - } - } - else if (code>='A' && code<='Z') { - if (flags&(ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD)) { - MODIFIED; - if (flags&ONIGENC_CASE_FOLD_TURKISH_AZERI && code=='I') - code = DOTLESS_i; - else - code += 'a'-'A'; - } - } + OnigCodePoint code; + OnigUChar *to_start = to; + OnigCaseFoldType flags = *flagP; + int codepoint_length; + + to_end -= CASE_MAPPING_SLACK; + /* copy flags ONIGENC_CASE_UPCASE and ONIGENC_CASE_DOWNCASE over to + * ONIGENC_CASE_UP_SPECIAL and ONIGENC_CASE_DOWN_SPECIAL */ + flags |= (flags & (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE)) << ONIGENC_CASE_SPECIAL_OFFSET; + + while (*pp < end && to <= to_end) { + codepoint_length = ONIGENC_PRECISE_MBC_ENC_LEN(enc, *pp, end); + if (codepoint_length < 0) + return codepoint_length; /* encoding invalid */ + code = ONIGENC_MBC_TO_CODE(enc, *pp, end); + *pp += codepoint_length; + + if (code <= 'z') { /* ASCII comes first */ + if (code >= 'a' && code <= 'z') { + if (flags & ONIGENC_CASE_UPCASE) { + MODIFIED; + if (flags & ONIGENC_CASE_FOLD_TURKISH_AZERI && code == 'i') + code = I_WITH_DOT_ABOVE; + else + code += 'A' - 'a'; } - else if (!(flags&ONIGENC_CASE_ASCII_ONLY) && code>=0x00B5) { /* deal with non-ASCII; micron sign (U+00B5) is lowest affected */ - const CodePointList3 *folded; - - if (code==I_WITH_DOT_ABOVE) { - if (flags&(ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD)) { - MODIFIED; - code = 'i'; - if (!(flags&ONIGENC_CASE_FOLD_TURKISH_AZERI)) { /* make dot above explicit */ - to += ONIGENC_CODE_TO_MBC(enc, code, to); - code = DOT_ABOVE; - } - } - } - else if (code==DOTLESS_i) { /* handle this manually, because it isn't involved in folding */ - if (flags&ONIGENC_CASE_UPCASE) - MODIFIED, code = 'I'; + } + else if (code >= 'A' && code <= 'Z') { + if (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD)) { + MODIFIED; + if (flags & ONIGENC_CASE_FOLD_TURKISH_AZERI && code == 'I') + code = DOTLESS_i; + else + code += 'a' - 'A'; + } + } + } + else if (!(flags & ONIGENC_CASE_ASCII_ONLY) && code >= 0x00B5) { /* deal with non-ASCII; micron sign (U+00B5) is lowest affected */ + const CodePointList3 *folded; + + if (code == I_WITH_DOT_ABOVE) { + if (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD)) { + MODIFIED; + code = 'i'; + if (!(flags & ONIGENC_CASE_FOLD_TURKISH_AZERI)) { /* make dot above explicit */ + to += ONIGENC_CODE_TO_MBC(enc, code, to); + code = DOT_ABOVE; + } + } + } + else if (code == DOTLESS_i) { /* handle this manually, because it isn't involved in folding */ + if (flags & ONIGENC_CASE_UPCASE) { + MODIFIED; + code = 'I'; + } + } + else if ((folded = onigenc_unicode_fold_lookup(code)) != 0) { /* data about character found in CaseFold_11_Table */ + if ((flags & ONIGENC_CASE_TITLECASE) /* Titlecase needed, */ + && (OnigCaseFoldFlags(folded->n) & ONIGENC_CASE_IS_TITLECASE)) { /* but already Titlecase */ + /* already Titlecase, no changes needed */ + } + else if (flags & OnigCaseFoldFlags(folded->n)) { /* needs and data availability match */ + const OnigCodePoint *next; + int count; + + MODIFIED; + if (flags & OnigCaseFoldFlags(folded->n) & ONIGENC_CASE_SPECIALS) { /* special */ + const OnigCodePoint *SpecialsStart = CaseMappingSpecials + OnigSpecialIndexDecode(folded->n); + + if (OnigCaseFoldFlags(folded->n) & ONIGENC_CASE_IS_TITLECASE) { /* swapCASE available */ + if ((flags & (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE)) + == (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE)) /* swapCASE needed */ + goto SpecialsCopy; + else /* swapCASE not needed */ + SpecialsStart += SpecialsLengthExtract(*SpecialsStart); } - else if ((folded = onigenc_unicode_fold_lookup(code)) != 0) { /* data about character found in CaseFold_11_Table */ - if ((flags&ONIGENC_CASE_TITLECASE) /* Titlecase needed, */ - && (OnigCaseFoldFlags(folded->n)&ONIGENC_CASE_IS_TITLECASE)) { /* but already Titlecase */ - /* already Titlecase, no changes needed */ - } - else if (flags&OnigCaseFoldFlags(folded->n)) { /* needs and data availability match */ - const OnigCodePoint *next; - int count; - - MODIFIED; - if (flags&OnigCaseFoldFlags(folded->n)&ONIGENC_CASE_SPECIALS) { /* special */ - const OnigCodePoint *SpecialsStart = CaseMappingSpecials + OnigSpecialIndexDecode(folded->n); - - if (OnigCaseFoldFlags(folded->n)&ONIGENC_CASE_IS_TITLECASE) { /* swapCASE available */ - if ((flags&(ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE)) - == (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE)) /* swapCASE needed */ - goto SpecialsCopy; - else /* swapCASE not needed */ - SpecialsStart += SpecialsLengthExtract(*SpecialsStart); - } - if (OnigCaseFoldFlags(folded->n)&ONIGENC_CASE_TITLECASE) { /* Titlecase available */ - if (flags&ONIGENC_CASE_TITLECASE) /* Titlecase needed, but not yet Titlecase */ - goto SpecialsCopy; - else /* Titlecase not needed */ - SpecialsStart += SpecialsLengthExtract(*SpecialsStart); - } - if (OnigCaseFoldFlags(folded->n)&ONIGENC_CASE_DOWN_SPECIAL) { - if (!(flags&ONIGENC_CASE_DOWN_SPECIAL)) - SpecialsStart += SpecialsLengthExtract(*SpecialsStart); - } - /* here, we know we use ONIGENC_CASE_UP_SPECIAL, and the position is right */ - SpecialsCopy: - count = SpecialsLengthExtract(*SpecialsStart); - next = SpecialsStart; - code = SpecialsCodepointExtract(*next++); - } - else { /* no specials */ - count = OnigCodePointCount(folded->n); - next = folded->code; - code = *next++; - } - if (count==1) - ; - else if (count==2) { - to += ONIGENC_CODE_TO_MBC(enc, code, to); - code = *next; - } - else { /* count == 3 */ - to += ONIGENC_CODE_TO_MBC(enc, code, to); - to += ONIGENC_CODE_TO_MBC(enc, *next++, to); - code = *next; - } - } + if (OnigCaseFoldFlags(folded->n) & ONIGENC_CASE_TITLECASE) { /* Titlecase available */ + if (flags & ONIGENC_CASE_TITLECASE) /* Titlecase needed, but not yet Titlecase */ + goto SpecialsCopy; + else /* Titlecase not needed */ + SpecialsStart += SpecialsLengthExtract(*SpecialsStart); } - else if ((folded = onigenc_unicode_unfold1_lookup(code)) != 0 /* data about character found in CaseUnfold_11_Table */ - && flags&OnigCaseFoldFlags(folded->n)) { /* needs and data availability match */ - MODIFIED; - code = folded->code[(flags&OnigCaseFoldFlags(folded->n)&ONIGENC_CASE_TITLECASE) ? 1 : 0]; + if (OnigCaseFoldFlags(folded->n) & ONIGENC_CASE_DOWN_SPECIAL) { + if (!(flags & ONIGENC_CASE_DOWN_SPECIAL)) + SpecialsStart += SpecialsLengthExtract(*SpecialsStart); } + /* here, we know we use ONIGENC_CASE_UP_SPECIAL, and the position is right */ +SpecialsCopy: + count = SpecialsLengthExtract(*SpecialsStart); + next = SpecialsStart; + code = SpecialsCodepointExtract(*next++); + } + else { /* no specials */ + count = OnigCodePointCount(folded->n); + next = folded->code; + code = *next++; + } + if (count == 1) + ; + else if (count == 2) { + to += ONIGENC_CODE_TO_MBC(enc, code, to); + code = *next; + } + else { /* count == 3 */ + to += ONIGENC_CODE_TO_MBC(enc, code, to); + to += ONIGENC_CODE_TO_MBC(enc, *next++, to); + code = *next; + } } - to += ONIGENC_CODE_TO_MBC(enc, code, to); - /* switch from titlecase to lowercase for capitalize */ - if (flags & ONIGENC_CASE_TITLECASE) - flags ^= (ONIGENC_CASE_UPCASE |ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE| - ONIGENC_CASE_UP_SPECIAL|ONIGENC_CASE_DOWN_SPECIAL); + } + else if ((folded = onigenc_unicode_unfold1_lookup(code)) != 0 /* data about character found in CaseUnfold_11_Table */ + && flags & OnigCaseFoldFlags(folded->n)) { /* needs and data availability match */ + MODIFIED; + code = folded->code[(flags & OnigCaseFoldFlags(folded->n) & ONIGENC_CASE_TITLECASE) ? 1 : 0]; + } } - *flagP = flags; - return (int)(to-to_start); + to += ONIGENC_CODE_TO_MBC(enc, code, to); + /* switch from titlecase to lowercase for capitalize */ + if (flags & ONIGENC_CASE_TITLECASE) + flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE | + ONIGENC_CASE_UP_SPECIAL | ONIGENC_CASE_DOWN_SPECIAL); + } + *flagP = flags; + return (int )(to - to_start); } #if 0 diff --git a/enc/us_ascii.c b/enc/us_ascii.c index cf835e6538..08f9072c43 100644 --- a/enc/us_ascii.c +++ b/enc/us_ascii.c @@ -1,7 +1,10 @@ #include "regenc.h" -#include "encindex.h" +#ifdef RUBY +# include "encindex.h" +#endif + #ifndef ENCINDEX_US_ASCII -#define ENCINDEX_US_ASCII 0 +# define ENCINDEX_US_ASCII 0 #endif static int @@ -29,9 +32,9 @@ OnigEncodingDefine(us_ascii, US_ASCII) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, + onigenc_single_byte_ascii_only_case_map, ENCINDEX_US_ASCII, ONIGENC_FLAG_NONE, - onigenc_single_byte_ascii_only_case_map, }; ENC_ALIAS("ASCII", "US-ASCII") ENC_ALIAS("ANSI_X3.4-1968", "US-ASCII") diff --git a/enc/utf_16be.c b/enc/utf_16be.c index e8b97983bf..f9dd7119d6 100644 --- a/enc/utf_16be.c +++ b/enc/utf_16be.c @@ -249,8 +249,8 @@ OnigEncodingDefine(utf_16be, UTF_16BE) = { onigenc_utf16_32_get_ctype_code_range, utf16be_left_adjust_char_head, onigenc_always_false_is_allowed_reverse_match, + onigenc_unicode_case_map, 0, ONIGENC_FLAG_UNICODE, - onigenc_unicode_case_map, }; ENC_ALIAS("UCS-2BE", "UTF-16BE") diff --git a/enc/utf_16le.c b/enc/utf_16le.c index 67ec2ad178..2c8438d0be 100644 --- a/enc/utf_16le.c +++ b/enc/utf_16le.c @@ -242,7 +242,7 @@ OnigEncodingDefine(utf_16le, UTF_16LE) = { onigenc_utf16_32_get_ctype_code_range, utf16le_left_adjust_char_head, onigenc_always_false_is_allowed_reverse_match, + onigenc_unicode_case_map, 0, ONIGENC_FLAG_UNICODE, - onigenc_unicode_case_map, }; diff --git a/enc/utf_32be.c b/enc/utf_32be.c index a57b854674..995c9d8ed5 100644 --- a/enc/utf_32be.c +++ b/enc/utf_32be.c @@ -187,9 +187,8 @@ OnigEncodingDefine(utf_32be, UTF_32BE) = { onigenc_utf16_32_get_ctype_code_range, utf32be_left_adjust_char_head, onigenc_always_false_is_allowed_reverse_match, + onigenc_unicode_case_map, 0, ONIGENC_FLAG_UNICODE, - onigenc_unicode_case_map, }; ENC_ALIAS("UCS-4BE", "UTF-32BE") - diff --git a/enc/utf_32le.c b/enc/utf_32le.c index c48089d6ed..e255f0e246 100644 --- a/enc/utf_32le.c +++ b/enc/utf_32le.c @@ -187,8 +187,8 @@ OnigEncodingDefine(utf_32le, UTF_32LE) = { onigenc_utf16_32_get_ctype_code_range, utf32le_left_adjust_char_head, onigenc_always_false_is_allowed_reverse_match, + onigenc_unicode_case_map, 0, ONIGENC_FLAG_UNICODE, - onigenc_unicode_case_map, }; ENC_ALIAS("UCS-4LE", "UTF-32LE") diff --git a/enc/utf_8.c b/enc/utf_8.c index 862b13fd9b..3dad2f729b 100644 --- a/enc/utf_8.c +++ b/enc/utf_8.c @@ -28,17 +28,20 @@ */ #include "regenc.h" -#include "encindex.h" +#ifdef RUBY +# include "encindex.h" +#endif + #ifndef ENCINDEX_UTF_8 -#define ENCINDEX_UTF_8 0 +# define ENCINDEX_UTF_8 0 #endif #define USE_INVALID_CODE_SCHEME #ifdef USE_INVALID_CODE_SCHEME /* virtual codepoint values for invalid encoding byte 0xfe and 0xff */ -#define INVALID_CODE_FE 0xfffffffe -#define INVALID_CODE_FF 0xffffffff +# define INVALID_CODE_FE 0xfffffffe +# define INVALID_CODE_FF 0xffffffff #endif #define VALID_CODE_LIMIT 0x0010ffff @@ -428,9 +431,9 @@ OnigEncodingDefine(utf_8, UTF_8) = { get_ctype_code_range, left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, + onigenc_unicode_case_map, ENCINDEX_UTF_8, ONIGENC_FLAG_UNICODE, - onigenc_unicode_case_map, }; ENC_ALIAS("CP65001", "UTF-8") @@ -444,4 +447,3 @@ ENC_ALIAS("CP65001", "UTF-8") ENC_REPLICATE("UTF8-MAC", "UTF-8") ENC_ALIAS("UTF-8-MAC", "UTF8-MAC") ENC_ALIAS("UTF-8-HFS", "UTF8-MAC") /* Emacs 23.2 */ - diff --git a/enc/windows_1250.c b/enc/windows_1250.c index 47317ddaf6..d2cf7b16bc 100644 --- a/enc/windows_1250.c +++ b/enc/windows_1250.c @@ -191,40 +191,41 @@ cp1250_get_case_fold_codes_by_str(OnigCaseFoldType flag, } static int -case_map (OnigCaseFoldType* flagP, const OnigUChar** pp, - const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, - const struct OnigEncodingTypeST* enc) +case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, + const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, + const struct OnigEncodingTypeST* enc) { OnigCodePoint code; OnigUChar *to_start = to; OnigCaseFoldType flags = *flagP; - while (*pp<end && to<to_end) { + while (*pp < end && to < to_end) { code = *(*pp)++; - if (code==SHARP_s) { - if (flags&ONIGENC_CASE_UPCASE) { + if (code == SHARP_s) { + if (flags & ONIGENC_CASE_UPCASE) { flags |= ONIGENC_CASE_MODIFIED; *to++ = 'S'; - code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S'; + code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S'; } - else if (flags&ONIGENC_CASE_FOLD) { + else if (flags & ONIGENC_CASE_FOLD) { flags |= ONIGENC_CASE_MODIFIED; *to++ = 's'; code = 's'; } } else if ((EncCP1250_CtypeTable[code] & BIT_CTYPE_UPPER) - && (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) { + && (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) { flags |= ONIGENC_CASE_MODIFIED; code = ENC_CP1250_TO_LOWER_CASE(code); } - else if (code==0xB5) ; + else if (code == 0xB5) + ; else if ((EncCP1250_CtypeTable[code]&BIT_CTYPE_LOWER) - && (flags&ONIGENC_CASE_UPCASE)) { + && (flags & ONIGENC_CASE_UPCASE)) { flags |= ONIGENC_CASE_MODIFIED; - if (code==0xB9) + if (code == 0xB9) code = 0xA5; - else if (code==0xBE) + else if (code == 0xBE) code = 0xBC; else if (code >= 0x8A && code <= 0xBF && code!=0xB9) code -= 0x10; @@ -232,11 +233,11 @@ case_map (OnigCaseFoldType* flagP, const OnigUChar** pp, code -= 0x20; } *to++ = code; - if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */ - flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE); + if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */ + flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE); } *flagP = flags; - return (int)(to-to_start); + return (int )(to - to_start); } OnigEncodingDefine(windows_1250, Windows_1250) = { @@ -256,9 +257,9 @@ OnigEncodingDefine(windows_1250, Windows_1250) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, + case_map, 0, ONIGENC_FLAG_NONE, - case_map, }; /* * Name: windows-1250 diff --git a/enc/windows_1251.c b/enc/windows_1251.c index 0f9b7fa69a..fcd0f1015d 100644 --- a/enc/windows_1251.c +++ b/enc/windows_1251.c @@ -181,49 +181,50 @@ cp1251_get_case_fold_codes_by_str(OnigCaseFoldType flag, } static int -case_map (OnigCaseFoldType* flagP, const OnigUChar** pp, - const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, - const struct OnigEncodingTypeST* enc) +case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, + const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, + const struct OnigEncodingTypeST* enc) { OnigCodePoint code; OnigUChar *to_start = to; OnigCaseFoldType flags = *flagP; - while (*pp<end && to<to_end) { + while (*pp < end && to < to_end) { code = *(*pp)++; if ((EncCP1251_CtypeTable[code] & BIT_CTYPE_UPPER) - && (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) { + && (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) { flags |= ONIGENC_CASE_MODIFIED; code = ENC_CP1251_TO_LOWER_CASE(code); } - else if (code==0xB5) ; + else if (code == 0xB5) + ; else if ((EncCP1251_CtypeTable[code]&BIT_CTYPE_LOWER) - && (flags&ONIGENC_CASE_UPCASE)) { + && (flags & ONIGENC_CASE_UPCASE)) { flags |= ONIGENC_CASE_MODIFIED; - if ((0x61<=code && code<=0x7A) || (0xE0<=code && code<=0xFF)) + if ((0x61 <= code && code <= 0x7A) || (0xE0 <= code && code <= 0xFF)) code -= 0x20; - else if (code==0xA2 || code==0xB3 || code==0xBE) + else if (code == 0xA2 || code == 0xB3 || code == 0xBE) code -= 0x01; - else if (code==0x83) + else if (code == 0x83) code = 0x81; - else if (code==0xBC) + else if (code == 0xBC) code = 0xA3; - else if (code==0xB4) + else if (code == 0xB4) code = 0xA5; else code -= 0x10; } *to++ = code; - if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */ - flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE); + if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */ + flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE); } *flagP = flags; - return (int)(to-to_start); + return (int )(to - to_start); } OnigEncodingDefine(windows_1251, Windows_1251) = { onigenc_single_byte_mbc_enc_len, - "Windows-1251", /* name */ + "Windows-1251",/* name */ 1, /* max enc length */ 1, /* min enc length */ onigenc_is_mbc_newline_0x0a, @@ -238,9 +239,9 @@ OnigEncodingDefine(windows_1251, Windows_1251) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, + case_map, 0, ONIGENC_FLAG_NONE, - case_map, }; /* * Name: windows-1251 diff --git a/enc/windows_1252.c b/enc/windows_1252.c index 4427f8e31e..5f90c15601 100644 --- a/enc/windows_1252.c +++ b/enc/windows_1252.c @@ -190,42 +190,43 @@ case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, OnigUChar *to_start = to; OnigCaseFoldType flags = *flagP; - while (*pp<end && to<to_end) { + while (*pp < end && to < to_end) { code = *(*pp)++; - if (code==SHARP_s) { - if (flags&ONIGENC_CASE_UPCASE) { + if (code == SHARP_s) { + if (flags & ONIGENC_CASE_UPCASE) { flags |= ONIGENC_CASE_MODIFIED; *to++ = 'S'; - code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S'; + code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S'; } - else if (flags&ONIGENC_CASE_FOLD) { + else if (flags & ONIGENC_CASE_FOLD) { flags |= ONIGENC_CASE_MODIFIED; *to++ = 's'; code = 's'; } } else if ((EncCP1252_CtypeTable[code] & BIT_CTYPE_UPPER) - && (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) { + && (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) { flags |= ONIGENC_CASE_MODIFIED; code = ENC_CP1252_TO_LOWER_CASE(code); } - else if (code==0x83 || code==0xAA || code==0xBA || code==0xB5) ; + else if (code == 0x83 || code == 0xAA || code == 0xBA || code == 0xB5) + ; else if ((EncCP1252_CtypeTable[code]&BIT_CTYPE_LOWER) - && (flags&ONIGENC_CASE_UPCASE)) { + && (flags & ONIGENC_CASE_UPCASE)) { flags |= ONIGENC_CASE_MODIFIED; - if (code==0x9A || code==0x9C || code==0x9E) + if (code == 0x9A || code == 0x9C || code == 0x9E) code -= 0x10; - else if (code==0xFF) + else if (code == 0xFF) code -= 0x60; else code -= 0x20; } *to++ = code; - if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */ - flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE); + if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */ + flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE); } *flagP = flags; - return (int)(to-to_start); + return (int )(to - to_start); } OnigEncodingDefine(windows_1252, Windows_1252) = { @@ -245,9 +246,9 @@ OnigEncodingDefine(windows_1252, Windows_1252) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, + case_map, 0, ONIGENC_FLAG_NONE, - case_map, }; /* * Name: windows-1252 diff --git a/enc/windows_1253.c b/enc/windows_1253.c index 2157b55c99..9e9c63a581 100644 --- a/enc/windows_1253.c +++ b/enc/windows_1253.c @@ -214,62 +214,63 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, } static int -case_map (OnigCaseFoldType* flagP, const OnigUChar** pp, - const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, - const struct OnigEncodingTypeST* enc) +case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, + const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, + const struct OnigEncodingTypeST* enc) { OnigCodePoint code; OnigUChar *to_start = to; OnigCaseFoldType flags = *flagP; - while (*pp<end && to<to_end) { + while (*pp < end && to < to_end) { code = *(*pp)++; - if (code==0xF2) { - if (flags&ONIGENC_CASE_UPCASE) { + if (code == 0xF2) { + if (flags & ONIGENC_CASE_UPCASE) { flags |= ONIGENC_CASE_MODIFIED; code = 0xD3; } - else if (flags&ONIGENC_CASE_FOLD) { + else if (flags & ONIGENC_CASE_FOLD) { flags |= ONIGENC_CASE_MODIFIED; code = 0xF3; } } - else if (code==0xB5) { - if (flags&ONIGENC_CASE_UPCASE) { + else if (code == 0xB5) { + if (flags & ONIGENC_CASE_UPCASE) { flags |= ONIGENC_CASE_MODIFIED; code = 0xCC; } - else if (flags&ONIGENC_CASE_FOLD) { + else if (flags & ONIGENC_CASE_FOLD) { flags |= ONIGENC_CASE_MODIFIED; code = 0xEC; } } - else if (code==0xC0 || code==0xE0 || code==0xB6) ; + else if (code == 0xC0 || code == 0xE0 || code == 0xB6) + ; else if ((EncCP1253_CtypeTable[code] & BIT_CTYPE_UPPER) - && (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) { + && (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) { flags |= ONIGENC_CASE_MODIFIED; code = ENC_CP1253_TO_LOWER_CASE(code); } else if ((EncCP1253_CtypeTable[code] & BIT_CTYPE_LOWER) - && (flags&ONIGENC_CASE_UPCASE)) { + && (flags & ONIGENC_CASE_UPCASE)) { flags |= ONIGENC_CASE_MODIFIED; - if (code==0xDC) + if (code == 0xDC) code = 0xA2; - else if (code>=0xDD && code<=0xDF) + else if (code >= 0xDD && code <= 0xDF) code -= 0x25; - else if (code==0xFC) + else if (code == 0xFC) code = 0xBC; - else if (code==0xFD || code==0xFE) + else if (code == 0xFD || code == 0xFE) code -= 0x3F; else code -= 0x20; } *to++ = code; - if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */ - flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE); + if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */ + flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE); } *flagP = flags; - return (int)(to-to_start); + return (int )(to - to_start); } OnigEncodingDefine(windows_1253, Windows_1253) = { @@ -289,8 +290,8 @@ OnigEncodingDefine(windows_1253, Windows_1253) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, + case_map, 0, ONIGENC_FLAG_NONE, - case_map, }; ENC_ALIAS("CP1253", "Windows-1253") diff --git a/enc/windows_1254.c b/enc/windows_1254.c index 2ccf966b8e..9ae66978a2 100644 --- a/enc/windows_1254.c +++ b/enc/windows_1254.c @@ -212,9 +212,9 @@ apply_all_case_fold(OnigCaseFoldType flag, static int get_case_fold_codes_by_str(OnigCaseFoldType flag, - const OnigUChar* p, const OnigUChar* end, - OnigCaseFoldCodeItem items[], - OnigEncoding enc ARG_UNUSED) + const OnigUChar* p, const OnigUChar* end, + OnigCaseFoldCodeItem items[], + OnigEncoding enc ARG_UNUSED) { return onigenc_get_case_fold_codes_by_str_with_map( numberof(CaseFoldMap), CaseFoldMap, 1, @@ -232,49 +232,50 @@ case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, OnigUChar *to_start = to; OnigCaseFoldType flags = *flagP; - while (*pp<end && to<to_end) { + while (*pp < end && to < to_end) { code = *(*pp)++; - if (code==SHARP_s) { - if (flags&ONIGENC_CASE_UPCASE) { + if (code == SHARP_s) { + if (flags & ONIGENC_CASE_UPCASE) { flags |= ONIGENC_CASE_MODIFIED; *to++ = 'S'; - code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S'; + code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S'; } - else if (flags&ONIGENC_CASE_FOLD) { + else if (flags & ONIGENC_CASE_FOLD) { flags |= ONIGENC_CASE_MODIFIED; *to++ = 's'; code = 's'; } } else if ((EncCP1254_CtypeTable[code] & BIT_CTYPE_UPPER) - && (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) { + && (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) { flags |= ONIGENC_CASE_MODIFIED; - if (code=='I') - code = flags&ONIGENC_CASE_FOLD_TURKISH_AZERI ? DOTLESS_i : 'i'; + if (code == 'I') + code = flags & ONIGENC_CASE_FOLD_TURKISH_AZERI ? DOTLESS_i : 'i'; else code = ENC_CP1254_TO_LOWER_CASE(code); } - else if (code==0x83 || code==0xAA || code==0xBA || code==0xB5) ; - else if ((EncCP1254_CtypeTable[code]&BIT_CTYPE_LOWER) - && (flags&ONIGENC_CASE_UPCASE)) { + else if (code == 0x83 || code == 0xAA || code == 0xBA || code == 0xB5) + ; + else if ((EncCP1254_CtypeTable[code] & BIT_CTYPE_LOWER) + && (flags & ONIGENC_CASE_UPCASE)) { flags |= ONIGENC_CASE_MODIFIED; - if (code=='i') - code = flags&ONIGENC_CASE_FOLD_TURKISH_AZERI ? I_WITH_DOT_ABOVE : 'I'; - else if (code==DOTLESS_i) + if (code == 'i') + code = flags & ONIGENC_CASE_FOLD_TURKISH_AZERI ? I_WITH_DOT_ABOVE : 'I'; + else if (code == DOTLESS_i) code = 'I'; - else if (code==0x9A || code==0x9C || code==0x9E) + else if (code == 0x9A || code == 0x9C || code == 0x9E) code -= 0x10; - else if (code==0xFF) + else if (code == 0xFF) code -= 0x60; else code -= 0x20; } *to++ = code; - if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */ - flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE); + if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */ + flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE); } *flagP = flags; - return (int)(to-to_start); + return (int )(to - to_start); } OnigEncodingDefine(windows_1254, Windows_1254) = { @@ -294,8 +295,8 @@ OnigEncodingDefine(windows_1254, Windows_1254) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, + case_map, 0, ONIGENC_FLAG_NONE, - case_map, }; ENC_ALIAS("CP1254", "Windows-1254") diff --git a/enc/windows_1257.c b/enc/windows_1257.c index 40cdb969aa..936a94ac76 100644 --- a/enc/windows_1257.c +++ b/enc/windows_1257.c @@ -216,9 +216,9 @@ apply_all_case_fold(OnigCaseFoldType flag, static int get_case_fold_codes_by_str(OnigCaseFoldType flag, - const OnigUChar* p, const OnigUChar* end, - OnigCaseFoldCodeItem items[], - OnigEncoding enc ARG_UNUSED) + const OnigUChar* p, const OnigUChar* end, + OnigCaseFoldCodeItem items[], + OnigEncoding enc ARG_UNUSED) { return onigenc_get_case_fold_codes_by_str_with_map( numberof(CaseFoldMap), CaseFoldMap, 1, @@ -228,55 +228,56 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, #define DOTLESS_i (0xB9) #define I_WITH_DOT_ABOVE (0xA9) static int -case_map (OnigCaseFoldType* flagP, const OnigUChar** pp, - const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, - const struct OnigEncodingTypeST* enc) +case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, + const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, + const struct OnigEncodingTypeST* enc) { OnigCodePoint code; OnigUChar *to_start = to; OnigCaseFoldType flags = *flagP; - while (*pp<end && to<to_end) { + while (*pp < end && to < to_end) { code = *(*pp)++; - if (code==SHARP_s) { - if (flags&ONIGENC_CASE_UPCASE) { + if (code == SHARP_s) { + if (flags & ONIGENC_CASE_UPCASE) { flags |= ONIGENC_CASE_MODIFIED; *to++ = 'S'; - code = (flags&ONIGENC_CASE_TITLECASE) ? 's' : 'S'; + code = (flags & ONIGENC_CASE_TITLECASE) ? 's' : 'S'; } - else if (flags&ONIGENC_CASE_FOLD) { + else if (flags & ONIGENC_CASE_FOLD) { flags |= ONIGENC_CASE_MODIFIED; *to++ = 's'; code = 's'; } } - else if (code==0xB5) ; + else if (code == 0xB5) + ; else if ((EncCP1252_CtypeTable[code] & BIT_CTYPE_UPPER) - && (flags & (ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_FOLD))) { + && (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) { flags |= ONIGENC_CASE_MODIFIED; - if (code=='I') - code = flags&ONIGENC_CASE_FOLD_TURKISH_AZERI ? DOTLESS_i : 'i'; + if (code == 'I') + code = flags & ONIGENC_CASE_FOLD_TURKISH_AZERI ? DOTLESS_i : 'i'; else code = ENC_CP1252_TO_LOWER_CASE(code); } else if ((EncCP1252_CtypeTable[code]&BIT_CTYPE_LOWER) - && (flags&ONIGENC_CASE_UPCASE)) { + && (flags & ONIGENC_CASE_UPCASE)) { flags |= ONIGENC_CASE_MODIFIED; - if (code=='i') - code = flags&ONIGENC_CASE_FOLD_TURKISH_AZERI ? I_WITH_DOT_ABOVE : 'I'; - else if (code==DOTLESS_i) + if (code == 'i') + code = flags & ONIGENC_CASE_FOLD_TURKISH_AZERI ? I_WITH_DOT_ABOVE : 'I'; + else if (code == DOTLESS_i) code = 'I'; - else if (code>=0xB0 && code<=0xBF ) + else if (code >= 0xB0 && code <= 0xBF) code -= 0x10; else code -= 0x20; } *to++ = code; - if (flags&ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */ - flags ^= (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE|ONIGENC_CASE_TITLECASE); + if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */ + flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE); } *flagP = flags; - return (int)(to-to_start); + return (int )(to - to_start); } OnigEncodingDefine(windows_1257, Windows_1257) = { @@ -296,9 +297,8 @@ OnigEncodingDefine(windows_1257, Windows_1257) = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, + case_map, 0, ONIGENC_FLAG_NONE, - case_map, }; - ENC_ALIAS("CP1257", "Windows-1257") diff --git a/enc/windows_31j.c b/enc/windows_31j.c index 71836c1f13..174f8983c4 100644 --- a/enc/windows_31j.c +++ b/enc/windows_31j.c @@ -33,7 +33,7 @@ OnigEncodingDefine(windows_31j, Windows_31J) = { mbc_enc_len, - "Windows-31J", /* name */ + "Windows-31J", /* name */ 2, /* max byte length */ 1, /* min byte length */ onigenc_is_mbc_newline_0x0a, @@ -48,9 +48,9 @@ OnigEncodingDefine(windows_31j, Windows_31J) = { get_ctype_code_range, left_adjust_char_head, is_allowed_reverse_match, + onigenc_ascii_only_case_map, 0, ONIGENC_FLAG_NONE, - onigenc_ascii_only_case_map, }; /* * Name: Windows-31J |