From f1f48e610311d812eaf408d0947e8a3686ed3364 Mon Sep 17 00:00:00 2001 From: duerst Date: Wed, 24 Feb 2016 13:32:01 +0000 Subject: * include/ruby/oniguruma.h: Rearranging flag assignments and making space for titlecase indices; adding additional macros to add or extract titlecase index; adding comments for better documentation. * enc/unicode.c: Moving some macros to include/ruby/oniguruma.h; activating use of titlecase indices. (with Kimihito Matsui) git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@53915 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 11 ++++++++++- enc/unicode.c | 6 +----- include/ruby/oniguruma.h | 47 +++++++++++++++++++++++++++++++++++------------ 3 files changed, 46 insertions(+), 18 deletions(-) diff --git a/ChangeLog b/ChangeLog index ec781a06de..e5a69a0c20 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,15 @@ +Wed Feb 24 22:31:13 2016 Martin Duerst + + * include/ruby/oniguruma.h: Rearranging flag assignments and making + space for titlecase indices; adding additional macros to add or + extract titlecase index; adding comments for better documentation. + * enc/unicode.c: Moving some macros to include/ruby/oniguruma.h; + activating use of titlecase indices. + (with Kimihito Matsui) + Wed Feb 24 21:03:04 2016 Tanaka Akira - * random.c (limited_rand): Add a specialized path for the limit fits + * random.c (limited_rand): Add a specialized path for when the limit fits in 32 bit. Tue Feb 23 21:52:24 2016 Martin Duerst diff --git a/enc/unicode.c b/enc/unicode.c index d26497fc9c..e9c2803cab 100644 --- a/enc/unicode.c +++ b/enc/unicode.c @@ -71,10 +71,6 @@ static const unsigned short EncUNICODE_ISO_8859_1_CtypeTable[256] = { 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2 }; -/* use bottom bytes for actual code point count; 3 bits is more than enough */ -#define OnigCodePointCount(n) ((n)&0x7) -#define OnigCaseFoldFlags(n) ((n)&~0x7) - typedef struct { int n; OnigCodePoint code[3]; @@ -144,7 +140,7 @@ code3_equal(const OnigCodePoint *x, const OnigCodePoint *y) #define U ONIGENC_CASE_UPCASE #define D ONIGENC_CASE_DOWNCASE #define F ONIGENC_CASE_FOLD -#define T(n) ONIGENC_CASE_TITLECASE +#define T(n) (ONIGENC_CASE_TITLECASE|OnigTitlecaseEncode(n)) #include "enc/unicode/casefold.h" diff --git a/include/ruby/oniguruma.h b/include/ruby/oniguruma.h index 0d46f306f5..7f1d66c460 100644 --- a/include/ruby/oniguruma.h +++ b/include/ruby/oniguruma.h @@ -116,22 +116,45 @@ typedef ptrdiff_t OnigPosition; #define ONIG_INFINITE_DISTANCE ~((OnigDistance )0) +/* + * Onig casefold/case mapping flags and related definitions + * + * Subfields (starting with 0 at LSB): + * 0-2: Code point count in casefold.h + * 3-9: Index into TitleCase array in casefold.h + * 10-15, 18-20: Case mapping flags + */ typedef unsigned int OnigCaseFoldType; /* case fold flag */ ONIG_EXTERN OnigCaseFoldType OnigDefaultCaseFoldFlag; -/* #define ONIGENC_CASE_FOLD_HIRAGANA_KATAKANA (1<<1) */ -/* #define ONIGENC_CASE_FOLD_KATAKANA_WIDTH (1<<2) */ -#define ONIGENC_CASE_TITLECASE (1<<10) -#define ONIGENC_CASE_UPCASE (1<<11) -#define ONIGENC_CASE_DOWNCASE (1<<12) -#define ONIGENC_CASE_FOLD (1<<13) -#define ONIGENC_CASE_ONCEONLY (1<<14) -#define ONIGENC_CASE_MODIFIED (1<<15) -#define ONIGENC_CASE_ASCII_ONLY (1<<19) -#define ONIGENC_CASE_FOLD_TURKISH_AZERI (1<<20) -#define ONIGENC_CASE_FOLD_LITHUANIAN (1<<21) -#define INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR (1<<30) +/* bits for actual code point count; 3 bits is more than enough, currently only 2 used */ +#define OnigCodePointMask (0x7) +#define OnigCodePointCount(n) ((n)&OnigCodePointMask) +#define OnigCaseFoldFlags(n) ((n)&~OnigCodePointMask) +/* #define ONIGENC_CASE_FOLD_HIRAGANA_KATAKANA (1<<1) */ /* no longer usable with these values! */ +/* #define ONIGENC_CASE_FOLD_KATAKANA_WIDTH (1<<2) */ /* no longer usable with these values! */ + +/* bits for index into table with separate titlecase mappings */ +/* 7 bits provide 128 values; as of Unicode 8.0.0, 89 values are used */ +#define OnigTitlecaseShift (3) +#define OnigTitlecaseWidth (7) +#define OnigTitlecaseMask (((1<>OnigTitlecaseShift) + +#define OnigTitlecaseIndex + +#define ONIGENC_CASE_TITLECASE (1<<10) /* has/needs titlecase mapping */ +#define ONIGENC_CASE_UPCASE (1<<11) /* has/needs uppercase mapping */ +#define ONIGENC_CASE_DOWNCASE (1<<12) /* has/needs lowercase mapping */ +#define ONIGENC_CASE_FOLD (1<<13) /* has/needs case folding */ +#define ONIGENC_CASE_SPECIAL (1<<14) /* has/needs special mapping from separate table */ +#define ONIGENC_CASE_MODIFIED (1<<15) /* data has been modified */ +#define ONIGENC_CASE_ASCII_ONLY (1<<18) /* only modify ASCII range */ +#define ONIGENC_CASE_FOLD_LITHUANIAN (1<<19) /* needs Lithuanian-specific mapping */ +#define ONIGENC_CASE_FOLD_TURKISH_AZERI (1<<20) /* needs mapping specific to Turkic languages; better not change original value! */ +#define INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR (1<<30) /* better not change original value! */ #define ONIGENC_CASE_FOLD_MIN INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR #define ONIGENC_CASE_FOLD_DEFAULT OnigDefaultCaseFoldFlag -- cgit v1.2.3