aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorduerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2016-02-24 13:32:01 +0000
committerduerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2016-02-24 13:32:01 +0000
commitf1f48e610311d812eaf408d0947e8a3686ed3364 (patch)
tree81d3fe56ff75679c57bc42b629ca5f86fd973c39
parent169b867fbf039ccb7f990449b898b2f9870a5269 (diff)
downloadruby-f1f48e610311d812eaf408d0947e8a3686ed3364.tar.gz
* include/ruby/oniguruma.h: Rearranging flag assignments and making
space for titlecase indices; adding additional macros to add or extract titlecase index; adding comments for better documentation. * enc/unicode.c: Moving some macros to include/ruby/oniguruma.h; activating use of titlecase indices. (with Kimihito Matsui) git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@53915 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog11
-rw-r--r--enc/unicode.c6
-rw-r--r--include/ruby/oniguruma.h47
3 files changed, 46 insertions, 18 deletions
diff --git a/ChangeLog b/ChangeLog
index ec781a06de..e5a69a0c20 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,6 +1,15 @@
+Wed Feb 24 22:31:13 2016 Martin Duerst <duerst@it.aoyama.ac.jp>
+
+ * include/ruby/oniguruma.h: Rearranging flag assignments and making
+ space for titlecase indices; adding additional macros to add or
+ extract titlecase index; adding comments for better documentation.
+ * enc/unicode.c: Moving some macros to include/ruby/oniguruma.h;
+ activating use of titlecase indices.
+ (with Kimihito Matsui)
+
Wed Feb 24 21:03:04 2016 Tanaka Akira <akr@fsij.org>
- * random.c (limited_rand): Add a specialized path for the limit fits
+ * random.c (limited_rand): Add a specialized path for when the limit fits
in 32 bit.
Tue Feb 23 21:52:24 2016 Martin Duerst <duerst@it.aoyama.ac.jp>
diff --git a/enc/unicode.c b/enc/unicode.c
index d26497fc9c..e9c2803cab 100644
--- a/enc/unicode.c
+++ b/enc/unicode.c
@@ -71,10 +71,6 @@ static const unsigned short EncUNICODE_ISO_8859_1_CtypeTable[256] = {
0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2, 0x30e2
};
-/* use bottom bytes for actual code point count; 3 bits is more than enough */
-#define OnigCodePointCount(n) ((n)&0x7)
-#define OnigCaseFoldFlags(n) ((n)&~0x7)
-
typedef struct {
int n;
OnigCodePoint code[3];
@@ -144,7 +140,7 @@ code3_equal(const OnigCodePoint *x, const OnigCodePoint *y)
#define U ONIGENC_CASE_UPCASE
#define D ONIGENC_CASE_DOWNCASE
#define F ONIGENC_CASE_FOLD
-#define T(n) ONIGENC_CASE_TITLECASE
+#define T(n) (ONIGENC_CASE_TITLECASE|OnigTitlecaseEncode(n))
#include "enc/unicode/casefold.h"
diff --git a/include/ruby/oniguruma.h b/include/ruby/oniguruma.h
index 0d46f306f5..7f1d66c460 100644
--- a/include/ruby/oniguruma.h
+++ b/include/ruby/oniguruma.h
@@ -116,22 +116,45 @@ typedef ptrdiff_t OnigPosition;
#define ONIG_INFINITE_DISTANCE ~((OnigDistance )0)
+/*
+ * Onig casefold/case mapping flags and related definitions
+ *
+ * Subfields (starting with 0 at LSB):
+ * 0-2: Code point count in casefold.h
+ * 3-9: Index into TitleCase array in casefold.h
+ * 10-15, 18-20: Case mapping flags
+ */
typedef unsigned int OnigCaseFoldType; /* case fold flag */
ONIG_EXTERN OnigCaseFoldType OnigDefaultCaseFoldFlag;
-/* #define ONIGENC_CASE_FOLD_HIRAGANA_KATAKANA (1<<1) */
-/* #define ONIGENC_CASE_FOLD_KATAKANA_WIDTH (1<<2) */
-#define ONIGENC_CASE_TITLECASE (1<<10)
-#define ONIGENC_CASE_UPCASE (1<<11)
-#define ONIGENC_CASE_DOWNCASE (1<<12)
-#define ONIGENC_CASE_FOLD (1<<13)
-#define ONIGENC_CASE_ONCEONLY (1<<14)
-#define ONIGENC_CASE_MODIFIED (1<<15)
-#define ONIGENC_CASE_ASCII_ONLY (1<<19)
-#define ONIGENC_CASE_FOLD_TURKISH_AZERI (1<<20)
-#define ONIGENC_CASE_FOLD_LITHUANIAN (1<<21)
-#define INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR (1<<30)
+/* bits for actual code point count; 3 bits is more than enough, currently only 2 used */
+#define OnigCodePointMask (0x7)
+#define OnigCodePointCount(n) ((n)&OnigCodePointMask)
+#define OnigCaseFoldFlags(n) ((n)&~OnigCodePointMask)
+/* #define ONIGENC_CASE_FOLD_HIRAGANA_KATAKANA (1<<1) */ /* no longer usable with these values! */
+/* #define ONIGENC_CASE_FOLD_KATAKANA_WIDTH (1<<2) */ /* no longer usable with these values! */
+
+/* bits for index into table with separate titlecase mappings */
+/* 7 bits provide 128 values; as of Unicode 8.0.0, 89 values are used */
+#define OnigTitlecaseShift (3)
+#define OnigTitlecaseWidth (7)
+#define OnigTitlecaseMask (((1<<OnigTitlecaseWidth)-1)<<OnigTitlecaseShift)
+#define OnigTitlecaseEncode(n) (((n)<<OnigTitlecaseShift)&OnigTitlecaseMask)
+#define OnigTitlecaseDecode(n) (((n)&OnigTitlecaseMask)>>OnigTitlecaseShift)
+
+#define OnigTitlecaseIndex
+
+#define ONIGENC_CASE_TITLECASE (1<<10) /* has/needs titlecase mapping */
+#define ONIGENC_CASE_UPCASE (1<<11) /* has/needs uppercase mapping */
+#define ONIGENC_CASE_DOWNCASE (1<<12) /* has/needs lowercase mapping */
+#define ONIGENC_CASE_FOLD (1<<13) /* has/needs case folding */
+#define ONIGENC_CASE_SPECIAL (1<<14) /* has/needs special mapping from separate table */
+#define ONIGENC_CASE_MODIFIED (1<<15) /* data has been modified */
+#define ONIGENC_CASE_ASCII_ONLY (1<<18) /* only modify ASCII range */
+#define ONIGENC_CASE_FOLD_LITHUANIAN (1<<19) /* needs Lithuanian-specific mapping */
+#define ONIGENC_CASE_FOLD_TURKISH_AZERI (1<<20) /* needs mapping specific to Turkic languages; better not change original value! */
+#define INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR (1<<30) /* better not change original value! */
#define ONIGENC_CASE_FOLD_MIN INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR
#define ONIGENC_CASE_FOLD_DEFAULT OnigDefaultCaseFoldFlag