* enc/unicode.c, test/ruby/enc/test_case_mapping.rb: Implemented :fold

option for String#downcase by using case folding data from regular expression engine, and added a few simple tests. (with Kimihito Matsui) git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@53747 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
author: duerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> 2016-02-06 05:37:29 +0000
committer: duerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> 2016-02-06 05:37:29 +0000
commit: fe011e922c528993dacb1359d86d31f0f9eeb9a7 (patch)
tree: bb427b2ff4bee977ae543f12cf6ed0436fc0007a /enc/unicode.c
parent: ffe52148f9a8501c4189652f713014d63188f850 (diff)
download: ruby-fe011e922c528993dacb1359d86d31f0f9eeb9a7.tar.gz
1 files changed, 26 insertions, 5 deletions
diff --git a/enc/unicode.c b/enc/unicode.c
index 3f41ea3844..cb9b0a94b4 100644
--- a/enc/unicode.c
+++ b/enc/unicode.c
@@ -628,40 +628,61 @@ onigenc_unicode_case_map(OnigCaseFoldType* flagP,
 	if (code<='z') { /* ASCII comes first */
 	    if (code>='a' && code<='z') {
 	        if (flags&ONIGENC_CASE_UPCASE) {
+		    MODIFIED;
 		    if (flags&ONIGENC_CASE_FOLD_TURKISH_AZERI && code==0x0069) /* i -> I WITH DOT ABOVE */
 			code = 0x0130;
 		    else
 			code += 'A'-'a';
-		    MODIFIED;
 		}
 	    }
 	    else if (code>='A' && code<='Z') {
 		if (flags&ONIGENC_CASE_DOWNCASE) {
+		    MODIFIED;
 		    if (flags&ONIGENC_CASE_FOLD_TURKISH_AZERI && code==0x0049) /* I -> DOTLESS i */
 			code = 0x0131;
 		    else
 			code += 'a'-'A';
-		    MODIFIED;
 		}
 	    }
 	}
 	else if (!(flags&ONIGENC_CASE_ASCII_ONLY) && code>=0x00C0) { /* deal with non-ASCII; nothing relevant below U+00C0 */
+	    const CodePointList3 *folded;
+
 	    if (code==0x0130) {
 		if (flags&ONIGENC_CASE_DOWNCASE) {
+		    MODIFIED;
 		    if (flags&ONIGENC_CASE_FOLD_TURKISH_AZERI)
 			code = 0x0069; /* I WITH DOT ABOVE -> i */
 		    else { /* make dot above explicit */
 			to += ONIGENC_CODE_TO_MBC(enc, 0x0069, to);
 			code = 0x0307; /* dot above */
 		    }
-		    MODIFIED;
 		}
 	    }
-	    /* the following case can be removed once we rely on data,
+	    /* the following special case for  DOTLESS i -> I
+	     * can be removed once we rely on data,
 	     * because the mapping is always the same */
-	    else if (code==0x0131 && (flags&ONIGENC_CASE_UPCASE)) { /* DOTLESS i -> I */
+	    else if (code==0x0131 && (flags&ONIGENC_CASE_UPCASE)) {
 		code = 0x0049; MODIFIED;
 	    }
+	    else if ((folded = onigenc_unicode_fold_lookup(code)) != 0) {
+		if (flags&ONIGENC_CASE_FOLD) {
+		    const OnigCodePoint *next = folded->code;
+		    int count = OnigCodePointCount(folded->n);
+		    MODIFIED;
+		    if (count==1)
+		        code = *next;
+		    else if (count==2) {
+			to += ONIGENC_CODE_TO_MBC(enc, *next++, to);
+			code = *next;
+		    }
+		    else { /* count == 3 */
+			to += ONIGENC_CODE_TO_MBC(enc, *next++, to);
+			to += ONIGENC_CODE_TO_MBC(enc, *next++, to);
+			code = *next;
+		    }
+		}
+	    }
 	}
 	to += ONIGENC_CODE_TO_MBC(enc, code, to);
 	/* switch from titlecase to lowercase for capitalize */
author	duerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>	2016-02-06 05:37:29 +0000
committer	duerst <duerst@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>	2016-02-06 05:37:29 +0000
commit	fe011e922c528993dacb1359d86d31f0f9eeb9a7 (patch)
tree	bb427b2ff4bee977ae543f12cf6ed0436fc0007a /enc/unicode.c
parent	ffe52148f9a8501c4189652f713014d63188f850 (diff)
download	ruby-fe011e922c528993dacb1359d86d31f0f9eeb9a7.tar.gz