From 78f540019a394421e1875cacaf956e8c23b18cc0 Mon Sep 17 00:00:00 2001 From: duerst Date: Tue, 29 Mar 2016 07:53:43 +0000 Subject: * enc/unicode/case-folding.rb, casefold.h: Tweaked handling of 6 special cases in CaseUnfold_11_Table. * enc/unicode.c: Adjustments for above. * test/ruby/enc/test_case_mapping.rb: Tests for the above: Some tests in test_titlecase activated; test_greek added. A test in test_cherokee fixed. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@54383 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- enc/unicode.c | 15 ++++++---- enc/unicode/case-folding.rb | 73 +++++++++++++++++++++++++++++---------------- enc/unicode/casefold.h | 18 ++++------- 3 files changed, 64 insertions(+), 42 deletions(-) (limited to 'enc') diff --git a/enc/unicode.c b/enc/unicode.c index 87ebb0d8a8..eebf060dd9 100644 --- a/enc/unicode.c +++ b/enc/unicode.c @@ -750,12 +750,17 @@ onigenc_unicode_case_map(OnigCaseFoldType* flagP, } } else if ((folded = onigenc_unicode_unfold1_lookup(code)) != 0) { /* data about character found in CaseUnfold_11_Table */ - if (flags&OnigCaseFoldFlags(folded->n)) { - int count = OnigCodePointCount(folded->n); - const OnigCodePoint *next = folded->code; + if (flags&OnigCaseFoldFlags(folded->n)) { /* needs and data availability match */ MODIFIED; - if (count==1) - code = *next; + if (flags&OnigCaseFoldFlags(folded->n)&ONIGENC_CASE_TITLECASE) + code = folded->code[1]; + else + code = folded->code[0]; + } + else if ((flags&(ONIGENC_CASE_UPCASE)) + && (code==0x03B9||code==0x03BC)) { /* GREEK SMALL LETTERs IOTA/MU */ + MODIFIED; + code = folded->code[1]; } } } diff --git a/enc/unicode/case-folding.rb b/enc/unicode/case-folding.rb index d3738be4e8..2df430185f 100755 --- a/enc/unicode/case-folding.rb +++ b/enc/unicode/case-folding.rb @@ -230,38 +230,61 @@ class CaseMapping def flags(from, type, to) # types: CaseFold_11, CaseUnfold_11, CaseUnfold_12, CaseUnfold_13 flags = "" - flags += '|F' if type=='CaseFold_11' from = Array(from).map {|i| "%04X" % i}.join(" ") to = Array(to).map {|i| "%04X" % i}.join(" ") - to = to.split(/ /).first if type=='CaseUnfold_11' item = @mappings[from] - if item - flags += '|U' if to==item.upper - flags += '|D' if to==item.lower - specials_index = nil - specials = [] - unless item.upper == item.title - if item.code == item.title - flags += '|IT' - else - flags += '|ST' - specials << item.title + specials_index = nil + specials = [] + case type + when 'CaseFold_11' + flags += '|F' + if item + flags += '|U' if to==item.upper + flags += '|D' if to==item.lower + unless item.upper == item.title + if item.code == item.title + flags += '|IT' + else + flags += '|ST' + specials << item.title + end + end + unless item.lower.nil? or item.lower==from or item.lower==to + specials << item.lower + flags += '|SL' + end + unless item.upper.nil? or item.upper==from or item.upper==to + specials << item.upper + flags += '|SU' end end - unless item.lower.nil? or item.lower==from or item.lower==to - specials << item.lower - flags += '|SL' - end - unless item.upper.nil? or item.upper==from or item.upper==to - specials << item.upper - flags += '|SU' - end - if specials.first - flags += "|I(#{@specials_length})" - @specials_length += specials.map { |s| s.split(/ /).length }.reduce(:+) - @specials << specials + when 'CaseUnfold_11' + to = to.split(/ /) + if item + case to.first + when item.upper then flags += '|U' + when item.lower then flags += '|D' + else + unless from=='03B9' or from=='03BC' + warn 'Unpredicted case 0; check data or adjust program (enc/unicode/case_folding.rb).' + end + end + unless item.upper == item.title + if item.code == item.title + warn 'Unpredicted case 1; check data or adjust program (enc/unicode/case_folding.rb).' + elsif item.title==to[1] + flags += '|ST' + else + warn 'Unpredicted case 2; check data or adjust program (enc/unicode/case_folding.rb).' + end + end end end + unless specials.empty? + flags += "|I(#{@specials_length})" + @specials_length += specials.map { |s| s.split(/ /).length }.reduce(:+) + @specials << specials + end flags end diff --git a/enc/unicode/casefold.h b/enc/unicode/casefold.h index c6c5d0d387..27beb5469c 100644 --- a/enc/unicode/casefold.h +++ b/enc/unicode/casefold.h @@ -3298,9 +3298,9 @@ static const CaseUnfold_11_Type CaseUnfold_11_Table[] = { {0x01b9, {1|U, {0x01b8}}}, {0x01bd, {1|U, {0x01bc}}}, {0x01bf, {1|U, {0x01f7}}}, - {0x01c6, {2|U|ST|I(347), {0x01c4, 0x01c5}}}, - {0x01c9, {2|U|ST|I(348), {0x01c7, 0x01c8}}}, - {0x01cc, {2|U|ST|I(349), {0x01ca, 0x01cb}}}, + {0x01c6, {2|U|ST, {0x01c4, 0x01c5}}}, + {0x01c9, {2|U|ST, {0x01c7, 0x01c8}}}, + {0x01cc, {2|U|ST, {0x01ca, 0x01cb}}}, {0x01ce, {1|U, {0x01cd}}}, {0x01d0, {1|U, {0x01cf}}}, {0x01d2, {1|U, {0x01d1}}}, @@ -3319,7 +3319,7 @@ static const CaseUnfold_11_Type CaseUnfold_11_Table[] = { {0x01eb, {1|U, {0x01ea}}}, {0x01ed, {1|U, {0x01ec}}}, {0x01ef, {1|U, {0x01ee}}}, - {0x01f3, {2|U|ST|I(350), {0x01f1, 0x01f2}}}, + {0x01f3, {2|U|ST, {0x01f1, 0x01f2}}}, {0x01f5, {1|U, {0x01f4}}}, {0x01f9, {1|U, {0x01f8}}}, {0x01fb, {1|U, {0x01fa}}}, @@ -3412,10 +3412,10 @@ static const CaseUnfold_11_Type CaseUnfold_11_Table[] = { {0x03b6, {1|U, {0x0396}}}, {0x03b7, {1|U, {0x0397}}}, {0x03b8, {3|U, {0x0398, 0x03d1, 0x03f4}}}, - {0x03b9, {3|SU|I(351), {0x0345, 0x0399, 0x1fbe}}}, + {0x03b9, {3, {0x0345, 0x0399, 0x1fbe}}}, {0x03ba, {2|U, {0x039a, 0x03f0}}}, {0x03bb, {1|U, {0x039b}}}, - {0x03bc, {2|SU|I(352), {0x00b5, 0x039c}}}, + {0x03bc, {2, {0x00b5, 0x039c}}}, {0x03bd, {1|U, {0x039d}}}, {0x03be, {1|U, {0x039e}}}, {0x03bf, {1|U, {0x039f}}}, @@ -6371,10 +6371,4 @@ OnigCodePoint CaseMappingSpecials[] = { L(2)|0x0544, 0x056B, L(2)|0x0544, 0x053B, L(2)|0x054E, 0x0576, L(2)|0x054E, 0x0546, L(2)|0x0544, 0x056D, L(2)|0x0544, 0x053D, - L(1)|0x01C5, - L(1)|0x01C8, - L(1)|0x01CB, - L(1)|0x01F2, - L(1)|0x0399, - L(1)|0x039C, }; -- cgit v1.2.3