diff options
-rw-r--r-- | enc/unicode.c | 6 | ||||
-rw-r--r-- | string.c | 2 | ||||
-rw-r--r-- | test/ruby/enc/test_case_comprehensive.rb | 6 | ||||
-rw-r--r-- | test/ruby/enc/test_case_mapping.rb | 17 |
4 files changed, 29 insertions, 2 deletions
diff --git a/enc/unicode.c b/enc/unicode.c index b3dbd55d32..6e8c3d8816 100644 --- a/enc/unicode.c +++ b/enc/unicode.c @@ -719,7 +719,11 @@ onigenc_unicode_case_map(OnigCaseFoldType* flagP, } } else if ((folded = onigenc_unicode_fold_lookup(code)) != 0) { /* data about character found in CaseFold_11_Table */ - if ((flags & ONIGENC_CASE_TITLECASE) /* Titlecase needed, */ + if ((flags & ONIGENC_CASE_TITLECASE) && code>=0x1C90 && code<=0x1CBF) { /* Georgian MTAVRULI */ + MODIFIED; + code += 0x10D0 - 0x1C90; + } + else if ((flags & ONIGENC_CASE_TITLECASE) /* Titlecase needed, */ && (OnigCaseFoldFlags(folded->n) & ONIGENC_CASE_IS_TITLECASE)) { /* but already Titlecase */ /* already Titlecase, no changes needed */ } @@ -6727,6 +6727,8 @@ rb_str_downcase(int argc, VALUE *argv, VALUE str) * * Modifies <i>str</i> by converting the first character to uppercase and the * remainder to lowercase. Returns <code>nil</code> if no changes are made. + * There is an exception for modern Georgian (mkhedruli/MTAVRULI), where + * the result is the same as for String#downcase, to avoid mixed case. * * See String#downcase for meaning of +options+ and use with different encodings. * diff --git a/test/ruby/enc/test_case_comprehensive.rb b/test/ruby/enc/test_case_comprehensive.rb index cd6447e928..bde47017a2 100644 --- a/test/ruby/enc/test_case_comprehensive.rb +++ b/test/ruby/enc/test_case_comprehensive.rb @@ -73,7 +73,11 @@ TestComprehensiveCaseMapping.data_files_available? and class TestComprehensiveC @@codepoints << code upcase[code] = hex2utf8 data[12] unless data[12].empty? downcase[code] = hex2utf8 data[13] unless data[13].empty? - titlecase[code] = hex2utf8 data[14] unless data[14].empty? + if code>="\u1C90" and code<="\u1CBF" # exception for Georgian: use lowercase for titlecase + titlecase[code] = hex2utf8(data[13]) unless data[13].empty? + else + titlecase[code] = hex2utf8 data[14] unless data[14].empty? + end end read_data_file('CaseFolding') do |code, data| casefold[code] = hex2utf8(data[2]) if data[1] =~ /^[CF]$/ diff --git a/test/ruby/enc/test_case_mapping.rb b/test/ruby/enc/test_case_mapping.rb index d095cd569c..984fd5d479 100644 --- a/test/ruby/enc/test_case_mapping.rb +++ b/test/ruby/enc/test_case_mapping.rb @@ -187,6 +187,23 @@ class TestCaseMappingPreliminary < Test::Unit::TestCase assert_equal 0, "\ua64A" =~ /\uA64B/i end + def test_georgian_canary + message = "Reexamine implementation of Georgian in String#capitalize" + assert_equal false, "\u1CBB".match?(/\p{assigned}/), message + assert_equal false, "\u1CBC".match?(/\p{assigned}/), message + end + + def test_georgian_capitalize + assert_equal "\u10D0\u10D1\u10D2", "\u1C90\u1C91\u1C92".capitalize + assert_equal "\u10D0\u10D1\u10D2", "\u1C90\u1C91\u10D2".capitalize + assert_equal "\u10D0\u10D1\u10D2", "\u1C90\u10D1\u1C92".capitalize + assert_equal "\u10D0\u10D1\u10D2", "\u1C90\u10D1\u10D2".capitalize + assert_equal "\u10D0\u10D1\u10D2", "\u10D0\u1C91\u1C92".capitalize + assert_equal "\u10D0\u10D1\u10D2", "\u10D0\u1C91\u10D2".capitalize + assert_equal "\u10D0\u10D1\u10D2", "\u10D0\u10D1\u1C92".capitalize + assert_equal "\u10D0\u10D1\u10D2", "\u10D0\u10D1\u10D2".capitalize + end + def no_longer_a_test_buffer_allocations assert_equal 'TURKISH*ı'*10, ('I'*10).downcase(:turkic) assert_equal 'TURKISH*ı'*100, ('I'*100).downcase(:turkic) |