From 15bfe2793a849f500a15ae54553f4ffba5d55b41 Mon Sep 17 00:00:00 2001 From: duerst Date: Wed, 22 Oct 2014 09:00:04 +0000 Subject: test/test-unicode_normalize.rb: Importing from https://github.com/duerst/eprun/blob/master/test/test_normalize.rb. (removing trailing whitespace, fixing EOLs and adding EOL property) git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@48084 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 8 +- test/test_unicode_normalize.rb | 171 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 178 insertions(+), 1 deletion(-) create mode 100644 test/test_unicode_normalize.rb diff --git a/ChangeLog b/ChangeLog index 914c26ca71..ff870434dd 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +Wed Oct 22 18:00:00 2014 Martin Duerst + + * test/test-unicode_normalize.rb: Importing from + https://github.com/duerst/eprun/blob/master/test/test_normalize.rb. + (removing trailing whitespace, fixing EOLs and adding EOL property) + Wed Oct 22 08:21:09 2014 Yuki Yugui Sonoda * nacl/pepper_main.c (Instance_DidCreate): mount devfs and rebind fd 0 @@ -39,7 +45,7 @@ Tue Oct 21 17:32:32 2014 Martin Duerst * common.mk: Adding explicit creation of directory enc/unicode/data because git doesn't handle empty - directories. + directories. [patch by Masahiro Ide, filed with r48073] Tue Oct 21 17:12:12 2014 Martin Duerst diff --git a/test/test_unicode_normalize.rb b/test/test_unicode_normalize.rb new file mode 100644 index 0000000000..82c7fffbe1 --- /dev/null +++ b/test/test_unicode_normalize.rb @@ -0,0 +1,171 @@ +# coding: utf-8 + +# Copyright 2010-2013 Ayumu Nojima (野島 歩) and Martin J. Dürst (duerst@it.aoyama.ac.jp) +# available under the same licence as Ruby itself +# (see http://www.ruby-lang.org/en/LICENSE.txt) + +Encoding.default_external = 'utf-8' +Encoding.default_internal = 'utf-8' +require '../lib/string_normalize' +require 'test/unit' + +NormTest = Struct.new :source, :NFC, :NFD, :NFKC, :NFKD, :line + +class TestNormalize < Test::Unit::TestCase + @@debug = false # if true, generation of explicit error messages is switched on + # false is about two times faster than true + def read_tests + IO.readlines('../data/NormalizationTest.txt') + .collect.with_index { |linedata, linenumber| [linedata, linenumber]} + .reject { |line| line[0] =~ /^[\#@]/ } + .collect do |line| + NormTest.new *(line[0].split(';').take(5).collect do |code_string| + code_string.split(/\s/).collect { |cp| cp.to_i(16) }.pack('U*') + end + [line[1]+1]) + end + end + + def to_codepoints(string) # this could be defined as a refinement on String + string.codepoints.collect { |cp| cp.to_s(16).upcase.rjust(4, '0') } + end + + def setup + @@tests ||= read_tests + end + + def self.generate_test_normalize(target, normalization, source, prechecked) + define_method "test_normalize_to_#{target}_from_#{source}_with_#{normalization}" do + @@tests.each do |test| + if not prechecked or test[source]==test[prechecked] + expected = test[target] + actual = test[source].normalize(normalization) + if @@debug + assert_equal expected, actual, + "#{to_codepoints(expected)} expected but was #{to_codepoints(actual)} on line #{test[:line]} (#{normalization})" + else + assert_equal expected, actual + end + end + end + end + end + +# source; NFC; NFD; NFKC; NFKD +# NFC +# :NFC == toNFC(:source) == toNFC(:NFC) == toNFC(:NFD) + generate_test_normalize :NFC, :nfc, :source, nil + generate_test_normalize :NFC, :nfc, :NFC, :source + generate_test_normalize :NFC, :nfc, :NFD, :source +# :NFKC == toNFC(:NFKC) == toNFC(:NFKD) + generate_test_normalize :NFKC, :nfc, :NFKC, nil + generate_test_normalize :NFKC, :nfc, :NFKD, :NFKC +# +# NFD +# :NFD == toNFD(:source) == toNFD(:NFC) == toNFD(:NFD) + generate_test_normalize :NFD, :nfd, :source, nil + generate_test_normalize :NFD, :nfd, :NFC, :source + generate_test_normalize :NFD, :nfd, :NFD, :source +# :NFKD == toNFD(:NFKC) == toNFD(:NFKD) + generate_test_normalize :NFKD, :nfd, :NFKC, nil + generate_test_normalize :NFKD, :nfd, :NFKD, :NFKC +# +# NFKC +# :NFKC == toNFKC(:source) == toNFKC(:NFC) == toNFKC(:NFD) == toNFKC(:NFKC) == toNFKC(:NFKD) + generate_test_normalize :NFKC, :nfkc, :source, nil + generate_test_normalize :NFKC, :nfkc, :NFC, :source + generate_test_normalize :NFKC, :nfkc, :NFD, :source + generate_test_normalize :NFKC, :nfkc, :NFKC, :NFC + generate_test_normalize :NFKC, :nfkc, :NFKD, :NFD +# +# NFKD +# :NFKD == toNFKD(:source) == toNFKD(:NFC) == toNFKD(:NFD) == toNFKD(:NFKC) == toNFKD(:NFKD) + generate_test_normalize :NFKD, :nfkd, :source, nil + generate_test_normalize :NFKD, :nfkd, :NFC, :source + generate_test_normalize :NFKD, :nfkd, :NFD, :source + generate_test_normalize :NFKD, :nfkd, :NFKC, :NFC + generate_test_normalize :NFKD, :nfkd, :NFKD, :NFD + + def self.generate_test_check_true(source, normalization) + define_method "test_check_true_#{source}_as_#{normalization}" do + @@tests.each do |test| + actual = test[source].normalized?(normalization) + if @@debug + assert_equal true, actual, + "#{to_codepoints(test[source])} should check as #{normalization} but does not on line #{test[:line]}" + else + assert_equal true, actual + end + end + end + end + + def one_false_check_test(test, compare_column, check_column, test_form, line) + if test[check_column- 1] != test[compare_column- 1] + actual = test[check_column- 1].normalized?(test_form) + assert_equal false, actual, "failed on line #{line+1} (#{test_form})" + end + end + + def self.generate_test_check_false(source, compare, normalization) + define_method "test_check_false_#{source}_as_#{normalization}" do + @@tests.each do |test| + if test[source] != test[compare] + actual = test[source].normalized?(normalization) + if @@debug + assert_equal false, actual, + "#{to_codepoints(test[source])} should not check as #{normalization} but does on line #{test[:line]}" + else + assert_equal false, actual + end + end + end + end + end + + generate_test_check_true :NFC, :nfc + generate_test_check_true :NFD, :nfd + generate_test_check_true :NFKC, :nfc + generate_test_check_true :NFKC, :nfkc + generate_test_check_true :NFKD, :nfd + generate_test_check_true :NFKD, :nfkd + + generate_test_check_false :source, :NFD, :nfd + generate_test_check_false :NFC, :NFD, :nfd + generate_test_check_false :NFKC, :NFKD, :nfd + generate_test_check_false :source, :NFC, :nfc + generate_test_check_false :NFD, :NFC, :nfc + generate_test_check_false :NFKD, :NFKC, :nfc + generate_test_check_false :source, :NFKD, :nfkd + generate_test_check_false :NFC, :NFKD, :nfkd + generate_test_check_false :NFD, :NFKD, :nfkd + generate_test_check_false :NFKC, :NFKD, :nfkd + generate_test_check_false :source, :NFKC, :nfkc + generate_test_check_false :NFC, :NFKC, :nfkc + generate_test_check_false :NFD, :NFKC, :nfkc + generate_test_check_false :NFKD, :NFKC, :nfkc + + def test_non_UTF_8 + assert_equal "\u1E0A".encode('UTF-16BE'), "D\u0307".encode('UTF-16BE').normalize(:nfc) + assert_equal true, "\u1E0A".encode('UTF-16BE').normalized?(:nfc) + assert_equal false, "D\u0307".encode('UTF-16BE').normalized?(:nfc) + end + + def test_singleton_with_accents + assert_equal "\u0136", "\u212A\u0327".normalize(:nfc) + end + + def test_partial_jamo_compose + assert_equal "\uAC01", "\uAC00\u11A8".normalize(:nfc) + end + + def test_partial_jamo_decompose + assert_equal "\u1100\u1161\u11A8", "\uAC00\u11A8".normalize(:nfd) + end + + def test_hangul_plus_accents + assert_equal "\uAC00\u0323\u0300", "\uAC00\u0300\u0323".normalize(:nfc) + assert_equal "\uAC00\u0323\u0300", "\u1100\u1161\u0300\u0323".normalize(:nfc) + assert_equal "\u1100\u1161\u0323\u0300", "\uAC00\u0300\u0323".normalize(:nfd) + assert_equal "\u1100\u1161\u0323\u0300", "\u1100\u1161\u0300\u0323".normalize(:nfd) + end +end -- cgit v1.2.3