From 79c0b7fcc86f5283ba3de22720eebf554b8e267c Mon Sep 17 00:00:00 2001 From: nobu Date: Mon, 10 Nov 2014 06:56:41 +0000 Subject: unicode_normalize: UNICODE_VERSION constant * template/unicode_norm_gen.tmpl (UnicodeNormalize): embed the version of Unicode data files used to generate. * test/test_unicode_normalize.rb (TestUnicodeNormalize): use the embedded version to load the test data. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@48357 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- template/unicode_norm_gen.tmpl | 10 +++++++++- test/test_unicode_normalize.rb | 4 +++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/template/unicode_norm_gen.tmpl b/template/unicode_norm_gen.tmpl index 2123643018..b8f9f963b3 100644 --- a/template/unicode_norm_gen.tmpl +++ b/template/unicode_norm_gen.tmpl @@ -7,7 +7,7 @@ # Constants for input and ouput directory InputDataDir = ARGV[0] || 'enc/unicode/data' -OuputDataDir = ARGV[1] || 'lib/unicode_normalize' +unicode_version = InputDataDir[/[\d.]+\z/] # convenience methods class Integer @@ -67,6 +67,12 @@ end # read the file 'CompositionExclusions.txt' composition_exclusions = vpath.open("#{InputDataDir}/CompositionExclusions.txt") {|f| + base = Regexp.quote(File.basename(f.path, '.*')) + ext = Regexp.quote(File.extname(f.path)) + version = (line = f.gets)[/^# *#{base}-([\d.]+)#{ext}\s*$/, 1] or + abort "No file version in #{f.path}: #{line}" + (unicode_version ||= version) == version or + abort "Unicode version of directory (#{unicode_version}) and file (#{version}) mismatch" f.grep(/^[A-Z0-9]{4,5}/) {|line| line.hex} } @@ -151,6 +157,8 @@ end # automatically generated by template/unicode_norm_gen.tmpl module UnicodeNormalize + UNICODE_VERSION = "<%=unicode_version%>".freeze + accents = "" \ "[<% accent_array.each_regexp_chars do |rx|%><%=rx%>" \ "<% end%>]" \ diff --git a/test/test_unicode_normalize.rb b/test/test_unicode_normalize.rb index 64916a0855..75b571a4a3 100644 --- a/test/test_unicode_normalize.rb +++ b/test/test_unicode_normalize.rb @@ -3,15 +3,17 @@ # Copyright Ayumu Nojima (野島 歩) and Martin J. Dürst (duerst@it.aoyama.ac.jp) require 'test/unit' +require 'unicode_normalize/normalize' class TestUnicodeNormalize < Test::Unit::TestCase - UNICODE_VERSION = '7.0.0' + UNICODE_VERSION = UnicodeNormalize::UNICODE_VERSION NormTest = Struct.new :source, :NFC, :NFD, :NFKC, :NFKD, :line def read_tests IO.readlines(File.expand_path("../enc/unicode/data/#{UNICODE_VERSION}/NormalizationTest.txt", __dir__), encoding: 'utf-8') + .tap { |lines| assert_include(lines[0], "NormalizationTest-#{UNICODE_VERSION}.txt")} .collect.with_index { |linedata, linenumber| [linedata, linenumber]} .reject { |line| line[0] =~ /^[\#@]/ } .collect do |line| -- cgit v1.2.3