unicode_normalize: UNICODE_VERSION constant

* template/unicode_norm_gen.tmpl (UnicodeNormalize): embed the version of Unicode data files used to generate. * test/test_unicode_normalize.rb (TestUnicodeNormalize): use the embedded version to load the test data. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@48357 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
author: nobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> 2014-11-10 06:56:41 +0000
committer: nobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> 2014-11-10 06:56:41 +0000
commit: 79c0b7fcc86f5283ba3de22720eebf554b8e267c (patch)
tree: 42a7049d8bfd7c1e1851b8f64f8e74d16fdba184
parent: 8cba9dccf68faae3ba2eb50c5f83a03c3828a9a0 (diff)
download: ruby-79c0b7fcc86f5283ba3de22720eebf554b8e267c.tar.gz
2 files changed, 12 insertions, 2 deletions
diff --git a/template/unicode_norm_gen.tmpl b/template/unicode_norm_gen.tmpl
index 2123643018..b8f9f963b3 100644
--- a/template/unicode_norm_gen.tmpl
+++ b/template/unicode_norm_gen.tmpl
@@ -7,7 +7,7 @@
 
 # Constants for input and ouput directory
 InputDataDir = ARGV[0] || 'enc/unicode/data'
-OuputDataDir = ARGV[1] || 'lib/unicode_normalize'
+unicode_version = InputDataDir[/[\d.]+\z/]
 
 # convenience methods
 class Integer
@@ -67,6 +67,12 @@ end
 
 # read the file 'CompositionExclusions.txt'
 composition_exclusions = vpath.open("#{InputDataDir}/CompositionExclusions.txt") {|f|
+  base = Regexp.quote(File.basename(f.path, '.*'))
+  ext = Regexp.quote(File.extname(f.path))
+  version = (line = f.gets)[/^# *#{base}-([\d.]+)#{ext}\s*$/, 1] or
+    abort "No file version in #{f.path}: #{line}"
+  (unicode_version ||= version) == version or
+    abort "Unicode version of directory (#{unicode_version}) and file (#{version}) mismatch"
   f.grep(/^[A-Z0-9]{4,5}/) {|line| line.hex}
 }
 
@@ -151,6 +157,8 @@ end
 # automatically generated by template/unicode_norm_gen.tmpl
 
 module UnicodeNormalize
+  UNICODE_VERSION = "<%=unicode_version%>".freeze
+
   accents = "" \
     "[<% accent_array.each_regexp_chars do |rx|%><%=rx%>" \
     "<% end%>]" \
diff --git a/test/test_unicode_normalize.rb b/test/test_unicode_normalize.rb
index 64916a0855..75b571a4a3 100644
--- a/test/test_unicode_normalize.rb
+++ b/test/test_unicode_normalize.rb
@@ -3,15 +3,17 @@
 # Copyright Ayumu Nojima (野島 歩) and Martin J. Dürst (duerst@it.aoyama.ac.jp)
 
 require 'test/unit'
+require 'unicode_normalize/normalize'
 
 class TestUnicodeNormalize < Test::Unit::TestCase
 
-  UNICODE_VERSION = '7.0.0'
+  UNICODE_VERSION = UnicodeNormalize::UNICODE_VERSION
 
   NormTest = Struct.new :source, :NFC, :NFD, :NFKC, :NFKD, :line
 
   def read_tests
     IO.readlines(File.expand_path("../enc/unicode/data/#{UNICODE_VERSION}/NormalizationTest.txt", __dir__), encoding: 'utf-8')
+    .tap { |lines| assert_include(lines[0], "NormalizationTest-#{UNICODE_VERSION}.txt")}
     .collect.with_index { |linedata, linenumber| [linedata, linenumber]}
     .reject { |line| line[0] =~ /^[\#@]/ }
     .collect do |line|
author	nobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>	2014-11-10 06:56:41 +0000
committer	nobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>	2014-11-10 06:56:41 +0000
commit	79c0b7fcc86f5283ba3de22720eebf554b8e267c (patch)
tree	42a7049d8bfd7c1e1851b8f64f8e74d16fdba184
parent	8cba9dccf68faae3ba2eb50c5f83a03c3828a9a0 (diff)
download	ruby-79c0b7fcc86f5283ba3de22720eebf554b8e267c.tar.gz