From f78e8fcef2b0759986a00b0f360ea80f8f30ce76 Mon Sep 17 00:00:00 2001 From: duerst Date: Sun, 7 Feb 2016 13:10:20 +0000 Subject: * common.mk: Added two more precondition files for enc/unicode/casefold.h * enc/unicode.c: Added shortening macros for enc/unicode/casefold.h * enc/unicode/case-folding.rb: Fixed file encoding for CaseFolding.txt to ASCII-8BIT (should fix some ci errors). Clarified usage. Created class MapItem. Partially implemented class CaseMapping. (with Kimihito Matsui) git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@53767 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 11 +++++++++++ common.mk | 2 ++ enc/unicode.c | 10 ++++++++++ enc/unicode/case-folding.rb | 35 +++++++++++++++++++++++++++++++---- 4 files changed, 54 insertions(+), 4 deletions(-) diff --git a/ChangeLog b/ChangeLog index 3eb9b734a8..a799ac29b6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,14 @@ +Sun Feb 7 22:10:08 2016 Martin Duerst + + * common.mk: Added two more precondition files for enc/unicode/casefold.h + + * enc/unicode.c: Added shortening macros for enc/unicode/casefold.h + + * enc/unicode/case-folding.rb: Fixed file encoding for CaseFolding.txt + to ASCII-8BIT (should fix some ci errors). Clarified usage. Created + class MapItem. Partially implemented class CaseMapping. + (with Kimihito Matsui) + Sun Feb 7 14:12:32 2016 Martin Duerst * enc/unicode/case-folding.rb: Fixing parameter passing. diff --git a/common.mk b/common.mk index afe8c51cb7..898e5478fc 100644 --- a/common.mk +++ b/common.mk @@ -1048,6 +1048,8 @@ $(srcdir)/.unicode-tables.time: $(srcdir)/tool/generic_erb.rb \ $(UNICODE_DATA_DIR) lib/unicode_normalize $(srcdir)/enc/unicode/casefold.h: $(srcdir)/enc/unicode/case-folding.rb \ + $(UNICODE_SRC_DATA_DIR)/UnicodeData.txt \ + $(UNICODE_SRC_DATA_DIR)/SpecialCasing.txt \ $(UNICODE_SRC_DATA_DIR)/CaseFolding.txt $(Q) $(BASERUBY) $(srcdir)/enc/unicode/case-folding.rb \ --output-file=$(srcdir)/enc/unicode/casefold.h \ diff --git a/enc/unicode.c b/enc/unicode.c index 99dc6dc0d3..b0ecc1bc50 100644 --- a/enc/unicode.c +++ b/enc/unicode.c @@ -140,8 +140,18 @@ code3_equal(const OnigCodePoint *x, const OnigCodePoint *y) return 1; } +#define UP ONIGENC_CASE_UPCASE +#define DOWN ONIGENC_CASE_DOWNCASE +#define TITLE ONIGENC_CASE_TITLECASE +#define FOLD ONIGENC_CASE_FOLD + #include "enc/unicode/casefold.h" +#undef UP +#undef DOWN +#undef TITLE +#undef FOLD + #include "enc/unicode/name2ctype.h" #define CODE_RANGES_NUM numberof(CodeRanges) diff --git a/enc/unicode/case-folding.rb b/enc/unicode/case-folding.rb index 33cbee5add..e1293a74db 100755 --- a/enc/unicode/case-folding.rb +++ b/enc/unicode/case-folding.rb @@ -1,10 +1,12 @@ #!/usr/bin/ruby -# Usage: +# Usage (for case folding only): # $ wget http://www.unicode.org/Public/UNIDATA/CaseFolding.txt # $ ruby case-folding.rb CaseFolding.txt -o casefold.h -# or: +# or (for case folding and case mapping): # $ wget http://www.unicode.org/Public/UNIDATA/CaseFolding.txt +# $ wget http://www.unicode.org/Public/UNIDATA/UnicodeData.txt +# $ wget http://www.unicode.org/Public/UNIDATA/SpecialCasing.txt # $ ruby case-folding.rb -m . -o casefold.h class CaseFolding @@ -175,12 +177,37 @@ class CaseFolding end end +class MapItem + def initialize(code, upper, lower, title) + @code = code + @upper = upper unless upper == '' + @lower = lower unless lower == '' + @title = title unless title == '' + end + + def flags + "" # preliminary implementation + end +end + class CaseMapping def initialize (mapping_directory) + @mappings = {} + IO.readlines(File.expand_path('UnicodeData.txt', mapping_directory), encoding: Encoding::ASCII_8BIT).each do |line| + next if line =~ /