== \Regexps Based on Unicode Properties The properties shown here are those currently supported in Ruby. Older versions may not support all of these. <% # Generate a documentation file for the unicode properties. # # Usage: # # Get PropertyAliases.txt, PropertyValueAliases.txt from unicode.org # (http://unicode.org/Public/UNIDATA/) and run # ``` # ruby tool/generic_erb.rb template/unicode_properties.rdoc.tmpl data_dir name2ctype.h # ``` data_dir = ARGV.shift&.tap { |d| Dir.exist?(d) } || abort("Usage: #{$0} data_directory [name2ctype.h]") # Map group names, given as last argument to #make_const in enc-unicode.rb, # to sections in the doc. The order in this hash controls the order in the doc. map = { /\[\[:/ => 'POSIX brackets', '-' => 'Special', /.+ Category/ => 'Major and General Categories', 'Binary Property' => 'Prop List', /Derived Property/ => 'Derived Core Properties', 'Script' => 'Scripts', 'Block' => 'Blocks', 'Emoji' => 'Emoji', /Grapheme/ => 'Graphemes', /Derived Age/ => 'Derived Ages', } # aliases in the form { short => long }, e.g. { 'Hex' => 'Hex_Digit', 'L' => 'Letter' } aliases = ( File.binread(File.join(data_dir, 'PropertyAliases.txt')).scan(/^(\w+)\s*; (\w+)/) + File.binread(File.join(data_dir, 'PropertyValueAliases.txt')).scan(/^(?:gc|sc)\s*; (\w+)\s*; (\w+)/) ).to_h props_by_section = {} ARGF.each_line do |line| next unless /'(?[^']+)': (?.+) \*/ =~ line next if prop == 'NEWLINE' # ignore custom internal prop section = map.find { |k, v| k === name }&.last || warn("no doc section for #{name}") # normalize prop names - the header file uses a mix of short and long names long_prop_name = aliases[prop] || prop (props_by_section[section] ||= []) << long_prop_name end map.each_value do |section| -%> === <%=section%> % props_by_section[section].sort.each do |prop| - <%= [prop, aliases.key(prop)].compact.uniq.map { |v| "\\p{#{v}}" }.join(', ') %> % end % end