From 66aeb2f7080dea92703f10546fb3cbcc946f6fa3 Mon Sep 17 00:00:00 2001 From: ser Date: Sun, 20 Jan 2008 04:31:57 +0000 Subject: r1479@bean: ser | 2008-01-19 14:26:31 -0500 r1483@bean: ser | 2008-01-19 14:47:23 -0500 Sam's fixes: * Don't blow up on empty documents * Add a test case for sorted attributes * Making the output predictable simplifies unit tests, and doesn't cost much given that most xml element have few attributes * Ruby 1.9 revision 14922 is more strict * Complete Ticket #134 * Fix for ticket #121 * Fix for ticket #124 * Fix for ticket #128 * Fix ticket #133 * Ticket #131 (Support Ruby 1.9) * Fix for ticket #127 * Fix for ticket #123 * Add missing data needed by test case r1481@bean (orig r1303): ser | 2008-01-19 17:22:32 -0500 Tagged for release r1482@bean (orig r1304): ser | 2008-01-19 17:27:10 -0500 Version bump git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@15141 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- lib/rexml/parsers/baseparser.rb | 38 ++++++++++++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 8 deletions(-) (limited to 'lib/rexml/parsers') diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index 854e707fae..85f2c4e46d 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -25,7 +25,20 @@ module REXML # # Nat Price gave me some good ideas for the API. class BaseParser - NCNAME_STR= '[\w:][\-\w\d.]*' + if String.method_defined? :encode + # Oniguruma / POSIX [understands unicode] + LETTER = '[[:alpha:]]' + DIGIT = '[[:digit:]]' + else + # Ruby < 1.9 [doesn't understand unicode] + LETTER = 'a-zA-Z' + DIGIT = '\d' + end + + COMBININGCHAR = '' # TODO + EXTENDER = '' # TODO + + NCNAME_STR= "[#{LETTER}_:][-#{LETTER}#{DIGIT}._:#{COMBININGCHAR}#{EXTENDER}]*" NAME_STR= "(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})" UNAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}" @@ -33,7 +46,7 @@ module REXML NAME = "([\\w:]#{NAMECHAR}*)" NMTOKEN = "(?:#{NAMECHAR})+" NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*" - REFERENCE = "(?:&#{NAME};|&#\\d+;|&#x[0-9a-fA-F]+;)" + REFERENCE = "&(?:#{NAME};|#\\d+;|#x[0-9a-fA-F]+;)" REFERENCE_RE = /#{REFERENCE}/ DOCTYPE_START = /\A\s*