diff options
-rw-r--r-- | ChangeLog | 8 | ||||
-rw-r--r-- | lib/rexml/parsers/baseparser.rb | 14 |
2 files changed, 15 insertions, 7 deletions
@@ -1,3 +1,11 @@ +Sat Aug 10 12:43:15 2013 Kouhei Sutou <kou@cozmixng.org> + + * lib/rexml/parsers/baseparser.rb (REXML::Parsers::BaseParser): + Use "\A" instead of "^" in document type declaration patterns + because they are used as the head match in content not the head + match in line. They don't cause any problems in the current code + but it should be fixed. + Sat Aug 10 12:39:00 2013 Kouhei Sutou <kou@cozmixng.org> * test/rexml/parse/test_document_type_declaration.rb: Add tests for diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index 04a23f4428..e937b169be 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -61,11 +61,11 @@ module REXML ENCODING = /\bencoding\s*=\s*["'](.*?)['"]/um STANDALONE = /\bstandalone\s*=\s*["'](.*?)['"]/um - ENTITY_START = /^\s*<!ENTITY/ + ENTITY_START = /\A\s*<!ENTITY/ IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'](.*?)['"])?(\s+['"](.*?)["'])?/u - ELEMENTDECL_START = /^\s*<!ELEMENT/um - ELEMENTDECL_PATTERN = /^\s*(<!ELEMENT.*?)>/um - SYSTEMENTITY = /^\s*(%.*?;)\s*$/um + ELEMENTDECL_START = /\A\s*<!ELEMENT/um + ELEMENTDECL_PATTERN = /\A\s*(<!ELEMENT.*?)>/um + SYSTEMENTITY = /\A\s*(%.*?;)\s*$/um ENUMERATION = "\\(\\s*#{NMTOKEN}(?:\\s*\\|\\s*#{NMTOKEN})*\\s*\\)" NOTATIONTYPE = "NOTATION\\s+\\(\\s*#{NAME}(?:\\s*\\|\\s*#{NAME})*\\s*\\)" ENUMERATEDTYPE = "(?:(?:#{NOTATIONTYPE})|(?:#{ENUMERATION}))" @@ -74,9 +74,9 @@ module REXML DEFAULTDECL = "(#REQUIRED|#IMPLIED|(?:(#FIXED\\s+)?#{ATTVALUE}))" ATTDEF = "\\s+#{NAME}\\s+#{ATTTYPE}\\s+#{DEFAULTDECL}" ATTDEF_RE = /#{ATTDEF}/ - ATTLISTDECL_START = /^\s*<!ATTLIST/um - ATTLISTDECL_PATTERN = /^\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um - NOTATIONDECL_START = /^\s*<!NOTATION/um + ATTLISTDECL_START = /\A\s*<!ATTLIST/um + ATTLISTDECL_PATTERN = /\A\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um + NOTATIONDECL_START = /\A\s*<!NOTATION/um PUBLIC = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(PUBLIC)\s+(["'])(.*?)\3(?:\s+(["'])(.*?)\5)?\s*>/um SYSTEM = /\A\s*<!NOTATION\s+(\w[\-\w]*)\s+(SYSTEM)\s+(["'])(.*?)\3\s*>/um |