From 4d15e619eb1838daafd1510e86c1c8076fb9b227 Mon Sep 17 00:00:00 2001 From: kou Date: Sun, 22 Apr 2018 08:09:04 +0000 Subject: rexml: Fix XPath bug of /#{ELEMENT_NAME} It doesn't mean that all elements which name "ELEMENT_NAME" with any namespace URI including null namespace URI. It means that all elements which name "ELEMENT_NAME" with null namespace URI. https://www.w3.org/TR/1999/REC-xpath-19991116/#NT-NodeTest > if the QName does not have a prefix, then the namespace URI is null > (this is the same way attribute names are expanded). We need to use "*[local-name()='#{ELEMENT_NAME}']" for all elements which name "ELEMENT_NAME" with any namespace URI including null namespace URI in XPath 1.0. But it's inconvenient. So this change includes "*:#{LOCAL_NAME}" syntax support that is introduced since XPath 2.0. * lib/rexml/parsers/xpathparser.rb: Support "*:#{LOCAL_NAME}" syntax that is introduced since XPath 2.0. * lib/rexml/xpath_parser.rb: * Fix namespace URI processing for "#{ELEMENT_NAME}". Now, "#{ELEMENT_NAME}" doesn't accept elements with null namespace URI. * Add "*:#{LOCAL_NAME}" support. * test/rexml/test_contrib.rb, test/rexml/test_core.rb, test/rexml/xpath/test_base.rb: Follow this change. * test/rexml/test_jaxen.rb: Fix namespace processing. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@63236 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- lib/rexml/parsers/xpathparser.rb | 15 ++++++++++++--- lib/rexml/xpath_parser.rb | 18 +++++++++++------- test/rexml/test_contrib.rb | 2 +- test/rexml/test_core.rb | 20 ++++++++++---------- test/rexml/test_jaxen.rb | 2 ++ test/rexml/xpath/test_base.rb | 1 + 6 files changed, 37 insertions(+), 21 deletions(-) diff --git a/lib/rexml/parsers/xpathparser.rb b/lib/rexml/parsers/xpathparser.rb index 304dc51698..ac3c4d4e67 100644 --- a/lib/rexml/parsers/xpathparser.rb +++ b/lib/rexml/parsers/xpathparser.rb @@ -271,10 +271,12 @@ module REXML # String, if a name match #NodeTest # | ('*' | NCNAME ':' '*' | QNAME) NameTest - # | NODE_TYPE '(' ')' NodeType + # | '*' ':' NCNAME NameTest since XPath 2.0 + # | NODE_TYPE '(' ')' NodeType # | PI '(' LITERAL ')' PI # | '[' expr ']' Predicate - NCNAMETEST= /^(#{NCNAME_STR}):\*/u + PREFIX_WILDCARD = /^\*:(#{NCNAME_STR})/u + LOCAL_NAME_WILDCARD = /^(#{NCNAME_STR}):\*/u QNAME = Namespace::NAMESPLIT NODE_TYPE = /^(comment|text|node)\(\s*\)/m PI = /^processing-instruction\(/ @@ -282,6 +284,13 @@ module REXML original_path = path path = path.lstrip case path + when PREFIX_WILDCARD + prefix = nil + name = $1 + path = $' + parsed << :qname + parsed << prefix + parsed << name when /^\*/ path = $' parsed << :any @@ -301,7 +310,7 @@ module REXML end parsed << :processing_instruction parsed << (literal || '') - when NCNAMETEST + when LOCAL_NAME_WILDCARD prefix = $1 path = $' parsed << :namespace diff --git a/lib/rexml/xpath_parser.rb b/lib/rexml/xpath_parser.rb index a94ad91ea1..d217ae78e8 100644 --- a/lib/rexml/xpath_parser.rb +++ b/lib/rexml/xpath_parser.rb @@ -169,16 +169,20 @@ module REXML prefix = path_stack.shift name = path_stack.shift # enter(:qname, path_stack, prefix, name, nodeset) - nodeset.delete_if do |node| - # FIXME: This DOUBLES the time XPath searches take - ns = get_namespace( node, prefix ) + nodeset.select! do |node| if node.node_type == :element - if node.name == name + if prefix.nil? + node.name == name + elsif prefix.empty? + node.name == name and node.namespace == "" + else + node.name == name and + # FIXME: This DOUBLES the time XPath searches take + node.namespace == get_namespace(node, prefix) end + else + false end - !(node.node_type == :element and - node.name == name and - node.namespace == ns ) end # leave(:qname, path_stack, nodeset) node_types = ELEMENTS diff --git a/test/rexml/test_contrib.rb b/test/rexml/test_contrib.rb index 8462b3c23f..28b96a4972 100644 --- a/test/rexml/test_contrib.rb +++ b/test/rexml/test_contrib.rb @@ -451,7 +451,7 @@ EOL end def test_external_entity - xp = '//channel/title' + xp = '//*:channel/*:title' %w{working.rss broken.rss}.each do |path| File.open(File.join(fixture_path(path))) do |file| diff --git a/test/rexml/test_core.rb b/test/rexml/test_core.rb index 0071063128..b2e5299f39 100644 --- a/test/rexml/test_core.rb +++ b/test/rexml/test_core.rb @@ -877,18 +877,18 @@ EOL EOL # The most common case. People not caring about the namespaces much. - assert_equal( "XY", XPath.match( doc, "/test/a/text()" ).join ) - assert_equal( "XY", XPath.match( doc, "/test/x:a/text()" ).join ) + assert_equal( "XY", XPath.match( doc, "/*:test/*:a/text()" ).join ) + assert_equal( "XY", XPath.match( doc, "/*:test/x:a/text()" ).join ) # Surprising? I don't think so, if you believe my definition of the "common case" - assert_equal( "XYZ", XPath.match( doc, "//a/text()" ).join ) + assert_equal( "XYZ", XPath.match( doc, "//*:a/text()" ).join ) # These are the uncommon cases. Namespaces are actually important, so we define our own # mappings, and pass them in. assert_equal( "XY", XPath.match( doc, "/f:test/f:a/text()", { "f" => "1" } ).join ) # The namespaces are defined, and override the original mappings - assert_equal( "", XPath.match( doc, "/test/a/text()", { "f" => "1" } ).join ) + assert_equal( "XY", XPath.match( doc, "/*:test/*:a/text()", { "f" => "1" } ).join ) assert_equal( "", XPath.match( doc, "/x:test/x:a/text()", { "f" => "1" } ).join ) - assert_equal( "", XPath.match( doc, "//a/text()", { "f" => "1" } ).join ) + assert_equal( "XYZ", XPath.match( doc, "//*:a/text()", { "f" => "1" } ).join ) end def test_processing_instruction @@ -1390,8 +1390,8 @@ ENDXML def test_ticket_102 doc = REXML::Document.new '' - assert_equal( "foo", doc.root.elements["item"].attribute("name","ns").to_s ) - assert_equal( "item", doc.root.elements["item[@name='foo']"].name ) + assert_equal( "foo", doc.root.elements["*:item"].attribute("name","ns").to_s ) + assert_equal( "item", doc.root.elements["*:item[@name='foo']"].name ) end def test_ticket_14 @@ -1420,11 +1420,11 @@ ENDXML doc = REXML::Document.new( 'text' ) - assert_equal 'text', doc.text( "/doc/item[@name='foo']" ) + assert_equal 'text', doc.text( "/*:doc/*:item[@name='foo']" ) assert_equal "name='foo'", - doc.root.elements["item"].attribute("name", "ns").inspect + doc.root.elements["*:item"].attribute("name", "ns").inspect assert_equal "text", - doc.root.elements["item[@name='foo']"].to_s + doc.root.elements["*:item[@name='foo']"].to_s end def test_ticket_135 diff --git a/test/rexml/test_jaxen.rb b/test/rexml/test_jaxen.rb index 1a1d36ba9a..0f8653c956 100644 --- a/test/rexml/test_jaxen.rb +++ b/test/rexml/test_jaxen.rb @@ -56,6 +56,8 @@ module REXMLTests def process_context(doc, context) test_context = XPath.match(doc, context.attributes["select"]) namespaces = context.namespaces + namespaces.delete("var") + namespaces = nil if namespaces.empty? variables = {} var_namespace = "http://jaxen.org/test-harness/var" XPath.each(context, diff --git a/test/rexml/xpath/test_base.rb b/test/rexml/xpath/test_base.rb index 5e0b477081..497bd9fc8f 100644 --- a/test/rexml/xpath/test_base.rb +++ b/test/rexml/xpath/test_base.rb @@ -877,6 +877,7 @@ module REXMLTests xa + xb XML x = d.root -- cgit v1.2.3