From 3289dfc78b4c5051b0079217a193ce50d5349506 Mon Sep 17 00:00:00 2001 From: ser Date: Thu, 10 Jun 2004 02:01:04 +0000 Subject: - git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@6441 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- lib/rexml/attribute.rb | 6 ++ lib/rexml/document.rb | 73 +-------------- lib/rexml/element.rb | 28 +++++- lib/rexml/parsers/baseparser.rb | 39 +++++++- lib/rexml/parsers/lightparser.rb | 4 + lib/rexml/parsers/pullparser.rb | 6 ++ lib/rexml/parsers/sax2parser.rb | 5 ++ lib/rexml/parsers/streamparser.rb | 9 +- lib/rexml/parsers/ultralightparser.rb | 4 + lib/rexml/parsers/xpathparser.rb | 164 ++++++++++++++++++++++++++-------- lib/rexml/rexml.rb | 10 +-- lib/rexml/text.rb | 8 ++ lib/rexml/xpath_parser.rb | 9 +- 13 files changed, 244 insertions(+), 121 deletions(-) (limited to 'lib/rexml') diff --git a/lib/rexml/attribute.rb b/lib/rexml/attribute.rb index 9eb3c211ea..a5a58055b8 100644 --- a/lib/rexml/attribute.rb +++ b/lib/rexml/attribute.rb @@ -152,6 +152,12 @@ module REXML write( rv ) rv end + + def xpath + path = @element.xpath + path += "/@#{self.expanded_name}" + return path + end end end #vim:ts=2 sw=2 noexpandtab: diff --git a/lib/rexml/document.rb b/lib/rexml/document.rb index 1378bb212c..084ebd2a39 100644 --- a/lib/rexml/document.rb +++ b/lib/rexml/document.rb @@ -9,6 +9,7 @@ require "rexml/parseexception" require "rexml/output" require "rexml/parsers/baseparser" require "rexml/parsers/streamparser" +require "rexml/parsers/treeparser" module REXML # Represents a full XML document, including PIs, a doctype, etc. A @@ -171,77 +172,7 @@ module REXML private def build( source ) - build_context = self - parser = Parsers::BaseParser.new( source ) - tag_stack = [] - in_doctype = false - entities = nil - begin - while true - event = parser.pull - case event[0] - when :end_document - return - when :start_element - tag_stack.push(event[1]) - # find the observers for namespaces - build_context = build_context.add_element( event[1], event[2] ) - when :end_element - tag_stack.pop - build_context = build_context.parent - when :text - if not in_doctype - if build_context[-1].instance_of? Text - build_context[-1] << event[1] - else - build_context.add( - Text.new( event[1], build_context.whitespace, nil, true ) - ) unless ( - event[1].strip.size==0 and - build_context.ignore_whitespace_nodes - ) - end - end - when :comment - c = Comment.new( event[1] ) - build_context.add( c ) - when :cdata - c = CData.new( event[1] ) - build_context.add( c ) - when :processing_instruction - build_context.add( Instruction.new( event[1], event[2] ) ) - when :end_doctype - in_doctype = false - entities.each { |k,v| entities[k] = build_context.entities[k].value } - build_context = build_context.parent - when :start_doctype - doctype = DocType.new( event[1..-1], build_context ) - build_context = doctype - entities = {} - in_doctype = true - when :attlistdecl - n = AttlistDecl.new( event[1..-1] ) - build_context.add( n ) - when :externalentity - n = ExternalEntity.new( event[1] ) - build_context.add( n ) - when :elementdecl - n = ElementDecl.new( event[1] ) - build_context.add(n) - when :entitydecl - entities[ event[1] ] = event[2] unless event[2] =~ /PUBLIC|SYSTEM/ - build_context.add(Entity.new(event)) - when :notationdecl - n = NotationDecl.new( *event[1..-1] ) - build_context.add( n ) - when :xmldecl - x = XMLDecl.new( event[1], event[2], event[3] ) - build_context.add( x ) - end - end - rescue - raise ParseException.new( $!.message, parser.source, parser, $! ) - end + Parsers::TreeParser.new( source, self ).parse end end end diff --git a/lib/rexml/element.rb b/lib/rexml/element.rb index ffc81bed91..27886a90bc 100644 --- a/lib/rexml/element.rb +++ b/lib/rexml/element.rb @@ -73,7 +73,8 @@ module REXML @attributes.each_attribute do |attr| rv << " " attr.write( rv, 0 ) - end unless @attributes.empty? + end + rv << ">" if children.size > 0 rv << " ... " @@ -517,6 +518,17 @@ module REXML :element end + def xpath + path_elements = [] + cur = self + path_elements << __to_xpath_helper( self ) + while cur.parent + cur = cur.parent + path_elements << __to_xpath_helper( cur ) + end + return path_elements.reverse.join( "/" ) + end + ################################################# # Attributes # ################################################# @@ -677,6 +689,20 @@ module REXML private + def __to_xpath_helper node + rv = node.expanded_name + if node.parent + results = node.parent.find_all {|n| + n.kind_of?(REXML::Element) and n.expanded_name == node.expanded_name + } + if results.length > 1 + idx = results.index( node ) + rv << "[#{idx+1}]" + end + end + rv + end + # A private helper method def each_with_something( test, max=0, name=nil ) num = 0 diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index fbb1ec06a8..16e5c80237 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -100,6 +100,23 @@ module REXML self.stream = source end + def add_listener( listener ) + if !defined?(@listeners) or !@listeners + @listeners = [] + instance_eval <<-EOL + alias :_old_pull :pull + def pull + event = _old_pull + @listeners.each do |listener| + listener.receive event + end + event + end + EOL + end + @listeners << listener + end + attr_reader :source def stream=( source ) @@ -162,11 +179,11 @@ module REXML # Returns the next event. This is a +PullEvent+ object. def pull - return [ :end_document ] if empty? if @closed x, @closed = @closed, nil return [ :end_element, x ] end + return [ :end_document ] if empty? return @stack.shift if @stack.size > 0 @source.read if @source.buffer.size<2 if @document_status == nil @@ -411,3 +428,23 @@ module REXML end end end + +=begin + case event[0] + when :start_element + when :text + when :end_element + when :processing_instruction + when :cdata + when :comment + when :xmldecl + when :start_doctype + when :end_doctype + when :externalentity + when :elementdecl + when :entity + when :attlistdecl + when :notationdecl + when :end_doctype + end +=end diff --git a/lib/rexml/parsers/lightparser.rb b/lib/rexml/parsers/lightparser.rb index 8c555f7960..0f35034993 100644 --- a/lib/rexml/parsers/lightparser.rb +++ b/lib/rexml/parsers/lightparser.rb @@ -10,6 +10,10 @@ module REXML @parser = REXML::Parsers::BaseParser.new( stream ) end + def add_listener( listener ) + @parser.add_listener( listener ) + end + def rewind @stream.rewind @parser.stream = @stream diff --git a/lib/rexml/parsers/pullparser.rb b/lib/rexml/parsers/pullparser.rb index aeda6251fe..fe4d41c959 100644 --- a/lib/rexml/parsers/pullparser.rb +++ b/lib/rexml/parsers/pullparser.rb @@ -29,8 +29,14 @@ module REXML def initialize stream super @entities = {} + @listeners = nil end + def add_listener( listener ) + @listeners = [] unless @listeners + @listeners << listener + end + def each while has_next? yield self.pull diff --git a/lib/rexml/parsers/sax2parser.rb b/lib/rexml/parsers/sax2parser.rb index 8c82cf8fc1..96440d17bf 100644 --- a/lib/rexml/parsers/sax2parser.rb +++ b/lib/rexml/parsers/sax2parser.rb @@ -14,6 +14,10 @@ module REXML @tag_stack = [] end + def add_listener( listener ) + @parser.add_listener( listener ) + end + # Listen arguments: # # Symbol, Array, Block @@ -89,6 +93,7 @@ module REXML if procs or listeners # break out the namespace declarations # The attributes live in event[2] + event[2].each {|n, v| event[2][n] = @parser.normalize(v)} nsdecl = event[2].find_all { |n, value| n =~ /^xmlns(:|$)/ } nsdecl.collect! { |n, value| [ n[6..-1], value ] } @namespace_stack.push({}) diff --git a/lib/rexml/parsers/streamparser.rb b/lib/rexml/parsers/streamparser.rb index 49bef0d8fa..357cc186e6 100644 --- a/lib/rexml/parsers/streamparser.rb +++ b/lib/rexml/parsers/streamparser.rb @@ -6,6 +6,10 @@ module REXML @parser = BaseParser.new( source ) end + def add_listener( listener ) + @parser.add_listener( listener ) + end + def parse # entity string while true @@ -14,7 +18,10 @@ module REXML when :end_document return when :start_element - @listener.tag_start( event[1], event[2] ) + attrs = event[2].each do |n, v| + event[2][n] = @parser.unnormalize( v ) + end + @listener.tag_start( event[1], attrs ) when :end_element @listener.tag_end( event[1] ) when :text diff --git a/lib/rexml/parsers/ultralightparser.rb b/lib/rexml/parsers/ultralightparser.rb index f3b208bf0f..adc4af18e2 100644 --- a/lib/rexml/parsers/ultralightparser.rb +++ b/lib/rexml/parsers/ultralightparser.rb @@ -9,6 +9,10 @@ module REXML @parser = REXML::Parsers::BaseParser.new( stream ) end + def add_listener( listener ) + @parser.add_listener( listener ) + end + def rewind @stream.rewind @parser.stream = @stream diff --git a/lib/rexml/parsers/xpathparser.rb b/lib/rexml/parsers/xpathparser.rb index 082f8255ad..41b2b8a5c1 100644 --- a/lib/rexml/parsers/xpathparser.rb +++ b/lib/rexml/parsers/xpathparser.rb @@ -30,46 +30,126 @@ module REXML parsed end - def to_string( path ) + def abbreviate( path ) + path = path.kind_of?(String) ? parse( path ) : path string = "" + document = false while path.size > 0 - case path[0] - when :ancestor, :ancestor_or_self, :attribute, :child, :descendant, :descendant_or_self, :following, :following_sibling, :namespace, :parent, :preceding, :preceding_sibling, :self - op = path.shift + op = path.shift + case op + when :node + when :attribute + string << "/" if string.size > 0 + string << "@" + when :child + string << "/" if string.size > 0 + when :descendant_or_self + string << "/" + when :self + string << "." + when :parent + string << ".." + when :any + string << "*" + when :text + string << "text()" + when :following, :following_sibling, + :ancestor, :ancestor_or_self, :descendant, + :namespace, :preceding, :preceding_sibling + string << "/" unless string.size == 0 + string << op.to_s.tr("_", "-") + string << "::" + when :qname + prefix = path.shift + name = path.shift + string << prefix+":" if prefix.size > 0 + string << name + when :predicate + string << '[' + string << predicate_to_string( path.shift ) {|x| abbreviate( x ) } + string << ']' + when :document + document = true + when :function + string << path.shift + string << "( " + string << predicate_to_string( path.shift[0] ) {|x| abbreviate( x )} + string << " )" + when :literal + string << %Q{ "#{path.shift}" } + else + string << "/" unless string.size == 0 + string << "UNKNOWN(" + string << op.inspect + string << ")" + end + end + string = "/"+string if document + return string + end + + def expand( path ) + path = path.kind_of?(String) ? parse( path ) : path + string = "" + document = false + while path.size > 0 + op = path.shift + case op + when :node + string << "node()" + when :attribute, :child, :following, :following_sibling, + :ancestor, :ancestor_or_self, :descendant, :descendant_or_self, + :namespace, :preceding, :preceding_sibling, :self, :parent string << "/" unless string.size == 0 - string << op.to_s + string << op.to_s.tr("_", "-") string << "::" when :any - path.shift string << "*" when :qname - path.shift prefix = path.shift name = path.shift string << prefix+":" if prefix.size > 0 string << name when :predicate - path.shift string << '[' - string << predicate_to_string( path.shift ) - string << ' ]' + string << predicate_to_string( path.shift ) { |x| expand(x) } + string << ']' + when :document + document = true else string << "/" unless string.size == 0 string << "UNKNOWN(" - string << path.shift.inspect + string << op.inspect string << ")" end end + string = "/"+string if document return string end - def predicate_to_string( path ) + def predicate_to_string( path, &block ) string = "" case path[0] - when :and, :or, :mult, :plus, :minus, :neq, :eq, :lt, :gt, :lteq, :gteq, :div, :mod, :neq, :union + when :and, :or, :mult, :plus, :minus, :neq, :eq, :lt, :gt, :lteq, :gteq, :div, :mod, :union op = path.shift - left = predicate_to_string( path.shift ) - right = predicate_to_string( path.shift ) + case op + when :eq + op = "=" + when :lt + op = "<" + when :gt + op = ">" + when :lteq + op = "<=" + when :gteq + op = ">=" + when :neq + op = "!=" + when :union + op = "|" + end + left = predicate_to_string( path.shift, &block ) + right = predicate_to_string( path.shift, &block ) string << " " string << left string << " " @@ -82,7 +162,7 @@ module REXML name = path.shift string << name string << "( " - string << predicate_to_string( path.shift ) + string << predicate_to_string( path.shift, &block ) string << " )" when :literal path.shift @@ -91,7 +171,7 @@ module REXML string << " " else string << " " - string << to_string( path ) + string << yield( path ) string << " " end return string.squeeze(" ") @@ -534,7 +614,6 @@ module REXML #| FUNCTION_NAME '(' ( expr ( ',' expr )* )? ')' def FunctionCall rest, parsed path, arguments = parse_args(rest) - #puts "Function call >>> (#{arguments.inspect})" argset = [] for argument in arguments args = [] @@ -567,28 +646,39 @@ module REXML def parse_args( string ) arguments = [] ind = 0 + inquot = false + inapos = false depth = 1 begin case string[ind] - when ?( - depth += 1 - if depth == 1 - string = string[1..-1] - ind -= 1 - end - when ?) - depth -= 1 - if depth == 0 - s = string[0,ind].strip - arguments << s unless s == "" - string = string[ind+1..-1] - end - when ?, - if depth == 1 - s = string[0,ind].strip - arguments << s unless s == "" - string = string[ind+1..-1] - ind = 0 + when ?" + inquot = !inquot unless inapos + when ?' + inapos = !inapos unless inquot + else + unless inquot or inapos + case string[ind] + when ?( + depth += 1 + if depth == 1 + string = string[1..-1] + ind -= 1 + end + when ?) + depth -= 1 + if depth == 0 + s = string[0,ind].strip + arguments << s unless s == "" + string = string[ind+1..-1] + end + when ?, + if depth == 1 + s = string[0,ind].strip + arguments << s unless s == "" + string = string[ind+1..-1] + ind = -1 + end + end end end ind += 1 diff --git a/lib/rexml/rexml.rb b/lib/rexml/rexml.rb index 6403a785c0..40995d7166 100644 --- a/lib/rexml/rexml.rb +++ b/lib/rexml/rexml.rb @@ -10,8 +10,8 @@ # # Main page:: http://www.germane-software.com/software/rexml # Author:: Sean Russell -# Version:: 3.0.9 -# Date:: +2004/137 +# Version:: 3.1.1 +# Date:: +2004/162 # # This API documentation can be downloaded from the REXML home page, or can # be accessed online[http://www.germane-software.com/software/rexml_doc] @@ -20,7 +20,7 @@ # or can be accessed # online[http://www.germane-software.com/software/rexml/docs/tutorial.html] module REXML - Copyright = "Copyright © 2001, 2002, 2003, 2004 Sean Russell " - Date = "+2004/137" - Version = "3.0.9" + Copyright = "Copyright © 2001, 2002, 2003, 2004 Sean Russell " + Date = "+2004/162" + Version = "3.1.1" end diff --git a/lib/rexml/text.rb b/lib/rexml/text.rb index 388256ac81..3e5fcc23b6 100644 --- a/lib/rexml/text.rb +++ b/lib/rexml/text.rb @@ -218,6 +218,14 @@ module REXML writer << s end + # FIXME + # This probably won't work properly + def xpath + path = @parent.xpath + path += "/text()" + return path + end + # Writes out text, substituting special characters beforehand. # +out+ A String, IO, or any other object supporting <<( String ) # +input+ the text to substitute and the write out diff --git a/lib/rexml/xpath_parser.rb b/lib/rexml/xpath_parser.rb index 8aadb8ef86..ce6cec5374 100644 --- a/lib/rexml/xpath_parser.rb +++ b/lib/rexml/xpath_parser.rb @@ -43,8 +43,6 @@ module REXML @variables[ variable_name ] = value end - private - def match( path_stack, nodeset ) while ( path_stack.size > 0 and nodeset.size > 0 ) #puts "PARSE: #{path_stack.inspect} '#{nodeset.collect{|n|n.class}.inspect}'" @@ -55,6 +53,8 @@ module REXML nodeset end + private + def internal_parse path_stack, nodeset #puts "INTERNAL_PARSE RETURNING WITH NO RESULTS" if nodeset.size == 0 or path_stack.size == 0 return nodeset if nodeset.size == 0 or path_stack.size == 0 @@ -423,10 +423,9 @@ module REXML current_index = all_siblings.index( node ) preceding_siblings = all_siblings[ 0 .. current_index-1 ] - preceding_siblings.reverse! preceding = [] - recurse( preceding_siblings ) { |node| preceding << node } - preceding.reverse + recurse( preceding_siblings ) { |node| preceding.unshift( node ) } + preceding end def equality_relational_compare( set1, op, set2 ) -- cgit v1.2.3