From 95be40a06d1227a0cd0a1cec4424ab33748d1450 Mon Sep 17 00:00:00 2001
From: ser <ser@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>
Date: Fri, 13 Feb 2004 22:40:14 +0000
Subject: @@ Fix for the XPath descendant* result set ordering bug @@ @@ SAX2
 listener bug fixes @@ @@ Undid a code change that caused a 10x speed
 regression @@ @@ Indentation fixes, and a new word wrapping feature for text
 nodes   was contributed by Devin Bayer (documentation forthcoming; see the  
 change logs for now) @@

The XPath bug fix is really ugly and inefficient, but I spent two days hacking
at it and this was the best I could come up with.

The SAX2 listener fixes had to do with crashes in certain conditions, like when
there was a carriage return at the end of a document

Several people submitted patches for the speed regression; it is embarrassing
how long it took me to get around to looking at this.  To this day, I don't
know where the offending code came from.

Encoding fixes

Added a contributed word wrapping option for text formatting.  Devin Bayer
contributed this.  Here's his comment:
"Setting :wordwrapping to :all, wordwraps all text nodes longer than 60
  characters.
  Setting :indentstyle to aString, make aString used as indentation,
  instead of the default '  '.
  And as long as :respect_whitespace isn't set for the element,
  multiline text nodes will be indented."


git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@5696 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
---
 lib/rexml/xpath_parser.rb | 42 ++++++++++++++++++++++++++++++------------
 1 file changed, 30 insertions(+), 12 deletions(-)

(limited to 'lib/rexml/xpath_parser.rb')

diff --git a/lib/rexml/xpath_parser.rb b/lib/rexml/xpath_parser.rb
index 9cd1e5d64c..377a51e885 100644
--- a/lib/rexml/xpath_parser.rb
+++ b/lib/rexml/xpath_parser.rb
@@ -29,7 +29,8 @@ module REXML
 
 		def parse path, nodeset
 			path_stack = @parser.parse( path )
-			#puts "PARSE: #{path} => #{path_stack.inspect}"
+      #puts "PARSE: #{path} => #{path_stack.inspect}"
+      #puts "PARSE: nodeset = #{nodeset.collect{|x|x.to_s}.inspect}"
 			match( path_stack, nodeset )
 		end
 
@@ -46,7 +47,7 @@ module REXML
 
 		def match( path_stack, nodeset ) 
 			while ( path_stack.size > 0 and nodeset.size > 0 ) 
-				#puts "PARSE: #{path_stack.inspect} '#{nodeset.collect{|n|n.type}.inspect}'"
+				#puts "PARSE: #{path_stack.inspect} '#{nodeset.collect{|n|n.class}.inspect}'"
 				nodeset = internal_parse( path_stack, nodeset )
 				#puts "NODESET: #{nodeset.size}"
 				#puts "PATH_STACK: #{path_stack.inspect}"
@@ -55,8 +56,9 @@ module REXML
 		end
 
 		def internal_parse path_stack, nodeset
+      #puts "INTERNAL_PARSE RETURNING WITH NO RESULTS" if nodeset.size == 0 or path_stack.size == 0
 			return nodeset if nodeset.size == 0 or path_stack.size == 0
-			#puts "INTERNAL_PARSE: #{path_stack.inspect}, #{nodeset.collect{|n| n.type}.inspect}"
+			#puts "INTERNAL_PARSE: #{path_stack.inspect}, #{nodeset.collect{|n| n.class}.inspect}"
 			case path_stack.shift
 			when :document
 				return [ nodeset[0].root.parent ]
@@ -205,7 +207,7 @@ module REXML
 					Functions::index = index+1
 					#puts "Node #{node} and index=#{index+1}"
 					result = Predicate( predicate, node )
-					#puts "Predicate returned #{result} (#{result.type}) for #{node.type}"
+					#puts "Predicate returned #{result} (#{result.class}) for #{node.class}"
 					if result.kind_of? Numeric
 						#puts "#{result} == #{index} => #{result == index}"
 						new_nodeset << node if result == (index+1)
@@ -285,6 +287,7 @@ module REXML
 		end
 
 		##########################################################
+    # FIXME
 		# The next two methods are BAD MOJO!
 		# This is my achilles heel.  If anybody thinks of a better
 		# way of doing this, be my guest.  This really sucks, but 
@@ -294,24 +297,39 @@ module REXML
 		def descendant_or_self( path_stack, nodeset )
 			rs = []
 			d_o_s( path_stack, nodeset, rs )
-			#puts "RS = #{rs.collect{|n|n.to_s}.inspect}"
-			rs.flatten.compact
+      #puts "RS = #{rs.collect{|n|n.to_s}.inspect}"
+      document_order(rs.flatten.compact)
 		end
 
 		def d_o_s( p, ns, r )
-			#puts r.collect{|n|n.to_s}.inspect
-			#puts ns.collect{|n|n.to_s}.inspect
 			nt = nil
 			ns.each_index do |i|
 				n = ns[i]
 				x = match( p.clone, [ n ] )
-				#puts "Got a match on #{p.inspect} for #{ns.collect{|n|n.to_s+"("+n.type.to_s+")"}.inspect}"
 				nt = n.node_type
-				d_o_s( p, n.children, x ) if nt == :element or nt == :document
-				r[i,0] = [x] if x.size > 0
+				d_o_s( p, n.children, x ) if nt == :element or nt == :document and n.children.size > 0
+        r.concat(x) if x.size > 0
 			end
 		end
 
+
+    # Reorders an array of nodes so that they are in document order
+    # It tries to do this efficiently.
+    def document_order( array_of_nodes )
+      new_arry = []
+      array_of_nodes.each { |node|
+        node_idx = [] 
+        np = node.node_type == :attribute ? node.element : node
+        while np.parent and np.parent.node_type == :element
+          node_idx << np.parent.children.index( np )
+          np = np.parent
+        end
+        new_arry << [ node_idx.reverse.join, node ]
+      }
+      new_arry.sort{ |s1, s2| s1[0] <=> s2[0] }.collect{ |s| s[1] }
+    end
+
+
     def recurse( nodeset, &block )
       for node in nodeset
 	      yield node
@@ -324,7 +342,7 @@ module REXML
 		def Predicate( predicate, node )
 			predicate = predicate.clone
 			#puts "#"*20
-			#puts "Predicate( #{predicate.inspect}, #{node.type} )"
+			#puts "Predicate( #{predicate.inspect}, #{node.class} )"
 			results = []
 			case (predicate[0])
 			when :and, :or, :eq, :neq, :lt, :lteq, :gt, :gteq
-- 
cgit v1.2.3