Initial revision

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@3925 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
author: ser <ser@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> 2003-06-10 01:31:01 +0000
committer: ser <ser@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> 2003-06-10 01:31:01 +0000
commit: ea7a527a2ae7024a5cf2885dee8f7a5c21fedd5d (patch)
tree: d3e1f95a5acf262a9dd46e9663b7034bb285b406 /lib/rexml/parsers
parent: ca02190d8887ecd852e4e3f18f3a3ea91e9c6f7a (diff)
download: ruby-ea7a527a2ae7024a5cf2885dee8f7a5c21fedd5d.tar.gz
7 files changed, 1477 insertions, 0 deletions
diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb
new file mode 100644
index 0000000000..e5c2cf7d96
--- /dev/null
+++ b/lib/rexml/parsers/baseparser.rb
@@ -0,0 +1,391 @@
+require 'rexml/parseexception'
+require 'rexml/source'
+
+module REXML
+	module Parsers
+		# = Using the Pull Parser
+		# <em>This API is experimental, and subject to change.</em>
+		#  parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
+		#  while parser.has_next?
+		#    res = parser.next
+		#    puts res[1]['att'] if res.start_tag? and res[0] == 'b'
+		#  end
+		# See the PullEvent class for information on the content of the results.
+		# The data is identical to the arguments passed for the various events to
+		# the StreamListener API.
+		#
+		# Notice that:
+		#  parser = PullParser.new( "<a>BAD DOCUMENT" )
+		#  while parser.has_next?
+		#    res = parser.next
+		#    raise res[1] if res.error?
+		#  end
+		#
+		# Nat Price gave me some good ideas for the API.
+		class BaseParser
+			NCNAME_STR= '[\w:][-\w\d.]*'
+			NAME_STR= "(?:#{NCNAME_STR}:)?#{NCNAME_STR}"
+
+			NAMECHAR = '[-\w\d\.:]'
+			NAME = "([\\w:]#{NAMECHAR}*)"
+			NMTOKEN = "(?:#{NAMECHAR})+"
+			NMTOKENS = "#{NMTOKEN}(\\s+#{NMTOKEN})*"
+			REFERENCE = "(?:&#{NAME};|&#\\d+;|&#x[0-9a-fA-F]+;)"
+
+			DOCTYPE_START = /\A\s*<!DOCTYPE\s/um
+			DOCTYPE_PATTERN = /\s*<!DOCTYPE\s+(.*?)(\[|>)/um
+			ATTRIBUTE_PATTERN = /\s*(#{NAME_STR})\s*=\s*(["'])(.*?)\2/um
+			COMMENT_START = /\A<!--/u
+			COMMENT_PATTERN = /<!--(.*?)-->/um
+			CDATA_START = /\A<!\[CDATA\[/u
+			CDATA_PATTERN = /<!\[CDATA\[(.*?)\]\]>/um
+			XMLDECL_START = /\A<\?xml\s/u;
+			XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>*/um
+			INSTRUCTION_START = /\A<\?/u
+			INSTRUCTION_PATTERN = /<\?(.*?)(\s+.*?)?\?>/um
+			TAG_MATCH = /^<((?>#{NAME_STR}))\s*((?>\s+#{NAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/um
+			CLOSE_MATCH = /^\s*<\/(#{NAME_STR})\s*>/um
+
+			VERSION = /\bversion\s*=\s*["'](.*?)['"]/um
+			ENCODING = /\bencoding=["'](.*?)['"]/um
+			STANDALONE = /\bstandalone=["'](.*?)['"]/um
+
+			ENTITY_START = /^\s*<!ENTITY/
+			IDENTITY = /^([!\*\w-]+)(\s+#{NCNAME_STR})?(\s+["'].*?['"])?(\s+['"].*?["'])?/u
+			ELEMENTDECL_START = /^\s*<!ELEMENT/um
+			ELEMENTDECL_PATTERN = /^\s*(<!ELEMENT.*?)>/um
+			ENUMERATION = "\\(\\s*#{NMTOKEN}(?:\\s*\\|\\s*#{NMTOKEN})*\\s*\\)"
+			NOTATIONTYPE = "NOTATION\\s+\\(\\s*#{NAME}(?:\\s*\\|\\s*#{NAME})*\\s*\\)"
+			ENUMERATEDTYPE = "(?:(?:#{NOTATIONTYPE})|(?:#{ENUMERATION}))"
+			ATTTYPE = "(CDATA|ID|IDREF|IDREFS|ENTITY|ENTITIES|NMTOKEN|NMTOKENS|#{ENUMERATEDTYPE})"
+			ATTVALUE = "(?:\"((?:[^<&\"]|#{REFERENCE})*)\")|(?:'((?:[^<&']|#{REFERENCE})*)')"
+			DEFAULTDECL = "(#REQUIRED|#IMPLIED|(?:(#FIXED\\s+)?#{ATTVALUE}))"
+			ATTDEF = "\\s+#{NAME}\\s+#{ATTTYPE}\\s+#{DEFAULTDECL}"
+			ATTLISTDECL_START = /^\s*<!ATTLIST/um
+			ATTLISTDECL_PATTERN = /^\s*<!ATTLIST\s+#{NAME}(?:#{ATTDEF})*\s*>/um
+			NOTATIONDECL_START = /^\s*<!NOTATION/um
+			PUBLIC = /^\s*<!NOTATION\s+(\w[-\w]*)\s+(PUBLIC)\s+((["']).*?\4)\s*>/um
+			SYSTEM = /^\s*<!NOTATION\s+(\w[-\w]*)\s+(SYSTEM)\s+((["']).*?\4)\s*>/um
+
+			TEXT_PATTERN = /\A([^<]*)/um
+
+			# Entity constants
+			PUBIDCHAR = "\x20\x0D\x0Aa-zA-Z0-9-()+,./:=?;!*@$_%#"
+			SYSTEMLITERAL = %Q{((?:"[^"]*")|(?:'[^']*'))}
+			PUBIDLITERAL = %Q{("[#{PUBIDCHAR}']*"|'[#{PUBIDCHAR}]*')}
+			EXTERNALID = "(?:(?:(SYSTEM)\\s+#{SYSTEMLITERAL})|(?:(PUBLIC)\\s+#{PUBIDLITERAL}\\s+#{SYSTEMLITERAL}))"
+			NDATADECL = "\\s+NDATA\\s+#{NAME}"
+			PEREFERENCE = "%#{NAME};"
+			ENTITYVALUE = %Q{((?:"(?:[^%&"]|#{PEREFERENCE}|#{REFERENCE})*")|(?:'([^%&']|#{PEREFERENCE}|#{REFERENCE})*'))}
+			PEDEF = "(?:#{ENTITYVALUE}|#{EXTERNALID})"
+			ENTITYDEF = "(?:#{ENTITYVALUE}|(?:#{EXTERNALID}(#{NDATADECL})?))"
+			PEDECL = "<!ENTITY\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
+			GEDECL = "<!ENTITY\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
+			ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
+
+			EREFERENCE = /&(?!#{NAME};)/
+
+			DEFAULT_ENTITIES = { 
+				'gt' => [/&gt;/, '&gt;', '>'], 
+				'lt' => [/&lt;/, '&lt;', '<'], 
+				'quot' => [/&quot;/, '&quot;', '"'], 
+				"apos" => [/&apos;/, "&apos;", "'"] 
+			}
+
+			def initialize( source )
+				self.stream = source
+			end
+
+			def stream=( source )
+				if source.kind_of? String
+					@source = Source.new(source)
+				elsif source.kind_of? IO
+					@source = IOSource.new(source)
+				elsif source.kind_of? Source
+					@source = source
+				else
+					raise "#{source.type} is not a valid input stream.  It must be \n"+
+					"either a String, IO, or Source."
+				end
+				@closed = nil
+				@document_status = nil
+				@tags = []
+				@stack = []
+				@entities = []
+			end
+
+			# Returns true if there are no more events
+			def empty?
+				!has_next?
+			end
+
+			# Returns true if there are more events.  Synonymous with !empty?
+			def has_next?
+				@source.read if @source.buffer.size==0 and !@source.empty?
+				(!@source.empty? and @source.buffer.strip.size>0) or @stack.size>0 or @closed
+			end
+
+			# Push an event back on the head of the stream.  This method
+			# has (theoretically) infinite depth.
+			def unshift token
+				@stack.unshift(token)
+			end
+
+			# Peek at the +depth+ event in the stack.  The first element on the stack
+			# is at depth 0.  If +depth+ is -1, will parse to the end of the input
+			# stream and return the last event, which is always :end_document.
+			# Be aware that this causes the stream to be parsed up to the +depth+ 
+			# event, so you can effectively pre-parse the entire document (pull the 
+			# entire thing into memory) using this method.  
+			def peek depth=0
+				raise 'Illegal argument "#{depth}"' if depth < -1
+				temp = []
+				if depth == -1
+					temp.push(pull()) until empty?
+				else
+					while @stack.size+temp.size < depth+1
+						temp.push(pull())
+					end
+				end
+				@stack += temp if temp.size > 0
+				@stack[depth]
+			end
+
+			# Returns the next event.  This is a +PullEvent+ object.
+			def pull
+				return [ :end_document ] if empty?
+				if @closed
+					x, @closed = @closed, nil
+					return [ :end_element, x ]
+				end
+				return @stack.shift if @stack.size > 0
+				@source.read if @source.buffer.size==0
+				if @document_status == nil
+					@source.match( /^\s*/um, true )
+					word = @source.match( /^\s*(<.*?)>/um )
+					word = word[1] unless word.nil?
+					case word
+					when COMMENT_START
+						return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ]
+					when XMLDECL_START
+						results = @source.match( XMLDECL_PATTERN, true )[1]
+						version = VERSION.match( results )
+						version = version[1] unless version.nil?
+						encoding = ENCODING.match(results)
+						encoding = encoding[1] unless encoding.nil?
+						@source.encoding = encoding
+						standalone = STANDALONE.match(results)
+						standalone = standalone[1] unless standalone.nil?
+						return [ :xmldecl, version, encoding, standalone]
+					when INSTRUCTION_START
+						return [ :processing_instruction, *@source.match(INSTRUCTION_PATTERN, true)[1,2] ]
+					when DOCTYPE_START
+						md = @source.match( DOCTYPE_PATTERN, true )
+						identity = md[1]
+						close = md[2]
+						identity =~ IDENTITY
+						name = $1
+						raise "DOCTYPE is missing a name" if name.nil?
+						pub_sys = $2.nil? ? nil : $2.strip
+						long_name = $3.nil? ? nil : $3.strip
+						uri = $4.nil? ? nil : $4.strip
+						args = [ :start_doctype, name, pub_sys, long_name, uri ]
+						if close == ">"
+							@document_status = :after_doctype
+							@source.read if @source.buffer.size==0
+							md = @source.match(/^\s*/um, true)
+							@stack << [ :end_doctype ]
+						else
+							@document_status = :in_doctype
+						end
+						return args
+					else
+						@document_status = :after_doctype
+						@source.read if @source.buffer.size==0
+						md = @source.match(/\s*/um, true)
+					end
+				end
+				if @document_status == :in_doctype
+					md = @source.match(/\s*(.*?>)/um)
+					case md[1]
+					when ELEMENTDECL_START
+						return [ :elementdecl, @source.match( ELEMENTDECL_PATTERN, true )[1] ]
+					when ENTITY_START
+						match = @source.match( ENTITYDECL, true ).to_a.compact
+						match[0] = :entitydecl
+						ref = false
+						if match[1] == '%'
+							ref = true
+							match.delete_at 1
+						end
+						# Now we have to sort out what kind of entity reference this is
+						if match[2] == 'SYSTEM'
+							# External reference
+							match[3] = match[3][1..-2] # PUBID
+							match.delete_at(4) if match.size > 4 # Chop out NDATA decl
+							# match is [ :entity, name, SYSTEM, pubid(, ndata)? ]
+						elsif match[2] == 'PUBLIC'
+							# External reference
+							match[3] = match[3][1..-2] # PUBID
+							match[4] = match[4][1..-2] # HREF
+							# match is [ :entity, name, PUBLIC, pubid, href ]
+						else
+							match[2] = match[2][1..-2]
+							match.pop if match.size == 4
+							# match is [ :entity, name, value ]
+						end
+						match << '%' if ref
+						return match
+					when ATTLISTDECL_START
+						md = @source.match( ATTLISTDECL_PATTERN, true )
+						raise REXML::ParseException.new( "Bad ATTLIST declaration!", @source ) if md.nil?
+						element = md[1]
+						contents = md[0]
+
+						pairs = {}
+						values = md[0].scan( ATTDEF )
+						values.each do |attdef|
+							unless attdef[3] == "#IMPLIED"
+								attdef.compact!
+								val = attdef[3]
+								val = attdef[4] if val == "#FIXED "
+								pairs[attdef[0]] = val
+							end
+						end
+						return [ :attlistdecl, element, pairs, contents ]
+					when NOTATIONDECL_START
+						md = nil
+						if @source.match( PUBLIC )
+							md = @source.match( PUBLIC, true )
+						elsif @source.match( SYSTEM )
+							md = @source.match( SYSTEM, true )
+						else
+							raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source )
+						end
+						return [ :notationdecl, md[1], md[2], md[3] ]
+					when /^\s*]\s*>/um
+						@document_status = :after_doctype
+						@source.match( /^\s*]\s*>/um, true )
+						return [ :end_doctype ]
+					end
+				end
+				begin 
+					if @source.buffer[0] == ?<
+						if @source.buffer[1] == ?/
+							last_tag = @tags.pop
+							md = @source.match( CLOSE_MATCH, true )
+							raise REXML::ParseException.new( "Missing end tag for '#{last_tag}' "+
+								"(got \"#{md[1]}\")", @source) unless last_tag == md[1]
+							return [ :end_element, last_tag ]
+						elsif @source.buffer[1] == ?!
+							md = @source.match(/\A(\s*[^>]*>)/um)
+							#puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}"
+							raise REXML::ParseException.new("Malformed node", @source) unless md
+							case md[1]
+							when CDATA_START
+								return [ :cdata, @source.match( CDATA_PATTERN, true )[1] ]
+							when COMMENT_START
+								return [ :comment, @source.match( COMMENT_PATTERN, true )[1] ]
+							else
+								raise REXML::ParseException.new( "Declarations can only occur "+
+								"in the doctype declaration.", @source)
+							end
+						elsif @source.buffer[1] == ??
+							md = @source.match( INSTRUCTION_PATTERN, true )
+							return [ :processing_instruction, md[1], md[2] ]
+						else
+							# Get the next tag
+							md = @source.match(TAG_MATCH, true)
+							raise REXML::ParseException.new("malformed XML: missing tag start", @source) unless md
+							attrs = []
+							if md[2].size > 0
+								attrs = md[2].scan( ATTRIBUTE_PATTERN )
+								raise REXML::ParseException.new( "error parsing attributes: [#{attrs.join ', '}], excess = \"#$'\"", @source) if $' and $'.strip.size > 0
+							end
+				
+							if md[4]
+								@closed = md[1]
+							else
+								@tags.push( md[1] )
+							end
+							attributes = {}
+							attrs.each { |a,b,c| attributes[a] = c }
+							return [ :start_element, md[1], attributes ]
+						end
+					else
+						md = @source.match(TEXT_PATTERN, true)
+						raise "no text to add" if md[0].length == 0
+						# unnormalized = Text::unnormalize( md[1], self )
+						# return PullEvent.new( :text, md[1], unnormalized )
+						return [ :text, md[1] ]
+					end
+        rescue REXML::ParseException
+          raise $!
+				rescue Exception, NameError => error
+					raise REXML::ParseException.new( "Exception parsing",
+						@source, self, error )
+				end
+				return [ :dummy ]
+			end
+
+			def entity( reference, entities )
+				value = nil
+				value = entities[ reference ] if entities
+				if not value
+					value = DEFAULT_ENTITIES[ reference ]
+					value = value[2] if value
+				end
+				unnormalize( value, entities ) if value
+			end
+
+			# Escapes all possible entities
+			def normalize( input, entities=nil, entity_filter=nil )
+				copy = input.clone
+				# Doing it like this rather than in a loop improves the speed
+				copy.gsub!( EREFERENCE, '&amp;' )
+				entities.each do |key, value|
+					copy.gsub!( value, "&#{key};" ) unless entity_filter and 
+																			entity_filter.include?(entity)
+				end if entities
+				copy.gsub!( EREFERENCE, '&amp;' )
+				DEFAULT_ENTITIES.each do |key, value|
+					copy.gsub!( value[2], value[1] )
+				end
+				copy
+			end
+
+			# Unescapes all possible entities
+			def unnormalize( string, entities=nil, filter=nil )
+				rv = string.clone
+				rv.gsub!( /\r\n?/, "\n" )
+				matches = rv.scan( REFERENCE)
+				return rv if matches.size == 0
+				rv.gsub!( /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {|m|
+					m=$1
+					m = "0#{m}" if m[0] == ?x
+					[Integer(m)].pack('U*')
+				}
+				matches.collect!{|x|x[0]}.compact!
+				if matches.size > 0
+					matches.each do |entity_reference|
+						unless filter and filter.include?(entity_reference)
+							entity_value = entity( entity_reference, entities )
+							if entity_value
+								re = /&#{entity_reference};/
+								rv.gsub!( re, entity_value )
+							end
+						end
+					end
+					matches.each do |entity_reference|
+						unless filter and filter.include?(entity_reference)
+							er = DEFAULT_ENTITIES[entity_reference]
+							rv.gsub!( er[0], er[2] ) if er
+						end
+					end
+					rv.gsub!( /&amp;/, '&' )
+				end
+				rv
+			end
+		end
+	end
+end
diff --git a/lib/rexml/parsers/lightparser.rb b/lib/rexml/parsers/lightparser.rb
new file mode 100644
index 0000000000..e2f083bc8e
--- /dev/null
+++ b/lib/rexml/parsers/lightparser.rb
@@ -0,0 +1,56 @@
+require 'rexml/parsers/streamparser'
+require 'rexml/parsers/baseparser'
+require 'rexml/light/node'
+
+module REXML
+	module Parsers
+		class LightParser
+			def initialize stream
+				@stream = stream
+				@parser = REXML::Parsers::BaseParser.new( stream )
+			end
+
+      def rewind
+        @stream.rewind
+        @parser.stream = @stream
+      end
+
+			def parse
+				root = context = REXML::Light::Node.new([ :document ])
+				while true
+					event = @parser.pull
+					case event[0]
+					when :end_document
+						break
+					when :end_doctype
+						context = context.parent
+					when :start_element, :start_doctype
+						new_node = REXML::Light::Node.new(event)
+						context << new_node
+						new_node.parent = context
+						context = new_node
+					when :end_element, :end_doctype
+						context = context.parent
+					else
+						new_node = REXML::Light::Node.new(event)
+						context << new_node
+						new_node.parent = context
+					end
+				end
+				root
+			end
+		end
+
+		# An element is an array.  The array contains:
+		#  0			The parent element
+		#  1			The tag name
+		#  2			A hash of attributes
+		#  3..-1	The child elements
+		# An element is an array of size > 3
+		# Text is a String
+		# PIs are [ :processing_instruction, target, data ]
+		# Comments are [ :comment, data ]
+		# DocTypes are DocType structs
+		# The root is an array with XMLDecls, Text, DocType, Array, Text
+	end
+end
diff --git a/lib/rexml/parsers/pullparser.rb b/lib/rexml/parsers/pullparser.rb
new file mode 100644
index 0000000000..aeda6251fe
--- /dev/null
+++ b/lib/rexml/parsers/pullparser.rb
@@ -0,0 +1,143 @@
+require 'rexml/parseexception'
+require 'rexml/parsers/baseparser'
+require 'rexml/xmltokens'
+
+module REXML
+	module Parsers
+		# = Using the Pull Parser
+		# <em>This API is experimental, and subject to change.</em>
+		#  parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
+		#  while parser.has_next?
+		#    res = parser.next
+		#    puts res[1]['att'] if res.start_tag? and res[0] == 'b'
+		#  end
+		# See the PullEvent class for information on the content of the results.
+		# The data is identical to the arguments passed for the various events to
+		# the StreamListener API.
+		#
+		# Notice that:
+		#  parser = PullParser.new( "<a>BAD DOCUMENT" )
+		#  while parser.has_next?
+		#    res = parser.next
+		#    raise res[1] if res.error?
+		#  end
+		#
+		# Nat Price gave me some good ideas for the API.
+		class PullParser < BaseParser
+			include XMLTokens
+
+			def initialize stream
+				super
+				@entities = {}
+			end
+
+			def each
+				while has_next?
+					yield self.pull
+				end
+			end
+
+			def peek depth=0
+				PullEvent.new(super)
+			end
+
+			def pull
+				event = super
+				case event[0]
+				when :entitydecl
+					@entities[ event[1] ] = 
+						event[2] unless event[2] =~ /PUBLIC|SYSTEM/
+				when :text
+					unnormalized = unnormalize( event[1], @entities )
+					event << unnormalized
+				end
+				PullEvent.new( event )
+			end
+		end
+
+		# A parsing event.  The contents of the event are accessed as an +Array?,
+		# and the type is given either by the ...? methods, or by accessing the
+		# +type+ accessor.  The contents of this object vary from event to event,
+		# but are identical to the arguments passed to +StreamListener+s for each
+		# event.
+		class PullEvent
+			# The type of this event.  Will be one of :tag_start, :tag_end, :text,
+			# :processing_instruction, :comment, :doctype, :attlistdecl, :entitydecl,
+			# :notationdecl, :entity, :cdata, :xmldecl, or :error.
+			def initialize(arg)
+				@contents = arg
+			end
+			def []( index )
+				@contents[index+1]
+			end
+			def event_type
+				@contents[0]
+			end
+			# Content: [ String tag_name, Hash attributes ]
+			def start_element?
+				@contents[0] == :start_element
+			end
+			# Content: [ String tag_name ]
+			def end_element?
+				@contents[0] == :end_element
+			end
+			# Content: [ String raw_text, String unnormalized_text ]
+			def text?
+				@contents[0] == :text
+			end
+			# Content: [ String text ]
+			def instruction?
+				@contents[0] == :processing_instruction
+			end
+			# Content: [ String text ]
+			def comment?
+				@contents[0] == :comment
+			end
+			# Content: [ String name, String pub_sys, String long_name, String uri ]
+			def doctype?
+				@contents[0] == :start_doctype
+			end
+			# Content: [ String text ]
+			def attlistdecl?
+				@contents[0] == :attlistdecl
+			end
+			# Content: [ String text ]
+			def elementdecl?
+				@contents[0] == :elementdecl
+			end
+			# Due to the wonders of DTDs, an entity declaration can be just about
+			# anything.  There's no way to normalize it; you'll have to interpret the
+			# content yourself.  However, the following is true:
+			#
+			# * If the entity declaration is an internal entity:
+			#   [ String name, String value ]
+			# Content: [ String text ]
+			def entitydecl?
+				@contents[0] == :entitydecl
+			end
+			# Content: [ String text ]
+			def notationdecl?
+				@contents[0] == :notationdecl
+			end
+			# Content: [ String text ]
+			def entity?
+				@contents[0] == :entity
+			end
+			# Content: [ String text ]
+			def cdata?
+				@contents[0] == :cdata
+			end
+			# Content: [ String version, String encoding, String standalone ]
+			def xmldecl?
+				@contents[0] == :xmldecl
+			end
+			def error?
+				@contents[0] == :error
+			end
+
+			def inspect
+				@contents[0].to_s + ": " + @contents[1..-1].inspect
+			end
+		end
+	end
+end
diff --git a/lib/rexml/parsers/sax2parser.rb b/lib/rexml/parsers/sax2parser.rb
new file mode 100644
index 0000000000..8598fd43e9
--- /dev/null
+++ b/lib/rexml/parsers/sax2parser.rb
@@ -0,0 +1,204 @@
+module REXML
+	module Parsers
+		class SAX2Parser
+			def initialize source
+				@parser = BaseParser.new(source)
+				@listeners = []
+				@procs = []
+				@namespace_stack = []
+				@has_listeners = false
+				@tag_stack = []
+			end
+			
+			# Listen arguments:
+			#
+			# Symbol, Array, Block
+			# 	Listen to Symbol events on Array elements
+			# Symbol, Block
+			#   Listen to Symbol events
+			# Array, Listener
+			# 	Listen to all events on Array elements
+			# Array, Block
+			# 	Listen to :start_element events on Array elements
+			# Listener
+			# 	Listen to All events
+			#
+			# Symbol can be one of: :start_element, :end_element,
+			# :start_prefix_mapping, :end_prefix_mapping, :characters,
+			# :processing_instruction, :doctype, :attlistdecl, :elementdecl,
+			# :entitydecl, :notationdecl, :cdata, :xmldecl, :comment
+			#
+			# Array contains regular expressions or strings which will be matched
+			# against fully qualified element names.
+			#
+			# Listener must implement the methods in SAX2Listener
+			#
+			# Block will be passed the same arguments as a SAX2Listener method would
+			# be, where the method name is the same as the matched Symbol.
+			# See the SAX2Listener for more information.
+			def listen( *args, &blok )
+				if args[0].kind_of? Symbol
+					if args.size == 2
+						args[1].each { |match| @procs << [args[0], match, blok] }
+					else
+						add( [args[0], /.*/, blok] )
+					end
+				elsif args[0].kind_of? Array
+					if args.size == 2
+						args[0].each { |match| add( [nil, match, args[1]] ) }
+					else
+						args[0].each { |match| add( [ :start_element, match, blok ] ) }
+					end
+				else
+					add([nil, /.*/, args[0]])
+				end
+			end
+			
+			def deafen( listener=nil, &blok )
+				if listener
+					@listeners.delete_if {|item| item[-1] == listener }
+					@has_listeners = false if @listeners.size == 0
+				else
+					@procs.delete_if {|item| item[-1] == blok }
+				end
+			end
+			
+			def parse
+				@procs.each { |sym,match,block| block.call if sym == :start_document }
+				@listeners.each { |sym,match,block| 
+					block.start_document if sym == :start_document or sym.nil?
+				}
+				root = context = []
+				while true
+					event = @parser.pull
+					case event[0]
+					when :end_document
+						handle( :end_document )
+						break
+					when :end_doctype
+						context = context[1]
+					when :start_element
+						@tag_stack.push(event[1])
+						# find the observers for namespaces
+						procs = get_procs( :start_prefix_mapping, event[1] )
+						listeners = get_listeners( :start_prefix_mapping, event[1] )
+						if procs or listeners
+							# break out the namespace declarations
+							# The attributes live in event[2]
+							nsdecl = event[2].find_all { |n, value| n =~ /^xmlns:/ }
+							nsdecl.collect! { |n, value| [ n[6..-1], value ] }
+							@namespace_stack.push({})
+							nsdecl.each do |n,v|
+								@namespace_stack[-1][n] = v
+								# notify observers of namespaces
+								procs.each { |ob| ob.call( n, v ) } if procs
+								listeners.each { |ob| ob.start_prefix_mapping(n, v) } if listeners
+							end
+						end
+						event[1] =~ Namespace::NAMESPLIT
+						prefix = $1
+						local = $2
+						uri = get_namespace(prefix)
+						# find the observers for start_element
+						procs = get_procs( :start_element, event[1] )
+						listeners = get_listeners( :start_element, event[1] )
+						# notify observers
+						procs.each { |ob| ob.call( uri, local, event[1], event[2] ) } if procs
+						listeners.each { |ob| 
+							ob.start_element( uri, local, event[1], event[2] ) 
+						} if listeners
+					when :end_element
+						@tag_stack.pop
+						event[1] =~ Namespace::NAMESPLIT
+						prefix = $1
+						local = $2
+						uri = get_namespace(prefix)
+						# find the observers for start_element
+						procs = get_procs( :end_element, event[1] )
+						listeners = get_listeners( :end_element, event[1] )
+						# notify observers
+						procs.each { |ob| ob.call( uri, local, event[1] ) } if procs
+						listeners.each { |ob| 
+							ob.end_element( uri, local, event[1] ) 
+						} if listeners
+
+						namespace_mapping = @namespace_stack.pop
+						# find the observers for namespaces
+						procs = get_procs( :end_prefix_mapping, event[1] )
+						listeners = get_listeners( :end_prefix_mapping, event[1] )
+						if procs or listeners
+							namespace_mapping.each do |prefix, uri|
+								# notify observers of namespaces
+								procs.each { |ob| ob.call( prefix ) } if procs
+								listeners.each { |ob| ob.end_prefix_mapping(prefix) } if listeners
+							end
+						end
+					when :text
+						normalized = @parser.normalize( event[1] )
+						handle( :characters, normalized )
+					when :processing_instruction, :comment, :doctype, :attlistdecl, 
+						:elementdecl, :entitydecl, :cdata, :notationdecl, :xmldecl
+						handle( *event )
+					end
+				end
+			end
+
+			private
+			def handle( symbol, *arguments )
+				tag = @tag_stack[-1]
+				procs = get_procs( symbol, tag )
+				listeners = get_listeners( symbol, tag )
+				# notify observers
+				procs.each { |ob| ob.call( *arguments ) } if procs
+				listeners.each { |l| 
+					l.send( symbol.to_s, *arguments ) 
+				} if listeners
+			end
+
+			# The following methods are duplicates, but it is faster than using
+			# a helper
+			def get_procs( symbol, name )
+				return nil if @procs.size == 0
+				@procs.find_all do |sym, match, block|
+					(
+						(sym.nil? or symbol == sym) and 
+						(name.nil? or (
+							(name == match) or
+							(match.kind_of? Regexp and name =~ match)
+							)
+						)
+					)
+				end.collect{|x| x[-1]}
+			end
+			def get_listeners( symbol, name )
+				return nil if @listeners.size == 0
+				@listeners.find_all do |sym, match, block|
+					(
+						(sym.nil? or symbol == sym) and 
+						(name.nil? or (
+							(name == match) or
+							(match.kind_of? Regexp and name =~ match)
+							)
+						)
+					)
+				end.collect{|x| x[-1]}
+			end
+
+			def add( pair )
+				if pair[-1].kind_of? Proc
+					@procs << pair unless @procs.include? pair
+				else
+					@listeners << pair unless @listeners.include? pair
+					@has_listeners = true
+				end
+			end
+
+			def get_namespace( prefix ) 
+				uri = @namespace_stack.find do |ns|
+					not ns[prefix].nil?
+				end
+				uri[prefix] unless uri.nil?
+			end
+		end
+	end
+end
diff --git a/lib/rexml/parsers/streamparser.rb b/lib/rexml/parsers/streamparser.rb
new file mode 100644
index 0000000000..51441289d9
--- /dev/null
+++ b/lib/rexml/parsers/streamparser.rb
@@ -0,0 +1,33 @@
+module REXML
+	module Parsers
+		class StreamParser
+			def initialize source, listener
+				@listener = listener
+				@parser = BaseParser.new( source )
+			end
+
+			def parse
+				# entity string
+				while true
+					event = @parser.pull
+					case event[0]
+					when :end_document
+						return
+					when :start_element
+						@listener.tag_start( event[1], event[2] )
+					when :end_element
+						@listener.tag_end( event[1] )
+					when :text
+						normalized = @parser.unnormalize( event[1] )
+						@listener.text( normalized )
+					when :processing_instruction
+						@listener.instruction( *event[1,2] )
+					when :comment, :doctype, :attlistdecl, 
+						:elementdecl, :entitydecl, :cdata, :notationdecl, :xmldecl
+						@listener.send( event[0].to_s, *event[1..-1] )
+					end
+				end
+			end
+		end
+	end
+end
diff --git a/lib/rexml/parsers/ultralightparser.rb b/lib/rexml/parsers/ultralightparser.rb
new file mode 100644
index 0000000000..f3b208bf0f
--- /dev/null
+++ b/lib/rexml/parsers/ultralightparser.rb
@@ -0,0 +1,52 @@
+require 'rexml/parsers/streamparser'
+require 'rexml/parsers/baseparser'
+
+module REXML
+	module Parsers
+		class UltraLightParser
+			def initialize stream
+				@stream = stream
+				@parser = REXML::Parsers::BaseParser.new( stream )
+			end
+
+      def rewind
+        @stream.rewind
+        @parser.stream = @stream
+      end
+
+			def parse
+				root = context = []
+				while true
+					event = @parser.pull
+					case event[0]
+					when :end_document
+						break
+					when :end_doctype
+						context = context[1]
+					when :start_element, :doctype
+						context << event
+						event[1,0] = [context]
+						context = event
+					when :end_element
+						context = context[1]
+					else
+						context << event
+					end
+				end
+				root
+			end
+		end
+
+		# An element is an array.  The array contains:
+		#  0			The parent element
+		#  1			The tag name
+		#  2			A hash of attributes
+		#  3..-1	The child elements
+		# An element is an array of size > 3
+		# Text is a String
+		# PIs are [ :processing_instruction, target, data ]
+		# Comments are [ :comment, data ]
+		# DocTypes are DocType structs
+		# The root is an array with XMLDecls, Text, DocType, Array, Text
+	end
+end
diff --git a/lib/rexml/parsers/xpathparser.rb b/lib/rexml/parsers/xpathparser.rb
new file mode 100644
index 0000000000..da27e7c705
--- /dev/null
+++ b/lib/rexml/parsers/xpathparser.rb
@@ -0,0 +1,598 @@
+require 'rexml/namespace'
+require 'rexml/xmltokens'
+
+module REXML
+	module Parsers
+		# You don't want to use this class.  Really.  Use XPath, which is a wrapper
+		# for this class.  Believe me.  You don't want to poke around in here.
+		# There is strange, dark magic at work in this code.  Beware.  Go back!  Go
+		# back while you still can!
+		class XPathParser
+			include XMLTokens
+			LITERAL		= /^'([^']*)'|^"([^"]*)"/u
+
+			def namespaces=( namespaces )
+				Functions::namespace_context = namespaces
+				@namespaces = namespaces
+			end
+
+			def parse path
+				path.gsub!(/([\(\[])\s+/, '\1') # Strip ignorable spaces
+				path.gsub!( /\s+([\]\)])/, '\1' )
+				parsed = []
+				path = LocationPath(path, parsed)
+				parsed
+			end
+
+			def predicate path
+				parsed = []
+				Predicate( "[#{path}]", parsed )
+				parsed
+			end
+
+			def to_string( path )
+				string = ""
+				while path.size > 0
+					case path[0]
+					when :ancestor, :ancestor_or_self, :attribute, :child, :descendant, :descendant_or_self, :following, :following_sibling, :namespace, :parent, :preceding, :preceding_sibling, :self
+						op = path.shift
+						string << "/" unless string.size == 0
+						string << op.to_s
+						string << "::"
+					when :any
+						path.shift
+						string << "*"
+					when :qname
+						path.shift
+						prefix = path.shift
+						name = path.shift
+						string << prefix+":" if prefix.size > 0
+						string << name
+					when :predicate
+						path.shift
+						string << '['
+						string << predicate_to_string( path.shift )
+						string << ' ]'
+					else
+						string << "/" unless string.size == 0
+						string << "UNKNOWN("
+						string << path.shift.inspect
+						string << ")"
+					end
+				end
+				return string
+			end
+
+			def predicate_to_string( path )
+				string = ""
+				case path[0]
+				when :and, :or, :mult, :plus, :minus, :neq, :eq, :lt, :gt, :lteq, :gteq, :div, :mod, :neq, :union
+					op = path.shift
+					left = predicate_to_string( path.shift )
+					right = predicate_to_string( path.shift )
+					string << " "
+					string << left
+					string << " "
+					string << op.to_s
+					string << " "
+					string << right
+					string << " "
+				when :function
+					path.shift
+					name = path.shift
+					string << name
+					string << "( "
+					string << predicate_to_string( path.shift )
+					string << " )"
+				when :literal
+					path.shift
+					string << " "
+					string << path.shift.inspect
+					string << " "
+				else
+					string << " "
+					string << to_string( path )
+					string << " "
+				end
+				return string.squeeze(" ")
+			end
+
+			private
+			#LocationPath
+			#	| RelativeLocationPath
+			#	| '/' RelativeLocationPath?
+			#	| '//' RelativeLocationPath
+			def LocationPath path, parsed
+        #puts "LocationPath '#{path}'"
+        path = path.strip
+        if path[0] == ?/
+          parsed << :document
+					if path[1] == ?/
+						parsed << :descendant_or_self
+						parsed << :node
+						path = path[2..-1]
+					else
+						path = path[1..-1]
+					end
+        end
+        #puts parsed.inspect
+				return RelativeLocationPath( path, parsed ) if path.size > 0
+			end
+
+			#RelativeLocationPath
+			#	|																										Step
+			#		| (AXIS_NAME '::' | '@' | '') 										AxisSpecifier
+			#			NodeTest
+			#				Predicate
+			#		| '.' | '..'																			AbbreviatedStep
+			#	|	RelativeLocationPath '/' Step
+			#	| RelativeLocationPath '//' Step
+			AXIS = /^(ancestor|ancestor-or-self|attribute|child|descendant|descendant-or-self|following|following-sibling|namespace|parent|preceding|preceding-sibling|self)::/
+			def RelativeLocationPath path, parsed
+				#puts "RelativeLocationPath #{path}"
+				while path.size > 0
+					# (axis or @ or <child::>) nodetest predicate	>
+					# OR																					>	/ Step
+					# (. or ..)																		>
+					if path[0] == ?.
+						if path[1] == ?.
+							parsed << :parent
+							parsed << :node
+							path = path[2..-1]
+						else
+							parsed << :self
+							parsed << :node
+							path = path[1..-1]
+						end
+					else
+						if path[0] == ?@
+							#puts "ATTRIBUTE"
+							parsed << :attribute
+							path = path[1..-1]
+							# Goto Nodetest
+						elsif path =~ AXIS
+							parsed << $1.tr('-','_').intern
+							path = $'
+							# Goto Nodetest
+						else
+							parsed << :child
+						end
+
+						#puts "NODETESTING '#{path}'"
+						n = []
+						path = NodeTest( path, n)
+						#puts "NODETEST RETURNED '#{path}'"
+
+						if path[0] == ?[
+							path = Predicate( path, n )
+						end
+
+						parsed.concat(n)
+					end
+					
+					if path.size > 0
+						if path[0] == ?/
+							if path[1] == ?/
+								parsed << :descendant_or_self
+								parsed << :node
+								path = path[2..-1]
+							else
+								path = path[1..-1]
+							end
+						else
+							return path
+						end
+					end
+				end
+				return path
+			end
+
+			# Returns a 1-1 map of the nodeset
+			# The contents of the resulting array are either:
+			# 	true/false, if a positive match
+			# 	String, if a name match
+			#NodeTest
+			#	| ('*' | NCNAME ':' '*' | QNAME)								NameTest
+			#	| NODE_TYPE '(' ')'															NodeType
+			#	| PI '(' LITERAL ')'														PI
+			#		| '[' expr ']'																Predicate
+			NCNAMETEST= /^(#{NCNAME_STR}):\*/u
+			QNAME 		= Namespace::NAMESPLIT
+			NODE_TYPE	= /^(comment|text|node)\(\s*\)/m
+			PI				= /^processing-instruction\(/
+			def NodeTest path, parsed
+        #puts "NodeTest with #{path}"
+				res = nil
+				case path
+				when /^\*/
+					path = $'
+					parsed << :any
+				when NODE_TYPE
+					type = $1
+					path = $'
+					parsed << type.tr('-', '_').intern
+				when PI
+					path = $'
+					literal = nil
+					if path !~ /^\s*\)/
+						path =~ LITERAL
+						literal = $1
+						path = $'
+						raise ParseException.new("Missing ')' after processing instruction") if path[0] != ?)
+						path = path[1..-1]
+					end
+					parsed << :processing_instruction
+					parsed << literal
+				when NCNAMETEST
+					#puts "NCNAMETEST"
+					prefix = $1
+					path = $'
+					parsed << :namespace
+					parsed << prefix
+				when QNAME
+					#puts "QNAME"
+					prefix = $1
+					name = $2
+					path = $'
+					prefix = "" unless prefix
+					parsed << :qname
+					parsed << prefix
+					parsed << name
+				end
+				return path
+			end
+
+			# Filters the supplied nodeset on the predicate(s)
+			def Predicate path, parsed
+        #puts "PREDICATE with #{path}"
+				return nil unless path[0] == ?[
+				predicates = []
+				while path[0] == ?[
+					path, expr = get_group(path)
+					predicates << expr[1..-2] if expr
+				end
+        #puts "PREDICATES = #{predicates.inspect}"
+				predicates.each{ |expr| 
+					#puts "ORING #{expr}"
+					preds = []
+					parsed << :predicate
+					parsed << preds
+					OrExpr(expr, preds) 
+				}
+        #puts "PREDICATES = #{predicates.inspect}"
+				path
+			end
+
+			# The following return arrays of true/false, a 1-1 mapping of the
+			# supplied nodeset, except for axe(), which returns a filtered
+			# nodeset
+
+			#| OrExpr S 'or' S AndExpr
+			#| AndExpr
+			def OrExpr path, parsed
+				#puts "OR >>> #{path}"
+				n = []
+				rest = AndExpr( path, n )
+				#puts "OR <<< #{rest}"
+				if rest != path
+					while rest =~ /^\s*( or )/
+						n = [ :or, n, [] ]
+						rest = AndExpr( $', n[-1] )
+					end
+				end
+				if parsed.size == 0 and n.size != 0
+					parsed.replace(n)
+				elsif n.size > 0
+					parsed << n
+				end
+				rest
+			end
+
+			#| AndExpr S 'and' S EqualityExpr
+			#| EqualityExpr
+			def AndExpr path, parsed
+				#puts "AND >>> #{path}"
+				n = []
+				rest = EqualityExpr( path, n )
+				#puts "AND <<< #{rest}"
+				if rest != path
+					while rest =~ /^\s*( and )/
+						n = [ :and, n, [] ]
+						#puts "AND >>> #{rest}"
+						rest = EqualityExpr( $', n[-1] )
+						#puts "AND <<< #{rest}"
+					end
+				end
+				if parsed.size == 0 and n.size != 0
+					parsed.replace(n)
+				elsif n.size > 0
+					parsed << n
+				end
+				rest
+			end
+
+			#| EqualityExpr ('=' | '!=')  RelationalExpr
+			#| RelationalExpr
+			def EqualityExpr path, parsed
+				#puts "EQUALITY >>> #{path}"
+				n = []
+				rest = RelationalExpr( path, n )
+				#puts "EQUALITY <<< #{rest}"
+				if rest != path
+					while rest =~ /^\s*(!?=)\s*/
+						if $1[0] == ?!
+							n = [ :neq, n, [] ]
+						else
+							n = [ :eq, n, [] ]
+						end
+						rest = RelationalExpr( $', n[-1] )
+					end
+				end
+				if parsed.size == 0 and n.size != 0
+					parsed.replace(n)
+				elsif n.size > 0
+					parsed << n
+				end
+				rest
+			end
+
+			#| RelationalExpr ('<' | '>' | '<=' | '>=') AdditiveExpr
+			#| AdditiveExpr
+			def RelationalExpr path, parsed
+				#puts "RELATION >>> #{path}"
+				n = []
+				rest = AdditiveExpr( path, n )
+				#puts "RELATION <<< #{rest}"
+				if rest != path
+					while rest =~ /^\s*([<>]=?)\s*/
+						if $1[0] == ?<
+							sym = "lt"
+						else
+							sym = "gt"
+						end
+						sym << "eq" if $1[-1] == ?=
+						n = [ sym.intern, n, [] ]
+						rest = AdditiveExpr( $', n[-1] )
+					end
+				end
+				if parsed.size == 0 and n.size != 0
+					parsed.replace(n)
+				elsif n.size > 0
+					parsed << n
+				end
+				rest
+			end
+
+			#| AdditiveExpr ('+' | S '-') MultiplicativeExpr
+			#| MultiplicativeExpr
+			def AdditiveExpr path, parsed
+				#puts "ADDITIVE >>> #{path}"
+				n = []
+				rest = MultiplicativeExpr( path, n )
+				#puts "ADDITIVE <<< #{rest}"
+				if rest != path
+					while rest =~ /^\s*(\+| -)\s*/
+						if $1[0] == ?+
+							n = [ :plus, n, [] ]
+						else
+							n = [ :minus, n, [] ]
+						end
+						rest = MultiplicativeExpr( $', n[-1] )
+					end
+				end
+				if parsed.size == 0 and n.size != 0
+					parsed.replace(n)
+				elsif n.size > 0
+					parsed << n
+				end
+				rest
+			end
+
+			#| MultiplicativeExpr ('*' | S ('div' | 'mod') S) UnaryExpr
+			#| UnaryExpr
+			def MultiplicativeExpr path, parsed
+				#puts "MULT >>> #{path}"
+				n = []
+				rest = UnaryExpr( path, n )
+				#puts "MULT <<< #{rest}"
+				if rest != path
+					while rest =~ /^\s*(\*| div | mod )\s*/
+						if $1[0] == ?*
+							n = [ :mult, n, [] ]
+						elsif $1.include?( "div" )
+							n = [ :div, n, [] ]
+						else
+							n = [ :mod, n, [] ]
+						end
+						rest = UnaryExpr( $', n[-1] )
+					end
+				end
+				if parsed.size == 0 and n.size != 0
+					parsed.replace(n)
+				elsif n.size > 0
+					parsed << n
+				end
+				rest
+			end
+
+			#| '-' UnaryExpr
+			#| UnionExpr
+			def UnaryExpr path, parsed
+				path =~ /^(\-*)/
+				path = $'
+				if $1 and (($1.size % 2) != 0)
+					mult = -1
+				else
+					mult = 1
+				end
+				parsed << :neg if mult < 0
+
+				#puts "UNARY >>> #{path}"
+				n = []
+				path = UnionExpr( path, n )
+				#puts "UNARY <<< #{path}"
+				parsed.concat( n )
+				path
+			end
+
+			#| UnionExpr '|' PathExpr
+			#| PathExpr
+			def UnionExpr path, parsed
+				#puts "UNION >>> #{path}"
+				n = []
+				rest = PathExpr( path, n )
+				#puts "UNION <<< #{rest}"
+				if rest != path
+					while rest =~ /^\s*(\|)\s*/
+						n = [ :union, n, [] ]
+						rest = PathExpr( $', n[-1] )
+					end
+				end
+				if parsed.size == 0 and n.size != 0
+					parsed.replace( n )
+				elsif n.size > 0
+					parsed << n
+				end
+				rest
+			end
+
+			#| LocationPath
+			#| FilterExpr ('/' | '//') RelativeLocationPath
+			def PathExpr path, parsed
+        path =~ /^\s*/
+        path = $'
+				#puts "PATH >>> #{path}"
+				n = []
+				rest = FilterExpr( path, n )
+				#puts "PATH <<< '#{rest}'"
+				if rest != path
+					if rest and rest[0] == ?/
+						return RelativeLocationPath(rest, n)
+					end
+				end
+				#puts "BEFORE WITH '#{rest}'"
+				rest = LocationPath(rest, n) if rest =~ /^[\/\.\@\[\w_*]/
+				parsed.concat(n)
+				return rest
+			end
+
+			#| FilterExpr Predicate
+			#| PrimaryExpr
+			def FilterExpr path, parsed
+				#puts "FILTER >>> #{path}"
+				n = []
+				path = PrimaryExpr( path, n )
+				#puts "FILTER <<< #{path}"
+				path = Predicate(path, n) if path and path[0] == ?[
+				#puts "FILTER <<< #{path}"
+				parsed.concat(n)
+				path
+			end
+
+			#| VARIABLE_REFERENCE
+			#| '(' expr ')'
+			#| LITERAL
+			#| NUMBER
+			#| FunctionCall
+			VARIABLE_REFERENCE	= /^\$(#{NAME_STR})/u
+			NUMBER							= /^(\d*\.?\d+)/
+			NT				= /^comment|text|processing-instruction|node$/
+			def PrimaryExpr path, parsed
+				arry = []
+				case path
+				when VARIABLE_REFERENCE
+					varname = $1
+					path = $'
+					parsed << :variable
+					parsed << varname
+					#arry << @variables[ varname ]
+				when /^(\w[-\w]*)(?:\()/
+					fname = $1
+					path = $'
+					return nil if fname =~ NT
+					parsed << :function
+					parsed << fname
+					path = FunctionCall(path, parsed)
+				when LITERAL, NUMBER
+					#puts "LITERAL or NUMBER: #$1"
+					varname = $1.nil? ? $2 : $1
+					path = $'
+					parsed << :literal 
+					parsed << varname
+				when /^\(/ 																							#/
+					path, contents = get_group(path)
+					contents = contents[1..-2]
+					n = []
+					OrExpr( contents, n )
+					parsed.concat(n)
+				end
+				path
+			end
+
+			#| FUNCTION_NAME '(' ( expr ( ',' expr )* )? ')'
+			def FunctionCall rest, parsed
+				path, arguments = parse_args(rest)
+        argset = []
+        for argument in arguments
+          args = []
+          OrExpr( argument, args )
+          argset << args
+        end
+				parsed << argset
+				path
+			end
+
+			# get_group( '[foo]bar' ) -> ['bar', '[foo]']
+			def get_group string
+				ind = 0
+				depth = 0
+				st = string[0,1]
+				en = (st == "(" ? ")" : "]")
+				begin
+					case string[ind,1]
+					when st
+						depth += 1
+					when en
+						depth -= 1
+					end
+					ind += 1
+				end while depth > 0 and ind < string.length
+				return nil unless depth==0
+				[string[ind..-1], string[0..ind-1]]
+			end
+			
+			def parse_args( string )
+				arguments = []
+				ind = 0
+				depth = 1
+				begin
+					case string[ind]
+					when ?(
+						depth += 1
+						if depth == 1
+							string = string[1..-1]
+							ind -= 1
+						end
+					when ?)
+						depth -= 1
+						if depth == 0
+							s = string[0,ind].strip
+							arguments << s unless s == ""
+							string = string[ind+1..-1]
+						end
+					when ?,
+						if depth == 1
+							s = string[0,ind].strip
+							arguments << s unless s == ""
+							string = string[ind+1..-1]
+							ind = 0
+						end
+					end
+					ind += 1
+				end while depth > 0 and ind < string.length
+				return nil unless depth==0
+				[string,arguments]
+			end
+		end
+	end
+end
author	ser <ser@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>	2003-06-10 01:31:01 +0000
committer	ser <ser@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>	2003-06-10 01:31:01 +0000
commit	ea7a527a2ae7024a5cf2885dee8f7a5c21fedd5d (patch)
tree	d3e1f95a5acf262a9dd46e9663b7034bb285b406 /lib/rexml/parsers
parent	ca02190d8887ecd852e4e3f18f3a3ea91e9c6f7a (diff)
download	ruby-ea7a527a2ae7024a5cf2885dee8f7a5c21fedd5d.tar.gz