diff options
author | ser <ser@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2005-05-19 02:58:11 +0000 |
---|---|---|
committer | ser <ser@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2005-05-19 02:58:11 +0000 |
commit | 21e8df5c109e4dd4f50bcebdebf8e4c4ce297560 (patch) | |
tree | bc15a6fc484d3df5ebe316d69359fa2d68cf6a5f /lib/rexml/parsers | |
parent | a399253153b1c4e6f09d798973524fa3dc158247 (diff) | |
download | ruby-21e8df5c109e4dd4f50bcebdebf8e4c4ce297560.tar.gz |
Merged in development from the main REXML repository.
* Fixed bug #34, typo in xpath_parser.
* Previous fix, (include? -> includes?) was incorrect.
* Added another test for encoding
* Started AnyName support in RelaxNG
* Added Element#Attributes#to_a, so that it does something intelligent.
This was needed by XPath, for '@*'
* Fixed XPath so that @* works.
* Added xmlgrep to the bin/ directory. A little tool allowing you to grep
for XPaths in an XML document.
* Fixed a CDATA pretty-printing bug. (#39)
* Fixed a buffering bug in Source.rb that affected the SAX parser
This bug was related to how REXML determines the encoding of a file, and
evinced itself by hanging on input when using the SAX parser.
* The unit test for the previous patch. Forgot to commit it.
* Minor pretty printing fix.
* Applied Curt Sampson's optimization improvements
* Issue #9; 3.1.3: The SAX parser was not denormalizing entity references
in incoming text. All declared internal entities, as well as numeric
entities, should now be denormalized. There was a related bug in that the
SAX parser was actually double-encoding entities; this is also fixed.
* bin/* programs should now be executable. Setting bin apps to executable
* Issue 14; 3.1.3: DTD events are now all being passed by StreamParser
Some of the DTD events were not being passed through by the stream parser.
* #26: Element#add_element(nil) now raises an error Changed XPath searches so
that if a non-Hash is passed, an error is raised Fixed a spurrious undefined
method error in encoding. #29: XPath ordering bug fixed by Mark Williams.
Incidentally, Mark supplied a superlative bug report, including a full unit
test. Then he went ahead and fixed the bug. It doesn't get any better than
this, folks.
* Fixed a broken link. Thanks to Dick Davies for pointing it out. Added
functions courtesy of Michael Neumann <mneumann@xxxx.de>.
Example code to follow.
* Added Michael's sample code. Merged the changes in from branches/xpath_V
* Fixed preceding:: and following:: axis Fixed the ordering bug that Martin
Fowler reported.
* Uncommented some code commented for testing Applied Nobu's changes to the
Encoding infrastructure, which should fix potential threading issues.
* Added more tests, and the missing syncenumerator class. Fixed the
inheritance bug in the pull parser that James Britt found. Indentation
changes, and changed some exceptions to runtime
exceptions.
* Changes by Matz, mostly of indent -> indent_level, to avoid
function/variable naming conflicts
* Tabs -> spaces (whitespace)
Note the addition of syncenumerator.rb. This is a stopgap, until I can work on
the class enough to get it accepted as a replacement for the SyncEnumerator
that comes with the Generator class. My version is orders of magnitude faster
than the Generator SyncEnumerator, but is currently missing a couple of
features of the original. Eventually, I expect this class to migrate to
another part of the source tree.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@8483 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'lib/rexml/parsers')
-rw-r--r-- | lib/rexml/parsers/pullparser.rb | 59 | ||||
-rw-r--r-- | lib/rexml/parsers/sax2parser.rb | 18 | ||||
-rw-r--r-- | lib/rexml/parsers/streamparser.rb | 5 | ||||
-rw-r--r-- | lib/rexml/parsers/xpathparser.rb | 9 |
4 files changed, 73 insertions, 18 deletions
diff --git a/lib/rexml/parsers/pullparser.rb b/lib/rexml/parsers/pullparser.rb index fe4d41c959..0a328ea8fc 100644 --- a/lib/rexml/parsers/pullparser.rb +++ b/lib/rexml/parsers/pullparser.rb @@ -23,13 +23,13 @@ module REXML # end # # Nat Price gave me some good ideas for the API. - class PullParser < BaseParser + class PullParser include XMLTokens def initialize stream - super @entities = {} @listeners = nil + @parser = BaseParser.new( stream ) end def add_listener( listener ) @@ -44,21 +44,38 @@ module REXML end def peek depth=0 - PullEvent.new(super) + PullEvent.new(@parser.peek(depth)) end + def has_next? + @parser.has_next? + end + def pull - event = super + event = @parser.pull case event[0] when :entitydecl @entities[ event[1] ] = event[2] unless event[2] =~ /PUBLIC|SYSTEM/ when :text - unnormalized = unnormalize( event[1], @entities ) + unnormalized = @parser.unnormalize( event[1], @entities ) event << unnormalized end PullEvent.new( event ) end + + def unshift token + @parser.unshift token + end + + def entity reference + @parser.entity( reference ) + end + + def empty? + @parser.empty? + end + end # A parsing event. The contents of the event are accessed as an +Array?, @@ -73,44 +90,65 @@ module REXML def initialize(arg) @contents = arg end - def []( index ) - @contents[index+1] + + def []( start, endd=nil) + if start.kind_of? Range + @contents.slice( start.begin+1 .. start.end ) + elsif start.kind_of? Numeric + if endd.nil? + @contents.slice( start+1 ) + else + @contents.slice( start+1, endd ) + end + else + raise "Illegal argument #{start.inspect} (#{start.class})" + end end + def event_type @contents[0] end + # Content: [ String tag_name, Hash attributes ] def start_element? @contents[0] == :start_element end + # Content: [ String tag_name ] def end_element? @contents[0] == :end_element end + # Content: [ String raw_text, String unnormalized_text ] def text? @contents[0] == :text end + # Content: [ String text ] def instruction? @contents[0] == :processing_instruction end + # Content: [ String text ] def comment? @contents[0] == :comment end + # Content: [ String name, String pub_sys, String long_name, String uri ] def doctype? @contents[0] == :start_doctype end + # Content: [ String text ] def attlistdecl? @contents[0] == :attlistdecl end + # Content: [ String text ] def elementdecl? @contents[0] == :elementdecl end + # Due to the wonders of DTDs, an entity declaration can be just about # anything. There's no way to normalize it; you'll have to interpret the # content yourself. However, the following is true: @@ -121,28 +159,33 @@ module REXML def entitydecl? @contents[0] == :entitydecl end + # Content: [ String text ] def notationdecl? @contents[0] == :notationdecl end + # Content: [ String text ] def entity? @contents[0] == :entity end + # Content: [ String text ] def cdata? @contents[0] == :cdata end + # Content: [ String version, String encoding, String standalone ] def xmldecl? @contents[0] == :xmldecl end + def error? @contents[0] == :error end def inspect - @contents[0].to_s + ": " + @contents[1..-1].inspect + @contents[0].to_s + ": " + @contents[1..-1].inspect end end end diff --git a/lib/rexml/parsers/sax2parser.rb b/lib/rexml/parsers/sax2parser.rb index 96440d17bf..d5ee1bcfcd 100644 --- a/lib/rexml/parsers/sax2parser.rb +++ b/lib/rexml/parsers/sax2parser.rb @@ -12,6 +12,7 @@ module REXML @namespace_stack = [] @has_listeners = false @tag_stack = [] + @entities = {} end def add_listener( listener ) @@ -143,10 +144,21 @@ module REXML end end when :text - normalized = @parser.normalize( event[1] ) - handle( :characters, normalized ) + #normalized = @parser.normalize( event[1] ) + #handle( :characters, normalized ) + copy = event[1].clone + @entities.each { |key, value| copy = copy.gsub("&#{key};", value) } + copy.gsub!( Text::NUMERICENTITY ) {|m| + m=$1 + m = "0#{m}" if m[0] == ?x + [Integer(m)].pack('U*') + } + handle( :characters, copy ) + when :entitydecl + @entities[ event[1] ] = event[2] if event.size == 3 + handle( *event ) when :processing_instruction, :comment, :doctype, :attlistdecl, - :elementdecl, :entitydecl, :cdata, :notationdecl, :xmldecl + :elementdecl, :cdata, :notationdecl, :xmldecl handle( *event ) end end diff --git a/lib/rexml/parsers/streamparser.rb b/lib/rexml/parsers/streamparser.rb index 357cc186e6..996d613e15 100644 --- a/lib/rexml/parsers/streamparser.rb +++ b/lib/rexml/parsers/streamparser.rb @@ -31,9 +31,8 @@ module REXML @listener.instruction( *event[1,2] ) when :start_doctype @listener.doctype( *event[1..-1] ) - when :notationdecl, :entitydecl, :elementdecl - @listener.notationdecl( event[1..-1] ) - when :comment, :attlistdecl, :elementdecl, :cdata, :xmldecl + when :comment, :attlistdecl, :notationdecl, :elementdecl, + :entitydecl, :cdata, :xmldecl, :attlistdecl @listener.send( event[0].to_s, *event[1..-1] ) end end diff --git a/lib/rexml/parsers/xpathparser.rb b/lib/rexml/parsers/xpathparser.rb index 41b2b8a5c1..6bac852d6b 100644 --- a/lib/rexml/parsers/xpathparser.rb +++ b/lib/rexml/parsers/xpathparser.rb @@ -20,7 +20,7 @@ module REXML path.gsub!(/([\(\[])\s+/, '\1') # Strip ignorable spaces path.gsub!( /\s+([\]\)])/, '\1' ) parsed = [] - path = LocationPath(path, parsed) + path = OrExpr(path, parsed) parsed end @@ -302,7 +302,7 @@ module REXML path = path[1..-1] end parsed << :processing_instruction - parsed << literal + parsed << (literal || '') when NCNAMETEST #puts "NCNAMETEST" prefix = $1 @@ -589,9 +589,10 @@ module REXML when /^(\w[-\w]*)(?:\()/ #puts "PrimaryExpr :: Function >>> #$1 -- '#$''" fname = $1 - path = $' + tmp = $' #puts "#{fname} =~ #{NT.inspect}" - #return nil if fname =~ NT + return path if fname =~ NT + path = tmp parsed << :function parsed << fname path = FunctionCall(path, parsed) |