From e6636fe890ab7cf8f8f1b86ca54a3e10f98d43e6 Mon Sep 17 00:00:00 2001 From: ser Date: Tue, 9 Dec 2003 02:41:33 +0000 Subject: * Added the lower-case Shift-JIS files to the manifest. The upper-case ones should be deprecated, but I need a Shift-JIS encoded XML file to test against, first. * Added support for maintaining external entity occurances in DTDs * Deprecated the use of Document::DECLARATION. The new default declaration can be gotten with XMLDecl::default() * Refactored the encoding support code. It should be more robust now, and fixes a few bugs. * The XPath string() function now deals with Element nodes properly. * Serialization with Output objects now works as would be expected. * Various code cleanups, some reducing the number of warnings that Ruby 1.8.x produces with REXML. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@5144 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- lib/rexml/doctype.rb | 14 + lib/rexml/document.rb | 27 +- lib/rexml/encoding.rb | 12 +- lib/rexml/encodings/EUC-JP.rb | 49 +- lib/rexml/encodings/ICONV.rb | 18 +- lib/rexml/encodings/ISO-8859-1.rb | 42 +- lib/rexml/encodings/UNILE.rb | 50 +- lib/rexml/encodings/US-ASCII.rb | 42 +- lib/rexml/encodings/UTF-16.rb | 50 +- lib/rexml/encodings/UTF-8.rb | 18 +- lib/rexml/functions.rb | 8 +- lib/rexml/light/node.rb | 4 +- lib/rexml/output.rb | 10 +- lib/rexml/parseexception.rb | 2 - lib/rexml/parsers/baseparser.rb | 6 + lib/rexml/parsers/sax2parser.rb | 13 +- lib/rexml/parsers/xpathparser.rb | 1099 +++++++++++++++++++------------------ lib/rexml/rexml.rb | 6 +- lib/rexml/source.rb | 24 +- lib/rexml/text.rb | 2 +- lib/rexml/xmldecl.rb | 43 +- 21 files changed, 817 insertions(+), 722 deletions(-) (limited to 'lib/rexml') diff --git a/lib/rexml/doctype.rb b/lib/rexml/doctype.rb index d70ea6fd6c..084676afa9 100644 --- a/lib/rexml/doctype.rb +++ b/lib/rexml/doctype.rb @@ -163,6 +163,20 @@ module REXML end end + class ExternalEntity < Child + def initialize( src ) + super() + @entity = src + end + def to_s + @entity + end + def write( output, indent ) + output << @entity + output << "\n" + end + end + class NotationDecl < Child def initialize name, middle, rest @name = name diff --git a/lib/rexml/document.rb b/lib/rexml/document.rb index 8617f904e6..1eefaea92a 100644 --- a/lib/rexml/document.rb +++ b/lib/rexml/document.rb @@ -19,7 +19,9 @@ module REXML class Document < Element # A convenient default XML declaration. If you want an XML declaration, # the easiest way to add one is mydoc << Document::DECLARATION - DECLARATION = XMLDecl.new( "1.0", "UTF-8" ) + # +DEPRECATED+ + # Use: mydoc << XMLDecl.default + DECLARATION = XMLDecl.default # Constructor # @param source if supplied, must be a Document, String, or IO. @@ -102,30 +104,27 @@ module REXML # @return the XMLDecl of this document; if no XMLDecl has been # set, the default declaration is returned. def xml_decl - rv = @children.find { |item| item.kind_of? XMLDecl } - rv = DECLARATION if rv.nil? - rv + rv = @children[0] + return rv if rv.kind_of? XMLDecl + rv = @children.unshift(XMLDecl.default)[0] end # @return the XMLDecl version of this document as a String. # If no XMLDecl has been set, returns the default version. def version - decl = xml_decl() - decl.nil? ? XMLDecl.DEFAULT_VERSION : decl.version + xml_decl().version end # @return the XMLDecl encoding of this document as a String. # If no XMLDecl has been set, returns the default encoding. def encoding - decl = xml_decl() - decl.nil? or decl.encoding.nil? ? XMLDecl.DEFAULT_ENCODING : decl.encoding + xml_decl().encoding end # @return the XMLDecl standalone value of this document as a String. # If no XMLDecl has been set, returns the default setting. def stand_alone? - decl = xml_decl() - decl.nil? ? XMLDecl.DEFAULT_STANDALONE : decl.stand_alone? + xml_decl().stand_alone? end # Write the XML tree out, optionally with indent. This writes out the @@ -154,8 +153,9 @@ module REXML # that IE's limited abilities can handle. This hack inserts a space # before the /> on empty tags. Defaults to false def write( output=$stdout, indent=-1, transitive=false, ie_hack=false ) - output = Output.new( output, xml_decl.encoding ) if xml_decl.encoding != "UTF-8" + output = Output.new( output, xml_decl.encoding ) if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output) @children.each { |node| + indent( output, indent) if node.node_type == :element node.write( output, indent, transitive, ie_hack ) output << "\n" unless indent<0 or node == @children[-1] } @@ -193,7 +193,7 @@ module REXML build_context.add( Text.new( event[1], true, nil, true ) ) unless ( - event[1].strip.size == 0 and + event[1].strip.size==0 and build_context.ignore_whitespace_nodes ) end @@ -218,6 +218,9 @@ module REXML when :attlistdecl n = AttlistDecl.new( event[1..-1] ) build_context.add( n ) + when :externalentity + n = ExternalEntity.new( event[1] ) + build_context.add( n ) when :elementdecl n = ElementDecl.new( event[1] ) build_context.add(n) diff --git a/lib/rexml/encoding.rb b/lib/rexml/encoding.rb index ad8ba7e342..92ae1e8c21 100644 --- a/lib/rexml/encoding.rb +++ b/lib/rexml/encoding.rb @@ -18,20 +18,26 @@ module REXML if enc and enc != UTF_8 @encoding = enc.upcase begin - load 'rexml/encodings/ICONV.rb' + load 'rexml/encodings/ICONV.rb' + instance_eval @@__REXML_encoding_methods Iconv::iconv( UTF_8, @encoding, "" ) rescue LoadError, Exception => err + raise "Bad encoding name #@encoding" unless @encoding =~ /^[\w-]+$/ + @encoding.untaint enc_file = File.join( "rexml", "encodings", "#@encoding.rb" ) begin - load enc_file + load enc_file + instance_eval @@__REXML_encoding_methods rescue LoadError + puts $!.message raise Exception.new( "No decoder found for encoding #@encoding. Please install iconv." ) end end else enc = UTF_8 @encoding = enc.upcase - load 'rexml/encodings/UTF-8.rb' + load 'rexml/encodings/UTF-8.rb' + instance_eval @@__REXML_encoding_methods end ensure $VERBOSE = old_verbosity diff --git a/lib/rexml/encodings/EUC-JP.rb b/lib/rexml/encodings/EUC-JP.rb index 8b146e5f0a..a1314d0856 100644 --- a/lib/rexml/encodings/EUC-JP.rb +++ b/lib/rexml/encodings/EUC-JP.rb @@ -1,31 +1,36 @@ begin - require 'uconv' + require 'iconv' - module REXML - module Encoding - def decode(str) - return Uconv::euctou8(str) - end + module REXML + module Encoding + @@__REXML_encoding_methods =<<-EOL + def decode(str) + return Iconv::iconv("utf-8", "euc-jp", str)[0] + end - def encode content - return Uconv::u8toeuc(content) - end - end - end + def encode content + return Iconv::iconv("euc-jp", "utf-8", content)[0] + end + EOL + end + end rescue LoadError begin - require 'iconv' - module REXML - module Encoding - def decode(str) - return Iconv::iconv("utf-8", "euc-jp", str)[0] - end + require 'uconv' + + module REXML + module Encoding + @@__REXML_encoding_methods =<<-EOL + def decode(str) + return Uconv::euctou8(str) + end - def encode content - return Iconv::iconv("euc-jp", "utf-8", content)[0] - end - end - end + def encode content + return Uconv::u8toeuc(content) + end + EOL + end + end rescue LoadError raise "uconv or iconv is required for Japanese encoding support." end diff --git a/lib/rexml/encodings/ICONV.rb b/lib/rexml/encodings/ICONV.rb index f1b5c80b87..384758d7b2 100644 --- a/lib/rexml/encodings/ICONV.rb +++ b/lib/rexml/encodings/ICONV.rb @@ -2,13 +2,15 @@ require "iconv" raise LoadError unless defined? Iconv module REXML - module Encoding - def decode( str ) - return Iconv::iconv(UTF_8, @encoding, str)[0] - end + module Encoding + @@__REXML_encoding_methods =<<-EOL + def decode( str ) + return Iconv::iconv("utf-8", @encoding, str)[0] + end - def encode( content ) - return Iconv::iconv(@encoding, UTF_8, content)[0] - end - end + def encode( content ) + return Iconv::iconv(@encoding, "utf-8", content)[0] + end + EOL + end end diff --git a/lib/rexml/encodings/ISO-8859-1.rb b/lib/rexml/encodings/ISO-8859-1.rb index 0cb9afd147..5beefbd408 100644 --- a/lib/rexml/encodings/ISO-8859-1.rb +++ b/lib/rexml/encodings/ISO-8859-1.rb @@ -1,23 +1,25 @@ module REXML - module Encoding - # Convert from UTF-8 - def encode content - array_utf8 = content.unpack('U*') - array_enc = [] - array_utf8.each do |num| - if num <= 0xFF - array_enc << num - else - # Numeric entity (&#nnnn;); shard by Stefan Scholl - array_enc.concat "&\##{num};".unpack('C*') - end - end - array_enc.pack('C*') - end + module Encoding + @@__REXML_encoding_methods =<<-EOL + # Convert from UTF-8 + def encode content + array_utf8 = content.unpack('U*') + array_enc = [] + array_utf8.each do |num| + if num <= 0xFF + array_enc << num + else + # Numeric entity (&#nnnn;); shard by Stefan Scholl + array_enc.concat "&\##{num};".unpack('C*') + end + end + array_enc.pack('C*') + end - # Convert to UTF-8 - def decode(str) - str.unpack('C*').pack('U*') - end - end + # Convert to UTF-8 + def decode(str) + str.unpack('C*').pack('U*') + end + EOL + end end diff --git a/lib/rexml/encodings/UNILE.rb b/lib/rexml/encodings/UNILE.rb index e57a784061..3ff9b2b53c 100644 --- a/lib/rexml/encodings/UNILE.rb +++ b/lib/rexml/encodings/UNILE.rb @@ -1,27 +1,29 @@ module REXML - module Encoding - def encode content - array_utf8 = content.unpack("U*") - array_enc = [] - array_utf8.each do |num| - if ((num>>16) > 0) - array_enc << ?? - array_enc << 0 - else - array_enc << (num & 0xFF) - array_enc << (num >> 8) - end - end - array_enc.pack('C*') - end + module Encoding + @@__REXML_encoding_string =<<-EOL + def encode content + array_utf8 = content.unpack("U*") + array_enc = [] + array_utf8.each do |num| + if ((num>>16) > 0) + array_enc << ?? + array_enc << 0 + else + array_enc << (num & 0xFF) + array_enc << (num >> 8) + end + end + array_enc.pack('C*') + end - def decode(str) - array_enc=str.unpack('C*') - array_utf8 = [] - 2.step(array_enc.size-1, 2){|i| - array_utf8 << (array_enc.at(i) + array_enc.at(i+1)*0x100) - } - array_utf8.pack('U*') - end - end + def decode(str) + array_enc=str.unpack('C*') + array_utf8 = [] + 2.step(array_enc.size-1, 2){|i| + array_utf8 << (array_enc.at(i) + array_enc.at(i+1)*0x100) + } + array_utf8.pack('U*') + end + EOL + end end diff --git a/lib/rexml/encodings/US-ASCII.rb b/lib/rexml/encodings/US-ASCII.rb index 0cb9afd147..618e0ad613 100644 --- a/lib/rexml/encodings/US-ASCII.rb +++ b/lib/rexml/encodings/US-ASCII.rb @@ -1,23 +1,25 @@ module REXML - module Encoding - # Convert from UTF-8 - def encode content - array_utf8 = content.unpack('U*') - array_enc = [] - array_utf8.each do |num| - if num <= 0xFF - array_enc << num - else - # Numeric entity (&#nnnn;); shard by Stefan Scholl - array_enc.concat "&\##{num};".unpack('C*') - end - end - array_enc.pack('C*') - end + module Encoding + @@__REXML_encoding_string =<<-EOL + # Convert from UTF-8 + def encode content + array_utf8 = content.unpack('U*') + array_enc = [] + array_utf8.each do |num| + if num <= 0xFF + array_enc << num + else + # Numeric entity (&#nnnn;); shard by Stefan Scholl + array_enc.concat "&\##{num};".unpack('C*') + end + end + array_enc.pack('C*') + end - # Convert to UTF-8 - def decode(str) - str.unpack('C*').pack('U*') - end - end + # Convert to UTF-8 + def decode(str) + str.unpack('C*').pack('U*') + end + EOL + end end diff --git a/lib/rexml/encodings/UTF-16.rb b/lib/rexml/encodings/UTF-16.rb index 31f2d81a5b..ecfc4f9945 100644 --- a/lib/rexml/encodings/UTF-16.rb +++ b/lib/rexml/encodings/UTF-16.rb @@ -1,27 +1,29 @@ module REXML - module Encoding - def encode content - array_utf8 = content.unpack("U*") - array_enc = [] - array_utf8.each do |num| - if ((num>>16) > 0) - array_enc << 0 - array_enc << ?? - else - array_enc << (num >> 8) - array_enc << (num & 0xFF) - end - end - array_enc.pack('C*') - end + module Encoding + @@__REXML_encoding_string =<<-EOL + def encode content + array_utf8 = content.unpack("U*") + array_enc = [] + array_utf8.each do |num| + if ((num>>16) > 0) + array_enc << 0 + array_enc << ?? + else + array_enc << (num >> 8) + array_enc << (num & 0xFF) + end + end + array_enc.pack('C*') + end - def decode(str) - array_enc=str.unpack('C*') - array_utf8 = [] - 2.step(arrayEnc.size-1, 2){|i| - array_utf8 << (array_enc.at(i+1) + array_enc.at(i)*0x100) - } - array_utf8.pack('U*') - end - end + def decode(str) + array_enc=str.unpack('C*') + array_utf8 = [] + 2.step(array_enc.size-1, 2){|i| + array_utf8 << (array_enc.at(i+1) + array_enc.at(i)*0x100) + } + array_utf8.pack('U*') + end + EOL + end end diff --git a/lib/rexml/encodings/UTF-8.rb b/lib/rexml/encodings/UTF-8.rb index 96fee4c4c0..33a7e490c4 100644 --- a/lib/rexml/encodings/UTF-8.rb +++ b/lib/rexml/encodings/UTF-8.rb @@ -1,11 +1,13 @@ module REXML - module Encoding - def encode content - content - end + module Encoding + @@__REXML_encoding_methods =<<-EOL + def encode content + content + end - def decode(str) - str - end - end + def decode(str) + str + end + EOL + end end diff --git a/lib/rexml/functions.rb b/lib/rexml/functions.rb index d2d078640b..d64ba7e378 100644 --- a/lib/rexml/functions.rb +++ b/lib/rexml/functions.rb @@ -27,7 +27,13 @@ module REXML def Functions::namespace_context; @@namespace_context; end def Functions::text( ) - return true if @@node.node_type == :text + if @@node.node_type == :element + return @@node.text + elsif @@node.node_type == :text + return @@node.value + else + return false + end end def Functions::last( ) diff --git a/lib/rexml/light/node.rb b/lib/rexml/light/node.rb index ff8cb987f0..943ec5f1a0 100644 --- a/lib/rexml/light/node.rb +++ b/lib/rexml/light/node.rb @@ -135,8 +135,8 @@ module REXML end def text=( foo ) - replace = at(4).kind_of?(String) ? 1 : 0 - _old_put(4, replace, normalizefoo) + replace = at(4).kind_of?(String)? 1 : 0 + self._old_put(4,replace, normalizefoo) end def root diff --git a/lib/rexml/output.rb b/lib/rexml/output.rb index c4a7473bfb..be4d23d42d 100644 --- a/lib/rexml/output.rb +++ b/lib/rexml/output.rb @@ -3,7 +3,9 @@ require 'rexml/encoding' module REXML class Output include Encoding - attr_reader :encoding + + attr_reader :encoding + def initialize real_IO, encd="iso-8859-1" @output = real_IO self.encoding = encd @@ -12,7 +14,11 @@ module REXML end def <<( content ) - @output << (@to_utf ? encode(content) : content) + @output << (@to_utf ? self.encode(content) : content) end + + def to_s + "Output[#{encoding}]" + end end end diff --git a/lib/rexml/parseexception.rb b/lib/rexml/parseexception.rb index 0fee3ae620..66a4214548 100644 --- a/lib/rexml/parseexception.rb +++ b/lib/rexml/parseexception.rb @@ -29,8 +29,6 @@ module REXML err << "Position: #{position}\n" err << "Last 80 unconsumed characters:\n" err << @source.buffer[0..80].gsub(/\n/, ' ') - err << "\n" - err << @source.buffer[0..80].unpack("U*").inspect end err diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index 276af03389..057617d6e8 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -56,6 +56,7 @@ module REXML IDENTITY = /^([!\*\w\-]+)(\s+#{NCNAME_STR})?(\s+["'].*?['"])?(\s+['"].*?["'])?/u ELEMENTDECL_START = /^\s*/um + SYSTEMENTITY = /^\s*(%.*?;)\s*$/um ENUMERATION = "\\(\\s*#{NMTOKEN}(?:\\s*\\|\\s*#{NMTOKEN})*\\s*\\)" NOTATIONTYPE = "NOTATION\\s+\\(\\s*#{NAME}(?:\\s*\\|\\s*#{NAME})*\\s*\\)" ENUMERATEDTYPE = "(?:(?:#{NOTATIONTYPE})|(?:#{ENUMERATION}))" @@ -214,8 +215,13 @@ module REXML if @document_status == :in_doctype md = @source.match(/\s*(.*?>)/um) case md[1] + when SYSTEMENTITY + match = @source.match( SYSTEMENTITY, true )[1] + return [ :externalentity, match ] + when ELEMENTDECL_START return [ :elementdecl, @source.match( ELEMENTDECL_PATTERN, true )[1] ] + when ENTITY_START match = @source.match( ENTITYDECL, true ).to_a.compact match[0] = :entitydecl diff --git a/lib/rexml/parsers/sax2parser.rb b/lib/rexml/parsers/sax2parser.rb index 2280b983a3..aab87caae4 100644 --- a/lib/rexml/parsers/sax2parser.rb +++ b/lib/rexml/parsers/sax2parser.rb @@ -1,3 +1,7 @@ +require 'rexml/parsers/baseparser' +require 'rexml/parseexception' +require 'rexml/namespace' + module REXML module Parsers class SAX2Parser @@ -85,7 +89,7 @@ module REXML if procs or listeners # break out the namespace declarations # The attributes live in event[2] - nsdecl = event[2].find_all { |n, value| n =~ /^xmlns:/ } + nsdecl = event[2].find_all { |n, value| n =~ /^xmlns(:|$)/ } nsdecl.collect! { |n, value| [ n[6..-1], value ] } @namespace_stack.push({}) nsdecl.each do |n,v| @@ -194,10 +198,9 @@ module REXML end def get_namespace( prefix ) - uri = @namespace_stack.find do |ns| - not ns[prefix].nil? - end - uri[prefix] unless uri.nil? + uris = (@namespace_stack.find_all { |ns| not ns[prefix].nil? }) || + (@namespace_stack.find { |ns| not ns[nil].nil? }) + uris[-1][prefix] unless uris.nil? or 0 == uris.size end end end diff --git a/lib/rexml/parsers/xpathparser.rb b/lib/rexml/parsers/xpathparser.rb index da27e7c705..082f8255ad 100644 --- a/lib/rexml/parsers/xpathparser.rb +++ b/lib/rexml/parsers/xpathparser.rb @@ -2,597 +2,600 @@ require 'rexml/namespace' require 'rexml/xmltokens' module REXML - module Parsers - # You don't want to use this class. Really. Use XPath, which is a wrapper - # for this class. Believe me. You don't want to poke around in here. - # There is strange, dark magic at work in this code. Beware. Go back! Go - # back while you still can! - class XPathParser - include XMLTokens - LITERAL = /^'([^']*)'|^"([^"]*)"/u + module Parsers + # You don't want to use this class. Really. Use XPath, which is a wrapper + # for this class. Believe me. You don't want to poke around in here. + # There is strange, dark magic at work in this code. Beware. Go back! Go + # back while you still can! + class XPathParser + include XMLTokens + LITERAL = /^'([^']*)'|^"([^"]*)"/u - def namespaces=( namespaces ) - Functions::namespace_context = namespaces - @namespaces = namespaces - end + def namespaces=( namespaces ) + Functions::namespace_context = namespaces + @namespaces = namespaces + end - def parse path - path.gsub!(/([\(\[])\s+/, '\1') # Strip ignorable spaces - path.gsub!( /\s+([\]\)])/, '\1' ) - parsed = [] - path = LocationPath(path, parsed) - parsed - end + def parse path + path.gsub!(/([\(\[])\s+/, '\1') # Strip ignorable spaces + path.gsub!( /\s+([\]\)])/, '\1' ) + parsed = [] + path = LocationPath(path, parsed) + parsed + end - def predicate path - parsed = [] - Predicate( "[#{path}]", parsed ) - parsed - end + def predicate path + parsed = [] + Predicate( "[#{path}]", parsed ) + parsed + end - def to_string( path ) - string = "" - while path.size > 0 - case path[0] - when :ancestor, :ancestor_or_self, :attribute, :child, :descendant, :descendant_or_self, :following, :following_sibling, :namespace, :parent, :preceding, :preceding_sibling, :self - op = path.shift - string << "/" unless string.size == 0 - string << op.to_s - string << "::" - when :any - path.shift - string << "*" - when :qname - path.shift - prefix = path.shift - name = path.shift - string << prefix+":" if prefix.size > 0 - string << name - when :predicate - path.shift - string << '[' - string << predicate_to_string( path.shift ) - string << ' ]' - else - string << "/" unless string.size == 0 - string << "UNKNOWN(" - string << path.shift.inspect - string << ")" - end - end - return string - end + def to_string( path ) + string = "" + while path.size > 0 + case path[0] + when :ancestor, :ancestor_or_self, :attribute, :child, :descendant, :descendant_or_self, :following, :following_sibling, :namespace, :parent, :preceding, :preceding_sibling, :self + op = path.shift + string << "/" unless string.size == 0 + string << op.to_s + string << "::" + when :any + path.shift + string << "*" + when :qname + path.shift + prefix = path.shift + name = path.shift + string << prefix+":" if prefix.size > 0 + string << name + when :predicate + path.shift + string << '[' + string << predicate_to_string( path.shift ) + string << ' ]' + else + string << "/" unless string.size == 0 + string << "UNKNOWN(" + string << path.shift.inspect + string << ")" + end + end + return string + end - def predicate_to_string( path ) - string = "" - case path[0] - when :and, :or, :mult, :plus, :minus, :neq, :eq, :lt, :gt, :lteq, :gteq, :div, :mod, :neq, :union - op = path.shift - left = predicate_to_string( path.shift ) - right = predicate_to_string( path.shift ) - string << " " - string << left - string << " " - string << op.to_s - string << " " - string << right - string << " " - when :function - path.shift - name = path.shift - string << name - string << "( " - string << predicate_to_string( path.shift ) - string << " )" - when :literal - path.shift - string << " " - string << path.shift.inspect - string << " " - else - string << " " - string << to_string( path ) - string << " " - end - return string.squeeze(" ") - end + def predicate_to_string( path ) + string = "" + case path[0] + when :and, :or, :mult, :plus, :minus, :neq, :eq, :lt, :gt, :lteq, :gteq, :div, :mod, :neq, :union + op = path.shift + left = predicate_to_string( path.shift ) + right = predicate_to_string( path.shift ) + string << " " + string << left + string << " " + string << op.to_s + string << " " + string << right + string << " " + when :function + path.shift + name = path.shift + string << name + string << "( " + string << predicate_to_string( path.shift ) + string << " )" + when :literal + path.shift + string << " " + string << path.shift.inspect + string << " " + else + string << " " + string << to_string( path ) + string << " " + end + return string.squeeze(" ") + end - private - #LocationPath - # | RelativeLocationPath - # | '/' RelativeLocationPath? - # | '//' RelativeLocationPath - def LocationPath path, parsed + private + #LocationPath + # | RelativeLocationPath + # | '/' RelativeLocationPath? + # | '//' RelativeLocationPath + def LocationPath path, parsed #puts "LocationPath '#{path}'" path = path.strip if path[0] == ?/ parsed << :document - if path[1] == ?/ - parsed << :descendant_or_self - parsed << :node - path = path[2..-1] - else - path = path[1..-1] - end + if path[1] == ?/ + parsed << :descendant_or_self + parsed << :node + path = path[2..-1] + else + path = path[1..-1] + end end #puts parsed.inspect - return RelativeLocationPath( path, parsed ) if path.size > 0 - end + return RelativeLocationPath( path, parsed ) if path.size > 0 + end - #RelativeLocationPath - # | Step - # | (AXIS_NAME '::' | '@' | '') AxisSpecifier - # NodeTest - # Predicate - # | '.' | '..' AbbreviatedStep - # | RelativeLocationPath '/' Step - # | RelativeLocationPath '//' Step - AXIS = /^(ancestor|ancestor-or-self|attribute|child|descendant|descendant-or-self|following|following-sibling|namespace|parent|preceding|preceding-sibling|self)::/ - def RelativeLocationPath path, parsed - #puts "RelativeLocationPath #{path}" - while path.size > 0 - # (axis or @ or ) nodetest predicate > - # OR > / Step - # (. or ..) > - if path[0] == ?. - if path[1] == ?. - parsed << :parent - parsed << :node - path = path[2..-1] - else - parsed << :self - parsed << :node - path = path[1..-1] - end - else - if path[0] == ?@ - #puts "ATTRIBUTE" - parsed << :attribute - path = path[1..-1] - # Goto Nodetest - elsif path =~ AXIS - parsed << $1.tr('-','_').intern - path = $' - # Goto Nodetest - else - parsed << :child - end + #RelativeLocationPath + # | Step + # | (AXIS_NAME '::' | '@' | '') AxisSpecifier + # NodeTest + # Predicate + # | '.' | '..' AbbreviatedStep + # | RelativeLocationPath '/' Step + # | RelativeLocationPath '//' Step + AXIS = /^(ancestor|ancestor-or-self|attribute|child|descendant|descendant-or-self|following|following-sibling|namespace|parent|preceding|preceding-sibling|self)::/ + def RelativeLocationPath path, parsed + #puts "RelativeLocationPath #{path}" + while path.size > 0 + # (axis or @ or ) nodetest predicate > + # OR > / Step + # (. or ..) > + if path[0] == ?. + if path[1] == ?. + parsed << :parent + parsed << :node + path = path[2..-1] + else + parsed << :self + parsed << :node + path = path[1..-1] + end + else + if path[0] == ?@ + #puts "ATTRIBUTE" + parsed << :attribute + path = path[1..-1] + # Goto Nodetest + elsif path =~ AXIS + parsed << $1.tr('-','_').intern + path = $' + # Goto Nodetest + else + parsed << :child + end - #puts "NODETESTING '#{path}'" - n = [] - path = NodeTest( path, n) - #puts "NODETEST RETURNED '#{path}'" + #puts "NODETESTING '#{path}'" + n = [] + path = NodeTest( path, n) + #puts "NODETEST RETURNED '#{path}'" - if path[0] == ?[ - path = Predicate( path, n ) - end + if path[0] == ?[ + path = Predicate( path, n ) + end - parsed.concat(n) - end - - if path.size > 0 - if path[0] == ?/ - if path[1] == ?/ - parsed << :descendant_or_self - parsed << :node - path = path[2..-1] - else - path = path[1..-1] - end - else - return path - end - end - end - return path - end + parsed.concat(n) + end + + if path.size > 0 + if path[0] == ?/ + if path[1] == ?/ + parsed << :descendant_or_self + parsed << :node + path = path[2..-1] + else + path = path[1..-1] + end + else + return path + end + end + end + return path + end - # Returns a 1-1 map of the nodeset - # The contents of the resulting array are either: - # true/false, if a positive match - # String, if a name match - #NodeTest - # | ('*' | NCNAME ':' '*' | QNAME) NameTest - # | NODE_TYPE '(' ')' NodeType - # | PI '(' LITERAL ')' PI - # | '[' expr ']' Predicate - NCNAMETEST= /^(#{NCNAME_STR}):\*/u - QNAME = Namespace::NAMESPLIT - NODE_TYPE = /^(comment|text|node)\(\s*\)/m - PI = /^processing-instruction\(/ - def NodeTest path, parsed + # Returns a 1-1 map of the nodeset + # The contents of the resulting array are either: + # true/false, if a positive match + # String, if a name match + #NodeTest + # | ('*' | NCNAME ':' '*' | QNAME) NameTest + # | NODE_TYPE '(' ')' NodeType + # | PI '(' LITERAL ')' PI + # | '[' expr ']' Predicate + NCNAMETEST= /^(#{NCNAME_STR}):\*/u + QNAME = Namespace::NAMESPLIT + NODE_TYPE = /^(comment|text|node)\(\s*\)/m + PI = /^processing-instruction\(/ + def NodeTest path, parsed #puts "NodeTest with #{path}" - res = nil - case path - when /^\*/ - path = $' - parsed << :any - when NODE_TYPE - type = $1 - path = $' - parsed << type.tr('-', '_').intern - when PI - path = $' - literal = nil - if path !~ /^\s*\)/ - path =~ LITERAL - literal = $1 - path = $' - raise ParseException.new("Missing ')' after processing instruction") if path[0] != ?) - path = path[1..-1] - end - parsed << :processing_instruction - parsed << literal - when NCNAMETEST - #puts "NCNAMETEST" - prefix = $1 - path = $' - parsed << :namespace - parsed << prefix - when QNAME - #puts "QNAME" - prefix = $1 - name = $2 - path = $' - prefix = "" unless prefix - parsed << :qname - parsed << prefix - parsed << name - end - return path - end + res = nil + case path + when /^\*/ + path = $' + parsed << :any + when NODE_TYPE + type = $1 + path = $' + parsed << type.tr('-', '_').intern + when PI + path = $' + literal = nil + if path !~ /^\s*\)/ + path =~ LITERAL + literal = $1 + path = $' + raise ParseException.new("Missing ')' after processing instruction") if path[0] != ?) + path = path[1..-1] + end + parsed << :processing_instruction + parsed << literal + when NCNAMETEST + #puts "NCNAMETEST" + prefix = $1 + path = $' + parsed << :namespace + parsed << prefix + when QNAME + #puts "QNAME" + prefix = $1 + name = $2 + path = $' + prefix = "" unless prefix + parsed << :qname + parsed << prefix + parsed << name + end + return path + end - # Filters the supplied nodeset on the predicate(s) - def Predicate path, parsed + # Filters the supplied nodeset on the predicate(s) + def Predicate path, parsed #puts "PREDICATE with #{path}" - return nil unless path[0] == ?[ - predicates = [] - while path[0] == ?[ - path, expr = get_group(path) - predicates << expr[1..-2] if expr - end + return nil unless path[0] == ?[ + predicates = [] + while path[0] == ?[ + path, expr = get_group(path) + predicates << expr[1..-2] if expr + end #puts "PREDICATES = #{predicates.inspect}" - predicates.each{ |expr| - #puts "ORING #{expr}" - preds = [] - parsed << :predicate - parsed << preds - OrExpr(expr, preds) - } + predicates.each{ |expr| + #puts "ORING #{expr}" + preds = [] + parsed << :predicate + parsed << preds + OrExpr(expr, preds) + } #puts "PREDICATES = #{predicates.inspect}" - path - end + path + end - # The following return arrays of true/false, a 1-1 mapping of the - # supplied nodeset, except for axe(), which returns a filtered - # nodeset + # The following return arrays of true/false, a 1-1 mapping of the + # supplied nodeset, except for axe(), which returns a filtered + # nodeset - #| OrExpr S 'or' S AndExpr - #| AndExpr - def OrExpr path, parsed - #puts "OR >>> #{path}" - n = [] - rest = AndExpr( path, n ) - #puts "OR <<< #{rest}" - if rest != path - while rest =~ /^\s*( or )/ - n = [ :or, n, [] ] - rest = AndExpr( $', n[-1] ) - end - end - if parsed.size == 0 and n.size != 0 - parsed.replace(n) - elsif n.size > 0 - parsed << n - end - rest - end + #| OrExpr S 'or' S AndExpr + #| AndExpr + def OrExpr path, parsed + #puts "OR >>> #{path}" + n = [] + rest = AndExpr( path, n ) + #puts "OR <<< #{rest}" + if rest != path + while rest =~ /^\s*( or )/ + n = [ :or, n, [] ] + rest = AndExpr( $', n[-1] ) + end + end + if parsed.size == 0 and n.size != 0 + parsed.replace(n) + elsif n.size > 0 + parsed << n + end + rest + end - #| AndExpr S 'and' S EqualityExpr - #| EqualityExpr - def AndExpr path, parsed - #puts "AND >>> #{path}" - n = [] - rest = EqualityExpr( path, n ) - #puts "AND <<< #{rest}" - if rest != path - while rest =~ /^\s*( and )/ - n = [ :and, n, [] ] - #puts "AND >>> #{rest}" - rest = EqualityExpr( $', n[-1] ) - #puts "AND <<< #{rest}" - end - end - if parsed.size == 0 and n.size != 0 - parsed.replace(n) - elsif n.size > 0 - parsed << n - end - rest - end + #| AndExpr S 'and' S EqualityExpr + #| EqualityExpr + def AndExpr path, parsed + #puts "AND >>> #{path}" + n = [] + rest = EqualityExpr( path, n ) + #puts "AND <<< #{rest}" + if rest != path + while rest =~ /^\s*( and )/ + n = [ :and, n, [] ] + #puts "AND >>> #{rest}" + rest = EqualityExpr( $', n[-1] ) + #puts "AND <<< #{rest}" + end + end + if parsed.size == 0 and n.size != 0 + parsed.replace(n) + elsif n.size > 0 + parsed << n + end + rest + end - #| EqualityExpr ('=' | '!=') RelationalExpr - #| RelationalExpr - def EqualityExpr path, parsed - #puts "EQUALITY >>> #{path}" - n = [] - rest = RelationalExpr( path, n ) - #puts "EQUALITY <<< #{rest}" - if rest != path - while rest =~ /^\s*(!?=)\s*/ - if $1[0] == ?! - n = [ :neq, n, [] ] - else - n = [ :eq, n, [] ] - end - rest = RelationalExpr( $', n[-1] ) - end - end - if parsed.size == 0 and n.size != 0 - parsed.replace(n) - elsif n.size > 0 - parsed << n - end - rest - end + #| EqualityExpr ('=' | '!=') RelationalExpr + #| RelationalExpr + def EqualityExpr path, parsed + #puts "EQUALITY >>> #{path}" + n = [] + rest = RelationalExpr( path, n ) + #puts "EQUALITY <<< #{rest}" + if rest != path + while rest =~ /^\s*(!?=)\s*/ + if $1[0] == ?! + n = [ :neq, n, [] ] + else + n = [ :eq, n, [] ] + end + rest = RelationalExpr( $', n[-1] ) + end + end + if parsed.size == 0 and n.size != 0 + parsed.replace(n) + elsif n.size > 0 + parsed << n + end + rest + end - #| RelationalExpr ('<' | '>' | '<=' | '>=') AdditiveExpr - #| AdditiveExpr - def RelationalExpr path, parsed - #puts "RELATION >>> #{path}" - n = [] - rest = AdditiveExpr( path, n ) - #puts "RELATION <<< #{rest}" - if rest != path - while rest =~ /^\s*([<>]=?)\s*/ - if $1[0] == ?< - sym = "lt" - else - sym = "gt" - end - sym << "eq" if $1[-1] == ?= - n = [ sym.intern, n, [] ] - rest = AdditiveExpr( $', n[-1] ) - end - end - if parsed.size == 0 and n.size != 0 - parsed.replace(n) - elsif n.size > 0 - parsed << n - end - rest - end + #| RelationalExpr ('<' | '>' | '<=' | '>=') AdditiveExpr + #| AdditiveExpr + def RelationalExpr path, parsed + #puts "RELATION >>> #{path}" + n = [] + rest = AdditiveExpr( path, n ) + #puts "RELATION <<< #{rest}" + if rest != path + while rest =~ /^\s*([<>]=?)\s*/ + if $1[0] == ?< + sym = "lt" + else + sym = "gt" + end + sym << "eq" if $1[-1] == ?= + n = [ sym.intern, n, [] ] + rest = AdditiveExpr( $', n[-1] ) + end + end + if parsed.size == 0 and n.size != 0 + parsed.replace(n) + elsif n.size > 0 + parsed << n + end + rest + end - #| AdditiveExpr ('+' | S '-') MultiplicativeExpr - #| MultiplicativeExpr - def AdditiveExpr path, parsed - #puts "ADDITIVE >>> #{path}" - n = [] - rest = MultiplicativeExpr( path, n ) - #puts "ADDITIVE <<< #{rest}" - if rest != path - while rest =~ /^\s*(\+| -)\s*/ - if $1[0] == ?+ - n = [ :plus, n, [] ] - else - n = [ :minus, n, [] ] - end - rest = MultiplicativeExpr( $', n[-1] ) - end - end - if parsed.size == 0 and n.size != 0 - parsed.replace(n) - elsif n.size > 0 - parsed << n - end - rest - end + #| AdditiveExpr ('+' | S '-') MultiplicativeExpr + #| MultiplicativeExpr + def AdditiveExpr path, parsed + #puts "ADDITIVE >>> #{path}" + n = [] + rest = MultiplicativeExpr( path, n ) + #puts "ADDITIVE <<< #{rest}" + if rest != path + while rest =~ /^\s*(\+| -)\s*/ + if $1[0] == ?+ + n = [ :plus, n, [] ] + else + n = [ :minus, n, [] ] + end + rest = MultiplicativeExpr( $', n[-1] ) + end + end + if parsed.size == 0 and n.size != 0 + parsed.replace(n) + elsif n.size > 0 + parsed << n + end + rest + end - #| MultiplicativeExpr ('*' | S ('div' | 'mod') S) UnaryExpr - #| UnaryExpr - def MultiplicativeExpr path, parsed - #puts "MULT >>> #{path}" - n = [] - rest = UnaryExpr( path, n ) - #puts "MULT <<< #{rest}" - if rest != path - while rest =~ /^\s*(\*| div | mod )\s*/ - if $1[0] == ?* - n = [ :mult, n, [] ] - elsif $1.include?( "div" ) - n = [ :div, n, [] ] - else - n = [ :mod, n, [] ] - end - rest = UnaryExpr( $', n[-1] ) - end - end - if parsed.size == 0 and n.size != 0 - parsed.replace(n) - elsif n.size > 0 - parsed << n - end - rest - end + #| MultiplicativeExpr ('*' | S ('div' | 'mod') S) UnaryExpr + #| UnaryExpr + def MultiplicativeExpr path, parsed + #puts "MULT >>> #{path}" + n = [] + rest = UnaryExpr( path, n ) + #puts "MULT <<< #{rest}" + if rest != path + while rest =~ /^\s*(\*| div | mod )\s*/ + if $1[0] == ?* + n = [ :mult, n, [] ] + elsif $1.include?( "div" ) + n = [ :div, n, [] ] + else + n = [ :mod, n, [] ] + end + rest = UnaryExpr( $', n[-1] ) + end + end + if parsed.size == 0 and n.size != 0 + parsed.replace(n) + elsif n.size > 0 + parsed << n + end + rest + end - #| '-' UnaryExpr - #| UnionExpr - def UnaryExpr path, parsed - path =~ /^(\-*)/ - path = $' - if $1 and (($1.size % 2) != 0) - mult = -1 - else - mult = 1 - end - parsed << :neg if mult < 0 + #| '-' UnaryExpr + #| UnionExpr + def UnaryExpr path, parsed + path =~ /^(\-*)/ + path = $' + if $1 and (($1.size % 2) != 0) + mult = -1 + else + mult = 1 + end + parsed << :neg if mult < 0 - #puts "UNARY >>> #{path}" - n = [] - path = UnionExpr( path, n ) - #puts "UNARY <<< #{path}" - parsed.concat( n ) - path - end + #puts "UNARY >>> #{path}" + n = [] + path = UnionExpr( path, n ) + #puts "UNARY <<< #{path}" + parsed.concat( n ) + path + end - #| UnionExpr '|' PathExpr - #| PathExpr - def UnionExpr path, parsed - #puts "UNION >>> #{path}" - n = [] - rest = PathExpr( path, n ) - #puts "UNION <<< #{rest}" - if rest != path - while rest =~ /^\s*(\|)\s*/ - n = [ :union, n, [] ] - rest = PathExpr( $', n[-1] ) - end - end - if parsed.size == 0 and n.size != 0 - parsed.replace( n ) - elsif n.size > 0 - parsed << n - end - rest - end + #| UnionExpr '|' PathExpr + #| PathExpr + def UnionExpr path, parsed + #puts "UNION >>> #{path}" + n = [] + rest = PathExpr( path, n ) + #puts "UNION <<< #{rest}" + if rest != path + while rest =~ /^\s*(\|)\s*/ + n = [ :union, n, [] ] + rest = PathExpr( $', n[-1] ) + end + end + if parsed.size == 0 and n.size != 0 + parsed.replace( n ) + elsif n.size > 0 + parsed << n + end + rest + end - #| LocationPath - #| FilterExpr ('/' | '//') RelativeLocationPath - def PathExpr path, parsed + #| LocationPath + #| FilterExpr ('/' | '//') RelativeLocationPath + def PathExpr path, parsed path =~ /^\s*/ path = $' - #puts "PATH >>> #{path}" - n = [] - rest = FilterExpr( path, n ) - #puts "PATH <<< '#{rest}'" - if rest != path - if rest and rest[0] == ?/ - return RelativeLocationPath(rest, n) - end - end - #puts "BEFORE WITH '#{rest}'" - rest = LocationPath(rest, n) if rest =~ /^[\/\.\@\[\w_*]/ - parsed.concat(n) - return rest - end + #puts "PATH >>> #{path}" + n = [] + rest = FilterExpr( path, n ) + #puts "PATH <<< '#{rest}'" + if rest != path + if rest and rest[0] == ?/ + return RelativeLocationPath(rest, n) + end + end + #puts "BEFORE WITH '#{rest}'" + rest = LocationPath(rest, n) if rest =~ /^[\/\.\@\[\w_*]/ + parsed.concat(n) + return rest + end - #| FilterExpr Predicate - #| PrimaryExpr - def FilterExpr path, parsed - #puts "FILTER >>> #{path}" - n = [] - path = PrimaryExpr( path, n ) - #puts "FILTER <<< #{path}" - path = Predicate(path, n) if path and path[0] == ?[ - #puts "FILTER <<< #{path}" - parsed.concat(n) - path - end + #| FilterExpr Predicate + #| PrimaryExpr + def FilterExpr path, parsed + #puts "FILTER >>> #{path}" + n = [] + path = PrimaryExpr( path, n ) + #puts "FILTER <<< #{path}" + path = Predicate(path, n) if path and path[0] == ?[ + #puts "FILTER <<< #{path}" + parsed.concat(n) + path + end - #| VARIABLE_REFERENCE - #| '(' expr ')' - #| LITERAL - #| NUMBER - #| FunctionCall - VARIABLE_REFERENCE = /^\$(#{NAME_STR})/u - NUMBER = /^(\d*\.?\d+)/ - NT = /^comment|text|processing-instruction|node$/ - def PrimaryExpr path, parsed - arry = [] - case path - when VARIABLE_REFERENCE - varname = $1 - path = $' - parsed << :variable - parsed << varname - #arry << @variables[ varname ] - when /^(\w[-\w]*)(?:\()/ - fname = $1 - path = $' - return nil if fname =~ NT - parsed << :function - parsed << fname - path = FunctionCall(path, parsed) - when LITERAL, NUMBER - #puts "LITERAL or NUMBER: #$1" - varname = $1.nil? ? $2 : $1 - path = $' - parsed << :literal - parsed << varname - when /^\(/ #/ - path, contents = get_group(path) - contents = contents[1..-2] - n = [] - OrExpr( contents, n ) - parsed.concat(n) - end - path - end + #| VARIABLE_REFERENCE + #| '(' expr ')' + #| LITERAL + #| NUMBER + #| FunctionCall + VARIABLE_REFERENCE = /^\$(#{NAME_STR})/u + NUMBER = /^(\d*\.?\d+)/ + NT = /^comment|text|processing-instruction|node$/ + def PrimaryExpr path, parsed + arry = [] + case path + when VARIABLE_REFERENCE + varname = $1 + path = $' + parsed << :variable + parsed << varname + #arry << @variables[ varname ] + when /^(\w[-\w]*)(?:\()/ + #puts "PrimaryExpr :: Function >>> #$1 -- '#$''" + fname = $1 + path = $' + #puts "#{fname} =~ #{NT.inspect}" + #return nil if fname =~ NT + parsed << :function + parsed << fname + path = FunctionCall(path, parsed) + when LITERAL, NUMBER + #puts "LITERAL or NUMBER: #$1" + varname = $1.nil? ? $2 : $1 + path = $' + parsed << :literal + parsed << varname + when /^\(/ #/ + path, contents = get_group(path) + contents = contents[1..-2] + n = [] + OrExpr( contents, n ) + parsed.concat(n) + end + path + end - #| FUNCTION_NAME '(' ( expr ( ',' expr )* )? ')' - def FunctionCall rest, parsed - path, arguments = parse_args(rest) + #| FUNCTION_NAME '(' ( expr ( ',' expr )* )? ')' + def FunctionCall rest, parsed + path, arguments = parse_args(rest) + #puts "Function call >>> (#{arguments.inspect})" argset = [] for argument in arguments args = [] OrExpr( argument, args ) argset << args end - parsed << argset - path - end + parsed << argset + path + end - # get_group( '[foo]bar' ) -> ['bar', '[foo]'] - def get_group string - ind = 0 - depth = 0 - st = string[0,1] - en = (st == "(" ? ")" : "]") - begin - case string[ind,1] - when st - depth += 1 - when en - depth -= 1 - end - ind += 1 - end while depth > 0 and ind < string.length - return nil unless depth==0 - [string[ind..-1], string[0..ind-1]] - end - - def parse_args( string ) - arguments = [] - ind = 0 - depth = 1 - begin - case string[ind] - when ?( - depth += 1 - if depth == 1 - string = string[1..-1] - ind -= 1 - end - when ?) - depth -= 1 - if depth == 0 - s = string[0,ind].strip - arguments << s unless s == "" - string = string[ind+1..-1] - end - when ?, - if depth == 1 - s = string[0,ind].strip - arguments << s unless s == "" - string = string[ind+1..-1] - ind = 0 - end - end - ind += 1 - end while depth > 0 and ind < string.length - return nil unless depth==0 - [string,arguments] - end - end - end + # get_group( '[foo]bar' ) -> ['bar', '[foo]'] + def get_group string + ind = 0 + depth = 0 + st = string[0,1] + en = (st == "(" ? ")" : "]") + begin + case string[ind,1] + when st + depth += 1 + when en + depth -= 1 + end + ind += 1 + end while depth > 0 and ind < string.length + return nil unless depth==0 + [string[ind..-1], string[0..ind-1]] + end + + def parse_args( string ) + arguments = [] + ind = 0 + depth = 1 + begin + case string[ind] + when ?( + depth += 1 + if depth == 1 + string = string[1..-1] + ind -= 1 + end + when ?) + depth -= 1 + if depth == 0 + s = string[0,ind].strip + arguments << s unless s == "" + string = string[ind+1..-1] + end + when ?, + if depth == 1 + s = string[0,ind].strip + arguments << s unless s == "" + string = string[ind+1..-1] + ind = 0 + end + end + ind += 1 + end while depth > 0 and ind < string.length + return nil unless depth==0 + [string,arguments] + end + end + end end diff --git a/lib/rexml/rexml.rb b/lib/rexml/rexml.rb index 8c402dd23f..779c0638ea 100644 --- a/lib/rexml/rexml.rb +++ b/lib/rexml/rexml.rb @@ -2,8 +2,8 @@ # # URL: http://www.germane-software.com/software/rexml # Author: Sean Russell -# Version: 2.5.6 -# Date: +2003/054 +# Version: 2.7.2 +# Date: +2003/343 @@ -21,6 +21,6 @@ # A tutorial is available in docs/tutorial.html module REXML Copyright = "Copyright #{Time.now.year} Sean Russell " - Date = "+2003/283" + Date = "+2003/343" Version = "2.7.2" end diff --git a/lib/rexml/source.rb b/lib/rexml/source.rb index 8161750694..2110e6db66 100644 --- a/lib/rexml/source.rb +++ b/lib/rexml/source.rb @@ -28,7 +28,7 @@ module REXML # Constructor # @param arg must be a String, and should be a valid XML document - def initialize arg + def initialize(arg) @orig = @buffer = arg self.encoding = check_encoding( @buffer ) #@buffer = decode(@buffer) unless @encoding == UTF_8 @@ -64,10 +64,10 @@ module REXML # everything after it in the Source. # @return the pattern, if found, or nil if the Source is empty or the # pattern is not found. - def scan pattern, consume=false + def scan(pattern, cons=false) return nil if @buffer.nil? rv = @buffer.scan(pattern) - @buffer = $' if consume and rv.size>0 + @buffer = $' if cons and rv.size>0 rv end @@ -88,21 +88,21 @@ module REXML return md end - def match pattern, consume=false + def match(pattern, cons=false) md = pattern.match(@buffer) - @buffer = $' if consume and md + @buffer = $' if cons and md return md end # @return true if the Source is exhausted def empty? - @buffer.nil? or @buffer.strip.nil? + @buffer.nil? end # @return the current line in the source def current_line lines = @orig.split - res = lines.grep(@buffer[0..30]) + res = lines.grep @buffer[0..30] res = res[-1] if res.kind_of? Array lines.index( res ) if res end @@ -113,7 +113,7 @@ module REXML class IOSource < Source #attr_reader :block_size - def initialize arg, block_size=500 + def initialize(arg, block_size=500) @er_source = @source = arg @to_utf = false # READLINE OPT @@ -127,7 +127,7 @@ module REXML @line_break = encode( '>' ) end - def scan pattern, consume=false + def scan(pattern, cons=false) rv = super # You'll notice that this next section is very similar to the same # section in match(), but just a liiittle different. This is @@ -166,16 +166,16 @@ module REXML match( pattern, true ) end - def match pattern, consume=false + def match( pattern, cons=false ) rv = pattern.match(@buffer) - @buffer = $' if consume and rv + @buffer = $' if cons and rv while !rv and @source begin str = @source.readline('>') str = decode(str) if @to_utf and str @buffer << str rv = pattern.match(@buffer) - @buffer = $' if consume and rv + @buffer = $' if cons and rv rescue @source = nil end diff --git a/lib/rexml/text.rb b/lib/rexml/text.rb index 0614e51d47..2494ad9e8a 100644 --- a/lib/rexml/text.rb +++ b/lib/rexml/text.rb @@ -245,7 +245,7 @@ module REXML def Text::unnormalize( string, doctype=nil, filter=nil, illegal=nil ) rv = string.clone rv.gsub!( /\r\n?/, "\n" ) - matches = rv.scan(REFERENCE) + matches = rv.scan( REFERENCE ) return rv if matches.size == 0 rv.gsub!( NUMERICENTITY ) {|m| m=$1 diff --git a/lib/rexml/xmldecl.rb b/lib/rexml/xmldecl.rb index 6a6cc31a53..1c0bde8f4d 100644 --- a/lib/rexml/xmldecl.rb +++ b/lib/rexml/xmldecl.rb @@ -13,13 +13,16 @@ module REXML STOP = '\?>'; attr_accessor :version, :standalone + attr_reader :writeencoding def initialize(version=DEFAULT_VERSION, encoding=nil, standalone=nil) - @encoding_set = !encoding.nil? + @writethis = true + @writeencoding = !encoding.nil? if version.kind_of? XMLDecl super() @version = version.version self.encoding = version.encoding + @writeencoding = version.writeencoding @standalone = version.standalone else super() @@ -35,9 +38,14 @@ module REXML end def write writer, indent=-1, transitive=false, ie_hack=false + return "" unless @writethis or writer.kind_of? Output indent( writer, indent ) writer << START.sub(/\\/u, '') - writer << " #{content}" + if writer.kind_of? Output + writer << " #{content writer.encoding}" + else + writer << " #{content encoding}" + end writer << STOP.sub(/\\/u, '') end @@ -50,7 +58,6 @@ module REXML def xmldecl version, encoding, standalone @version = version - @encoding_set = !encoding.nil? self.encoding = encoding @standalone = standalone end @@ -60,11 +67,37 @@ module REXML end alias :stand_alone? :standalone + alias :old_enc= :encoding= + + def encoding=( enc ) + if enc.nil? + self.old_enc = "UTF-8" + @writeencoding = false + else + self.old_enc = enc + @writeencoding = true + end + self.dowrite + end + + def XMLDecl.default + rv = XMLDecl.new( "1.0" ) + rv.nowrite + rv + end + + def nowrite + @writethis = false + end + + def dowrite + @writethis = true + end private - def content + def content(enc) rv = "version='#@version'" - rv << " encoding='#{encoding}'" if @encoding_set + rv << " encoding='#{enc}'" if @writeencoding || enc !~ /utf-8/i rv << " standalone='#@standalone'" if @standalone rv end -- cgit v1.2.3