diff options
author | kou <kou@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2007-03-17 10:13:25 +0000 |
---|---|---|
committer | kou <kou@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2007-03-17 10:13:25 +0000 |
commit | 966a25465aab5c2972e6c453f631a15fc2223256 (patch) | |
tree | 847090e856c9901ab2cc19251c179b9b0985e65b /lib/rss/parser.rb | |
parent | 53cbab048452742b537ac8bccf494630d1c184c8 (diff) | |
download | ruby-966a25465aab5c2972e6c453f631a15fc2223256.tar.gz |
* lib/rss, test/rss:
- supported Atom.
- bumped version 0.1.6 to 0.1.7.
* sample/rss/convert.rb: added new sample.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12087 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'lib/rss/parser.rb')
-rw-r--r-- | lib/rss/parser.rb | 172 |
1 files changed, 117 insertions, 55 deletions
diff --git a/lib/rss/parser.rb b/lib/rss/parser.rb index babf15f52c..f5ea2bbc03 100644 --- a/lib/rss/parser.rb +++ b/lib/rss/parser.rb @@ -2,6 +2,7 @@ require "forwardable" require "open-uri" require "rss/rss" +require "rss/xml" module RSS @@ -63,7 +64,8 @@ module RSS end end - def parse(rss, do_validate=true, ignore_unknown_element=true, parser_class=default_parser) + def parse(rss, do_validate=true, ignore_unknown_element=true, + parser_class=default_parser) parser = new(rss, parser_class) parser.do_validate = do_validate parser.ignore_unknown_element = ignore_unknown_element @@ -103,7 +105,7 @@ module RSS return rss if rss.is_a?(::URI::Generic) begin - URI(rss) + ::URI.parse(rss) rescue ::URI::Error rss end @@ -158,26 +160,26 @@ module RSS class << self - @@setters = {} + @@accessor_bases = {} @@registered_uris = {} @@class_names = {} - def install_setter(uri, tag_name, setter) - @@setters[uri] ||= {} - @@setters[uri][tag_name] = setter - end - def setter(uri, tag_name) - begin - @@setters[uri][tag_name] - rescue NameError + _getter = getter(uri, tag_name) + if _getter + "#{_getter}=" + else nil end end + def getter(uri, tag_name) + (@@accessor_bases[uri] || {})[tag_name] + end + def available_tags(uri) begin - @@setters[uri].keys + @@accessor_bases[uri].keys rescue NameError [] end @@ -205,8 +207,8 @@ module RSS end end - def install_get_text_element(uri, name, setter) - install_setter(uri, name, setter) + def install_get_text_element(uri, name, accessor_base) + install_accessor_base(uri, name, accessor_base) def_get_text_element(uri, name, *get_file_and_line_from_caller(1)) end @@ -215,20 +217,18 @@ module RSS end private + def install_accessor_base(uri, tag_name, accessor_base) + @@accessor_bases[uri] ||= {} + @@accessor_bases[uri][tag_name] = accessor_base.chomp("=") + end def def_get_text_element(uri, name, file, line) register_uri(uri, name) unless private_instance_methods(false).include?("start_#{name}".to_sym) module_eval(<<-EOT, file, line) def start_#{name}(name, prefix, attrs, ns) - uri = ns[prefix] + uri = _ns(ns, prefix) if self.class.uri_registered?(uri, #{name.inspect}) - if @do_validate - tags = self.class.available_tags(uri) - unless tags.include?(name) - raise UnknownTagError.new(name, uri) - end - end start_get_text_element(name, prefix, ns, uri) else start_else_element(name, prefix, attrs, ns) @@ -244,7 +244,6 @@ module RSS end module ListenerMixin - attr_reader :rss attr_accessor :ignore_unknown_element @@ -254,13 +253,16 @@ module RSS @rss = nil @ignore_unknown_element = true @do_validate = true - @ns_stack = [{}] + @ns_stack = [{"xml" => :xml}] @tag_stack = [[]] @text_stack = [''] @proc_stack = [] @last_element = nil @version = @encoding = @standalone = nil @xml_stylesheets = [] + @xml_child_mode = false + @xml_element = nil + @last_xml_element = nil end def xmldecl(version, encoding, standalone) @@ -271,7 +273,7 @@ module RSS if name == "xml-stylesheet" params = parse_pi_content(content) if params.has_key?("href") - @xml_stylesheets << XMLStyleSheet.new(*params) + @xml_stylesheets << XMLStyleSheet.new(params) end end end @@ -291,12 +293,41 @@ module RSS @ns_stack.push(ns) prefix, local = split_name(name) - @tag_stack.last.push([ns[prefix], local]) + @tag_stack.last.push([_ns(ns, prefix), local]) @tag_stack.push([]) - if respond_to?("start_#{local}", true) - __send__("start_#{local}", local, prefix, attrs, ns.dup) + if @xml_child_mode + previous = @last_xml_element + element_attrs = attributes.dup + unless previous + ns.each do |ns_prefix, value| + next if ns_prefix == "xml" + key = ns_prefix.empty? ? "xmlns" : "xmlns:#{ns_prefix}" + element_attrs[key] ||= value + end + end + next_element = XML::Element.new(local, + prefix.empty? ? nil : prefix, + _ns(ns, prefix), + element_attrs) + previous << next_element if previous + @last_xml_element = next_element + pr = Proc.new do |text, tags| + if previous + @last_xml_element = previous + else + @xml_element = @last_xml_element + @last_xml_element = nil + end + end + @proc_stack.push(pr) else - start_else_element(local, prefix, attrs, ns.dup) + if @rss.nil? and respond_to?("initial_start_#{local}", true) + __send__("initial_start_#{local}", local, prefix, attrs, ns.dup) + elsif respond_to?("start_#{local}", true) + __send__("start_#{local}", local, prefix, attrs, ns.dup) + else + start_else_element(local, prefix, attrs, ns.dup) + end end end @@ -313,10 +344,17 @@ module RSS end def text(data) - @text_stack.last << data + if @xml_child_mode + @last_xml_element << data if @last_xml_element + else + @text_stack.last << data + end end private + def _ns(ns, prefix) + ns.fetch(prefix, "") + end CONTENT_PATTERN = /\s*([^=]+)=(["'])([^\2]+?)\2/ def parse_pi_content(content) @@ -328,20 +366,20 @@ module RSS end def start_else_element(local, prefix, attrs, ns) - class_name = self.class.class_name(ns[prefix], local) + class_name = self.class.class_name(_ns(ns, prefix), local) current_class = @last_element.class if current_class.const_defined?(class_name) next_class = current_class.const_get(class_name) start_have_something_element(local, prefix, attrs, ns, next_class) else - if @ignore_unknown_element + if !@do_validate or @ignore_unknown_element @proc_stack.push(nil) else parent = "ROOT ELEMENT???" if current_class.tag_name parent = current_class.tag_name end - raise NotExceptedTagError.new(local, parent) + raise NotExpectedTagError.new(local, _ns(ns, prefix), parent) end end end @@ -353,41 +391,48 @@ module RSS end def check_ns(tag_name, prefix, ns, require_uri) - if @do_validate - if ns[prefix] == require_uri - #ns.delete(prefix) - else + unless _ns(ns, prefix) == require_uri + if @do_validate raise NSError.new(tag_name, prefix, require_uri) + else + # Force bind required URI with prefix + @ns_stack.last[prefix] = require_uri end end end def start_get_text_element(tag_name, prefix, ns, required_uri) - @proc_stack.push Proc.new {|text, tags| + pr = Proc.new do |text, tags| setter = self.class.setter(required_uri, tag_name) - setter ||= "#{tag_name}=" if @last_element.respond_to?(setter) + if @do_validate + getter = self.class.getter(required_uri, tag_name) + if @last_element.__send__(getter) + raise TooMuchTagError.new(tag_name, @last_element.tag_name) + end + end @last_element.__send__(setter, text.to_s) else - if @do_validate and not @ignore_unknown_element - raise NotExceptedTagError.new(tag_name, @last_element.tag_name) + if @do_validate and !@ignore_unknown_element + raise NotExpectedTagError.new(tag_name, _ns(ns, prefix), + @last_element.tag_name) end end - } + end + @proc_stack.push(pr) end def start_have_something_element(tag_name, prefix, attrs, ns, klass) check_ns(tag_name, prefix, ns, klass.required_uri) - args = [] - - klass.get_attributes.each do |a_name, a_uri, required| + attributes = {} + klass.get_attributes.each do |a_name, a_uri, required, element_name| if a_uri.is_a?(String) or !a_uri.respond_to?(:include?) a_uri = [a_uri] end - unless a_uri == [nil] + unless a_uri == [""] for prefix, uri in ns if a_uri.include?(uri) val = attrs["#{prefix}:#{a_name}"] @@ -395,12 +440,12 @@ module RSS end end end - if val.nil? and a_uri.include?(nil) + if val.nil? and a_uri.include?("") val = attrs[a_name] end if @do_validate and required and val.nil? - unless a_uri.include?(nil) + unless a_uri.include?("") for prefix, uri in ns if a_uri.include?(uri) a_name = "#{prefix}:#{a_name}" @@ -410,20 +455,37 @@ module RSS raise MissingAttributeError.new(tag_name, a_name) end - args << val + attributes[a_name] = val end previous = @last_element - next_element = klass.new(*args) - next_element.do_validate = @do_validate - previous.funcall(:set_next_element, tag_name, next_element) + next_element = klass.new(@do_validate, attributes) + previous.__send!(:set_next_element, tag_name, next_element) @last_element = next_element - @proc_stack.push Proc.new { |text, tags| + @last_element.parent = previous if klass.need_parent? + @xml_child_mode = @last_element.have_xml_content? + pr = Proc.new do |text, tags| p(@last_element.class) if DEBUG - @last_element.content = text if klass.have_content? - @last_element.validate_for_stream(tags) if @do_validate + if @xml_child_mode + @last_element.content = @xml_element.to_s + xml_setter = @last_element.class.xml_setter + @last_element.__send__(xml_setter, @xml_element) + @xml_element = nil + @xml_child_mode = false + else + if klass.have_content? + if @last_element.need_base64_encode? + text = Base64.decode64(text.lstrip) + end + @last_element.content = text + end + end + if @do_validate + @last_element.validate_for_stream(tags, @ignore_unknown_element) + end @last_element = previous - } + end + @proc_stack.push(pr) end end |