From 84035542b7176081506dc06f90eb15e7f5b8fd00 Mon Sep 17 00:00:00 2001 From: "(no author)" <(no author)@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> Date: Sat, 1 May 2004 16:09:55 +0000 Subject: This commit was manufactured by cvs2svn to create branch 'ruby_1_8'. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/branches/ruby_1_8@6238 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- lib/rss/1.0.rb | 638 ++++++++++++++++++++++++++++++++++++++++++++++++++ lib/rss/dublincore.rb | 62 +++++ lib/rss/parser.rb | 380 ++++++++++++++++++++++++++++++ lib/rss/rss.rb | 634 +++++++++++++++++++++++++++++++++++++++++++++++++ lib/rss/xmlparser.rb | 91 +++++++ 5 files changed, 1805 insertions(+) create mode 100644 lib/rss/1.0.rb create mode 100644 lib/rss/dublincore.rb create mode 100644 lib/rss/parser.rb create mode 100644 lib/rss/rss.rb create mode 100644 lib/rss/xmlparser.rb (limited to 'lib') diff --git a/lib/rss/1.0.rb b/lib/rss/1.0.rb new file mode 100644 index 0000000000..696b492ada --- /dev/null +++ b/lib/rss/1.0.rb @@ -0,0 +1,638 @@ +require "rss/parser" + +module RSS + + module RSS10 + NSPOOL = {} + ELEMENTS = [] + end + + class RDF < Element + + include RSS10 + include RootElementMixin + include XMLStyleSheetMixin + + class << self + + def required_uri + URI + end + + end + + TAG_NAME.replace('RDF') + + PREFIX = 'rdf' + URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" + + install_ns('', ::RSS::URI) + install_ns(PREFIX, URI) + + [ + ["channel", nil], + ["image", "?"], + ["item", "+"], + ["textinput", "?"], + ].each do |tag, occurs| + install_model(tag, occurs) + end + + %w(channel image textinput).each do |x| + install_have_child_element(x) + end + + install_have_children_element("item") + + attr_accessor :rss_version, :version, :encoding, :standalone + + def initialize(version=nil, encoding=nil, standalone=nil) + super('1.0', version, encoding, standalone) + end + + def to_s(convert=true) + rv = <<-EORDF +#{xmldecl} +#{xml_stylesheet_pi}<#{PREFIX}:RDF#{ns_declaration}> +#{channel_element(false)} +#{image_element(false)} +#{item_elements(false)} +#{textinput_element(false)} +#{other_element(false, "\t")} + +EORDF + rv = @converter.convert(rv) if convert and @converter + rv + end + + private + def rdf_validate(tags) + _validate(tags, []) + end + + def children + [@channel, @image, @textinput, *@item] + end + + def _tags + rv = [ + [::RSS::URI, "channel"], + [::RSS::URI, "image"], + ].delete_if {|x| send(x[1]).nil?} + @item.each do |x| + rv << [::RSS::URI, "item"] + end + rv << [::RSS::URI, "textinput"] if @textinput + rv + end + + class Seq < Element + + include RSS10 + + class << self + + def required_uri + URI + end + + end + + TAG_NAME.replace('Seq') + + install_have_children_element("li") + + install_must_call_validator('rdf', ::RSS::RDF::URI) + + def initialize(li=[]) + super() + @li = li + end + + def to_s(convert=true) + <<-EOT + <#{PREFIX}:Seq> +#{li_elements(convert, "\t\t\t\t")} +#{other_element(convert, "\t\t\t\t")} + +EOT + end + + private + def children + @li + end + + def rdf_validate(tags) + _validate(tags, [["li", '*']]) + end + + def _tags + rv = [] + @li.each do |x| + rv << [URI, "li"] + end + rv + end + + end + + class Li < Element + + include RSS10 + + class << self + + def required_uri + URI + end + + end + + [ + ["resource", [URI, nil], true] + ].each do |name, uri, required| + install_get_attribute(name, uri, required) + end + + def initialize(resource=nil) + super() + @resource = resource + end + + def to_s(convert=true) + if @resource + rv = %Q!<#{PREFIX}:li resource="#{h @resource}" />\n! + rv = @converter.convert(rv) if convert and @converter + rv + else + '' + end + end + + private + def _attrs + [ + ["resource", true] + ] + end + + end + + class Channel < Element + + include RSS10 + + class << self + + def required_uri + ::RSS::URI + end + + end + + [ + ["about", URI, true] + ].each do |name, uri, required| + install_get_attribute(name, uri, required) + end + + %w(title link description).each do |x| + install_text_element(x) + end + + %w(image items textinput).each do |x| + install_have_child_element(x) + end + + [ + ['title', nil], + ['link', nil], + ['description', nil], + ['image', '?'], + ['items', nil], + ['textinput', '?'], + ].each do |tag, occurs| + install_model(tag, occurs) + end + + def initialize(about=nil) + super() + @about = about + end + + def to_s(convert=true) + about = '' + about << %Q!#{PREFIX}:about="#{h @about}"! if @about + rv = <<-EOT + + #{title_element(false)} + #{link_element(false)} + #{description_element(false)} + #{image_element(false)} +#{items_element(false)} + #{textinput_element(false)} +#{other_element(false, "\t\t")} + +EOT + rv = @converter.convert(rv) if convert and @converter + rv + end + + private + def children + [@image, @items, @textinput] + end + + def _tags + [ + [::RSS::URI, 'title'], + [::RSS::URI, 'link'], + [::RSS::URI, 'description'], + [::RSS::URI, 'image'], + [::RSS::URI, 'items'], + [::RSS::URI, 'textinput'], + ].delete_if do |x| + send(x[1]).nil? + end + end + + def _attrs + [ + ["about", true] + ] + end + + class Image < Element + + include RSS10 + + class << self + + def required_uri + ::RSS::URI + end + + end + + [ + ["resource", URI, true] + ].each do |name, uri, required| + install_get_attribute(name, uri, required) + end + + def initialize(resource=nil) + super() + @resource = resource + end + + def to_s(convert=true) + if @resource + rv = %Q!! + rv = @converter.convert(rv) if convert and @converter + rv + else + '' + end + end + + private + def _attrs + [ + ["resource", true] + ] + end + + end + + class Textinput < Element + + include RSS10 + + class << self + + def required_uri + ::RSS::URI + end + + end + + [ + ["resource", URI, true] + ].each do |name, uri, required| + install_get_attribute(name, uri, required) + end + + def initialize(resource=nil) + super() + @resource = resource + end + + def to_s(convert=true) + if @resource + rv = %Q|| + rv = @converter.convert(rv) if convert and @converter + rv + else + '' + end + end + + private + def _attrs + [ + ["resource", true], + ] + end + + end + + class Items < Element + + include RSS10 + + Seq = ::RSS::RDF::Seq + class Seq + unless const_defined?(:Li) + Li = ::RSS::RDF::Li + end + end + + class << self + + def required_uri + ::RSS::URI + end + + end + + install_have_child_element("Seq") + + install_must_call_validator('rdf', ::RSS::RDF::URI) + + def initialize(seq=Seq.new) + super() + @Seq = seq + end + + def to_s(convert=true) + <<-EOT + +#{Seq_element(convert)} +#{other_element(convert, "\t\t\t")} + +EOT + end + + private + def children + [@Seq] + end + + private + def _tags + rv = [] + rv << [URI, 'Seq'] unless @Seq.nil? + rv + end + + def rdf_validate(tags) + _validate(tags, [["Seq", nil]]) + end + + end + + end + + class Image < Element + + include RSS10 + + class << self + + def required_uri + ::RSS::URI + end + + end + + [ + ["about", URI, true] + ].each do |name, uri, required| + install_get_attribute(name, uri, required) + end + + %w(title url link).each do |x| + install_text_element(x) + end + + [ + ['title', nil], + ['url', nil], + ['link', nil], + ].each do |tag, occurs| + install_model(tag, occurs) + end + + def initialize(about=nil) + super() + @about = about + end + + def to_s(convert=true) + about = '' + about << %Q!#{PREFIX}:about="#{h @about}"! if @about + rv = <<-EOT + + #{title_element(false)} + #{url_element(false)} + #{link_element(false)} +#{other_element(false, "\t\t")} + +EOT + rv = @converter.convert(rv) if convert and @converter + rv + end + + private + def _tags + [ + [::RSS::URI, 'title'], + [::RSS::URI, 'url'], + [::RSS::URI, 'link'], + ].delete_if do |x| + send(x[1]).nil? + end + end + + def _attrs + [ + ["about", true], + ] + end + + end + + class Item < Element + + include RSS10 + + class << self + + def required_uri + ::RSS::URI + end + + end + + [ + ["about", URI, true] + ].each do |name, uri, required| + install_get_attribute(name, uri, required) + end + + %w(title link description).each do |x| + install_text_element(x) + end + + [ + ["title", nil], + ["link", nil], + ["description", "?"], + ].each do |tag, occurs| + install_model(tag, occurs) + end + + def initialize(about=nil) + super() + @about = about + end + + def to_s(convert=true) + about = '' + about << %Q!#{PREFIX}:about="#{h @about}"! if @about + rv = <<-EOT + + #{title_element(false)} + #{link_element(false)} + #{description_element(false)} +#{other_element(false, "\t\t")} + +EOT + rv = @converter.convert(rv) if convert and @converter + rv + end + + private + def _tags + [ + [::RSS::URI, 'title'], + [::RSS::URI, 'link'], + [::RSS::URI, 'description'], + ].delete_if do |x| + send(x[1]).nil? + end + end + + def _attrs + [ + ["about", true], + ] + end + + end + + class Textinput < Element + + include RSS10 + + class << self + + def required_uri + ::RSS::URI + end + + end + + [ + ["about", URI, true] + ].each do |name, uri, required| + install_get_attribute(name, uri, required) + end + + %w(title description name link).each do |x| + install_text_element(x) + end + + [ + ["title", nil], + ["description", nil], + ["name", nil], + ["link", nil], + ].each do |tag, occurs| + install_model(tag, occurs) + end + + def initialize(about=nil) + super() + @about = about + end + + def to_s(convert=true) + about = '' + about << %Q!#{PREFIX}:about="#{h @about}"! if @about + rv = <<-EOT + + #{title_element(false)} + #{description_element(false)} + #{name_element(false)} + #{link_element(false)} +#{other_element(false, "\t\t")} + +EOT + rv = @converter.convert(rv) if convert and @converter + rv + end + + private + def _tags + [ + [::RSS::URI, 'title'], + [::RSS::URI, 'description'], + [::RSS::URI, 'name'], + [::RSS::URI, 'link'], + ].delete_if do |x| + send(x[1]).nil? + end + end + + def _attrs + [ + ["about", true], + ] + end + + end + + end + + RSS10::ELEMENTS.each do |x| + BaseListener.install_get_text_element(x, URI, "#{x}=") + end + + module ListenerMixin + private + def start_RDF(tag_name, prefix, attrs, ns) + check_ns(tag_name, prefix, ns, RDF::URI) + + @rss = RDF.new(@version, @encoding, @standalone) + @rss.do_validate = @do_validate + @rss.xml_stylesheets = @xml_stylesheets + @last_element = @rss + @proc_stack.push Proc.new { |text, tags| + @rss.validate_for_stream(tags) if @do_validate + } + end + end + +end diff --git a/lib/rss/dublincore.rb b/lib/rss/dublincore.rb new file mode 100644 index 0000000000..dcf8e46c65 --- /dev/null +++ b/lib/rss/dublincore.rb @@ -0,0 +1,62 @@ +require "rss/1.0" + +module RSS + + DC_PREFIX = 'dc' + DC_URI = "http://purl.org/dc/elements/1.1/" + + RDF.install_ns(DC_PREFIX, DC_URI) + + module DublinCoreModel + + extend BaseModel + + ELEMENTS = [] + + def self.included(mod) + mod.module_eval(<<-EOC) + %w(title description creator subject publisher + contributor type format identifier source + language relation coverage rights).each do |x| + install_text_element("\#{DC_PREFIX}_\#{x}") + end + + %w(date).each do |x| + install_date_element("\#{DC_PREFIX}_\#{x}", 'w3cdtf', x) + end + EOC + end + + def dc_validate(tags) + counter = {} + ELEMENTS.each do |x| + counter[x] = 0 + end + + tags.each do |tag| + key = "#{DC_PREFIX}_#{tag}" + raise UnknownTagError.new(tag, DC_URI) unless counter.has_key?(key) + counter[key] += 1 + raise TooMuchTagError.new(tag, tag_name) if counter[key] > 1 + end + end + + end + + # For backward compatibility + DublincoreModel = DublinCoreModel + + class RDF + class Channel; include DublinCoreModel; end + class Image; include DublinCoreModel; end + class Item; include DublinCoreModel; end + class Textinput; include DublinCoreModel; end + end + + prefix_size = DC_PREFIX.size + 1 + DublinCoreModel::ELEMENTS.uniq! + DublinCoreModel::ELEMENTS.each do |x| + BaseListener.install_get_text_element(x[prefix_size..-1], DC_URI, "#{x}=") + end + +end diff --git a/lib/rss/parser.rb b/lib/rss/parser.rb new file mode 100644 index 0000000000..e1c61f46ae --- /dev/null +++ b/lib/rss/parser.rb @@ -0,0 +1,380 @@ +require "forwardable" + +require "rss/rss" + +module RSS + + class NotWellFormedError < Error + attr_reader :line, :element + def initialize(line=nil, element=nil) + message = "This is not well formed XML" + if element or line + message << "\nerror occurred" + message << " in #{element}" if element + message << " at about #{line} line" if line + end + message << "\n#{yield}" if block_given? + super(message) + end + end + + class XMLParserNotFound < Error + def initialize + super("available XML parser does not found in " << + "#{AVAILABLE_PARSER_LIBRARIES.inspect}.") + end + end + + class NotValidXMLParser < Error + def initialize(parser) + super("#{parser} is not available XML parser. " << + "available XML parser is " << + "#{AVAILABLE_PARSERS.inspect}.") + end + end + + class NSError < InvalidRSSError + attr_reader :tag, :prefix, :uri + def initialize(tag, prefix, require_uri) + @tag, @prefix, @uri = tag, prefix, require_uri + super("prefix <#{prefix}> doesn't associate uri " << + "<#{require_uri}> in tag <#{tag}>") + end + end + + class Parser + + extend Forwardable + + class << self + + @@default_parser = nil + + def default_parser + @@default_parser || AVAILABLE_PARSERS.first + end + + def default_parser=(new_value) + if AVAILABLE_PARSERS.include?(new_value) + @@default_parser = new_value + else + raise NotValidXMLParser.new(new_value) + end + end + + def parse(rss, do_validate=true, ignore_unknown_element=true, parser_class=default_parser) + parser = new(rss, parser_class) + parser.do_validate = do_validate + parser.ignore_unknown_element = ignore_unknown_element + parser.parse + end + + end + + def_delegators(:@parser, :parse, :rss, + :ignore_unknown_element, + :ignore_unknown_element=, :do_validate, + :do_validate=) + + def initialize(rss, parser_class=self.class.default_parser) + @parser = parser_class.new(rss) + end + end + + class BaseParser + + def initialize(rss) + @listener = listener.new + @rss = rss + end + + def rss + @listener.rss + end + + def ignore_unknown_element + @listener.ignore_unknown_element + end + + def ignore_unknown_element=(new_value) + @listener.ignore_unknown_element = new_value + end + + def do_validate + @listener.do_validate + end + + def do_validate=(new_value) + @listener.do_validate = new_value + end + + def parse + if @listener.rss.nil? + _parse + end + @listener.rss + end + + end + + class BaseListener + + extend Utils + + class << self + + @@setter = {} + def install_setter(uri, tag_name, setter) + @@setter[uri] = {} unless @@setter.has_key?(uri) + @@setter[uri][tag_name] = setter + end + + def setter(uri, tag_name) + begin + @@setter[uri][tag_name] + rescue NameError + nil + end + end + + def available_tags(uri) + begin + @@setter[uri].keys + rescue NameError + [] + end + end + + def install_get_text_element(name, uri, setter) + install_setter(uri, name, setter) + def_get_text_element(name, *get_file_and_line_from_caller(1)) + end + + private + + def def_get_text_element(name, file, line) + unless private_instance_methods(false).include?("start_#{name}") + module_eval(<<-EOT, file, line) + def start_#{name}(name, prefix, attrs, ns) + uri = ns[prefix] + if @do_validate + tags = self.class.available_tags(uri) + unless tags.include?(name) + raise UnknownTagError.new(name, uri) + end + end + start_get_text_element(name, prefix, ns, uri) + end + EOT + end + send("private", "start_#{name}") + end + + end + + end + + module ListenerMixin + + attr_reader :rss + + attr_accessor :ignore_unknown_element + attr_accessor :do_validate + + def initialize + @rss = nil + @ignore_unknown_element = true + @do_validate = true + @ns_stack = [{}] + @tag_stack = [[]] + @text_stack = [''] + @proc_stack = [] + @last_element = nil + @version = @encoding = @standalone = nil + @xml_stylesheets = [] + end + + def xmldecl(version, encoding, standalone) + @version, @encoding, @standalone = version, encoding, standalone + end + + def instruction(name, content) + if name == "xml-stylesheet" + params = parse_pi_content(content) + if params.has_key?("href") + @xml_stylesheets << XMLStyleSheet.new(*params) + end + end + end + + def tag_start(name, attributes) + @text_stack.push('') + + ns = @ns_stack.last.dup + attrs = {} + attributes.each do |n, v| + if n =~ /\Axmlns:?/ + ns[$POSTMATCH] = v + else + attrs[n] = v + end + end + @ns_stack.push(ns) + + prefix, local = split_name(name) + @tag_stack.last.push([ns[prefix], local]) + @tag_stack.push([]) + if respond_to?("start_#{local}", true) + send("start_#{local}", local, prefix, attrs, ns.dup) + else + start_else_element(local, prefix, attrs, ns.dup) + end + end + + def tag_end(name) + if DEBUG + p "end tag #{name}" + p @tag_stack + end + text = @text_stack.pop + tags = @tag_stack.pop + pr = @proc_stack.pop + pr.call(text, tags) unless pr.nil? + end + + def text(data) + @text_stack.last << data + end + + private + + CONTENT_PATTERN = /\s*([^=]+)=(["'])([^\2]+?)\2/ + def parse_pi_content(content) + params = {} + content.scan(CONTENT_PATTERN) do |name, quote, value| + params[name] = value + end + params + end + + def start_else_element(local, prefix, attrs, ns) + class_name = local[0,1].upcase << local[1..-1] + current_class = @last_element.class +# begin + if current_class.constants.include?(class_name) + next_class = current_class.const_get(class_name) + start_have_something_element(local, prefix, attrs, ns, next_class) +# rescue NameError + else + if @ignore_unknown_element + @proc_stack.push(nil) + else + parent = "ROOT ELEMENT???" + if current_class.const_defined?("TAG_NAME") + parent = current_class.const_get("TAG_NAME") + end + raise NotExceptedTagError.new(local, parent) + end + end + end + + NAMESPLIT = /^(?:([\w:][-\w\d.]*):)?([\w:][-\w\d.]*)/ + def split_name(name) + name =~ NAMESPLIT + [$1 || '', $2] + end + + def check_ns(tag_name, prefix, ns, require_uri) + if @do_validate + if ns[prefix] == require_uri + #ns.delete(prefix) + else + raise NSError.new(tag_name, prefix, require_uri) + end + end + end + + def start_get_text_element(tag_name, prefix, ns, required_uri) + @proc_stack.push Proc.new {|text, tags| + setter = self.class.setter(required_uri, tag_name) + setter ||= "#{tag_name}=" + if @last_element.respond_to?(setter) + @last_element.send(setter, text.to_s) + else + if @do_validate and not @ignore_unknown_element + raise NotExceptedTagError.new(tag_name, @last_element.tag_name) + end + end + } + end + + def start_have_something_element(tag_name, prefix, attrs, ns, klass) + + check_ns(tag_name, prefix, ns, klass.required_uri) + + args = [] + + klass.get_attributes.each do |a_name, a_uri, required| + + if a_uri.is_a?(String) or !a_uri.respond_to?(:include?) + a_uri = [a_uri] + end + unless a_uri == [nil] + for prefix, uri in ns + if a_uri.include?(uri) + val = attrs["#{prefix}:#{a_name}"] + break if val + end + end + end + if val.nil? and a_uri.include?(nil) + val = attrs[a_name] + end + + if @do_validate and required and val.nil? + raise MissingAttributeError.new(tag_name, a_name) + end + + args << val + end + + previous = @last_element + next_element = klass.send(:new, *args) + next_element.do_validate = @do_validate + setter = "" + setter << "#{klass.required_prefix}_" if klass.required_prefix + setter << "#{tag_name}=" + @last_element.send(setter, next_element) + @last_element = next_element + @proc_stack.push Proc.new { |text, tags| + p(@last_element.class) if DEBUG + @last_element.content = text if klass.have_content? + @last_element.validate_for_stream(tags) if @do_validate + @last_element = previous + } + end + + end + + unless const_defined? :AVAILABLE_PARSER_LIBRARIES + AVAILABLE_PARSER_LIBRARIES = [ + ["rss/xmlparser", :XMLParserParser], + ["rss/xmlscanner", :XMLScanParser], + ["rss/rexmlparser", :REXMLParser], + ] + end + + AVAILABLE_PARSERS = [] + + AVAILABLE_PARSER_LIBRARIES.each do |lib, parser| + begin + require lib + AVAILABLE_PARSERS.push(const_get(parser)) + rescue LoadError + end + end + + if AVAILABLE_PARSERS.empty? + raise XMLParserNotFound + end +end diff --git a/lib/rss/rss.rb b/lib/rss/rss.rb new file mode 100644 index 0000000000..fbd134c9c9 --- /dev/null +++ b/lib/rss/rss.rb @@ -0,0 +1,634 @@ +require "time" + +class Time + class << self + unless respond_to?(:w3cdtf) + def w3cdtf(date) + if /\A\s* + (-?\d+)-(\d\d)-(\d\d) + (?:T + (\d\d):(\d\d)(?::(\d\d))? + (\.\d+)? + (Z|[+-]\d\d:\d\d)?)? + \s*\z/ix =~ date and (($5 and $8) or (!$5 and !$8)) + datetime = [$1.to_i, $2.to_i, $3.to_i, $4.to_i, $5.to_i, $6.to_i] + datetime << $7.to_f * 1000000 if $7 + if $8 + Time.utc(*datetime) - zone_offset($8) + else + Time.local(*datetime) + end + else + raise ArgumentError.new("invalid date: #{date.inspect}") + end + end + end + end + + unless instance_methods.include?("w3cdtf") + alias w3cdtf iso8601 + end +end + +require "English" +require "rss/utils" +require "rss/converter" +require "rss/xml-stylesheet" + +module RSS + + VERSION = "0.0.8" + + DEBUG = false + + class Error < StandardError; end + + class OverlappedPrefixError < Error + attr_reader :prefix + def initialize(prefix) + @prefix = prefix + end + end + + class InvalidRSSError < Error; end + + class MissingTagError < InvalidRSSError + attr_reader :tag, :parent + def initialize(tag, parent) + @tag, @parent = tag, parent + super("tag <#{tag}> is missing in tag <#{parent}>") + end + end + + class TooMuchTagError < InvalidRSSError + attr_reader :tag, :parent + def initialize(tag, parent) + @tag, @parent = tag, parent + super("tag <#{tag}> is too much in tag <#{parent}>") + end + end + + class MissingAttributeError < InvalidRSSError + attr_reader :tag, :attribute + def initialize(tag, attribute) + @tag, @attribute = tag, attribute + super("attribute <#{attribute}> is missing in tag <#{tag}>") + end + end + + class UnknownTagError < InvalidRSSError + attr_reader :tag, :uri + def initialize(tag, uri) + @tag, @uri = tag, uri + super("tag <#{tag}> is unknown in namespace specified by uri <#{uri}>") + end + end + + class NotExceptedTagError < InvalidRSSError + attr_reader :tag, :parent + def initialize(tag, parent) + @tag, @parent = tag, parent + super("tag <#{tag}> is not expected in tag <#{parent}>") + end + end + + class NotAvailableValueError < InvalidRSSError + attr_reader :tag, :value + def initialize(tag, value) + @tag, @value = tag, value + super("value <#{value}> of tag <#{tag}> is not available.") + end + end + + class UnknownConversionMethodError < Error + attr_reader :to, :from + def initialize(to, from) + @to = to + @from = from + super("can't convert to #{to} from #{from}.") + end + end + # for backward compatibility + UnknownConvertMethod = UnknownConversionMethodError + + class ConversionError < Error + attr_reader :string, :to, :from + def initialize(string, to, from) + @string = string + @to = to + @from = from + super("can't convert #{@string} to #{to} from #{from}.") + end + end + + module BaseModel + + include Utils + + def install_have_child_element(name) + add_need_initialize_variable(name) + + attr_accessor name + install_element(name) do |n, elem_name| + <<-EOC + if @#{n} + "\#{indent}\#{@#{n}.to_s(convert)}" + else + '' + end +EOC + end + end + alias_method(:install_have_attribute_element, :install_have_child_element) + + def install_have_children_element(name, postfix="s") + add_have_children_element(name) + + def_children_accessor(name, postfix) + install_element(name, postfix) do |n, elem_name| + <<-EOC + rv = '' + @#{n}.each do |x| + rv << "\#{indent}\#{x.to_s(convert)}" + end + rv +EOC + end + end + + def install_text_element(name) + self::ELEMENTS << name + add_need_initialize_variable(name) + + attr_writer name + convert_attr_reader name + install_element(name) do |n, elem_name| + <<-EOC + if @#{n} + rv = "\#{indent}<#{elem_name}>" + value = html_escape(@#{n}) + if convert and @converter + rv << @converter.convert(value) + else + rv << value + end + rv << "" + rv + else + '' + end +EOC + end + end + + def install_date_element(name, type, disp_name=name) + self::ELEMENTS << name + add_need_initialize_variable(name) + + # accessor + convert_attr_reader name + module_eval(<<-EOC, *get_file_and_line_from_caller(2)) + def #{name}=(new_value) + if new_value.kind_of?(Time) + @#{name} = new_value + else + if @do_validate + begin + @#{name} = Time.send('#{type}', new_value) + rescue ArgumentError + raise NotAvailableValueError.new('#{disp_name}', new_value) + end + else + @#{name} = nil + if /\\A\\s*\\z/ !~ new_value.to_s + begin + @#{name} = Time.parse(new_value) + rescue ArgumentError + end + end + end + end + + # Is it need? + if @#{name} + class << @#{name} + alias_method(:_to_s, :to_s) unless respond_to?(:_to_s) + alias_method(:to_s, :#{type}) + end + end + + end +EOC + + install_element(name) do |n, elem_name| + <<-EOC + if @#{n} + rv = "\#{indent}<#{elem_name}>" + value = html_escape(@#{n}.#{type}) + if convert and @converter + rv << @converter.convert(value) + else + rv << value + end + rv << "" + rv + else + '' + end +EOC + end + + end + + private + def install_element(name, postfix="") + elem_name = name.sub('_', ':') + module_eval(<<-EOC, *get_file_and_line_from_caller(2)) + def #{name}_element#{postfix}(convert=true, indent='') + #{yield(name, elem_name)} + end + private :#{name}_element#{postfix} +EOC + end + + def convert_attr_reader(*attrs) + attrs.each do |attr| + attr = attr.id2name if attr.kind_of?(Integer) + module_eval(<<-EOC, *get_file_and_line_from_caller(2)) + def #{attr} + if @converter + @converter.convert(@#{attr}) + else + @#{attr} + end + end +EOC + end + end + + def def_children_accessor(accessor_name, postfix="s") + module_eval(<<-EOC, *get_file_and_line_from_caller(2)) + def #{accessor_name}#{postfix} + @#{accessor_name} + end + + def #{accessor_name}(*args) + if args.empty? + @#{accessor_name}.first + else + @#{accessor_name}.send("[]", *args) + end + end + + def #{accessor_name}=(*args) + if args.size == 1 + @#{accessor_name}.push(args[0]) + else + @#{accessor_name}.send("[]=", *args) + end + end + alias_method(:set_#{accessor_name}, :#{accessor_name}=) +EOC + end + + end + + URI = "http://purl.org/rss/1.0/" + + class Element + + extend BaseModel + include Utils + + class << self + + def inherited(klass) + klass.module_eval(<<-EOC) + public + + TAG_NAME = name.split('::').last.downcase + + + @@must_call_validators = {::RSS::URI => ''} + + def self.must_call_validators + @@must_call_validators + end + + def self.install_must_call_validator(prefix, uri) + @@must_call_validators[uri] = prefix + end + + @@model = [] + + def self.model + @@model + end + + def self.install_model(tag, occurs=nil) + if m = @@model.find {|t, o| t == tag} + m[1] = occurs + else + @@model << [tag, occurs] + end + end + + @@get_attributes = [] + + def self.get_attributes() + @@get_attributes + end + + def self.install_get_attribute(name, uri, required=true) + attr_writer name + convert_attr_reader name + @@get_attributes << [name, uri, required] + end + + @@have_content = false + + def self.content_setup + attr_writer :content + convert_attr_reader :content + @@have_content = true + end + + def self.have_content? + @@have_content + end + + @@have_children_elements = [] + + def self.have_children_elements + @@have_children_elements + end + + def self.add_have_children_element(variable_name) + @@have_children_elements << variable_name + end + + @@need_initialize_variables = [] + + def self.add_need_initialize_variable(variable_name) + @@need_initialize_variables << variable_name + end + + def self.need_initialize_variables + @@need_initialize_variables + end + + EOC + end + + def required_prefix + nil + end + + def required_uri + nil + end + + def install_ns(prefix, uri) + if self::NSPOOL.has_key?(prefix) + raise OverlappedPrefixError.new(prefix) + end + self::NSPOOL[prefix] = uri + end + + end + + attr_accessor :do_validate + + def initialize(do_validate=true) + @converter = nil + @do_validate = do_validate + initialize_variables + end + + def tag_name + self.class::TAG_NAME + end + + def converter=(converter) + @converter = converter + children.each do |child| + child.converter = converter unless child.nil? + end + end + + def validate + validate_attribute + __validate + end + + def validate_for_stream(tags) + __validate(tags, false) + end + + private + def initialize_variables + self.class.need_initialize_variables.each do |variable_name| + instance_eval("@#{variable_name} = nil") + end + initialize_have_children_elements + @content = "" if self.class.have_content? + end + + def initialize_have_children_elements + self.class.have_children_elements.each do |variable_name| + instance_eval("@#{variable_name} = []") + end + end + + # not String class children. + def children + [] + end + + # default #validate() argument. + def _tags + [] + end + + def _attrs + [] + end + + def __validate(tags=_tags, recursive=true) + if recursive + children.compact.each do |child| + child.validate + end + end + must_call_validators = self.class::must_call_validators + tags = tag_filter(tags.dup) + p tags if DEBUG + self.class::NSPOOL.each do |prefix, uri| + if tags.has_key?(uri) and !must_call_validators.has_key?(uri) + meth = "#{prefix}_validate" + send(meth, tags[uri]) if respond_to?(meth, true) + end + end + must_call_validators.each do |uri, prefix| + send("#{prefix}_validate", tags[uri]) + end + end + + def validate_attribute + _attrs.each do |a_name, required| + if required and send(a_name).nil? + raise MissingAttributeError.new(self.class::TAG_NAME, a_name) + end + end + end + + def other_element(convert, indent='') + rv = '' + private_methods.each do |meth| + if /\A([^_]+)_[^_]+_elements?\z/ =~ meth and + self.class::NSPOOL.has_key?($1) + res = send(meth, convert) + rv << "#{indent}#{res}\n" if /\A\s*\z/ !~ res + end + end + rv + end + + def _validate(tags, model=self.class.model) + count = 1 + do_redo = false + not_shift = false + tag = nil + + model.each_with_index do |elem, i| + + if DEBUG + p "before" + p tags + p elem + end + + if not_shift + not_shift = false + elsif tags + tag = tags.shift + end + + if DEBUG + p "mid" + p count + end + + case elem[1] + when '?' + if count > 2 + raise TooMuchTagError.new(elem[0], tag_name) + else + if elem[0] == tag + do_redo = true + else + not_shift = true + end + end + when '*' + if elem[0] == tag + do_redo = true + else + not_shift = true + end + when '+' + if elem[0] == tag + do_redo = true + else + if count > 1 + not_shift = true + else + raise MissingTagError.new(elem[0], tag_name) + end + end + else + if elem[0] == tag + if model[i+1] and model[i+1][0] != elem[0] and + tags and tags.first == elem[0] + raise TooMuchTagError.new(elem[0], tag_name) + end + else + raise MissingTagError.new(elem[0], tag_name) + end + end + + if DEBUG + p "after" + p not_shift + p do_redo + p tag + end + + if do_redo + do_redo = false + count += 1 + redo + else + count = 1 + end + + end + + if !tags.nil? and !tags.empty? + raise NotExceptedTagError.new(tag, tag_name) + end + + end + + def tag_filter(tags) + rv = {} + tags.each do |tag| + rv[tag[0]] = [] unless rv.has_key?(tag[0]) + rv[tag[0]].push(tag[1]) + end + rv + end + + end + + module RootElementMixin + + attr_reader :output_encoding + + def initialize(rss_version, version=nil, encoding=nil, standalone=nil) + super() + @rss_version = rss_version + @version = version || '1.0' + @encoding = encoding + @standalone = standalone + @output_encoding = nil + end + + def output_encoding=(enc) + @output_encoding = enc + self.converter = Converter.new(@output_encoding, @encoding) + end + + private + def xmldecl + rv = %Q[' + rv + end + + def ns_declaration + rv = '' + self.class::NSPOOL.each do |prefix, uri| + prefix = ":#{prefix}" unless prefix.empty? + rv << %Q|\n\txmlns#{prefix}="#{html_escape(uri)}"| + end + rv + end + + end + +end diff --git a/lib/rss/xmlparser.rb b/lib/rss/xmlparser.rb new file mode 100644 index 0000000000..7be0dc9a11 --- /dev/null +++ b/lib/rss/xmlparser.rb @@ -0,0 +1,91 @@ +begin + require "xml/parser" +rescue LoadError + require "xmlparser" +end + +begin + require "xml/encoding-ja" +rescue LoadError + require "xmlencoding-ja" + if defined?(Kconv) + module XMLEncoding_ja + class SJISHandler + include Kconv + end + end + end +end + +module XML + class Parser + unless defined?(Error) + Error = ::XMLParserError + end + end +end + +module RSS + + class REXMLLikeXMLParser < ::XML::Parser + + include ::XML::Encoding_ja + + def listener=(listener) + @listener = listener + end + + def startElement(name, attrs) + @listener.tag_start(name, attrs) + end + + def endElement(name) + @listener.tag_end(name) + end + + def character(data) + @listener.text(data) + end + + def xmlDecl(version, encoding, standalone) + @listener.xmldecl(version, encoding, standalone == 1) + end + + def processingInstruction(target, content) + @listener.instruction(target, content) + end + + end + + class XMLParserParser < BaseParser + + private + def listener + XMLParserListener + end + + def _parse + begin + parser = REXMLLikeXMLParser.new + parser.listener = @listener + parser.parse(@rss) + rescue ::XML::Parser::Error => e + raise NotWellFormedError.new(parser.line){e.message} + end + end + + end + + class XMLParserListener < BaseListener + + include ListenerMixin + + def xmldecl(version, encoding, standalone) + super + # Encoding is converted to UTF-8 when XMLParser parses XML. + @encoding = 'UTF-8' + end + + end + +end -- cgit v1.2.3