aboutsummaryrefslogtreecommitdiffstats
path: root/lib/xsd/xmlparser
diff options
context:
space:
mode:
Diffstat (limited to 'lib/xsd/xmlparser')
-rw-r--r--lib/xsd/xmlparser/parser.rb107
-rw-r--r--lib/xsd/xmlparser/rexmlparser.rb65
-rw-r--r--lib/xsd/xmlparser/xmlparser.rb61
-rw-r--r--lib/xsd/xmlparser/xmlscanner.rb158
4 files changed, 391 insertions, 0 deletions
diff --git a/lib/xsd/xmlparser/parser.rb b/lib/xsd/xmlparser/parser.rb
new file mode 100644
index 0000000000..0c7fd48084
--- /dev/null
+++ b/lib/xsd/xmlparser/parser.rb
@@ -0,0 +1,107 @@
+=begin
+XSD4R - XML Instance parser library.
+Copyright (C) 2002, 2003 NAKAMURA, Hiroshi.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; either version 2 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+PRATICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 675 Mass
+Ave, Cambridge, MA 02139, USA.
+=end
+
+
+require 'xsd/qname'
+require 'xsd/ns'
+require 'xsd/charset'
+
+
+module XSD
+module XMLParser
+
+
+class Parser
+ class ParseError < Error; end
+ class FormatDecodeError < ParseError; end
+ class UnknownElementError < FormatDecodeError; end
+ class UnknownAttributeError < FormatDecodeError; end
+ class UnexpectedElementError < FormatDecodeError; end
+ class ElementConstraintError < FormatDecodeError; end
+
+ @@parser_factory = nil
+
+ def self.factory
+ @@parser_factory
+ end
+
+ def self.create_parser(host, opt = {})
+ @@parser_factory.new(host, opt)
+ end
+
+ def self.add_factory(factory)
+ if $DEBUG
+ puts "Set #{ factory } as XML processor."
+ end
+ @@parser_factory = factory
+ end
+
+public
+
+ attr_accessor :charset
+
+ def initialize(host, opt = {})
+ @host = host
+ @charset = opt[:charset] || 'us-ascii'
+ end
+
+ def parse(string_or_readable)
+ @textbuf = ''
+ prologue
+ do_parse(string_or_readable)
+ epilogue
+ end
+
+private
+
+ def do_parse(string_or_readable)
+ raise NotImplementError.new(
+ 'Method do_parse must be defined in derived class.')
+ end
+
+ def start_element(name, attrs)
+ @host.start_element(name, attrs)
+ end
+
+ def characters(text)
+ @host.characters(text)
+ end
+
+ def end_element(name)
+ @host.end_element(name)
+ end
+
+ def prologue
+ end
+
+ def epilogue
+ end
+
+ def xmldecl_encoding=(charset)
+ if @charset.nil?
+ @charset = charset
+ else
+ # Definition in a stream (like HTTP) has a priority.
+ p "encoding definition: #{ charset } is ignored." if $DEBUG
+ end
+ end
+end
+
+
+end
+end
diff --git a/lib/xsd/xmlparser/rexmlparser.rb b/lib/xsd/xmlparser/rexmlparser.rb
new file mode 100644
index 0000000000..2500d432d8
--- /dev/null
+++ b/lib/xsd/xmlparser/rexmlparser.rb
@@ -0,0 +1,65 @@
+=begin
+XSD4R - REXMLParser XML parser library.
+Copyright (C) 2002, 2003 NAKAMURA, Hiroshi.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; either version 2 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+PRATICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 675 Mass
+Ave, Cambridge, MA 02139, USA.
+=end
+
+
+require 'xsd/xmlparser'
+require 'rexml/streamlistener'
+require 'rexml/document'
+
+
+module XSD
+module XMLParser
+
+
+class REXMLParser < XSD::XMLParser::Parser
+ include REXML::StreamListener
+
+ def do_parse(string_or_readable)
+ source = nil
+ source = REXML::SourceFactory.create_from(string_or_readable)
+ source.encoding = charset if charset
+ # Listener passes a String in utf-8.
+ @charset = 'utf-8'
+ REXML::Document.parse_stream(source, self)
+ end
+
+ def epilogue
+ end
+
+ def tag_start(name, attrs)
+ start_element(name, attrs)
+ end
+
+ def tag_end(name)
+ end_element(name)
+ end
+
+ def text(text)
+ characters(text)
+ end
+
+ def xmldecl(version, encoding, standalone)
+ # Version should be checked.
+ end
+
+ add_factory(self)
+end
+
+
+end
+end
diff --git a/lib/xsd/xmlparser/xmlparser.rb b/lib/xsd/xmlparser/xmlparser.rb
new file mode 100644
index 0000000000..f555b99b26
--- /dev/null
+++ b/lib/xsd/xmlparser/xmlparser.rb
@@ -0,0 +1,61 @@
+=begin
+XSD4R - XMLParser XML parser library.
+Copyright (C) 2001, 2003 NAKAMURA, Hiroshi.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; either version 2 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+PRATICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 675 Mass
+Ave, Cambridge, MA 02139, USA.
+=end
+
+
+require 'xsd/xmlparser'
+require 'xml/parser'
+
+
+module XSD
+module XMLParser
+
+
+class XMLParser < XSD::XMLParser::Parser
+ class Listener < XML::Parser
+ begin
+ require 'xml/encoding-ja'
+ include XML::Encoding_ja
+ rescue LoadError
+ # uconv may not be installed.
+ end
+ end
+
+ def do_parse(string_or_readable)
+ # XMLParser passes a String in utf-8.
+ @charset = 'utf-8'
+ @parser = Listener.new
+ @parser.parse(string_or_readable) do |type, name, data|
+ case type
+ when XML::Parser::START_ELEM
+ start_element(name, data)
+ when XML::Parser::END_ELEM
+ end_element(name)
+ when XML::Parser::CDATA
+ characters(data)
+ else
+ raise FormatDecodeError.new("Unexpected XML: #{ type }/#{ name }/#{ data }.")
+ end
+ end
+ end
+
+ add_factory(self)
+end
+
+
+end
+end
diff --git a/lib/xsd/xmlparser/xmlscanner.rb b/lib/xsd/xmlparser/xmlscanner.rb
new file mode 100644
index 0000000000..c10e275b9e
--- /dev/null
+++ b/lib/xsd/xmlparser/xmlscanner.rb
@@ -0,0 +1,158 @@
+=begin
+XSD4R - XMLScan XML parser library.
+Copyright (C) 2002, 2003 NAKAMURA, Hiroshi.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; either version 2 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+PRATICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 675 Mass
+Ave, Cambridge, MA 02139, USA.
+=end
+
+
+require 'xsd/xmlparser'
+require 'xmlscan/scanner'
+
+
+module XSD
+module XMLParser
+
+
+class XMLScanner < XSD::XMLParser::Parser
+ include XMLScan::Visitor
+
+ def do_parse(string_or_readable)
+ @attrs = {}
+ @curattr = nil
+ @scanner = XMLScan::XMLScanner.new(self)
+ @scanner.kcode = ::XSD::Charset.charset_str(charset) if charset
+ @scanner.parse(string_or_readable)
+ end
+
+ def scanner_kcode=(charset)
+ @scanner.kcode = ::XSD::Charset.charset_str(charset) if charset
+ self.xmldecl_encoding = charset
+ end
+
+ ENTITY_REF_MAP = {
+ 'lt' => '<',
+ 'gt' => '>',
+ 'amp' => '&',
+ 'quot' => '"',
+ 'apos' => '\''
+ }
+
+ def parse_error(msg)
+ raise ParseError.new(msg)
+ end
+
+ def wellformed_error(msg)
+ raise NotWellFormedError.new(msg)
+ end
+
+ def valid_error(msg)
+ raise NotValidError.new(msg)
+ end
+
+ def warning(msg)
+ p msg if $DEBUG
+ end
+
+ # def on_xmldecl; end
+
+ def on_xmldecl_version(str)
+ # 1.0 expected.
+ end
+
+ def on_xmldecl_encoding(str)
+ self.scanner_kcode = str
+ end
+
+ # def on_xmldecl_standalone(str); end
+
+ # def on_xmldecl_other(name, value); end
+
+ # def on_xmldecl_end; end
+
+ # def on_doctype(root, pubid, sysid); end
+
+ # def on_prolog_space(str); end
+
+ # def on_comment(str); end
+
+ # def on_pi(target, pi); end
+
+ def on_chardata(str)
+ characters(str)
+ end
+
+ # def on_cdata(str); end
+
+ def on_etag(name)
+ end_element(name)
+ end
+
+ def on_entityref(ref)
+ characters(ENTITY_REF_MAP[ref])
+ end
+
+ def on_charref(code)
+ characters([code].pack('U'))
+ end
+
+ def on_charref_hex(code)
+ on_charref(code)
+ end
+
+ # def on_start_document; end
+
+ # def on_end_document; end
+
+ def on_stag(name)
+ @attrs = {}
+ end
+
+ def on_attribute(name)
+ @attrs[name] = @curattr = ''
+ end
+
+ def on_attr_value(str)
+ @curattr << str
+ end
+
+ def on_attr_entityref(ref)
+ @curattr << ENTITY_REF_MAP[ref]
+ end
+
+ def on_attr_charref(code)
+ @curattr << [code].pack('U')
+ end
+
+ def on_attr_charref_hex(code)
+ on_attr_charref(code)
+ end
+
+ # def on_attribute_end(name); end
+
+ def on_stag_end_empty(name)
+ on_stag_end(name)
+ on_etag(name)
+ end
+
+ def on_stag_end(name)
+ start_element(name, @attrs)
+ end
+
+ add_factory(self)
+end
+
+
+end
+end