aboutsummaryrefslogtreecommitdiffstats
path: root/lib/rdoc/parser.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/rdoc/parser.rb')
-rw-r--r--lib/rdoc/parser.rb64
1 files changed, 42 insertions, 22 deletions
diff --git a/lib/rdoc/parser.rb b/lib/rdoc/parser.rb
index 3798734d5e..d29599c879 100644
--- a/lib/rdoc/parser.rb
+++ b/lib/rdoc/parser.rb
@@ -22,14 +22,14 @@ require 'rdoc/stats'
# following incantation
#
# require "rdoc/parser"
-#
+#
# class RDoc::Parser::Xyz < RDoc::Parser
# parse_files_matching /\.xyz$/ # <<<<
-#
+#
# def initialize(file_name, body, options)
# ...
# end
-#
+#
# def scan
# ...
# end
@@ -63,13 +63,37 @@ class RDoc::Parser
end
##
- # Return _true_ if the +file+ seems like binary.
+ # Determines if the file is a "binary" file which basically means it has
+ # content that an RDoc parser shouldn't try to consume.
def self.binary?(file)
- s = File.read(file, 1024) or return false
- s.count("^ -~\t\r\n").fdiv(s.size) > 0.3 || s.index("\x00")
+ s = File.read(file, File.stat(file).blksize) || ""
+
+ if s[0, 2] == Marshal.dump('')[0, 2] then
+ true
+ elsif file =~ /erb\.rb$/ then
+ false
+ elsif s.scan(/<%|%>/).length >= 4 then
+ true
+ else
+ # From ptools under the Artistic License 2.0, (c) Daniel Berger.
+ s = s.split(//)
+
+ ((s.size - s.grep(" ".."~").size) / s.size.to_f) > 0.30
+ end
+ end
+
+ ##
+ # Checks if +file+ is a zip file in disguise. Signatures from
+ # http://www.garykessler.net/library/file_sigs.html
+
+ def self.zip? file
+ zip_signature = File.read file, 4
+
+ zip_signature == "PK\x03\x04" or
+ zip_signature == "PK\x05\x06" or
+ zip_signature == "PK\x07\x08"
end
- private_class_method :binary?
##
# Return a parser that can handle a particular extension
@@ -77,16 +101,13 @@ class RDoc::Parser
def self.can_parse(file_name)
parser = RDoc::Parser.parsers.find { |regexp,| regexp =~ file_name }.last
- #
- # The default parser should *NOT* parse binary files.
- #
- if parser == RDoc::Parser::Simple then
- if binary? file_name then
- return nil
- end
- end
+ # HACK Selenium hides a jar file using a .txt extension
+ return if parser == RDoc::Parser::Simple and zip? file_name
- return parser
+ # The default parser must not parse binary files
+ return if parser == RDoc::Parser::Simple and file_name !~ /\.(txt|rdoc)$/
+
+ parser
end
##
@@ -94,6 +115,8 @@ class RDoc::Parser
# for ones that we don't know
def self.for(top_level, file_name, body, options, stats)
+ return if binary? file_name
+
# If no extension, look for shebang
if file_name !~ /\.\w+$/ && body =~ %r{\A#!(.+)} then
shebang = $1
@@ -105,18 +128,15 @@ class RDoc::Parser
parser = can_parse file_name
- #
- # This method must return a parser.
- #
- if !parser then
- parser = RDoc::Parser::Simple
- end
+ return unless parser
parser.new top_level, file_name, body, options, stats
end
##
# Record which file types this parser can understand.
+ #
+ # It is ok to call this multiple times.
def self.parse_files_matching(regexp)
RDoc::Parser.parsers.unshift [regexp, self]