aboutsummaryrefslogtreecommitdiffstats
path: root/lib/rdoc/encoding.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/rdoc/encoding.rb')
-rw-r--r--lib/rdoc/encoding.rb48
1 files changed, 27 insertions, 21 deletions
diff --git a/lib/rdoc/encoding.rb b/lib/rdoc/encoding.rb
index 54ecd89816..cf60badd24 100644
--- a/lib/rdoc/encoding.rb
+++ b/lib/rdoc/encoding.rb
@@ -7,6 +7,18 @@
module RDoc::Encoding
+ HEADER_REGEXP = /^
+ (?:
+ \A\#!.*\n
+ |
+ ^\#\s+frozen[-_]string[-_]literal[=:].+\n
+ |
+ ^\#[^\n]+\b(?:en)?coding[=:]\s*(?<name>[^\s;]+).*\n
+ |
+ <\?xml[^?]*encoding=(?<quote>["'])(?<name>.*?)\k<quote>.*\n
+ )+
+ /xi # :nodoc:
+
##
# Reads the contents of +filename+ and handles any encoding directives in
# the file.
@@ -18,12 +30,13 @@ module RDoc::Encoding
# unknown character in the target encoding will be replaced with '?'
def self.read_file filename, encoding, force_transcode = false
- content = open filename, "rb" do |f| f.read end
+ content = File.open filename, "rb" do |f| f.read end
content.gsub!("\r\n", "\n") if RUBY_PLATFORM =~ /mswin|mingw/
utf8 = content.sub!(/\A\xef\xbb\xbf/, '')
- content = RDoc::Encoding.set_encoding content
+ enc = RDoc::Encoding.detect_encoding content
+ content = RDoc::Encoding.change_encoding content, enc if enc
begin
encoding ||= Encoding.default_external
@@ -85,29 +98,22 @@ module RDoc::Encoding
end
##
- # Sets the encoding of +string+ based on the magic comment
-
- def self.set_encoding string
- string = remove_frozen_string_literal string
-
- string =~ /\A(?:#!.*\n)?(.*\n)/
-
- first_line = $1
+ # Detects the encoding of +string+ based on the magic comment
- name = case first_line
- when /^<\?xml[^?]*encoding=(["'])(.*?)\1/ then $2
- when /\b(?:en)?coding[=:]\s*([^\s;]+)/i then $1
- else return string
- end
+ def self.detect_encoding string
+ result = HEADER_REGEXP.match string
+ name = result && result[:name]
- string = string.sub first_line, ''
-
- string = remove_frozen_string_literal string
+ name ? Encoding.find(name) : nil
+ end
- enc = Encoding.find name
- string = RDoc::Encoding.change_encoding string, enc if enc
+ ##
+ # Removes magic comments and shebang
- string
+ def self.remove_magic_comment string
+ string.sub HEADER_REGEXP do |s|
+ s.gsub(/[^\n]/, '')
+ end
end
##