diff options
author | drbrain <drbrain@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2010-12-20 03:22:49 +0000 |
---|---|---|
committer | drbrain <drbrain@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2010-12-20 03:22:49 +0000 |
commit | 2ef9c50c6e405717d06362787c4549ca4f1c6485 (patch) | |
tree | ee99486567461dd5796f3d6edcc9e204187f2666 /lib/rdoc/text.rb | |
parent | d7effd506f5b91a636f2e6452ef1946b923007c7 (diff) | |
download | ruby-2ef9c50c6e405717d06362787c4549ca4f1c6485.tar.gz |
Import RDoc 3
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@30249 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'lib/rdoc/text.rb')
-rw-r--r-- | lib/rdoc/text.rb | 137 |
1 files changed, 133 insertions, 4 deletions
diff --git a/lib/rdoc/text.rb b/lib/rdoc/text.rb index 5280aa0fd2..20cd8a258a 100644 --- a/lib/rdoc/text.rb +++ b/lib/rdoc/text.rb @@ -1,9 +1,44 @@ +# coding: utf-8 + +## +# For RDoc::Text#to_html + +require 'strscan' + ## # Methods for manipulating comment text module RDoc::Text ## + # Maps an encoding to a Hash of characters properly transcoded for that + # encoding. + # + # See also encode_fallback. + + TO_HTML_CHARACTERS = Hash.new do |h, encoding| + h[encoding] = { + :close_dquote => encode_fallback('”', encoding, '"'), + :close_squote => encode_fallback('’', encoding, '\''), + :copyright => encode_fallback('©', encoding, '(c)'), + :ellipsis => encode_fallback('…', encoding, '...'), + :em_dash => encode_fallback('—', encoding, '---'), + :en_dash => encode_fallback('–', encoding, '--'), + :open_dquote => encode_fallback('“', encoding, '"'), + :open_squote => encode_fallback('‘', encoding, '\''), + :trademark => encode_fallback('®', encoding, '(r)'), + } + end if Object.const_defined? :Encoding + + ## + # Transcodes +character+ to +encoding+ with a +fallback+ character. + + def self.encode_fallback character, encoding, fallback + character.encode(encoding, :fallback => { character => fallback }, + :undef => :replace, :replace => fallback) + end + + ## # Expands tab characters in +text+ to eight spaces def expand_tabs text @@ -43,8 +78,7 @@ module RDoc::Text end ## - # Convert a string in markup format into HTML. Removes the first paragraph - # tags if +remove_para+ is true. + # Convert a string in markup format into HTML. # # Requires the including class to implement #formatter @@ -105,7 +139,7 @@ http://rubyforge.org/tracker/?atid=2472&group_id=627&func=browse def strip_hashes text return text if text =~ /^(?>\s*)[^\#]/ - text.gsub(/^\s*(#+)/) { $1.tr '#',' ' } + text.gsub(/^\s*(#+)/) { $1.tr '#',' ' }.gsub(/^\s+$/, '') end ## @@ -123,7 +157,102 @@ http://rubyforge.org/tracker/?atid=2472&group_id=627&func=browse text.sub! %r%/\*+% do " " * $&.length end text.sub! %r%\*+/% do " " * $&.length end text.gsub! %r%^[ \t]*\*%m do " " * $&.length end - text + text.gsub(/^\s+$/, '') + end + + ## + # Converts ampersand, dashes, ellipsis, quotes, copyright and registered + # trademark symbols in +text+ to properly encoded characters. + + def to_html text + if Object.const_defined? :Encoding then + html = ''.encode text.encoding + + encoded = RDoc::Text::TO_HTML_CHARACTERS[text.encoding] + else + html = '' + encoded = { + :close_dquote => '”', + :close_squote => '’', + :copyright => '©', + :ellipsis => '…', + :em_dash => '—', + :en_dash => '–', + :open_dquote => '“', + :open_squote => '‘', + :trademark => '®', + } + end + + s = StringScanner.new text + insquotes = false + indquotes = false + after_word = nil + + until s.eos? do + case + when s.scan(/<tt>.*?<\/tt>/) then # skip contents of tt + html << s.matched.gsub('\\\\', '\\') + when s.scan(/<tt>.*?/) then + warn 'mismatched <tt> tag' # TODO signal file/line + html << s.matched + when s.scan(/<[^>]+\/?s*>/) then # skip HTML tags + html << s.matched + when s.scan(/\\(\S)/) then # unhandled suppressed crossref + html << s[1] + after_word = nil + when s.scan(/\.\.\.(\.?)/) then + html << s[1] << encoded[:ellipsis] + after_word = nil + when s.scan(/\(c\)/) then + html << encoded[:copyright] + after_word = nil + when s.scan(/\(r\)/) then + html << encoded[:trademark] + after_word = nil + when s.scan(/---/) then + html << encoded[:em_dash] + after_word = nil + when s.scan(/--/) then + html << encoded[:en_dash] + after_word = nil + when s.scan(/"|"/) then + html << encoded[indquotes ? :close_dquote : :open_dquote] + indquotes = !indquotes + after_word = nil + when s.scan(/``/) then # backtick double quote + html << encoded[:open_dquote] + after_word = nil + when s.scan(/''/) then # tick double quote + html << encoded[:close_dquote] + after_word = nil + when s.scan(/'/) then # single quote + if insquotes + html << encoded[:close_squote] + insquotes = false + elsif after_word + # Mary's dog, my parents' house: do not start paired quotes + html << encoded[:close_squote] + else + html << encoded[:open_squote] + insquotes = true + end + + after_word = nil + else # advance to the next potentially significant character + match = s.scan(/.+?(?=[<\\.("'`&-])/) #" + + if match then + html << match + after_word = match =~ /\w$/ + else + html << s.rest + break + end + end + end + + html end end |