aboutsummaryrefslogtreecommitdiffstats
path: root/lib/rdoc/ruby_lex.rb
diff options
context:
space:
mode:
authordrbrain <drbrain@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2012-11-27 04:28:14 +0000
committerdrbrain <drbrain@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2012-11-27 04:28:14 +0000
commit1c279a7d2753949c725754e1302f791b76358114 (patch)
tree36aa3bdde250e564445eba5f2e25fcb96bcb6cef /lib/rdoc/ruby_lex.rb
parentc72f0daa877808e4fa5018b3191ca09d4b97c03d (diff)
downloadruby-1c279a7d2753949c725754e1302f791b76358114.tar.gz
* lib/rdoc*: Updated to RDoc 4.0 (pre-release)
* bin/rdoc: ditto * test/rdoc: ditto * NEWS: Updated with RDoc 4.0 information git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@37889 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'lib/rdoc/ruby_lex.rb')
-rw-r--r--lib/rdoc/ruby_lex.rb125
1 files changed, 82 insertions, 43 deletions
diff --git a/lib/rdoc/ruby_lex.rb b/lib/rdoc/ruby_lex.rb
index 4392cea9cf..845569b0bc 100644
--- a/lib/rdoc/ruby_lex.rb
+++ b/lib/rdoc/ruby_lex.rb
@@ -11,7 +11,6 @@
require "e2mmap"
require "irb/slex"
-require "rdoc/ruby_token"
require "stringio"
##
@@ -21,6 +20,12 @@ require "stringio"
class RDoc::RubyLex
+ ##
+ # Raised upon invalid input
+
+ class Error < RDoc::Error
+ end
+
# :stopdoc:
extend Exception2MessageMapper
@@ -51,6 +56,29 @@ class RDoc::RubyLex
self.debug_level = 0
+ # :startdoc:
+
+ ##
+ # Returns an Array of +ruby+ tokens. See ::new for a description of
+ # +options+.
+
+ def self.tokenize ruby, options
+ tokens = []
+
+ scanner = RDoc::RubyLex.new ruby, options
+ scanner.exception_on_syntax_error = true
+
+ while token = scanner.token do
+ tokens << token
+ end
+
+ tokens
+ end
+
+ ##
+ # Creates a new lexer for +content+. +options+ is an RDoc::Options, only
+ # +tab_width is used.
+
def initialize(content, options)
lex_init
@@ -91,8 +119,11 @@ class RDoc::RubyLex
@prompt = nil
@prev_seek = nil
+ @ltype = nil
end
+ # :stopdoc:
+
def inspect # :nodoc:
"#<%s:0x%x pos %d lex_state %p space_seen %p>" % [
self.class, object_id,
@@ -122,8 +153,8 @@ class RDoc::RubyLex
end
def get_readed
- if idx = @readed.reverse.index("\n")
- @base_char_no = idx
+ if idx = @readed.rindex("\n")
+ @base_char_no = @readed.size - (idx + 1)
else
@base_char_no += @readed.size
end
@@ -188,8 +219,8 @@ class RDoc::RubyLex
@seek -= 1
if c == "\n"
@line_no -= 1
- if idx = @readed.reverse.index("\n")
- @char_no = @readed.size - idx
+ if idx = @readed.rindex("\n")
+ @char_no = idx + 1
else
@char_no = @base_char_no + @readed.size
end
@@ -323,7 +354,7 @@ class RDoc::RubyLex
tk = @OP.match(self)
@space_seen = tk.kind_of?(TkSPACE)
rescue SyntaxError => e
- raise RDoc::Error, "syntax error: #{e.message}" if
+ raise Error, "syntax error: #{e.message}" if
@exception_on_syntax_error
tk = TkError.new(@seek, @line_no, @char_no)
@@ -381,7 +412,7 @@ class RDoc::RubyLex
def lex_init()
@OP = IRB::SLex.new
@OP.def_rules("\0", "\004", "\032") do |op, io|
- Token(TkEND_OF_SCRIPT)
+ Token(TkEND_OF_SCRIPT, '')
end
@OP.def_rules(" ", "\t", "\f", "\r", "\13") do |op, io|
@@ -413,7 +444,7 @@ class RDoc::RubyLex
gets # consume =end
@ltype = nil
- Token(TkCOMMENT, res)
+ Token(TkRD_COMMENT, res)
end
@OP.def_rule("\n") do |op, io|
@@ -460,7 +491,7 @@ class RDoc::RubyLex
|op, io|
tk = nil
if @lex_state != EXPR_END && @lex_state != EXPR_CLASS &&
- (@lex_state != EXPR_ARG || @space_seen)
+ (@lex_state != EXPR_ARG || @space_seen)
c = peek(0)
if /\S/ =~ c && (/["'`]/ =~ c || /\w/ =~ c || c == "-")
tk = identify_here_document
@@ -505,13 +536,8 @@ class RDoc::RubyLex
@lex_state = EXPR_BEG;
Token(TkQUESTION)
else
- str = ch
- if ch == '\\'
- str << read_escape
- end
@lex_state = EXPR_END
- str << (ch.respond_to?(:ord) ? ch.ord : ch[0])
- Token(TkINTEGER, str)
+ Token(TkSTRING, ch)
end
end
end
@@ -782,7 +808,8 @@ class RDoc::RubyLex
@OP.def_rule("_") do
if peek_match?(/_END__/) and @lex_state == EXPR_BEG then
- Token(TkEND_OF_SCRIPT)
+ 6.times { getc }
+ Token(TkEND_OF_SCRIPT, '__END__')
else
ungetc
identify_identifier
@@ -909,6 +936,8 @@ class RDoc::RubyLex
@indent += 1
@indent_stack.push token_c
end
+ else
+ token_c = TkIDENTIFIER
end
elsif DEINDENT_CLAUSE.include?(token)
@@ -929,7 +958,8 @@ class RDoc::RubyLex
if peek(0) == '='
token.concat getc
end
- elsif @lex_state == EXPR_BEG || @lex_state == EXPR_DOT
+ elsif @lex_state == EXPR_BEG || @lex_state == EXPR_DOT ||
+ @lex_state == EXPR_ARG
@lex_state = EXPR_ARG
else
@lex_state = EXPR_END
@@ -978,14 +1008,15 @@ class RDoc::RubyLex
end
@here_header = false
- doc = ''
+ doc = '"'
while l = gets
- l = l.sub(/(:?\r)?\n\z/, '')
- if (indent ? l.strip : l) == quoted
+ l = l.sub(/(:?\r)?\n\z/, "\n")
+ if (indent ? l.strip : l.chomp) == quoted
break
end
doc << l
end
+ doc << '"'
@here_header = true
@here_readed.concat reserve
@@ -999,13 +1030,14 @@ class RDoc::RubyLex
end
def identify_quotation
- ch = getc
- if lt = PERCENT_LTYPE[ch]
+ type = ch = getc
+ if lt = PERCENT_LTYPE[type]
ch = getc
- elsif ch =~ /\W/
+ elsif type =~ /\W/
+ type = nil
lt = "\""
else
- raise RDoc::Error, "unknown type of %string #{ch.inspect}"
+ return Token(TkMOD, '%')
end
# if ch !~ /\W/
# ungetc
@@ -1013,7 +1045,7 @@ class RDoc::RubyLex
# end
#@ltype = lt
@quoted = ch unless @quoted = PERCENT_PAREN[ch]
- identify_string(lt, @quoted)
+ identify_string(lt, @quoted, type)
end
def identify_number(op = "")
@@ -1040,7 +1072,7 @@ class RDoc::RubyLex
when /[0-7]/
match = /[0-7_]/
when /[89]/
- raise RDoc::Error, "Illegal octal digit"
+ raise Error, "Illegal octal digit"
else
return Token(TkINTEGER, num)
end
@@ -1054,7 +1086,7 @@ class RDoc::RubyLex
if match =~ ch
if ch == "_"
if non_digit
- raise RDoc::Error, "trailing `#{ch}' in number"
+ raise Error, "trailing `#{ch}' in number"
else
non_digit = ch
end
@@ -1066,10 +1098,10 @@ class RDoc::RubyLex
ungetc
num[-1, 1] = ''
if len0
- raise RDoc::Error, "numeric literal without digits"
+ raise Error, "numeric literal without digits"
end
if non_digit
- raise RDoc::Error, "trailing `#{non_digit}' in number"
+ raise Error, "trailing `#{non_digit}' in number"
end
break
end
@@ -1090,7 +1122,7 @@ class RDoc::RubyLex
non_digit = ch
when allow_point && "."
if non_digit
- raise RDoc::Error, "trailing `#{non_digit}' in number"
+ raise Error, "trailing `#{non_digit}' in number"
end
type = TkFLOAT
if peek(0) !~ /[0-9]/
@@ -1102,7 +1134,7 @@ class RDoc::RubyLex
allow_point = false
when allow_e && "e", allow_e && "E"
if non_digit
- raise RDoc::Error, "trailing `#{non_digit}' in number"
+ raise Error, "trailing `#{non_digit}' in number"
end
type = TkFLOAT
if peek(0) =~ /[+-]/
@@ -1113,7 +1145,7 @@ class RDoc::RubyLex
non_digit = ch
else
if non_digit
- raise RDoc::Error, "trailing `#{non_digit}' in number"
+ raise Error, "trailing `#{non_digit}' in number"
end
ungetc
num[-1, 1] = ''
@@ -1124,14 +1156,17 @@ class RDoc::RubyLex
Token(type, num)
end
- def identify_string(ltype, quoted = ltype)
+ def identify_string(ltype, quoted = ltype, type = nil)
+ close = PERCENT_PAREN.values.include?(quoted)
@ltype = ltype
@quoted = quoted
- str = if ltype == quoted then
+ str = if ltype == quoted and %w[" ' /].include? ltype then
ltype.dup
+ elsif RUBY_VERSION > '1.9' then
+ "%#{type or PERCENT_LTYPE.key ltype}#{PERCENT_PAREN_REV[quoted]}"
else
- "%#{PERCENT_PAREN_REV[quoted]}"
+ "%#{type or PERCENT_LTYPE.index ltype}#{PERCENT_PAREN_REV[quoted]}"
end
subtype = nil
@@ -1141,7 +1176,7 @@ class RDoc::RubyLex
while ch = getc
str << ch
- if @quoted == ch and nest == 0
+ if @quoted == ch and nest <= 0
break
elsif @ltype != "'" && @ltype != "]" && @ltype != ":" and ch == "#"
ch = getc
@@ -1152,17 +1187,21 @@ class RDoc::RubyLex
else
ungetc
end
- elsif ch == '\\' and @ltype == "'" #'
- case ch = getc
- when "\\", "\n", "'"
+ elsif ch == '\\'
+ if %w[' /].include? @ltype then
+ case ch = getc
+ when "\\", "\n", "'"
+ when @ltype
+ str << ch
+ else
+ ungetc
+ end
else
- ungetc
+ str << read_escape
end
- elsif ch == '\\' #'
- str << read_escape
end
- if PERCENT_PAREN.values.include?(@quoted)
+ if close then
if PERCENT_PAREN[ch] == @quoted
nest += 1
elsif ch == @quoted