From 214a7f8d49c7b59d06f5e2e3e1a8a3567ab7c570 Mon Sep 17 00:00:00 2001 From: hsbt Date: Tue, 12 Sep 2017 03:42:54 +0000 Subject: Merge rdoc-6.0.0.beta2 from upstream. * This version changed lexer used Ripper from lexer based IRB. see details: https://github.com/ruby/rdoc/pull/512 git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@59845 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- lib/rdoc/parser/c.rb | 6 +- lib/rdoc/parser/ripper_state_lex.rb | 587 ++++++++++++++++++++++++++ lib/rdoc/parser/ruby.rb | 796 ++++++++++++++++++++---------------- lib/rdoc/parser/ruby_tools.rb | 32 +- 4 files changed, 1052 insertions(+), 369 deletions(-) create mode 100644 lib/rdoc/parser/ripper_state_lex.rb (limited to 'lib/rdoc/parser') diff --git a/lib/rdoc/parser/c.rb b/lib/rdoc/parser/c.rb index 0e72df2ecd..5c940ab28e 100644 --- a/lib/rdoc/parser/c.rb +++ b/lib/rdoc/parser/c.rb @@ -666,8 +666,7 @@ class RDoc::Parser::C < RDoc::Parser #meth_obj.params = params meth_obj.start_collecting_tokens - tk = RDoc::RubyToken::Token.new nil, 1, 1 - tk.set_text body + tk = { :line_no => 1, :char_no => 1, :text => body } meth_obj.add_token tk meth_obj.comment = comment meth_obj.line = file_content[0, offset].count("\n") + 1 @@ -684,8 +683,7 @@ class RDoc::Parser::C < RDoc::Parser find_modifiers comment, meth_obj meth_obj.start_collecting_tokens - tk = RDoc::RubyToken::Token.new nil, 1, 1 - tk.set_text body + tk = { :line_no => 1, :char_no => 1, :text => body } meth_obj.add_token tk meth_obj.comment = comment meth_obj.line = file_content[0, offset].count("\n") + 1 diff --git a/lib/rdoc/parser/ripper_state_lex.rb b/lib/rdoc/parser/ripper_state_lex.rb new file mode 100644 index 0000000000..c9a0f5a21e --- /dev/null +++ b/lib/rdoc/parser/ripper_state_lex.rb @@ -0,0 +1,587 @@ +require 'ripper' + +class RDoc::RipperStateLex + EXPR_NONE = 0 + EXPR_BEG = 1 + EXPR_END = 2 + EXPR_ENDARG = 4 + EXPR_ENDFN = 8 + EXPR_ARG = 16 + EXPR_CMDARG = 32 + EXPR_MID = 64 + EXPR_FNAME = 128 + EXPR_DOT = 256 + EXPR_CLASS = 512 + EXPR_LABEL = 1024 + EXPR_LABELED = 2048 + EXPR_FITEM = 4096 + EXPR_VALUE = EXPR_BEG + EXPR_BEG_ANY = (EXPR_BEG | EXPR_MID | EXPR_CLASS) + EXPR_ARG_ANY = (EXPR_ARG | EXPR_CMDARG) + EXPR_END_ANY = (EXPR_END | EXPR_ENDARG | EXPR_ENDFN) + + class InnerStateLex < Ripper::Filter + attr_accessor :lex_state + + def initialize(code) + @lex_state = EXPR_BEG + @in_fname = false + @continue = false + reset + super(code) + end + + def reset + @command_start = false + @cmd_state = @command_start + end + + def on_nl(tok, data) + case @lex_state + when EXPR_FNAME, EXPR_DOT + @continue = true + else + @continue = false + @lex_state = EXPR_BEG unless (EXPR_LABEL & @lex_state) != 0 + end + @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state}) + end + + def on_ignored_nl(tok, data) + case @lex_state + when EXPR_FNAME, EXPR_DOT + @continue = true + else + @continue = false + @lex_state = EXPR_BEG unless (EXPR_LABEL & @lex_state) != 0 + end + @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state}) + end + + def on_op(tok, data) + case tok + when '&', '|', '!', '!=', '!~' + case @lex_state + when EXPR_FNAME, EXPR_DOT + @lex_state = EXPR_ARG + else + @lex_state = EXPR_BEG + end + when '<<' + # TODO next token? + case @lex_state + when EXPR_FNAME, EXPR_DOT + @lex_state = EXPR_ARG + else + @lex_state = EXPR_BEG + end + when '?' + @lex_state = EXPR_BEG + when '&&', '||', '+=', '-=', '*=', '**=', + '&=', '|=', '^=', '<<=', '>>=', '||=', '&&=' + @lex_state = EXPR_BEG + else + case @lex_state + when EXPR_FNAME, EXPR_DOT + @lex_state = EXPR_ARG + else + @lex_state = EXPR_BEG + end + end + @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state}) + end + + def on_kw(tok, data) + case tok + when 'class' + @lex_state = EXPR_CLASS + @in_fname = true + when 'def' + @lex_state = EXPR_FNAME + @continue = true + @in_fname = true + when 'if', 'unless', 'while', 'until' + if ((EXPR_END | EXPR_ENDARG | EXPR_ENDFN | EXPR_ARG | EXPR_CMDARG) & @lex_state) != 0 # postfix if + @lex_state = EXPR_BEG | EXPR_LABEL + else + @lex_state = EXPR_BEG + end + when 'begin' + @lex_state = EXPR_BEG + else + if @lex_state == EXPR_FNAME + @lex_state = EXPR_END + else + @lex_state = EXPR_END + end + end + @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state}) + end + + def on_tstring_beg(tok, data) + @lex_state = EXPR_BEG + @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state}) + end + + def on_tstring_end(tok, data) + @lex_state = EXPR_END | EXPR_ENDARG + @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state}) + end + + def on_CHAR(tok, data) + @lex_state = EXPR_END + @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state}) + end + + def on_period(tok, data) + @lex_state = EXPR_DOT + @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state}) + end + + def on_int(tok, data) + @lex_state = EXPR_END | EXPR_ENDARG + @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state}) + end + + def on_float(tok, data) + @lex_state = EXPR_END | EXPR_ENDARG + @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state}) + end + + def on_rational(tok, data) + @lex_state = EXPR_END | EXPR_ENDARG + @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state}) + end + + def on_imaginary(tok, data) + @lex_state = EXPR_END | EXPR_ENDARG + @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state}) + end + + def on_symbeg(tok, data) + @lex_state = EXPR_FNAME + @continue = true + @in_fname = true + @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state}) + end + + private def on_variables(event, tok, data) + if @in_fname + @lex_state = EXPR_ENDFN + @in_fname = false + @continue = false + elsif @continue + case @lex_state + when EXPR_DOT + @lex_state = EXPR_ARG + else + @lex_state = EXPR_ENDFN + @continue = false + end + else + @lex_state = EXPR_CMDARG + end + @callback.call({ :line_no => lineno, :char_no => column, :kind => event, :text => tok, :state => @lex_state}) + end + + def on_ident(tok, data) + on_variables(__method__, tok, data) + end + + def on_ivar(tok, data) + @lex_state = EXPR_END + on_variables(__method__, tok, data) + end + + def on_cvar(tok, data) + @lex_state = EXPR_END + on_variables(__method__, tok, data) + end + + def on_gvar(tok, data) + @lex_state = EXPR_END + on_variables(__method__, tok, data) + end + + def on_backref(tok, data) + @lex_state = EXPR_END + on_variables(__method__, tok, data) + end + + def on_lparen(tok, data) + @lex_state = EXPR_LABEL | EXPR_BEG + @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state}) + end + + def on_rparen(tok, data) + @lex_state = EXPR_ENDFN + @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state}) + end + + def on_lbrace(tok, data) + @lex_state = EXPR_LABEL | EXPR_BEG + @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state}) + end + + def on_rbrace(tok, data) + @lex_state = EXPR_ENDARG + @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state}) + end + + def on_lbracket(tok, data) + @lex_state = EXPR_LABEL | EXPR_BEG + @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state}) + end + + def on_rbracket(tok, data) + @lex_state = EXPR_ENDARG + @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state}) + end + + def on_const(tok, data) + case @lex_state + when EXPR_FNAME + @lex_state = EXPR_ENDFN + when EXPR_CLASS + @lex_state = EXPR_ARG + else + @lex_state = EXPR_CMDARG + end + @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state}) + end + + def on_sp(tok, data) + @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state}) + end + + def on_comma(tok, data) + @lex_state = EXPR_BEG | EXPR_LABEL if (EXPR_ARG_ANY & @lex_state) != 0 + @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state}) + end + + def on_comment(tok, data) + @lex_state = EXPR_BEG unless (EXPR_LABEL & @lex_state) != 0 + @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state}) + end + + def on_ignored_sp(tok, data) + @lex_state = EXPR_BEG unless (EXPR_LABEL & @lex_state) != 0 + @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state}) + end + + def on_heredoc_end(tok, data) + @callback.call({ :line_no => lineno, :char_no => column, :kind => __method__, :text => tok, :state => @lex_state}) + @lex_state = EXPR_BEG + end + + def on_default(event, tok, data) + reset + @callback.call({ :line_no => lineno, :char_no => column, :kind => event, :text => tok, :state => @lex_state}) + end + + def each(&block) + @callback = block + parse + end + end + + def get_squashed_tk + if @buf.empty? + tk = @inner_lex_enumerator.next + else + tk = @buf.shift + end + case tk[:kind] + when :on_symbeg then + tk = get_symbol_tk(tk) + when :on_tstring_beg then + tk = get_string_tk(tk) + when :on_backtick then + if (EXPR_FNAME & tk[:state]) != 0 + @inner_lex.lex_state = EXPR_ARG + tk[:kind] = :on_ident + tk[:state] = @inner_lex.lex_state + else + tk = get_string_tk(tk) + end + when :on_regexp_beg then + tk = get_regexp_tk(tk) + when :on_embdoc_beg then + tk = get_embdoc_tk(tk) + when :on_heredoc_beg then + @heredoc_queue << retrieve_heredoc_info(tk) + @inner_lex.lex_state = EXPR_END + when :on_nl, :on_ignored_nl, :on_comment, :on_heredoc_end then + unless @heredoc_queue.empty? + get_heredoc_tk(*@heredoc_queue.shift) + end + when :on_words_beg then + tk = get_words_tk(tk) + when :on_qwords_beg then + tk = get_words_tk(tk) + when :on_symbols_beg then + tk = get_words_tk(tk) + when :on_qsymbols_beg then + tk = get_words_tk(tk) + when :on_op then + if '&.' == tk[:text] + tk[:kind] = :on_period + else + tk = get_op_tk(tk) + end + end + tk + end + + private def get_symbol_tk(tk) + is_symbol = true + symbol_tk = { :line_no => tk[:line_no], :char_no => tk[:char_no], :kind => :on_symbol } + if ":'" == tk[:text] or ':"' == tk[:text] + tk1 = get_string_tk(tk) + symbol_tk[:text] = tk1[:text] + symbol_tk[:state] = tk1[:state] + else + case (tk1 = get_squashed_tk)[:kind] + when :on_ident + symbol_tk[:text] = ":#{tk1[:text]}" + symbol_tk[:state] = tk1[:state] + when :on_tstring_content + symbol_tk[:text] = ":#{tk1[:text]}" + symbol_tk[:state] = get_squashed_tk[:state] # skip :on_tstring_end + when :on_tstring_end + symbol_tk[:text] = ":#{tk1[:text]}" + symbol_tk[:state] = tk1[:state] + when :on_op + symbol_tk[:text] = ":#{tk1[:text]}" + symbol_tk[:state] = tk1[:state] + when :on_ivar + symbol_tk[:text] = ":#{tk1[:text]}" + symbol_tk[:state] = tk1[:state] + when :on_cvar + symbol_tk[:text] = ":#{tk1[:text]}" + symbol_tk[:state] = tk1[:state] + when :on_gvar + symbol_tk[:text] = ":#{tk1[:text]}" + symbol_tk[:state] = tk1[:state] + when :on_const + symbol_tk[:text] = ":#{tk1[:text]}" + symbol_tk[:state] = tk1[:state] + when :on_kw + symbol_tk[:text] = ":#{tk1[:text]}" + symbol_tk[:state] = tk1[:state] + else + is_symbol = false + tk = tk1 + end + end + if is_symbol + tk = symbol_tk + end + tk + end + + private def get_string_tk(tk) + string = tk[:text] + state = nil + kind = :on_tstring + loop do + inner_str_tk = get_squashed_tk + if inner_str_tk.nil? + break + elsif :on_tstring_end == inner_str_tk[:kind] + string = string + inner_str_tk[:text] + state = inner_str_tk[:state] + break + elsif :on_label_end == inner_str_tk[:kind] + string = string + inner_str_tk[:text] + state = inner_str_tk[:state] + kind = :on_symbol + break + else + string = string + inner_str_tk[:text] + if :on_embexpr_beg == inner_str_tk[:kind] then + kind = :on_dstring if :on_tstring == kind + end + end + end + { + :line_no => tk[:line_no], + :char_no => tk[:char_no], + :kind => kind, + :text => string, + :state => state + } + end + + private def get_regexp_tk(tk) + string = tk[:text] + state = nil + loop do + inner_str_tk = get_squashed_tk + if inner_str_tk.nil? + break + elsif :on_regexp_end == inner_str_tk[:kind] + string = string + inner_str_tk[:text] + state = inner_str_tk[:state] + break + else + string = string + inner_str_tk[:text] + end + end + { + :line_no => tk[:line_no], + :char_no => tk[:char_no], + :kind => :on_regexp, + :text => string, + :state => state + } + end + + private def get_embdoc_tk(tk) + string = tk[:text] + until :on_embdoc_end == (embdoc_tk = get_squashed_tk)[:kind] do + string = string + embdoc_tk[:text] + end + string = string + embdoc_tk[:text] + { + :line_no => tk[:line_no], + :char_no => tk[:char_no], + :kind => :on_embdoc, + :text => string, + :state => embdoc_tk[:state] + } + end + + private def get_heredoc_tk(heredoc_name, indent) + string = '' + start_tk = nil + prev_tk = nil + until heredoc_end?(heredoc_name, indent, tk = @inner_lex_enumerator.next) do + start_tk = tk unless start_tk + if (prev_tk.nil? or "\n" == prev_tk[:text][-1]) and 0 != tk[:char_no] + string = string + (' ' * tk[:char_no]) + end + string = string + tk[:text] + prev_tk = tk + end + start_tk = tk unless start_tk + prev_tk = tk unless prev_tk + @buf.unshift tk # closing heredoc + heredoc_tk = { + :line_no => start_tk[:line_no], + :char_no => start_tk[:char_no], + :kind => :on_heredoc, + :text => string, + :state => prev_tk[:state] + } + @buf.unshift heredoc_tk + end + + private def retrieve_heredoc_info(tk) + name = tk[:text].gsub(/\A<<[-~]?(['"`]?)(.+)\1\z/, '\2') + indent = tk[:text] =~ /\A<<[-~]/ + [name, indent] + end + + private def heredoc_end?(name, indent, tk) + result = false + if :on_heredoc_end == tk[:kind] then + tk_name = (indent ? tk[:text].gsub(/^ *(.+)\n?$/, '\1') : tk[:text].gsub(/\n\z/, '')) + if name == tk_name + result = true + end + end + result + end + + private def get_words_tk(tk) + string = '' + start_token = tk[:text] + start_quote = tk[:text].rstrip[-1] + line_no = tk[:line_no] + char_no = tk[:char_no] + state = tk[:state] + end_quote = + case start_quote + when ?( then ?) + when ?[ then ?] + when ?{ then ?} + when ?< then ?> + else start_quote + end + end_token = nil + loop do + tk = get_squashed_tk + if tk.nil? + end_token = end_quote + break + elsif :on_tstring_content == tk[:kind] then + string += tk[:text] + elsif :on_words_sep == tk[:kind] or :on_tstring_end == tk[:kind] then + if end_quote == tk[:text].strip then + end_token = tk[:text] + break + else + string += tk[:text] + end + else + string += tk[:text] + end + end + text = "#{start_token}#{string}#{end_token}" + { + :line_no => line_no, + :char_no => char_no, + :kind => :on_dstring, + :text => text, + :state => state + } + end + + private def get_op_tk(tk) + redefinable_operators = %w[! != !~ % & * ** + +@ - -@ / < << <= <=> == === =~ > >= >> [] []= ^ ` | ~] + if redefinable_operators.include?(tk[:text]) and EXPR_ARG == tk[:state] then + @inner_lex.lex_state = EXPR_ARG + tk[:kind] = :on_ident + tk[:state] = @inner_lex.lex_state + elsif tk[:text] =~ /^[-+]$/ then + tk_ahead = get_squashed_tk + case tk_ahead[:kind] + when :on_int, :on_float, :on_rational, :on_imaginary then + tk[:text] += tk_ahead[:text] + tk[:kind] = tk_ahead[:kind] + tk[:state] = tk_ahead[:state] + else + @buf.unshift tk_ahead + end + end + tk + end + + def initialize(code) + @buf = [] + @heredoc_queue = [] + @inner_lex = InnerStateLex.new(code) + @inner_lex_enumerator = Enumerator.new do |y| + @inner_lex.each do |tk| + y << tk + end + end + end + + def self.parse(code) + lex = self.new(code) + tokens = [] + begin + while tk = lex.get_squashed_tk + tokens.push tk + end + rescue StopIteration + end + tokens + end + + def self.end?(token) + (token[:state] & EXPR_END) + end +end diff --git a/lib/rdoc/parser/ruby.rb b/lib/rdoc/parser/ruby.rb index e7900c0807..130eca89c7 100644 --- a/lib/rdoc/parser/ruby.rb +++ b/lib/rdoc/parser/ruby.rb @@ -24,6 +24,7 @@ $TOKEN_DEBUG ||= nil # * aliases # * private, public, protected # * private_class_function, public_class_function +# * private_constant, public_constant # * module_function # * attr, attr_reader, attr_writer, attr_accessor # * extra accessors given on the command line @@ -139,11 +140,12 @@ $TOKEN_DEBUG ||= nil # Note that by default, the :method: directive will be ignored if there is a # standard rdocable item following it. +require 'ripper' + class RDoc::Parser::Ruby < RDoc::Parser parse_files_matching(/\.rbw?$/) - include RDoc::RubyToken include RDoc::TokenStream include RDoc::Parser::RubyTools @@ -163,10 +165,20 @@ class RDoc::Parser::Ruby < RDoc::Parser def initialize(top_level, file_name, content, options, stats) super + if /\t/ =~ content then + tab_width = @options.tab_width + content = content.split(/\n/).map do |line| + 1 while line.gsub!(/\t+/) { + ' ' * (tab_width*$&.length - $`.length % tab_width) + } && $~ + line + end.join("\n") + end + @size = 0 @token_listeners = nil - @scanner = RDoc::RubyLex.new content, @options - @scanner.exception_on_syntax_error = false + @scanner = RDoc::RipperStateLex.parse(content) + @scanner_point = 0 @prev_seek = nil @markup = @options.markup @track_visibility = :nodoc != @options.visibility @@ -175,6 +187,10 @@ class RDoc::Parser::Ruby < RDoc::Parser reset end + def tk_nl?(tk) + :on_nl == tk[:kind] or :on_ignored_nl == tk[:kind] + end + ## # Retrieves the read token stream and replaces +pattern+ with +replacement+ # using gsub. If the result is only a ";" returns an empty string. @@ -194,7 +210,7 @@ class RDoc::Parser::Ruby < RDoc::Parser # methods. def get_visibility_information tk, single # :nodoc: - vis_type = tk.name + vis_type = tk[:text] singleton = single == SINGLE vis = @@ -226,27 +242,27 @@ class RDoc::Parser::Ruby < RDoc::Parser comment = '' comment.force_encoding @encoding if @encoding first_line = true - first_comment_tk_class = nil + first_comment_tk_kind = nil tk = get_tk - while TkCOMMENT === tk - if first_line and tk.text =~ /\A#!/ then + while tk && (:on_comment == tk[:kind] or :on_embdoc == tk[:kind]) + if first_line and tk[:text] =~ /\A#!/ then skip_tkspace tk = get_tk - elsif first_line and tk.text =~ /\A#\s*-\*-/ then + elsif first_line and tk[:text] =~ /\A#\s*-\*-/ then first_line = false skip_tkspace tk = get_tk else - break if first_comment_tk_class and not first_comment_tk_class === tk - first_comment_tk_class = tk.class + break if first_comment_tk_kind and not first_comment_tk_kind === tk[:kind] + first_comment_tk_kind = tk[:kind] first_line = false - comment << tk.text << "\n" + comment << tk[:text] tk = get_tk - if TkNL === tk then + if :on_nl === tk then skip_tkspace false tk = get_tk end @@ -262,7 +278,6 @@ class RDoc::Parser::Ruby < RDoc::Parser # Consumes trailing whitespace from the token stream def consume_trailing_spaces # :nodoc: - get_tkread skip_tkspace false end @@ -309,10 +324,9 @@ class RDoc::Parser::Ruby < RDoc::Parser def get_bool skip_tkspace tk = get_tk - case tk - when TkTRUE + if :on_kw == tk[:kind] && 'true' == tk[:text] true - when TkFALSE, TkNIL + elsif :on_kw == tk[:kind] && ('false' == tk[:text] || 'nil' == tk[:text]) false else unget_tk tk @@ -331,24 +345,24 @@ class RDoc::Parser::Ruby < RDoc::Parser given_name = '' # class ::A -> A is in the top level - case name_t - when TkCOLON2, TkCOLON3 then # bug + if :on_op == name_t[:kind] and '::' == name_t[:text] then # bug name_t = get_tk container = @top_level given_name << '::' end skip_tkspace false - given_name << name_t.name + given_name << name_t[:text] - while TkCOLON2 === peek_tk do + is_self = name_t[:kind] == :on_op && name_t[:text] == '<<' + while !is_self && (tk = peek_tk) and :on_op == tk[:kind] and '::' == tk[:text] do prev_container = container - container = container.find_module_named name_t.name + container = container.find_module_named name_t[:text] container ||= if ignore_constants then RDoc::Context.new else - c = prev_container.add_module RDoc::NormalModule, name_t.name + c = prev_container.add_module RDoc::NormalModule, name_t[:text] c.ignore unless prev_container.document_children @top_level.add_to_classes_or_modules c c @@ -359,7 +373,10 @@ class RDoc::Parser::Ruby < RDoc::Parser get_tk skip_tkspace false name_t = get_tk - given_name << '::' << name_t.name + unless :on_const == name_t[:kind] || :on_ident == name_t[:kind] + raise RDoc::Error, "Invalid class or module definition: #{given_name}" + end + given_name << '::' << name_t[:text] end skip_tkspace false @@ -371,9 +388,13 @@ class RDoc::Parser::Ruby < RDoc::Parser # Return a superclass, which can be either a constant of an expression def get_class_specification - case peek_tk - when TkSELF then return 'self' - when TkGVAR then return '' + tk = peek_tk + if tk.nil? + return '' + elsif :on_kw == tk[:kind] && 'self' == tk[:text] + return 'self' + elsif :on_gvar == tk[:kind] + return '' end res = get_constant @@ -383,9 +404,10 @@ class RDoc::Parser::Ruby < RDoc::Parser get_tkread # empty out read buffer tk = get_tk + return res unless tk - case tk - when TkNL, TkCOMMENT, TkSEMICOLON then + case tk[:kind] + when :on_nl, :on_comment, :on_embdoc, :on_semicolon then unget_tk(tk) return res end @@ -403,8 +425,8 @@ class RDoc::Parser::Ruby < RDoc::Parser skip_tkspace false tk = get_tk - while TkCOLON2 === tk or TkCOLON3 === tk or TkCONSTANT === tk do - res += tk.name + while tk && ((:on_op == tk[:kind] && '::' == tk[:text]) || :on_const == tk[:kind]) do + res += tk[:text] tk = get_tk end @@ -420,7 +442,7 @@ class RDoc::Parser::Ruby < RDoc::Parser nest = 0 - while TkLPAREN === (tk = peek_tk) or TkfLPAREN === tk do + while :on_lparen == (tk = peek_tk)[:kind] do get_tk skip_tkspace nest += 1 @@ -431,7 +453,7 @@ class RDoc::Parser::Ruby < RDoc::Parser while nest > 0 skip_tkspace tk = get_tk - nest -= 1 if TkRPAREN === tk + nest -= 1 if :on_rparen == tk[:kind] end name @@ -446,13 +468,19 @@ class RDoc::Parser::Ruby < RDoc::Parser # won't catch all cases (such as "a = yield + 1" def get_end_token tk # :nodoc: - case tk - when TkLPAREN, TkfLPAREN - TkRPAREN - when TkRPAREN + case tk[:kind] + when :on_lparen + { + :kind => :on_rparen, + :text => ')' + } + when :on_rparen nil else - TkNL + { + :kind => :on_nl, + :text => "\n" + } end end @@ -461,11 +489,11 @@ class RDoc::Parser::Ruby < RDoc::Parser def get_method_container container, name_t # :nodoc: prev_container = container - container = container.find_module_named(name_t.name) + container = container.find_module_named(name_t[:text]) unless container then constant = prev_container.constants.find do |const| - const.name == name_t.name + const.name == name_t[:text] end if constant then @@ -476,21 +504,21 @@ class RDoc::Parser::Ruby < RDoc::Parser unless container then # TODO seems broken, should starting at Object in @store - obj = name_t.name.split("::").inject(Object) do |state, item| + obj = name_t[:text].split("::").inject(Object) do |state, item| state.const_get(item) end rescue nil type = obj.class == Class ? RDoc::NormalClass : RDoc::NormalModule unless [Class, Module].include?(obj.class) then - warn("Couldn't find #{name_t.name}. Assuming it's a module") + warn("Couldn't find #{name_t[:text]}. Assuming it's a module") end if type == RDoc::NormalClass then sclass = obj.superclass ? obj.superclass.name : nil - container = prev_container.add_class type, name_t.name, sclass + container = prev_container.add_class type, name_t[:text], sclass else - container = prev_container.add_module type, name_t.name + container = prev_container.add_module type, name_t[:text] end record_location container @@ -504,32 +532,26 @@ class RDoc::Parser::Ruby < RDoc::Parser def get_symbol_or_name tk = get_tk - case tk - when TkSYMBOL then - text = tk.text.sub(/^:/, '') + case tk[:kind] + when :on_symbol then + text = tk[:text].sub(/^:/, '') - if TkASSIGN === peek_tk then + next_tk = peek_tk + if next_tk && :on_op == next_tk[:kind] && '=' == next_tk[:text] then get_tk text << '=' end text - when TkId, TkOp then - tk.name - when TkAMPER, - TkDSTRING, - TkSTAR, - TkSTRING then - tk.text + when :on_ident, :on_const, :on_gvar, :on_cvar, :on_ivar, :on_op, :on_kw then + tk[:text] + when :on_tstring, :on_dstring then + tk[:text][1..-2] else raise RDoc::Error, "Name or symbol expected (got #{tk})" end end - def stop_at_EXPR_END # :nodoc: - @scanner.lex_state == :EXPR_END || !@scanner.continue - end - ## # Marks containers between +container+ and +ancestor+ as ignored @@ -570,7 +592,8 @@ class RDoc::Parser::Ruby < RDoc::Parser def make_message message prefix = "#{@file_name}:" - prefix << "#{@scanner.line_no}:#{@scanner.char_no}:" if @scanner + tk = peek_tk + prefix << "#{tk[:line_no]}:#{tk[:char_no]}:" if tk "#{prefix} #{message}" end @@ -589,7 +612,7 @@ class RDoc::Parser::Ruby < RDoc::Parser # +comment+. def parse_attr(context, single, tk, comment) - line_no = tk.line_no + line_no = tk[:line_no] args = parse_symbol_arg 1 if args.size > 0 then @@ -598,7 +621,7 @@ class RDoc::Parser::Ruby < RDoc::Parser skip_tkspace false tk = get_tk - if TkCOMMA === tk then + if :on_comma == tk[:kind] then rw = "RW" if get_bool else unget_tk tk @@ -618,7 +641,7 @@ class RDoc::Parser::Ruby < RDoc::Parser # comment for each to +comment+. def parse_attr_accessor(context, single, tk, comment) - line_no = tk.line_no + line_no = tk[:line_no] args = parse_symbol_arg rw = "?" @@ -629,7 +652,7 @@ class RDoc::Parser::Ruby < RDoc::Parser # and add found items appropriately but here we do not. I'm not sure why. return if @track_visibility and not tmp.document_self - case tk.name + case tk[:text] when "attr_reader" then rw = "R" when "attr_writer" then rw = "W" when "attr_accessor" then rw = "RW" @@ -647,21 +670,19 @@ class RDoc::Parser::Ruby < RDoc::Parser # Parses an +alias+ in +context+ with +comment+ def parse_alias(context, single, tk, comment) - line_no = tk.line_no + line_no = tk[:line_no] skip_tkspace - if TkLPAREN === peek_tk then + if :on_lparen === peek_tk[:kind] then get_tk skip_tkspace end new_name = get_symbol_or_name - @scanner.lex_state = :EXPR_FNAME - skip_tkspace - if TkCOMMA === peek_tk then + if :on_comma === peek_tk[:kind] then get_tk skip_tkspace end @@ -688,34 +709,38 @@ class RDoc::Parser::Ruby < RDoc::Parser # Extracts call parameters from the token stream. def parse_call_parameters(tk) - end_token = case tk - when TkLPAREN, TkfLPAREN - TkRPAREN - when TkRPAREN + end_token = case tk[:kind] + when :on_lparen + :on_rparen + when :on_rparen return "" else - TkNL + :on_nl end nest = 0 loop do - case tk - when TkSEMICOLON + break if tk.nil? + case tk[:kind] + when :on_semicolon break - when TkLPAREN, TkfLPAREN + when :on_lparen nest += 1 when end_token - if end_token == TkRPAREN + if end_token == :on_rparen nest -= 1 - break if @scanner.lex_state == :EXPR_END and nest <= 0 + break if RDoc::RipperStateLex.end?(tk) and nest <= 0 else - break unless @scanner.continue + break if RDoc::RipperStateLex.end?(tk) end - when TkCOMMENT, TkASSIGN, TkOPASGN + when :on_comment, :on_embdoc unget_tk(tk) break - when nil then - break + when :on_op + if tk[:text] =~ /^(.{1,2})?=$/ + unget_tk(tk) + break + end end tk = get_tk end @@ -727,28 +752,27 @@ class RDoc::Parser::Ruby < RDoc::Parser # Parses a class in +context+ with +comment+ def parse_class container, single, tk, comment - line_no = tk.line_no + line_no = tk[:line_no] declaration_context = container container, name_t, given_name = get_class_or_module container - cls = - case name_t - when TkCONSTANT - parse_class_regular container, declaration_context, single, - name_t, given_name, comment - when TkLSHFT - case name = get_class_specification - when 'self', container.name - parse_statements container, SINGLE - return # don't update line - else - parse_class_singleton container, name, comment - end + if name_t[:kind] == :on_const + cls = parse_class_regular container, declaration_context, single, + name_t, given_name, comment + elsif name_t[:kind] == :on_op && name_t[:text] == '<<' + case name = get_class_specification + when 'self', container.name + read_documentation_modifiers cls, RDoc::CLASS_MODIFIERS + parse_statements container, SINGLE + return # don't update line else - warn "Expected class name or '<<'. Got #{name_t.class}: #{name_t.text.inspect}" - return + cls = parse_class_singleton container, name, comment end + else + warn "Expected class name or '<<'. Got #{name_t[:kind]}: #{name_t[:text].inspect}" + return + end cls.line = line_no @@ -770,7 +794,8 @@ class RDoc::Parser::Ruby < RDoc::Parser given_name = $' end - if TkLT === peek_tk then + tk = peek_tk + if tk[:kind] == :on_op && tk[:text] == '<' then get_tk skip_tkspace superclass = get_class_specification @@ -840,40 +865,52 @@ class RDoc::Parser::Ruby < RDoc::Parser # true, no found constants will be added to RDoc. def parse_constant container, tk, comment, ignore_constants = false - line_no = tk.line_no + line_no = tk[:line_no] - name = tk.name + name = tk[:text] skip_tkspace false return unless name =~ /^\w+$/ eq_tk = get_tk - if TkCOLON2 === eq_tk then + if :on_op == eq_tk[:kind] && '::' == eq_tk[:text] then unget_tk eq_tk unget_tk tk container, name_t, = get_class_or_module container, ignore_constants - name = name_t.name + name = name_t[:text] eq_tk = get_tk end - unless TkASSIGN === eq_tk then - unget_tk eq_tk - return false + is_array_or_hash = false + if eq_tk && :on_lbracket == eq_tk[:kind] + nest = 1 + while bracket_tk = get_tk + case bracket_tk[:kind] + when :on_lbracket + nest += 1 + when :on_rbracket + nest -= 1 + break if nest == 0 + end + end + skip_tkspace false + eq_tk = get_tk + is_array_or_hash = true end - if TkGT === peek_tk then + unless eq_tk && :on_op == eq_tk[:kind] && '=' == eq_tk[:text] then unget_tk eq_tk - return + return false end value = '' con = RDoc::Constant.new name, value, comment - body = parse_constant_body container, con + body = parse_constant_body container, con, is_array_or_hash return unless body @@ -882,13 +919,15 @@ class RDoc::Parser::Ruby < RDoc::Parser con.line = line_no read_documentation_modifiers con, RDoc::CONSTANT_MODIFIERS + return if is_array_or_hash + @stats.add_constant con container.add_constant con true end - def parse_constant_body container, constant # :nodoc: + def parse_constant_body container, constant, is_array_or_hash # :nodoc: nest = 0 rhs_name = '' @@ -896,44 +935,51 @@ class RDoc::Parser::Ruby < RDoc::Parser tk = get_tk + body = nil loop do - case tk - when TkSEMICOLON then + break if tk.nil? + if :on_semicolon == tk[:kind] then break if nest <= 0 - when TkLPAREN, TkfLPAREN, TkLBRACE, TkfLBRACE, TkLBRACK, TkfLBRACK, - TkDO, TkIF, TkUNLESS, TkCASE, TkDEF, TkBEGIN then + elsif [:on_tlambeg, :on_lparen, :on_lbrace, :on_lbracket].include?(tk[:kind]) then + nest += 1 + elsif (:on_kw == tk[:kind] && 'def' == tk[:text]) then nest += 1 - when TkRPAREN, TkRBRACE, TkRBRACK, TkEND then + elsif (:on_kw == tk[:kind] && %w{do if unless case begin}.include?(tk[:text])) then + if (RDoc::RipperStateLex::EXPR_LABEL & tk[:state]) == 0 + nest += 1 + end + elsif [:on_rparen, :on_rbrace, :on_rbracket].include?(tk[:kind]) || + (:on_kw == tk[:kind] && 'end' == tk[:text]) then nest -= 1 - when TkCOMMENT then - if nest <= 0 and stop_at_EXPR_END then - unget_tk tk + elsif (:on_comment == tk[:kind] or :on_embdoc == tk[:kind]) then + unget_tk tk + if nest <= 0 and RDoc::RipperStateLex.end?(tk) then + body = get_tkread_clean(/^[ \t]+/, '') + read_documentation_modifiers constant, RDoc::CONSTANT_MODIFIERS break else - unget_tk tk read_documentation_modifiers constant, RDoc::CONSTANT_MODIFIERS end - when TkCONSTANT then - rhs_name << tk.name + elsif :on_const == tk[:kind] then + rhs_name << tk[:text] - if nest <= 0 and TkNL === peek_tk then - create_module_alias container, constant, rhs_name + next_tk = peek_tk + if nest <= 0 and (next_tk.nil? || :on_nl == next_tk[:kind]) then + create_module_alias container, constant, rhs_name unless is_array_or_hash break end - when TkNL then - if nest <= 0 and stop_at_EXPR_END then + elsif :on_nl == tk[:kind] then + if nest <= 0 and RDoc::RipperStateLex.end?(tk) then unget_tk tk break end - when TkCOLON2, TkCOLON3 then + elsif :on_op == tk[:kind] && '::' == tk[:text] rhs_name << '::' - when nil then - break end tk = get_tk end - get_tkread_clean(/^[ \t]+/, '') + body ? body : get_tkread_clean(/^[ \t]+/, '') end ## @@ -942,8 +988,8 @@ class RDoc::Parser::Ruby < RDoc::Parser def parse_comment container, tk, comment return parse_comment_tomdoc container, tk, comment if @markup == 'tomdoc' - column = tk.char_no - line_no = tk.line_no + column = tk[:char_no] + line_no = tk[:line_no] text = comment.text @@ -988,12 +1034,11 @@ class RDoc::Parser::Ruby < RDoc::Parser record_location meth meth.start_collecting_tokens - indent = TkSPACE.new 0, 1, 1 - indent.set_text " " * column - - position_comment = TkCOMMENT.new 0, line_no, 1 - position_comment.set_text "# File #{@top_level.relative_name}, line #{line_no}" - meth.add_tokens [position_comment, NEWLINE_TOKEN, indent] + indent = { :line_no => 1, :char_no => 1, :kind => :on_sp, :text => ' ' * column } + position_comment = { :line_no => line_no, :char_no => 1, :kind => :on_comment } + position_comment[:text] = "# File #{@top_level.relative_name}, line #{line_no}" + newline = { :line_no => 0, :char_no => 0, :kind => :on_nl, :text => "\n" } + meth.add_tokens [position_comment, newline, indent] meth.params = if text.sub!(/^#\s+:?args?:\s*(.*?)\s*$/i, '') then @@ -1022,8 +1067,8 @@ class RDoc::Parser::Ruby < RDoc::Parser def parse_comment_tomdoc container, tk, comment return unless signature = RDoc::TomDoc.signature(comment) - column = tk.char_no - line_no = tk.line_no + column = tk[:char_no] + line_no = tk[:line_no] name, = signature.split %r%[ \(]%, 2 @@ -1032,12 +1077,11 @@ class RDoc::Parser::Ruby < RDoc::Parser meth.line = line_no meth.start_collecting_tokens - indent = TkSPACE.new 0, 1, 1 - indent.set_text " " * column - - position_comment = TkCOMMENT.new 0, line_no, 1 - position_comment.set_text "# File #{@top_level.relative_name}, line #{line_no}" - meth.add_tokens [position_comment, NEWLINE_TOKEN, indent] + indent = { :line_no => 1, :char_no => 1, :kind => :on_sp, :text => ' ' * column } + position_comment = { :line_no => line_no, :char_no => 1, :kind => :on_comment } + position_comment[:text] = "# File #{@top_level.relative_name}, line #{line_no}" + newline = { :line_no => 0, :char_no => 0, :kind => :on_nl, :text => "\n" } + meth.add_tokens [position_comment, newline, indent] meth.call_seq = signature @@ -1067,7 +1111,7 @@ class RDoc::Parser::Ruby < RDoc::Parser record_location obj end - return unless TkCOMMA === peek_tk + return if peek_tk.nil? || :on_comma != peek_tk[:kind] get_tk end @@ -1079,11 +1123,14 @@ class RDoc::Parser::Ruby < RDoc::Parser # Returns true if the comment was not consumed. def parse_identifier container, single, tk, comment # :nodoc: - case tk.name + case tk[:text] when 'private', 'protected', 'public', 'private_class_method', 'public_class_method', 'module_function' then parse_visibility container, single, tk return true + when 'private_constant', 'public_constant' + parse_constant_visibility container, single, tk + return true when 'attr' then parse_attr container, single, tk, comment when /^attr_(reader|writer|accessor)$/ then @@ -1172,8 +1219,8 @@ class RDoc::Parser::Ruby < RDoc::Parser # Parses a meta-programmed method def parse_meta_method(container, single, tk, comment) - column = tk.char_no - line_no = tk.line_no + column = tk[:char_no] + line_no = tk[:line_no] start_collecting_tokens add_token tk @@ -1195,12 +1242,11 @@ class RDoc::Parser::Ruby < RDoc::Parser remove_token_listener self meth.start_collecting_tokens - indent = TkSPACE.new 0, 1, 1 - indent.set_text " " * column - - position_comment = TkCOMMENT.new 0, line_no, 1 - position_comment.value = "# File #{@top_level.relative_name}, line #{line_no}" - meth.add_tokens [position_comment, NEWLINE_TOKEN, indent] + indent = { :line_no => 1, :char_no => 1, :kind => :on_sp, :text => ' ' * column } + position_comment = { :line_no => line_no, :char_no => 1, :kind => :on_comment } + position_comment[:text] = "# File #{@top_level.relative_name}, line #{line_no}" + newline = { :line_no => 0, :char_no => 0, :kind => :on_nl, :text => "\n" } + meth.add_tokens [position_comment, newline, indent] meth.add_tokens @token_stream parse_meta_method_params container, single, meth, tk, comment @@ -1224,17 +1270,16 @@ class RDoc::Parser::Ruby < RDoc::Parser name_t = get_tk - case name_t - when TkSYMBOL then - name_t.text[1..-1] - when TkSTRING then - name_t.value[1..-2] - when TkASSIGN then # ignore + if :on_symbol == name_t[:kind] then + name_t[:text][1..-1] + elsif :on_tstring == name_t[:kind] then + name_t[:text][1..-2] + elsif :on_op == name_t[:kind] && '=' == name_t[:text] then # ignore remove_token_listener self nil else - warn "unknown name token #{name_t.inspect} for meta-method '#{tk.name}'" + warn "unknown name token #{name_t.inspect} for meta-method '#{tk[:text]}'" 'unknown' end end @@ -1254,14 +1299,13 @@ class RDoc::Parser::Ruby < RDoc::Parser last_tk = tk while tk = get_tk do - case tk - when TkSEMICOLON then + if :on_semicolon == tk[:kind] then break - when TkNL then - break unless last_tk and TkCOMMA === last_tk - when TkSPACE then + elsif :on_nl == tk[:kind] then + break unless last_tk and :on_comma == last_tk[:kind] + elsif :on_sp == tk[:kind] then # expression continues - when TkDO then + elsif :on_kw == tk[:kind] && 'do' == tk[:text] then parse_statements container, single, meth break else @@ -1278,8 +1322,8 @@ class RDoc::Parser::Ruby < RDoc::Parser singleton = nil added_container = false name = nil - column = tk.char_no - line_no = tk.line_no + column = tk[:char_no] + line_no = tk[:line_no] start_collecting_tokens add_token tk @@ -1299,12 +1343,11 @@ class RDoc::Parser::Ruby < RDoc::Parser meth.line = line_no meth.start_collecting_tokens - indent = TkSPACE.new 0, 1, 1 - indent.set_text " " * column - - token = TkCOMMENT.new 0, line_no, 1 - token.set_text "# File #{@top_level.relative_name}, line #{line_no}" - meth.add_tokens [token, NEWLINE_TOKEN, indent] + indent = { :line_no => 1, :char_no => 1, :kind => :on_sp, :text => ' ' * column } + token = { :line_no => line_no, :char_no => 1, :kind => :on_comment } + token[:text] = "# File #{@top_level.relative_name}, line #{line_no}" + newline = { :line_no => 0, :char_no => 0, :kind => :on_nl, :text => "\n" } + meth.add_tokens [token, newline, indent] meth.add_tokens @token_stream parse_method_params_and_body container, single, meth, added_container @@ -1325,7 +1368,6 @@ class RDoc::Parser::Ruby < RDoc::Parser def parse_method_params_and_body container, single, meth, added_container token_listener meth do - @scanner.continue = false parse_method_parameters meth if meth.document_self or not @track_visibility then @@ -1368,15 +1410,13 @@ class RDoc::Parser::Ruby < RDoc::Parser # it is a singleton or regular method. def parse_method_name container # :nodoc: - @scanner.lex_state = :EXPR_FNAME - skip_tkspace name_t = get_tk - back_tk = skip_tkspace + back_tk = skip_tkspace(false) singleton = false - case dot = get_tk - when TkDOT, TkCOLON2 then + dot = get_tk + if dot[:kind] == :on_period || (dot[:kind] == :on_op && dot[:text] == '::') then singleton = true name, container = parse_method_name_singleton container, name_t @@ -1397,16 +1437,15 @@ class RDoc::Parser::Ruby < RDoc::Parser # is parsed from the token stream for a regular method. def parse_method_name_regular container, name_t # :nodoc: - case name_t - when TkSTAR, TkAMPER then - name_t.text + if :on_op == name_t[:kind] && (%w{* & [] []= <<}.include?(name_t[:text])) then + name_t[:text] else - unless name_t.respond_to? :name then + unless [:on_kw, :on_const, :on_ident].include?(name_t[:kind]) then warn "expected method name token, . or ::, got #{name_t.inspect}" skip_method container return end - name_t.name + name_t[:text] end end @@ -1416,47 +1455,43 @@ class RDoc::Parser::Ruby < RDoc::Parser # for a singleton method. def parse_method_name_singleton container, name_t # :nodoc: - @scanner.lex_state = :EXPR_FNAME skip_tkspace name_t2 = get_tk - name = - case name_t - when TkSELF, TkMOD then - case name_t2 - # NOTE: work around '[' being consumed early and not being re-tokenized - # as a TkAREF - when TkfLBRACK then - get_tk - '[]' - else - name_t2.name - end - when TkCONSTANT then - name = name_t2.name + if (:on_kw == name_t[:kind] && 'self' == name_t[:text]) || (:on_op == name_t[:kind] && '%' == name_t[:text]) then + # NOTE: work around '[' being consumed early and not being re-tokenized + # as a TkAREF + if :on_lbracket == name_t2[:kind] + get_tk + name = '[]' + else + name = name_t2[:text] + end + elsif :on_const == name_t[:kind] then + name = name_t2[:text] - container = get_method_container container, name_t + container = get_method_container container, name_t - return unless container + return unless container - name - when TkIDENTIFIER, TkIVAR, TkGVAR then - parse_method_dummy container + name + elsif :on_ident == name_t[:kind] || :on_ivar == name_t[:kind] || :on_gvar == name_t[:kind] then + parse_method_dummy container - nil - when TkTRUE, TkFALSE, TkNIL then - klass_name = "#{name_t.name.capitalize}Class" - container = @store.find_class_named klass_name - container ||= @top_level.add_class RDoc::NormalClass, klass_name + name = nil + elsif (:on_kw == name_t[:kind]) && ('true' == name_t[:text] || 'false' == name_t[:text] || 'nil' == name_t[:text]) then + klass_name = "#{name_t[:text].capitalize}Class" + container = @store.find_class_named klass_name + container ||= @top_level.add_class RDoc::NormalClass, klass_name - name_t2.name - else - warn "unexpected method name token #{name_t.inspect}" - # break - skip_method container + name = name_t2[:text] + else + warn "unexpected method name token #{name_t.inspect}" + # break + skip_method container - nil - end + name = nil + end return name, container end @@ -1472,43 +1507,49 @@ class RDoc::Parser::Ruby < RDoc::Parser return '' unless end_token nest = 0 + continue = false - loop do - case tk - when TkSEMICOLON then + while tk != nil do + case tk[:kind] + when :on_semicolon then break if nest == 0 - when TkLBRACE, TkfLBRACE then + when :on_lbrace then nest += 1 - when TkRBRACE then + when :on_rbrace then nest -= 1 if nest <= 0 # we might have a.each { |i| yield i } unget_tk(tk) if nest < 0 break end - when TkLPAREN, TkfLPAREN then + when :on_lparen then nest += 1 - when end_token then - if end_token == TkRPAREN + when end_token[:kind] then + if end_token[:kind] == :on_rparen nest -= 1 break if nest <= 0 else - break unless @scanner.continue + break end - when TkRPAREN then + when :on_rparen then nest -= 1 - when method && method.block_params.nil? && TkCOMMENT then - unget_tk tk - read_documentation_modifiers method, modifiers - @read.pop - when TkCOMMENT then + when :on_comment, :on_embdoc then @read.pop - when nil then - break + if :on_nl == end_token[:kind] and "\n" == tk[:text][-1] and + (!continue or (RDoc::RipperStateLex::EXPR_LABEL & tk[:state]) != 0) then + if method && method.block_params.nil? then + unget_tk tk + read_documentation_modifiers method, modifiers + end + break if !continue and nest <= 0 + end + when :on_comma then + continue = true + when :on_ident then + continue = false if continue end tk = get_tk end - @scanner.first_in_method_statement = true get_tkread_clean(/\s+/, ' ') end @@ -1539,7 +1580,7 @@ class RDoc::Parser::Ruby < RDoc::Parser def parse_module container, single, tk, comment container, name_t, = get_class_or_module container - name = name_t.name + name = name_t[:text] mod = container.add_module RDoc::NormalModule, name mod.ignore unless container.document_children @@ -1562,12 +1603,12 @@ class RDoc::Parser::Ruby < RDoc::Parser skip_tkspace_comment tk = get_tk - if TkLPAREN === tk then + if :on_lparen == tk[:kind] then skip_tkspace_comment tk = get_tk end - name = tk.text if TkSTRING === tk + name = tk[:text][1..-2] if :on_tstring == tk[:kind] if name then @top_level.add_require RDoc::Require.new(name, comment) @@ -1582,14 +1623,15 @@ class RDoc::Parser::Ruby < RDoc::Parser def parse_rescue skip_tkspace false + nest = 0 while tk = get_tk - case tk - when TkNL, TkSEMICOLON then + case tk[:kind] + when :on_nl, :on_semicolon, :on_comment then break - when TkCOMMA then + when :on_comma then skip_tkspace false - get_tk if TkNL === peek_tk + get_tk if :on_nl == peek_tk[:kind] end skip_tkspace false @@ -1613,14 +1655,32 @@ class RDoc::Parser::Ruby < RDoc::Parser keep_comment = false try_parse_comment = false - non_comment_seen = true unless TkCOMMENT === tk + non_comment_seen = true unless (:on_comment == tk[:kind] or :on_embdoc == tk[:kind]) - case tk - when TkNL then - skip_tkspace - tk = get_tk + case tk[:kind] + when :on_nl, :on_ignored_nl, :on_comment, :on_embdoc then + if :on_nl == tk[:kind] or :on_ignored_nl == tk[:kind] + skip_tkspace + tk = get_tk + else + past_tokens = @read.size > 1 ? @read[0..-2] : [] + nl_position = 0 + past_tokens.reverse.each_with_index do |read_tk, i| + if read_tk =~ /^\n$/ then + nl_position = (past_tokens.size - 1) - i + break + elsif read_tk =~ /^#.*\n$/ then + nl_position = ((past_tokens.size - 1) - i) + 1 + break + end + end + comment_only_line = past_tokens[nl_position..-1].all?{ |c| c =~ /^\s+$/ } + unless comment_only_line then + tk = get_tk + end + end - if TkCOMMENT === tk then + if tk and (:on_comment == tk[:kind] or :on_embdoc == tk[:kind]) then if non_comment_seen then # Look for RDoc in a comment about to be thrown away non_comment_seen = parse_comment container, tk, comment unless @@ -1630,15 +1690,14 @@ class RDoc::Parser::Ruby < RDoc::Parser comment.force_encoding @encoding if @encoding end - while TkCOMMENT === tk do - comment << tk.text << "\n" - - tk = get_tk + while tk and (:on_comment == tk[:kind] or :on_embdoc == tk[:kind]) do + comment << tk[:text] + comment << "\n" unless "\n" == tk[:text].chars.to_a.last - if TkNL === tk then + if tk[:text].size > 1 && "\n" == tk[:text].chars.to_a.last then skip_tkspace false # leading spaces - tk = get_tk end + tk = get_tk end comment = new_comment comment @@ -1661,59 +1720,76 @@ class RDoc::Parser::Ruby < RDoc::Parser keep_comment = true container.current_line_visibility = nil - when TkCLASS then - parse_class container, single, tk, comment + when :on_kw then + case tk[:text] + when 'class' then + parse_class container, single, tk, comment - when TkMODULE then - parse_module container, single, tk, comment + when 'module' then + parse_module container, single, tk, comment - when TkDEF then - parse_method container, single, tk, comment + when 'def' then + parse_method container, single, tk, comment - when TkCONSTANT then - unless parse_constant container, tk, comment, current_method then - try_parse_comment = true - end + when 'alias' then + parse_alias container, single, tk, comment unless current_method - when TkALIAS then - parse_alias container, single, tk, comment unless current_method + when 'yield' then + if current_method.nil? then + warn "Warning: yield outside of method" if container.document_self + else + parse_yield container, single, tk, current_method + end - when TkYIELD then - if current_method.nil? then - warn "Warning: yield outside of method" if container.document_self - else - parse_yield container, single, tk, current_method - end + when 'until', 'while' then + if (RDoc::RipperStateLex::EXPR_LABEL & tk[:state]) == 0 + nest += 1 + skip_optional_do_after_expression + end - # Until and While can have a 'do', which shouldn't increase the nesting. - # We can't solve the general case, but we can handle most occurrences by - # ignoring a do at the end of a line. + # Until and While can have a 'do', which shouldn't increase the nesting. + # We can't solve the general case, but we can handle most occurrences by + # ignoring a do at the end of a line. - when TkUNTIL, TkWHILE then - nest += 1 - skip_optional_do_after_expression + # 'for' is trickier + when 'for' then + nest += 1 + skip_for_variable + skip_optional_do_after_expression - # 'for' is trickier - when TkFOR then - nest += 1 - skip_for_variable - skip_optional_do_after_expression + when 'case', 'do', 'if', 'unless', 'begin' then + if (RDoc::RipperStateLex::EXPR_LABEL & tk[:state]) == 0 + nest += 1 + end - when TkCASE, TkDO, TkIF, TkUNLESS, TkBEGIN then - nest += 1 + when 'super' then + current_method.calls_super = true if current_method - when TkSUPER then - current_method.calls_super = true if current_method + when 'rescue' then + parse_rescue - when TkRESCUE then - parse_rescue + when 'end' then + nest -= 1 + if nest == 0 then + container.ongoing_visibility = save_visibility + + parse_comment container, tk, comment unless comment.empty? + + return + end + end + + when :on_const then + unless parse_constant container, tk, comment, current_method then + try_parse_comment = true + end - when TkIDENTIFIER then + when :on_ident then if nest == 1 and current_method.nil? then keep_comment = parse_identifier container, single, tk, comment end - case tk.name + case tk[:text] when "require" then parse_require container, comment when "include" then @@ -1722,15 +1798,6 @@ class RDoc::Parser::Ruby < RDoc::Parser parse_extend_or_include RDoc::Extend, container, comment end - when TkEND then - nest -= 1 - if nest == 0 then - container.ongoing_visibility = save_visibility - - parse_comment container, tk, comment unless comment.empty? - - return - end else try_parse_comment = nest == 1 end @@ -1762,8 +1829,8 @@ class RDoc::Parser::Ruby < RDoc::Parser def parse_symbol_arg(no = nil) skip_tkspace_comment - case tk = get_tk - when TkLPAREN + tk = get_tk + if tk[:kind] == :on_lparen parse_symbol_arg_paren no else parse_symbol_arg_space no, tk @@ -1785,10 +1852,10 @@ class RDoc::Parser::Ruby < RDoc::Parser end skip_tkspace_comment - case tk2 = get_tk - when TkRPAREN + case (tk2 = get_tk)[:kind] + when :on_rparen break - when TkCOMMA + when :on_comma else warn("unexpected token: '#{tk2.inspect}'") if $DEBUG_RDOC break @@ -1815,7 +1882,7 @@ class RDoc::Parser::Ruby < RDoc::Parser skip_tkspace false tk1 = get_tk - unless TkCOMMA === tk1 then + if tk1.nil? || :on_comma != tk1[:kind] then unget_tk tk1 break end @@ -1834,12 +1901,12 @@ class RDoc::Parser::Ruby < RDoc::Parser # Returns symbol text from the next token def parse_symbol_in_arg - case tk = get_tk - when TkSYMBOL - tk.text.sub(/^:/, '') - when TkSTRING - eval @read[-1] - when TkDSTRING, TkIDENTIFIER then + tk = get_tk + if :on_symbol == tk[:kind] then + tk[:text].sub(/^:/, '') + elsif :on_tstring == tk[:kind] then + tk[:text][1..-2] + elsif :on_dstring == tk[:kind] or :on_ident == tk[:kind] then nil # ignore else warn("Expected symbol or string, got #{tk.inspect}") if $DEBUG_RDOC @@ -1873,21 +1940,37 @@ class RDoc::Parser::Ruby < RDoc::Parser skip_tkspace_comment false - case peek_tk - # Ryan Davis suggested the extension to ignore modifiers, because he - # often writes - # - # protected unless $TESTING - # - when TkNL, TkUNLESS_MOD, TkIF_MOD, TkSEMICOLON then + ptk = peek_tk + # Ryan Davis suggested the extension to ignore modifiers, because he + # often writes + # + # protected unless $TESTING + # + if [:on_nl, :on_semicolon].include?(ptk[:kind]) || (:on_kw == ptk[:kind] && (['if', 'unless'].include?(ptk[:text]))) then container.ongoing_visibility = vis - when TkDEF + elsif :on_kw == ptk[:kind] && 'def' == ptk[:text] container.current_line_visibility = vis else update_visibility container, vis_type, vis, singleton end end + ## + # Parses a Module#private_constant or Module#public_constant call from +tk+. + + def parse_constant_visibility(container, single, tk) + args = parse_symbol_arg + case tk[:text] + when 'private_constant' + vis = :private + when 'public_constant' + vis = :public + else + raise RDoc::Error, 'Unreachable' + end + container.set_constant_visibility_for args, vis + end + ## # Determines the block parameter for +context+ @@ -1895,7 +1978,6 @@ class RDoc::Parser::Ruby < RDoc::Parser return if method.block_params get_tkread - @scanner.continue = false method.block_params = parse_method_or_yield_parameters end @@ -1919,11 +2001,10 @@ class RDoc::Parser::Ruby < RDoc::Parser while tk = get_tk do tokens << tk - case tk - when TkNL, TkDEF then + if :on_nl == tk[:kind] or (:on_kw == tk[:kind] && 'def' == tk[:text]) then return - when TkCOMMENT then - return unless tk.text =~ /\s*:?([\w-]+):\s*(.*)/ + elsif :on_comment == tk[:kind] or :on_embdoc == tk[:kind] then + return unless tk[:text] =~ /\s*:?([\w-]+):\s*(.*)/ directive = $1.downcase @@ -1933,7 +2014,7 @@ class RDoc::Parser::Ruby < RDoc::Parser end end ensure - unless tokens.length == 1 and TkCOMMENT === tokens.first then + unless tokens.length == 1 and (:on_comment == tokens.first[:kind] or :on_embdoc == tokens.first[:kind]) then tokens.reverse_each do |token| unget_tk token end @@ -1947,6 +2028,7 @@ class RDoc::Parser::Ruby < RDoc::Parser # See also RDoc::Markup::PreProcess#handle_directive def read_documentation_modifiers context, allowed + skip_tkspace(false) directive, value = read_directive allowed return unless directive @@ -1995,27 +2077,25 @@ class RDoc::Parser::Ruby < RDoc::Parser rescue StandardError => e bytes = '' - 20.times do @scanner.ungetc end - count = 0 - 60.times do |i| - count = i - byte = @scanner.getc - break unless byte - bytes << byte + if @scanner_point >= @scanner.size + now_line_no = @scanner[@scanner.size - 1][:line_no] + else + now_line_no = peek_tk[:line_no] end - count -= 20 - count.times do @scanner.ungetc end $stderr.puts <<-EOF -#{self.class} failure around line #{@scanner.line_no} of +#{self.class} failure around line #{now_line_no} of #{@file_name} EOF unless bytes.empty? then $stderr.puts - $stderr.puts bytes.inspect + now_line_no = peek_tk[:line_no] + start_index = @scanner.find_index { |tk| tk[:line_no] == now_line_no } + end_index = @scanner.find_index { |tk| tk[:line_no] == now_line_no + 1 } - 1 + $stderr.puts @scanner[start_index..end_index].join end raise e @@ -2034,31 +2114,36 @@ class RDoc::Parser::Ruby < RDoc::Parser b_nest = 0 nest = 0 - @scanner.continue = false loop do - case tk - when TkSEMICOLON, TkNL then + break unless tk + case tk[:kind] + when :on_semicolon, :on_nl, :on_ignored_nl then break if b_nest.zero? - when TkLPAREN, TkfLPAREN then + when :on_lparen then nest += 1 - when TkRPAREN then + when :on_rparen then nest -= 1 - when TkBEGIN then - b_nest += 1 - when TkEND then - b_nest -= 1 - when TkDO - break if nest.zero? - when nil then - break + when :on_kw then + case tk[:text] + when 'begin' + b_nest += 1 + when 'end' + b_nest -= 1 + when 'do' + break if nest.zero? + end + when :on_comment, :on_embdoc then + if b_nest.zero? and "\n" == tk[:text][-1] then + break + end end tk = get_tk end skip_tkspace false - get_tk if TkDO === peek_tk + get_tk if peek_tk && :on_kw == peek_tk[:kind] && 'do' == peek_tk[:text] end ## @@ -2069,7 +2154,7 @@ class RDoc::Parser::Ruby < RDoc::Parser get_tk skip_tkspace false tk = get_tk - unget_tk(tk) unless TkIN === tk + unget_tk(tk) unless :on_kw == tk[:kind] and 'in' == tk[:text] end ## @@ -2087,7 +2172,8 @@ class RDoc::Parser::Ruby < RDoc::Parser def skip_tkspace_comment(skip_nl = true) loop do skip_tkspace skip_nl - return unless TkCOMMENT === peek_tk + next_tk = peek_tk + return if next_tk.nil? || (:on_comment != next_tk[:kind] and :on_embdoc != next_tk[:kind]) get_tk end end diff --git a/lib/rdoc/parser/ruby_tools.rb b/lib/rdoc/parser/ruby_tools.rb index e35b420f30..0a566827a5 100644 --- a/lib/rdoc/parser/ruby_tools.rb +++ b/lib/rdoc/parser/ruby_tools.rb @@ -5,8 +5,6 @@ module RDoc::Parser::RubyTools - include RDoc::RubyToken - ## # Adds a token listener +obj+, but you should probably use token_listener @@ -22,16 +20,25 @@ module RDoc::Parser::RubyTools tk = nil if @tokens.empty? then - tk = @scanner.token - @read.push @scanner.get_readed - puts "get_tk1 => #{tk.inspect}" if $TOKEN_DEBUG + if @scanner_point >= @scanner.size + return nil + else + tk = @scanner[@scanner_point] + @scanner_point += 1 + @read.push tk[:text] + puts "get_tk1 => #{tk.inspect}" if $TOKEN_DEBUG + end else @read.push @unget_read.shift tk = @tokens.shift puts "get_tk2 => #{tk.inspect}" if $TOKEN_DEBUG end - tk = nil if TkEND_OF_SCRIPT === tk + if tk == nil || :on___end__ == tk[:kind] + tk = nil + end + + return nil unless tk # inform any listeners of our shiny new token @token_listeners.each do |obj| @@ -102,19 +109,24 @@ module RDoc::Parser::RubyTools @tokens = [] @unget_read = [] @nest = 0 + @scanner_point = 0 + end + + def tk_nl?(tk) + :on_nl == tk[:kind] or :on_ignored_nl == tk[:kind] end ## # Skips whitespace tokens including newlines if +skip_nl+ is true - def skip_tkspace(skip_nl = true) # HACK dup + def skip_tkspace(skip_nl = true) tokens = [] - while TkSPACE === (tk = get_tk) or (skip_nl and TkNL === tk) do - tokens.push tk + while (tk = get_tk) and (:on_sp == tk[:kind] or (skip_nl and tk_nl?(tk))) do + tokens.push(tk) end - unget_tk tk + unget_tk(tk) tokens end -- cgit v1.2.3