diff options
author | yui-knk <spiketeika@gmail.com> | 2023-10-23 09:07:23 +0900 |
---|---|---|
committer | Yuichiro Kaneko <spiketeika@gmail.com> | 2023-10-23 13:14:15 +0900 |
commit | 2d468358a516f575d013f07801079e0906c61f0c (patch) | |
tree | 609afdf2ba961d7c8079bb22ea4fe2b94500abcb /tool/lrama/lib/lrama/grammar.rb | |
parent | dd9b4851a335888f971f09322f25ed63c5b2f587 (diff) | |
download | ruby-2d468358a516f575d013f07801079e0906c61f0c.tar.gz |
Lrama v0.5.7
Diffstat (limited to 'tool/lrama/lib/lrama/grammar.rb')
-rw-r--r-- | tool/lrama/lib/lrama/grammar.rb | 188 |
1 files changed, 187 insertions, 1 deletions
diff --git a/tool/lrama/lib/lrama/grammar.rb b/tool/lrama/lib/lrama/grammar.rb index 81df399682..91685530a0 100644 --- a/tool/lrama/lib/lrama/grammar.rb +++ b/tool/lrama/lib/lrama/grammar.rb @@ -1,3 +1,5 @@ +require "strscan" + require "lrama/grammar/auxiliary" require "lrama/grammar/code" require "lrama/grammar/error_token" @@ -306,6 +308,188 @@ module Lrama @nterms ||= @symbols.select(&:nterm?) end + def extract_references + unless initial_action.nil? + scanner = StringScanner.new(initial_action.s_value) + references = [] + + while !scanner.eos? do + start = scanner.pos + case + # $ references + # It need to wrap an identifier with brackets to use ".-" for identifiers + when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$ + tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil + references << [:dollar, "$", tag, start, scanner.pos - 1] + when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1 + tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil + references << [:dollar, Integer(scanner[2]), tag, start, scanner.pos - 1] + when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets) + tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil + references << [:dollar, scanner[2], tag, start, scanner.pos - 1] + when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets) + tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil + references << [:dollar, scanner[2], tag, start, scanner.pos - 1] + + # @ references + # It need to wrap an identifier with brackets to use ".-" for identifiers + when scanner.scan(/@\$/) # @$ + references << [:at, "$", nil, start, scanner.pos - 1] + when scanner.scan(/@(\d+)/) # @1 + references << [:at, Integer(scanner[1]), nil, start, scanner.pos - 1] + when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets) + references << [:at, scanner[1], nil, start, scanner.pos - 1] + when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets) + references << [:at, scanner[1], nil, start, scanner.pos - 1] + else + scanner.getch + end + end + + initial_action.token_code.references = references + build_references(initial_action.token_code) + end + + @printers.each do |printer| + scanner = StringScanner.new(printer.code.s_value) + references = [] + + while !scanner.eos? do + start = scanner.pos + case + # $ references + # It need to wrap an identifier with brackets to use ".-" for identifiers + when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$ + tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil + references << [:dollar, "$", tag, start, scanner.pos - 1] + when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1 + tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil + references << [:dollar, Integer(scanner[2]), tag, start, scanner.pos - 1] + when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets) + tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil + references << [:dollar, scanner[2], tag, start, scanner.pos - 1] + when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets) + tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil + references << [:dollar, scanner[2], tag, start, scanner.pos - 1] + + # @ references + # It need to wrap an identifier with brackets to use ".-" for identifiers + when scanner.scan(/@\$/) # @$ + references << [:at, "$", nil, start, scanner.pos - 1] + when scanner.scan(/@(\d+)/) # @1 + references << [:at, Integer(scanner[1]), nil, start, scanner.pos - 1] + when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets) + references << [:at, scanner[1], nil, start, scanner.pos - 1] + when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets) + references << [:at, scanner[1], nil, start, scanner.pos - 1] + else + scanner.getch + end + end + + printer.code.token_code.references = references + build_references(printer.code.token_code) + end + + @error_tokens.each do |error_token| + scanner = StringScanner.new(error_token.code.s_value) + references = [] + + while !scanner.eos? do + start = scanner.pos + case + # $ references + # It need to wrap an identifier with brackets to use ".-" for identifiers + when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$ + tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil + references << [:dollar, "$", tag, start, scanner.pos - 1] + when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1 + tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil + references << [:dollar, Integer(scanner[2]), tag, start, scanner.pos - 1] + when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets) + tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil + references << [:dollar, scanner[2], tag, start, scanner.pos - 1] + when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets) + tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil + references << [:dollar, scanner[2], tag, start, scanner.pos - 1] + + # @ references + # It need to wrap an identifier with brackets to use ".-" for identifiers + when scanner.scan(/@\$/) # @$ + references << [:at, "$", nil, start, scanner.pos - 1] + when scanner.scan(/@(\d+)/) # @1 + references << [:at, Integer(scanner[1]), nil, start, scanner.pos - 1] + when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets) + references << [:at, scanner[1], nil, start, scanner.pos - 1] + when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets) + references << [:at, scanner[1], nil, start, scanner.pos - 1] + else + scanner.getch + end + end + + error_token.code.token_code.references = references + build_references(error_token.code.token_code) + end + + @_rules.each do |lhs, rhs, _| + rhs.each_with_index do |token, index| + next if token.class == Lrama::Grammar::Symbol || token.type != Lrama::Lexer::Token::User_code + + scanner = StringScanner.new(token.s_value) + references = [] + + while !scanner.eos? do + start = scanner.pos + case + # $ references + # It need to wrap an identifier with brackets to use ".-" for identifiers + when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$ + tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil + references << [:dollar, "$", tag, start, scanner.pos - 1] + when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1 + tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil + references << [:dollar, Integer(scanner[2]), tag, start, scanner.pos - 1] + when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets) + tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil + references << [:dollar, scanner[2], tag, start, scanner.pos - 1] + when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets) + tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil + references << [:dollar, scanner[2], tag, start, scanner.pos - 1] + + # @ references + # It need to wrap an identifier with brackets to use ".-" for identifiers + when scanner.scan(/@\$/) # @$ + references << [:at, "$", nil, start, scanner.pos - 1] + when scanner.scan(/@(\d+)/) # @1 + references << [:at, Integer(scanner[1]), nil, start, scanner.pos - 1] + when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets) + references << [:at, scanner[1], nil, start, scanner.pos - 1] + when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets) + references << [:at, scanner[1], nil, start, scanner.pos - 1] + + when scanner.scan(/\/\*/) + scanner.scan_until(/\*\//) + else + scanner.getch + end + end + + token.references = references + token.numberize_references(lhs, rhs) + build_references(token) + end + end + end + + def create_token(type, s_value, line, column) + t = Token.new(type: type, s_value: s_value) + t.line = line + t.column = column + + return t + end + private def find_nterm_by_id!(id) @@ -470,7 +654,9 @@ module Lrama # Fill #number and #token_id def fill_symbol_number - # TODO: why start from 256 + # Character literal in grammar file has + # token id corresponding to ASCII code by default, + # so start token_id from 256. token_id = 256 # YYEMPTY = -2 |