aboutsummaryrefslogtreecommitdiffstats
path: root/tool/lrama/lib/lrama/grammar.rb
diff options
context:
space:
mode:
authoryui-knk <spiketeika@gmail.com>2023-10-23 09:07:23 +0900
committerYuichiro Kaneko <spiketeika@gmail.com>2023-10-23 13:14:15 +0900
commit2d468358a516f575d013f07801079e0906c61f0c (patch)
tree609afdf2ba961d7c8079bb22ea4fe2b94500abcb /tool/lrama/lib/lrama/grammar.rb
parentdd9b4851a335888f971f09322f25ed63c5b2f587 (diff)
downloadruby-2d468358a516f575d013f07801079e0906c61f0c.tar.gz
Lrama v0.5.7
Diffstat (limited to 'tool/lrama/lib/lrama/grammar.rb')
-rw-r--r--tool/lrama/lib/lrama/grammar.rb188
1 files changed, 187 insertions, 1 deletions
diff --git a/tool/lrama/lib/lrama/grammar.rb b/tool/lrama/lib/lrama/grammar.rb
index 81df399682..91685530a0 100644
--- a/tool/lrama/lib/lrama/grammar.rb
+++ b/tool/lrama/lib/lrama/grammar.rb
@@ -1,3 +1,5 @@
+require "strscan"
+
require "lrama/grammar/auxiliary"
require "lrama/grammar/code"
require "lrama/grammar/error_token"
@@ -306,6 +308,188 @@ module Lrama
@nterms ||= @symbols.select(&:nterm?)
end
+ def extract_references
+ unless initial_action.nil?
+ scanner = StringScanner.new(initial_action.s_value)
+ references = []
+
+ while !scanner.eos? do
+ start = scanner.pos
+ case
+ # $ references
+ # It need to wrap an identifier with brackets to use ".-" for identifiers
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
+ tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
+ references << [:dollar, "$", tag, start, scanner.pos - 1]
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
+ tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
+ references << [:dollar, Integer(scanner[2]), tag, start, scanner.pos - 1]
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
+ tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
+ references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
+ tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
+ references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
+
+ # @ references
+ # It need to wrap an identifier with brackets to use ".-" for identifiers
+ when scanner.scan(/@\$/) # @$
+ references << [:at, "$", nil, start, scanner.pos - 1]
+ when scanner.scan(/@(\d+)/) # @1
+ references << [:at, Integer(scanner[1]), nil, start, scanner.pos - 1]
+ when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
+ references << [:at, scanner[1], nil, start, scanner.pos - 1]
+ when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets)
+ references << [:at, scanner[1], nil, start, scanner.pos - 1]
+ else
+ scanner.getch
+ end
+ end
+
+ initial_action.token_code.references = references
+ build_references(initial_action.token_code)
+ end
+
+ @printers.each do |printer|
+ scanner = StringScanner.new(printer.code.s_value)
+ references = []
+
+ while !scanner.eos? do
+ start = scanner.pos
+ case
+ # $ references
+ # It need to wrap an identifier with brackets to use ".-" for identifiers
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
+ tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
+ references << [:dollar, "$", tag, start, scanner.pos - 1]
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
+ tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
+ references << [:dollar, Integer(scanner[2]), tag, start, scanner.pos - 1]
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
+ tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
+ references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
+ tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
+ references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
+
+ # @ references
+ # It need to wrap an identifier with brackets to use ".-" for identifiers
+ when scanner.scan(/@\$/) # @$
+ references << [:at, "$", nil, start, scanner.pos - 1]
+ when scanner.scan(/@(\d+)/) # @1
+ references << [:at, Integer(scanner[1]), nil, start, scanner.pos - 1]
+ when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
+ references << [:at, scanner[1], nil, start, scanner.pos - 1]
+ when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets)
+ references << [:at, scanner[1], nil, start, scanner.pos - 1]
+ else
+ scanner.getch
+ end
+ end
+
+ printer.code.token_code.references = references
+ build_references(printer.code.token_code)
+ end
+
+ @error_tokens.each do |error_token|
+ scanner = StringScanner.new(error_token.code.s_value)
+ references = []
+
+ while !scanner.eos? do
+ start = scanner.pos
+ case
+ # $ references
+ # It need to wrap an identifier with brackets to use ".-" for identifiers
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
+ tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
+ references << [:dollar, "$", tag, start, scanner.pos - 1]
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
+ tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
+ references << [:dollar, Integer(scanner[2]), tag, start, scanner.pos - 1]
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
+ tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
+ references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
+ tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
+ references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
+
+ # @ references
+ # It need to wrap an identifier with brackets to use ".-" for identifiers
+ when scanner.scan(/@\$/) # @$
+ references << [:at, "$", nil, start, scanner.pos - 1]
+ when scanner.scan(/@(\d+)/) # @1
+ references << [:at, Integer(scanner[1]), nil, start, scanner.pos - 1]
+ when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
+ references << [:at, scanner[1], nil, start, scanner.pos - 1]
+ when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets)
+ references << [:at, scanner[1], nil, start, scanner.pos - 1]
+ else
+ scanner.getch
+ end
+ end
+
+ error_token.code.token_code.references = references
+ build_references(error_token.code.token_code)
+ end
+
+ @_rules.each do |lhs, rhs, _|
+ rhs.each_with_index do |token, index|
+ next if token.class == Lrama::Grammar::Symbol || token.type != Lrama::Lexer::Token::User_code
+
+ scanner = StringScanner.new(token.s_value)
+ references = []
+
+ while !scanner.eos? do
+ start = scanner.pos
+ case
+ # $ references
+ # It need to wrap an identifier with brackets to use ".-" for identifiers
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
+ tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
+ references << [:dollar, "$", tag, start, scanner.pos - 1]
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
+ tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
+ references << [:dollar, Integer(scanner[2]), tag, start, scanner.pos - 1]
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
+ tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
+ references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
+ tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
+ references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
+
+ # @ references
+ # It need to wrap an identifier with brackets to use ".-" for identifiers
+ when scanner.scan(/@\$/) # @$
+ references << [:at, "$", nil, start, scanner.pos - 1]
+ when scanner.scan(/@(\d+)/) # @1
+ references << [:at, Integer(scanner[1]), nil, start, scanner.pos - 1]
+ when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
+ references << [:at, scanner[1], nil, start, scanner.pos - 1]
+ when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets)
+ references << [:at, scanner[1], nil, start, scanner.pos - 1]
+
+ when scanner.scan(/\/\*/)
+ scanner.scan_until(/\*\//)
+ else
+ scanner.getch
+ end
+ end
+
+ token.references = references
+ token.numberize_references(lhs, rhs)
+ build_references(token)
+ end
+ end
+ end
+
+ def create_token(type, s_value, line, column)
+ t = Token.new(type: type, s_value: s_value)
+ t.line = line
+ t.column = column
+
+ return t
+ end
+
private
def find_nterm_by_id!(id)
@@ -470,7 +654,9 @@ module Lrama
# Fill #number and #token_id
def fill_symbol_number
- # TODO: why start from 256
+ # Character literal in grammar file has
+ # token id corresponding to ASCII code by default,
+ # so start token_id from 256.
token_id = 256
# YYEMPTY = -2