aboutsummaryrefslogtreecommitdiffstats
path: root/tool/lrama/lib/lrama/lexer.rb
diff options
context:
space:
mode:
authoryui-knk <spiketeika@gmail.com>2023-11-18 17:46:38 +0900
committerYuichiro Kaneko <spiketeika@gmail.com>2023-11-18 19:38:55 +0900
commitc56dd94db0001b900a2dab3ee350a182d6bb42af (patch)
tree7a4f5236452ae721f67fe6602f3dcbc68a085ac1 /tool/lrama/lib/lrama/lexer.rb
parentf479e629ab497f325091096819fa5bf60c0d03b2 (diff)
downloadruby-c56dd94db0001b900a2dab3ee350a182d6bb42af.tar.gz
Lrama v0.5.10
Diffstat (limited to 'tool/lrama/lib/lrama/lexer.rb')
-rw-r--r--tool/lrama/lib/lrama/lexer.rb60
1 files changed, 31 insertions, 29 deletions
diff --git a/tool/lrama/lib/lrama/lexer.rb b/tool/lrama/lib/lrama/lexer.rb
index 870d087b38..18d702a49d 100644
--- a/tool/lrama/lib/lrama/lexer.rb
+++ b/tool/lrama/lib/lrama/lexer.rb
@@ -1,4 +1,5 @@
require "strscan"
+require "lrama/lexer/location"
require "lrama/lexer/token"
module Lrama
@@ -7,7 +8,7 @@ module Lrama
attr_accessor :status
attr_accessor :end_symbol
- SYMBOLS = %w(%{ %} %% { } \[ \] : \| ;)
+ SYMBOLS = ['%{', '%}', '%%', '{', '}', '\[', '\]', '\(', '\)', '\,', ':', '\|', ';']
PERCENT_TOKENS = %w(
%union
%token
@@ -31,8 +32,8 @@ module Lrama
def initialize(text)
@scanner = StringScanner.new(text)
- @head = @scanner.pos
- @line = 1
+ @head_column = @head = @scanner.pos
+ @head_line = @line = 1
@status = :initial
@end_symbol = nil
end
@@ -54,6 +55,13 @@ module Lrama
@scanner.pos - @head
end
+ def location
+ Location.new(
+ first_line: @head_line, first_column: @head_column,
+ last_line: @line, last_column: column
+ )
+ end
+
def lex_token
while !@scanner.eos? do
case
@@ -63,9 +71,8 @@ module Lrama
# noop
when @scanner.scan(/\/\*/)
lex_comment
- when @scanner.scan(/\/\//)
- @scanner.scan_until(/\n/)
- newline
+ when @scanner.scan(/\/\/.*(?<newline>\n)?/)
+ newline if @scanner[:newline]
else
break
end
@@ -84,17 +91,17 @@ module Lrama
when @scanner.scan(/[\?\+\*]/)
return [@scanner.matched, @scanner.matched]
when @scanner.scan(/<\w+>/)
- return [:TAG, setup_token(Lrama::Lexer::Token::Tag.new(s_value: @scanner.matched))]
+ return [:TAG, Lrama::Lexer::Token::Tag.new(s_value: @scanner.matched, location: location)]
when @scanner.scan(/'.'/)
- return [:CHARACTER, setup_token(Lrama::Lexer::Token::Char.new(s_value: @scanner.matched))]
+ return [:CHARACTER, Lrama::Lexer::Token::Char.new(s_value: @scanner.matched, location: location)]
when @scanner.scan(/'\\\\'|'\\b'|'\\t'|'\\f'|'\\r'|'\\n'|'\\v'|'\\13'/)
- return [:CHARACTER, setup_token(Lrama::Lexer::Token::Char.new(s_value: @scanner.matched))]
- when @scanner.scan(/"/)
- return [:STRING, %Q("#{@scanner.scan_until(/"/)})]
+ return [:CHARACTER, Lrama::Lexer::Token::Char.new(s_value: @scanner.matched, location: location)]
+ when @scanner.scan(/".*?"/)
+ return [:STRING, %Q(#{@scanner.matched})]
when @scanner.scan(/\d+/)
return [:INTEGER, Integer(@scanner.matched)]
when @scanner.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)/)
- token = setup_token(Lrama::Lexer::Token::Ident.new(s_value: @scanner.matched))
+ token = Lrama::Lexer::Token::Ident.new(s_value: @scanner.matched, location: location)
type =
if @scanner.check(/\s*(\[\s*[a-zA-Z_.][-a-zA-Z0-9_.]*\s*\])?\s*:/)
:IDENT_COLON
@@ -118,25 +125,27 @@ module Lrama
when @scanner.scan(/}/)
if nested == 0 && @end_symbol == '}'
@scanner.unscan
- return [:C_DECLARATION, setup_token(Lrama::Lexer::Token::UserCode.new(s_value: code))]
+ return [:C_DECLARATION, Lrama::Lexer::Token::UserCode.new(s_value: code, location: location)]
else
code += @scanner.matched
nested -= 1
end
when @scanner.check(/#{@end_symbol}/)
- return [:C_DECLARATION, setup_token(Lrama::Lexer::Token::UserCode.new(s_value: code))]
+ return [:C_DECLARATION, Lrama::Lexer::Token::UserCode.new(s_value: code, location: location)]
when @scanner.scan(/\n/)
code += @scanner.matched
newline
- when @scanner.scan(/"/)
- matched = @scanner.scan_until(/"/)
- code += %Q("#{matched})
- @line += matched.count("\n")
- when @scanner.scan(/'/)
- matched = @scanner.scan_until(/'/)
- code += %Q('#{matched})
+ when @scanner.scan(/".*?"/)
+ code += %Q(#{@scanner.matched})
+ @line += @scanner.matched.count("\n")
+ when @scanner.scan(/'.*?'/)
+ code += %Q(#{@scanner.matched})
else
- code += @scanner.getch
+ if @scanner.scan(/[^\"'\{\}\n#{@end_symbol}]+/)
+ code += @scanner.matched
+ else
+ code += @scanner.getch
+ end
end
end
raise ParseError, "Unexpected code: #{code}."
@@ -158,13 +167,6 @@ module Lrama
end
end
- def setup_token(token)
- token.line = @head_line
- token.column = @head_column
-
- token
- end
-
def newline
@line += 1
@head = @scanner.pos + 1