diff options
author | yui-knk <spiketeika@gmail.com> | 2023-11-18 17:46:38 +0900 |
---|---|---|
committer | Yuichiro Kaneko <spiketeika@gmail.com> | 2023-11-18 19:38:55 +0900 |
commit | c56dd94db0001b900a2dab3ee350a182d6bb42af (patch) | |
tree | 7a4f5236452ae721f67fe6602f3dcbc68a085ac1 /tool/lrama/lib/lrama/lexer.rb | |
parent | f479e629ab497f325091096819fa5bf60c0d03b2 (diff) | |
download | ruby-c56dd94db0001b900a2dab3ee350a182d6bb42af.tar.gz |
Lrama v0.5.10
Diffstat (limited to 'tool/lrama/lib/lrama/lexer.rb')
-rw-r--r-- | tool/lrama/lib/lrama/lexer.rb | 60 |
1 files changed, 31 insertions, 29 deletions
diff --git a/tool/lrama/lib/lrama/lexer.rb b/tool/lrama/lib/lrama/lexer.rb index 870d087b38..18d702a49d 100644 --- a/tool/lrama/lib/lrama/lexer.rb +++ b/tool/lrama/lib/lrama/lexer.rb @@ -1,4 +1,5 @@ require "strscan" +require "lrama/lexer/location" require "lrama/lexer/token" module Lrama @@ -7,7 +8,7 @@ module Lrama attr_accessor :status attr_accessor :end_symbol - SYMBOLS = %w(%{ %} %% { } \[ \] : \| ;) + SYMBOLS = ['%{', '%}', '%%', '{', '}', '\[', '\]', '\(', '\)', '\,', ':', '\|', ';'] PERCENT_TOKENS = %w( %union %token @@ -31,8 +32,8 @@ module Lrama def initialize(text) @scanner = StringScanner.new(text) - @head = @scanner.pos - @line = 1 + @head_column = @head = @scanner.pos + @head_line = @line = 1 @status = :initial @end_symbol = nil end @@ -54,6 +55,13 @@ module Lrama @scanner.pos - @head end + def location + Location.new( + first_line: @head_line, first_column: @head_column, + last_line: @line, last_column: column + ) + end + def lex_token while !@scanner.eos? do case @@ -63,9 +71,8 @@ module Lrama # noop when @scanner.scan(/\/\*/) lex_comment - when @scanner.scan(/\/\//) - @scanner.scan_until(/\n/) - newline + when @scanner.scan(/\/\/.*(?<newline>\n)?/) + newline if @scanner[:newline] else break end @@ -84,17 +91,17 @@ module Lrama when @scanner.scan(/[\?\+\*]/) return [@scanner.matched, @scanner.matched] when @scanner.scan(/<\w+>/) - return [:TAG, setup_token(Lrama::Lexer::Token::Tag.new(s_value: @scanner.matched))] + return [:TAG, Lrama::Lexer::Token::Tag.new(s_value: @scanner.matched, location: location)] when @scanner.scan(/'.'/) - return [:CHARACTER, setup_token(Lrama::Lexer::Token::Char.new(s_value: @scanner.matched))] + return [:CHARACTER, Lrama::Lexer::Token::Char.new(s_value: @scanner.matched, location: location)] when @scanner.scan(/'\\\\'|'\\b'|'\\t'|'\\f'|'\\r'|'\\n'|'\\v'|'\\13'/) - return [:CHARACTER, setup_token(Lrama::Lexer::Token::Char.new(s_value: @scanner.matched))] - when @scanner.scan(/"/) - return [:STRING, %Q("#{@scanner.scan_until(/"/)})] + return [:CHARACTER, Lrama::Lexer::Token::Char.new(s_value: @scanner.matched, location: location)] + when @scanner.scan(/".*?"/) + return [:STRING, %Q(#{@scanner.matched})] when @scanner.scan(/\d+/) return [:INTEGER, Integer(@scanner.matched)] when @scanner.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)/) - token = setup_token(Lrama::Lexer::Token::Ident.new(s_value: @scanner.matched)) + token = Lrama::Lexer::Token::Ident.new(s_value: @scanner.matched, location: location) type = if @scanner.check(/\s*(\[\s*[a-zA-Z_.][-a-zA-Z0-9_.]*\s*\])?\s*:/) :IDENT_COLON @@ -118,25 +125,27 @@ module Lrama when @scanner.scan(/}/) if nested == 0 && @end_symbol == '}' @scanner.unscan - return [:C_DECLARATION, setup_token(Lrama::Lexer::Token::UserCode.new(s_value: code))] + return [:C_DECLARATION, Lrama::Lexer::Token::UserCode.new(s_value: code, location: location)] else code += @scanner.matched nested -= 1 end when @scanner.check(/#{@end_symbol}/) - return [:C_DECLARATION, setup_token(Lrama::Lexer::Token::UserCode.new(s_value: code))] + return [:C_DECLARATION, Lrama::Lexer::Token::UserCode.new(s_value: code, location: location)] when @scanner.scan(/\n/) code += @scanner.matched newline - when @scanner.scan(/"/) - matched = @scanner.scan_until(/"/) - code += %Q("#{matched}) - @line += matched.count("\n") - when @scanner.scan(/'/) - matched = @scanner.scan_until(/'/) - code += %Q('#{matched}) + when @scanner.scan(/".*?"/) + code += %Q(#{@scanner.matched}) + @line += @scanner.matched.count("\n") + when @scanner.scan(/'.*?'/) + code += %Q(#{@scanner.matched}) else - code += @scanner.getch + if @scanner.scan(/[^\"'\{\}\n#{@end_symbol}]+/) + code += @scanner.matched + else + code += @scanner.getch + end end end raise ParseError, "Unexpected code: #{code}." @@ -158,13 +167,6 @@ module Lrama end end - def setup_token(token) - token.line = @head_line - token.column = @head_column - - token - end - def newline @line += 1 @head = @scanner.pos + 1 |