diff options
author | Kevin Newton <kddnewton@gmail.com> | 2024-03-25 09:27:27 -0400 |
---|---|---|
committer | Kevin Newton <kddnewton@gmail.com> | 2024-03-25 11:52:13 -0400 |
commit | 53cc2723877f7794807684e31a530daca1a72ed6 (patch) | |
tree | 8413324464f459bd892c2cb1d483ca30a1f28ddc | |
parent | 86077fbcde05f4abd6b306ad0fcc88ee891f8e8b (diff) | |
download | ruby-53cc2723877f7794807684e31a530daca1a72ed6.tar.gz |
[ruby/prism] Handle CLRF in regexp
https://github.com/ruby/prism/commit/b96bada9ae
-rw-r--r-- | prism/parser.h | 4 | ||||
-rw-r--r-- | prism/prism.c | 19 |
2 files changed, 18 insertions, 5 deletions
diff --git a/prism/parser.h b/prism/parser.h index b685fa377d..7e4bb99197 100644 --- a/prism/parser.h +++ b/prism/parser.h @@ -173,7 +173,7 @@ typedef struct pm_lex_mode { * This is the character set that should be used to delimit the * tokens within the regular expression. */ - uint8_t breakpoints[6]; + uint8_t breakpoints[7]; } regexp; struct { @@ -206,7 +206,7 @@ typedef struct pm_lex_mode { * This is the character set that should be used to delimit the * tokens within the string. */ - uint8_t breakpoints[6]; + uint8_t breakpoints[7]; } string; struct { diff --git a/prism/prism.c b/prism/prism.c index 6aa611624a..58c70dba69 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -308,14 +308,14 @@ lex_mode_push_regexp(pm_parser_t *parser, uint8_t incrementor, uint8_t terminato // regular expression. We'll use strpbrk to find the first of these // characters. uint8_t *breakpoints = lex_mode.as.regexp.breakpoints; - memcpy(breakpoints, "\n\\#\0\0", sizeof(lex_mode.as.regexp.breakpoints)); + memcpy(breakpoints, "\r\n\\#\0\0", sizeof(lex_mode.as.regexp.breakpoints)); // First we'll add the terminator. - breakpoints[3] = terminator; + breakpoints[4] = terminator; // Next, if there is an incrementor, then we'll check for that as well. if (incrementor != '\0') { - breakpoints[4] = incrementor; + breakpoints[5] = incrementor; } return lex_mode_push(parser, lex_mode); @@ -10835,6 +10835,19 @@ parser_lex(pm_parser_t *parser) { parser->current.end = breakpoint + 1; breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false); break; + case '\r': + if (peek_at(parser, breakpoint + 1) != '\n') { + parser->current.end = breakpoint + 1; + breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false); + break; + } + + parser->current.end = breakpoint + 1; + pm_regexp_token_buffer_escape(parser, &token_buffer); + breakpoint++; + token_buffer.base.cursor = breakpoint; + + /* fallthrough */ case '\n': // If we've hit a newline, then we need to track that in // the list of newlines. |