aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKevin Newton <kddnewton@gmail.com>2024-03-25 09:27:27 -0400
committerKevin Newton <kddnewton@gmail.com>2024-03-25 11:52:13 -0400
commit53cc2723877f7794807684e31a530daca1a72ed6 (patch)
tree8413324464f459bd892c2cb1d483ca30a1f28ddc
parent86077fbcde05f4abd6b306ad0fcc88ee891f8e8b (diff)
downloadruby-53cc2723877f7794807684e31a530daca1a72ed6.tar.gz
[ruby/prism] Handle CLRF in regexp
https://github.com/ruby/prism/commit/b96bada9ae
-rw-r--r--prism/parser.h4
-rw-r--r--prism/prism.c19
2 files changed, 18 insertions, 5 deletions
diff --git a/prism/parser.h b/prism/parser.h
index b685fa377d..7e4bb99197 100644
--- a/prism/parser.h
+++ b/prism/parser.h
@@ -173,7 +173,7 @@ typedef struct pm_lex_mode {
* This is the character set that should be used to delimit the
* tokens within the regular expression.
*/
- uint8_t breakpoints[6];
+ uint8_t breakpoints[7];
} regexp;
struct {
@@ -206,7 +206,7 @@ typedef struct pm_lex_mode {
* This is the character set that should be used to delimit the
* tokens within the string.
*/
- uint8_t breakpoints[6];
+ uint8_t breakpoints[7];
} string;
struct {
diff --git a/prism/prism.c b/prism/prism.c
index 6aa611624a..58c70dba69 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -308,14 +308,14 @@ lex_mode_push_regexp(pm_parser_t *parser, uint8_t incrementor, uint8_t terminato
// regular expression. We'll use strpbrk to find the first of these
// characters.
uint8_t *breakpoints = lex_mode.as.regexp.breakpoints;
- memcpy(breakpoints, "\n\\#\0\0", sizeof(lex_mode.as.regexp.breakpoints));
+ memcpy(breakpoints, "\r\n\\#\0\0", sizeof(lex_mode.as.regexp.breakpoints));
// First we'll add the terminator.
- breakpoints[3] = terminator;
+ breakpoints[4] = terminator;
// Next, if there is an incrementor, then we'll check for that as well.
if (incrementor != '\0') {
- breakpoints[4] = incrementor;
+ breakpoints[5] = incrementor;
}
return lex_mode_push(parser, lex_mode);
@@ -10835,6 +10835,19 @@ parser_lex(pm_parser_t *parser) {
parser->current.end = breakpoint + 1;
breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
break;
+ case '\r':
+ if (peek_at(parser, breakpoint + 1) != '\n') {
+ parser->current.end = breakpoint + 1;
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
+ break;
+ }
+
+ parser->current.end = breakpoint + 1;
+ pm_regexp_token_buffer_escape(parser, &token_buffer);
+ breakpoint++;
+ token_buffer.base.cursor = breakpoint;
+
+ /* fallthrough */
case '\n':
// If we've hit a newline, then we need to track that in
// the list of newlines.