aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKevin Menard <kevin@nirvdrum.com>2024-02-08 16:27:59 -0500
committergit <svn-admin@ruby-lang.org>2024-03-08 18:48:53 +0000
commit82fb6a90d5cf5081df11ebed6cfc623159f72676 (patch)
treeaabe815d04a121575f585d48fb5fd41b509efe56
parent2d80b6093f3b0c21c89db72eebacfef4a535b149 (diff)
downloadruby-82fb6a90d5cf5081df11ebed6cfc623159f72676.tar.gz
[ruby/prism] Track both the unescaped bytes and source string for a regular expression so we can accurately set its encoding flags.
https://github.com/ruby/prism/commit/dc6dd3a926
-rw-r--r--prism/encoding.h2
-rw-r--r--prism/parser.h11
-rw-r--r--prism/prism.c164
-rw-r--r--test/prism/encoding_test.rb58
-rw-r--r--test/prism/snapshots/heredoc_with_escaped_newline_at_start.txt4
-rw-r--r--test/prism/snapshots/newline_terminated.txt2
-rw-r--r--test/prism/snapshots/patterns.txt12
-rw-r--r--test/prism/snapshots/regex.txt30
-rw-r--r--test/prism/snapshots/seattlerb/TestRubyParserShared.txt2
-rw-r--r--test/prism/snapshots/seattlerb/bug190.txt2
-rw-r--r--test/prism/snapshots/seattlerb/bug_case_when_regexp.txt2
-rw-r--r--test/prism/snapshots/seattlerb/bug_cond_pct.txt2
-rw-r--r--test/prism/snapshots/seattlerb/case_in.txt2
-rw-r--r--test/prism/snapshots/seattlerb/regexp.txt10
-rw-r--r--test/prism/snapshots/seattlerb/regexp_esc_C_slash.txt2
-rw-r--r--test/prism/snapshots/seattlerb/regexp_esc_u.txt2
-rw-r--r--test/prism/snapshots/seattlerb/regexp_unicode_curlies.txt4
-rw-r--r--test/prism/snapshots/spanning_heredoc_newlines.txt2
-rw-r--r--test/prism/snapshots/unescaping.txt2
-rw-r--r--test/prism/snapshots/unparser/corpus/literal/if.txt2
-rw-r--r--test/prism/snapshots/unparser/corpus/literal/literal.txt12
-rw-r--r--test/prism/snapshots/unparser/corpus/literal/send.txt12
-rw-r--r--test/prism/snapshots/unparser/corpus/semantic/literal.txt4
-rw-r--r--test/prism/snapshots/whitequark/bug_regex_verification.txt2
-rw-r--r--test/prism/snapshots/whitequark/cond_match_current_line.txt4
-rw-r--r--test/prism/snapshots/whitequark/interp_digit_var.txt8
-rw-r--r--test/prism/snapshots/whitequark/lvar_injecting_match.txt2
-rw-r--r--test/prism/snapshots/whitequark/parser_bug_830.txt2
-rw-r--r--test/prism/snapshots/whitequark/parser_slash_slash_n_escaping_in_literals.txt4
-rw-r--r--test/prism/snapshots/whitequark/regex_plain.txt2
-rw-r--r--test/prism/snapshots/whitequark/ruby_bug_11873.txt16
31 files changed, 259 insertions, 126 deletions
diff --git a/prism/encoding.h b/prism/encoding.h
index d0f947eacd..a1af1298e0 100644
--- a/prism/encoding.h
+++ b/prism/encoding.h
@@ -248,7 +248,7 @@ extern const pm_encoding_t pm_encodings[PM_ENCODING_MAXIMUM];
/**
* This is the ASCII-8BIT encoding. We need a reference to it so that pm_strpbrk
* can compare against it because invalid multibyte characters are not a thing
- * in this encoding.
+ * in this encoding. It is also needed for handling Regexp encoding flags.
*/
#define PM_ENCODING_ASCII_8BIT_ENTRY (&pm_encodings[PM_ENCODING_ASCII_8BIT])
diff --git a/prism/parser.h b/prism/parser.h
index 02f60192d5..cf5f702a87 100644
--- a/prism/parser.h
+++ b/prism/parser.h
@@ -664,6 +664,17 @@ struct pm_parser {
pm_string_t current_string;
/**
+ * This string is used to pass information from the lexer to the parser. When
+ * processing regular expressions we must track the string source for the expression
+ * as well as its unescaped representation. In that case, `current_string` will hold
+ * the unescaped value while this field will hold the translated source value. There
+ * are some escape sequences in regular expressions that will cause the associated
+ * source string to have a different value than the content of the expression so we
+ * must track this state separately.
+ */
+ pm_string_t current_regular_expression_source;
+
+ /**
* The line number at the start of the parse. This will be used to offset
* the line numbers of all of the locations.
*/
diff --git a/prism/prism.c b/prism/prism.c
index 6921feac48..6e4de22ec1 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -5950,6 +5950,34 @@ parse_symbol_encoding(const pm_parser_t *parser, const pm_string_t *contents) {
}
/**
+ * Ruby "downgrades" the encoding of Regexps to US-ASCII if the associated encoding is ASCII-compatible and
+ * the unescaped representation of a Regexp source consists only of US-ASCII code points. This is true even
+ * when the Regexp is explicitly given an ASCII-8BIT encoding via the (/n) modifier. Otherwise, the encoding
+ * may be explicitly set with an escape sequence.
+ */
+static inline pm_node_flags_t
+parse_regular_expression_encoding(const pm_parser_t *parser, const pm_string_t *contents) {
+ // Ruby stipulates that all source files must use an ASCII-compatible encoding. Thus, all regular expressions
+ // appearing in source are eligible for "downgrading" to US-ASCII.
+ if (pm_ascii_only_p(contents)) {
+ return PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING;
+ }
+
+ // A Regexp may optionally have its encoding explicitly set via a character escape sequence in the source string
+ // or by specifying a modifier.
+ //
+ // NB: an explicitly set encoding is ignored by Ruby if the Regexp consists of only US ASCII code points.
+ if (parser->explicit_encoding != NULL) {
+ if (parser->explicit_encoding == PM_ENCODING_UTF_8_ENTRY) {
+ return PM_REGULAR_EXPRESSION_FLAGS_FORCED_UTF8_ENCODING;
+ } else if (parser->encoding == PM_ENCODING_US_ASCII_ENTRY) {
+ return PM_REGULAR_EXPRESSION_FLAGS_FORCED_BINARY_ENCODING;
+ }
+ }
+ return 0;
+}
+
+/**
* Allocate and initialize a new SymbolNode node with the given unescaped
* string.
*/
@@ -8130,34 +8158,34 @@ escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer) {
* source so that the regular expression engine will perform its own unescaping.
*/
static inline void
-escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags, uint8_t byte) {
+escape_write_byte(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags, uint8_t byte) {
if (flags & PM_ESCAPE_FLAG_REGEXP) {
- pm_buffer_append_bytes(buffer, (const uint8_t *) "\\x", 2);
+ pm_buffer_append_bytes(regular_expression_buffer, (const uint8_t *) "\\x", 2);
uint8_t byte1 = (uint8_t) ((byte >> 4) & 0xF);
uint8_t byte2 = (uint8_t) (byte & 0xF);
if (byte1 >= 0xA) {
- pm_buffer_append_byte(buffer, (uint8_t) ((byte1 - 0xA) + 'A'));
+ pm_buffer_append_byte(regular_expression_buffer, (uint8_t) ((byte1 - 0xA) + 'A'));
} else {
- pm_buffer_append_byte(buffer, (uint8_t) (byte1 + '0'));
+ pm_buffer_append_byte(regular_expression_buffer, (uint8_t) (byte1 + '0'));
}
if (byte2 >= 0xA) {
- pm_buffer_append_byte(buffer, (uint8_t) (byte2 - 0xA + 'A'));
+ pm_buffer_append_byte(regular_expression_buffer, (uint8_t) (byte2 - 0xA + 'A'));
} else {
- pm_buffer_append_byte(buffer, (uint8_t) (byte2 + '0'));
+ pm_buffer_append_byte(regular_expression_buffer, (uint8_t) (byte2 + '0'));
}
- } else {
- escape_write_byte_encoded(parser, buffer, byte);
}
+
+ escape_write_byte_encoded(parser, buffer, byte);
}
/**
* Read the value of an escape into the buffer.
*/
static void
-escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
+escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
switch (peek(parser)) {
case '\\': {
parser->current.end++;
@@ -8248,10 +8276,10 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
}
if (flags & PM_ESCAPE_FLAG_REGEXP) {
- pm_buffer_append_bytes(buffer, start, (size_t) (parser->current.end - start));
- } else {
- escape_write_byte_encoded(parser, buffer, value);
+ pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end - start));
}
+
+ escape_write_byte_encoded(parser, buffer, value);
} else {
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_HEXADECIMAL);
}
@@ -8272,10 +8300,9 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
uint32_t value = escape_unicode(parser->current.end, 4);
if (flags & PM_ESCAPE_FLAG_REGEXP) {
- pm_buffer_append_bytes(buffer, start, (size_t) (parser->current.end + 4 - start));
- } else {
- escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value);
+ pm_buffer_append_bytes(regular_expression_buffer, start, (size_t) (parser->current.end + 4 - start));
}
+ escape_write_unicode(parser, buffer, flags, start, parser->current.end + 4, value);
parser->current.end += 4;
} else if (peek(parser) == '{') {
@@ -8306,10 +8333,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
extra_codepoints_start = unicode_start;
}
- if (!(flags & PM_ESCAPE_FLAG_REGEXP)) {
- uint32_t value = escape_unicode(unicode_start, hexadecimal_length);
- escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value);
- }
+ uint32_t value = escape_unicode(unicode_start, hexadecimal_length);
+ escape_write_unicode(parser, buffer, flags, unicode_start, parser->current.end, value);
parser->current.end += pm_strspn_whitespace(parser->current.end, parser->end - parser->current.end);
}
@@ -8327,7 +8352,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
}
if (flags & PM_ESCAPE_FLAG_REGEXP) {
- pm_buffer_append_bytes(buffer, unicode_codepoints_start, (size_t) (parser->current.end - unicode_codepoints_start));
+ pm_buffer_append_bytes(regular_expression_buffer, unicode_codepoints_start, (size_t) (parser->current.end - unicode_codepoints_start));
}
} else {
pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_UNICODE);
@@ -8346,7 +8371,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
switch (peeked) {
case '?': {
parser->current.end++;
- escape_write_byte(parser, buffer, flags, escape_byte(0x7f, flags));
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
return;
}
case '\\':
@@ -8355,7 +8380,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
return;
}
parser->current.end++;
- escape_read(parser, buffer, flags | PM_ESCAPE_FLAG_CONTROL);
+ escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
return;
default: {
if (!char_is_ascii_printable(peeked)) {
@@ -8364,7 +8389,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
}
parser->current.end++;
- escape_write_byte(parser, buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
return;
}
}
@@ -8386,7 +8411,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
switch (peeked) {
case '?': {
parser->current.end++;
- escape_write_byte(parser, buffer, flags, escape_byte(0x7f, flags));
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(0x7f, flags));
return;
}
case '\\':
@@ -8395,7 +8420,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
return;
}
parser->current.end++;
- escape_read(parser, buffer, flags | PM_ESCAPE_FLAG_CONTROL);
+ escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_CONTROL);
return;
default: {
if (!char_is_ascii_printable(peeked)) {
@@ -8404,7 +8429,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
}
parser->current.end++;
- escape_write_byte(parser, buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
return;
}
}
@@ -8429,7 +8454,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
return;
}
parser->current.end++;
- escape_read(parser, buffer, flags | PM_ESCAPE_FLAG_META);
+ escape_read(parser, buffer, regular_expression_buffer, flags | PM_ESCAPE_FLAG_META);
return;
}
@@ -8439,7 +8464,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
}
parser->current.end++;
- escape_write_byte(parser, buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
+ escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
return;
}
case '\r': {
@@ -8510,7 +8535,7 @@ lex_question_mark(pm_parser_t *parser) {
pm_buffer_t buffer;
pm_buffer_init_capacity(&buffer, 3);
- escape_read(parser, &buffer, PM_ESCAPE_FLAG_SINGLE);
+ escape_read(parser, &buffer, NULL, PM_ESCAPE_FLAG_SINGLE);
pm_string_owned_init(&parser->current_string, (uint8_t *) buffer.value, buffer.length);
return PM_TOKEN_CHARACTER_LITERAL;
@@ -8724,7 +8749,7 @@ parser_end_of_line_p(const pm_parser_t *parser) {
* "foo\n"
*
* then the bytes in the string are "f", "o", "o", "\", "n", but we want to
- * provide out consumers with the string content "f", "o", "o", "\n". In these
+ * provide our consumers with the string content "f", "o", "o", "\n". In these
* cases, when we find the first escape sequence, we initialize a pm_buffer_t
* to keep track of the string content. Then in the parser, it will
* automatically attach the string content to the node that it belongs to.
@@ -8737,6 +8762,20 @@ typedef struct {
pm_buffer_t buffer;
/**
+ * In order to properly set a regular expression's encoding and to validate
+ * the byte sequence for the underlying encoding we must process any escape
+ * sequences. The unescaped byte sequence will be stored in `buffer` just like
+ * for other string-like types. However, we also need to store the regular
+ * expression's source string. That string may different from the what we see
+ * during lexing because some escape sequences rewrite the source.
+ *
+ * This value will only be initialized for regular expressions and only if we
+ * receive an escape sequence. It will contain the regular expression's source
+ * string's byte sequence.
+ */
+ pm_buffer_t regular_expression_buffer;
+
+ /**
* The cursor into the source string that points to how far we have
* currently copied into the buffer.
*/
@@ -8751,19 +8790,29 @@ pm_token_buffer_push_byte(pm_token_buffer_t *token_buffer, uint8_t byte) {
pm_buffer_append_byte(&token_buffer->buffer, byte);
}
+static inline void
+pm_token_buffer_push_byte_regular_expression(pm_token_buffer_t *token_buffer, uint8_t byte) {
+ pm_buffer_append_byte(&token_buffer->regular_expression_buffer, byte);
+}
+
+
/**
* Append the given bytes into the token buffer.
*/
static inline void
-pm_token_buffer_push_bytes(pm_token_buffer_t *token_buffer, const uint8_t *bytes, size_t length) {
+pm_token_buffer_push_bytes(pm_token_buffer_t *token_buffer, const uint8_t *bytes, size_t length, uint8_t flags) {
pm_buffer_append_bytes(&token_buffer->buffer, bytes, length);
+
+ if (flags & PM_ESCAPE_FLAG_REGEXP) {
+ pm_buffer_append_bytes(&token_buffer->regular_expression_buffer, bytes, length);
+ }
}
/**
* Push an escaped character into the token buffer.
*/
static inline void
-pm_token_buffer_push_escaped(pm_token_buffer_t *token_buffer, pm_parser_t *parser) {
+pm_token_buffer_push_escaped(pm_token_buffer_t *token_buffer, pm_parser_t *parser, uint8_t flags) {
// First, determine the width of the character to be escaped.
size_t width;
if (parser->encoding_changed) {
@@ -8777,7 +8826,7 @@ pm_token_buffer_push_escaped(pm_token_buffer_t *token_buffer, pm_parser_t *parse
width = (width == 0 ? 1 : width);
// Now, push the bytes into the buffer.
- pm_token_buffer_push_bytes(token_buffer, parser->current.end, width);
+ pm_token_buffer_push_bytes(token_buffer, parser->current.end, width, flags);
parser->current.end += width;
}
@@ -8790,6 +8839,7 @@ pm_token_buffer_push_escaped(pm_token_buffer_t *token_buffer, pm_parser_t *parse
static inline void
pm_token_buffer_copy(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
pm_string_owned_init(&parser->current_string, (uint8_t *) token_buffer->buffer.value, token_buffer->buffer.length);
+ pm_string_owned_init(&parser->current_regular_expression_source, (uint8_t *) token_buffer->regular_expression_buffer.value, token_buffer->regular_expression_buffer.length);
}
/**
@@ -8805,8 +8855,10 @@ static void
pm_token_buffer_flush(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
if (token_buffer->cursor == NULL) {
pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
+ pm_string_shared_init(&parser->current_regular_expression_source, parser->current.start, parser->current.end);
} else {
pm_buffer_append_bytes(&token_buffer->buffer, token_buffer->cursor, (size_t) (parser->current.end - token_buffer->cursor));
+ pm_buffer_append_bytes(&token_buffer->regular_expression_buffer, token_buffer->cursor, (size_t) (parser->current.end - token_buffer->cursor));
pm_token_buffer_copy(parser, token_buffer);
}
}
@@ -8824,6 +8876,7 @@ pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
const uint8_t *start;
if (token_buffer->cursor == NULL) {
pm_buffer_init_capacity(&token_buffer->buffer, 16);
+ pm_buffer_init_capacity(&token_buffer->regular_expression_buffer, 16);
start = parser->current.start;
} else {
start = token_buffer->cursor;
@@ -8831,6 +8884,7 @@ pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
const uint8_t *end = parser->current.end - 1;
pm_buffer_append_bytes(&token_buffer->buffer, start, (size_t) (end - start));
+ pm_buffer_append_bytes(&token_buffer->regular_expression_buffer, start, (size_t) (end - start));
token_buffer->cursor = end;
}
@@ -10143,7 +10197,7 @@ parser_lex(pm_parser_t *parser) {
// If we haven't found an escape yet, then this buffer will be
// unallocated since we can refer directly to the source string.
- pm_token_buffer_t token_buffer = { { 0 }, 0 };
+ pm_token_buffer_t token_buffer = { { 0 }, { 0 }, 0 };
while (breakpoint != NULL) {
// If we hit a null byte, skip directly past it.
@@ -10242,10 +10296,10 @@ parser_lex(pm_parser_t *parser) {
pm_token_buffer_push_byte(&token_buffer, peeked);
parser->current.end++;
} else if (lex_mode->as.list.interpolation) {
- escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_NONE);
+ escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
} else {
pm_token_buffer_push_byte(&token_buffer, '\\');
- pm_token_buffer_push_escaped(&token_buffer, parser);
+ pm_token_buffer_push_escaped(&token_buffer, parser, PM_ESCAPE_FLAG_NONE);
}
break;
@@ -10320,7 +10374,7 @@ parser_lex(pm_parser_t *parser) {
// characters.
const uint8_t *breakpoints = lex_mode->as.regexp.breakpoints;
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
- pm_token_buffer_t token_buffer = { { 0 }, 0 };
+ pm_token_buffer_t token_buffer = { { 0 }, { 0 }, 0 };
while (breakpoint != NULL) {
// If we hit a null byte, skip directly past it.
@@ -10403,9 +10457,10 @@ parser_lex(pm_parser_t *parser) {
parser->current.end++;
if (peek(parser) != '\n') {
if (lex_mode->as.regexp.terminator != '\r') {
- pm_token_buffer_push_byte(&token_buffer, '\\');
+ pm_token_buffer_push_byte_regular_expression(&token_buffer, '\\');
}
pm_token_buffer_push_byte(&token_buffer, '\r');
+ pm_token_buffer_push_byte_regular_expression(&token_buffer, '\r');
break;
}
/* fallthrough */
@@ -10429,7 +10484,7 @@ parser_lex(pm_parser_t *parser) {
case 'M':
case 'u':
case 'x':
- escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_REGEXP);
+ escape_read(parser, &token_buffer.buffer, &token_buffer.regular_expression_buffer, PM_ESCAPE_FLAG_REGEXP);
break;
default:
if (lex_mode->as.regexp.terminator == peeked) {
@@ -10440,19 +10495,20 @@ parser_lex(pm_parser_t *parser) {
case '$': case ')': case '*': case '+':
case '.': case '>': case '?': case ']':
case '^': case '|': case '}':
- pm_token_buffer_push_byte(&token_buffer, '\\');
+ pm_token_buffer_push_byte_regular_expression(&token_buffer, '\\');
break;
default:
break;
}
pm_token_buffer_push_byte(&token_buffer, peeked);
+ pm_token_buffer_push_byte_regular_expression(&token_buffer, peeked);
parser->current.end++;
break;
}
- if (peeked < 0x80) pm_token_buffer_push_byte(&token_buffer, '\\');
- pm_token_buffer_push_escaped(&token_buffer, parser);
+ if (peeked < 0x80) pm_token_buffer_push_byte_regular_expression(&token_buffer, '\\');
+ pm_token_buffer_push_escaped(&token_buffer, parser, PM_ESCAPE_FLAG_REGEXP);
break;
}
@@ -10525,7 +10581,7 @@ parser_lex(pm_parser_t *parser) {
// If we haven't found an escape yet, then this buffer will be
// unallocated since we can refer directly to the source string.
- pm_token_buffer_t token_buffer = { { 0 }, 0 };
+ pm_token_buffer_t token_buffer = { { 0 }, { 0 }, 0 };
while (breakpoint != NULL) {
// If we hit the incrementor, then we'll increment then nesting and
@@ -10660,10 +10716,10 @@ parser_lex(pm_parser_t *parser) {
pm_token_buffer_push_byte(&token_buffer, peeked);
parser->current.end++;
} else if (lex_mode->as.string.interpolation) {
- escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_NONE);
+ escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
} else {
pm_token_buffer_push_byte(&token_buffer, '\\');
- pm_token_buffer_push_escaped(&token_buffer, parser);
+ pm_token_buffer_push_escaped(&token_buffer, parser, PM_ESCAPE_FLAG_NONE);
}
break;
@@ -10813,7 +10869,7 @@ parser_lex(pm_parser_t *parser) {
}
const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
- pm_token_buffer_t token_buffer = { { 0 }, 0 };
+ pm_token_buffer_t token_buffer = { { 0 }, { 0 }, 0 };
bool was_line_continuation = false;
while (breakpoint != NULL) {
@@ -10935,7 +10991,7 @@ parser_lex(pm_parser_t *parser) {
continue;
default:
pm_token_buffer_push_byte(&token_buffer, '\\');
- pm_token_buffer_push_escaped(&token_buffer, parser);
+ pm_token_buffer_push_escaped(&token_buffer, parser, PM_ESCAPE_FLAG_NONE);
break;
}
} else {
@@ -10972,7 +11028,7 @@ parser_lex(pm_parser_t *parser) {
breakpoint = parser->current.end;
continue;
default:
- escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_NONE);
+ escape_read(parser, &token_buffer.buffer, NULL, PM_ESCAPE_FLAG_NONE);
break;
}
}
@@ -16948,7 +17004,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
};
parser_lex(parser);
- return (pm_node_t *) pm_regular_expression_node_create(parser, &opening, &content, &parser->previous);
+
+ pm_node_t *regular_expression_node = (pm_node_t *) (pm_node_t *) pm_regular_expression_node_create(parser, &opening, &content, &parser->previous);
+ pm_node_flag_set(regular_expression_node, PM_REGULAR_EXPRESSION_FLAGS_FORCED_US_ASCII_ENCODING);
+
+ return regular_expression_node;
}
pm_interpolated_regular_expression_node_t *node;
@@ -16959,6 +17019,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
// following token is the end (in which case we can return a plain
// regular expression) or if it's not then it has interpolation.
pm_string_t unescaped = parser->current_string;
+ pm_string_t source = parser->current_regular_expression_source;
pm_token_t content = parser->current;
parser_lex(parser);
@@ -16966,7 +17027,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
// without interpolation, which can be represented more succinctly and
// more easily compiled.
if (accept1(parser, PM_TOKEN_REGEXP_END)) {
- return (pm_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
+ pm_node_t *regular_expression_node = (pm_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &source);
+ pm_node_flag_set(regular_expression_node, parse_regular_expression_encoding(parser, &unescaped));
+ return regular_expression_node;
}
// If we get here, then we have interpolation so we'll need to create
@@ -18527,6 +18590,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
.newline_list = { 0 },
.integer_base = 0,
.current_string = PM_STRING_EMPTY,
+ .current_regular_expression_source = PM_STRING_EMPTY,
.start_line = 1,
.explicit_encoding = NULL,
.command_line = 0,
diff --git a/test/prism/encoding_test.rb b/test/prism/encoding_test.rb
index 44491bf0d5..762dcde717 100644
--- a/test/prism/encoding_test.rb
+++ b/test/prism/encoding_test.rb
@@ -149,6 +149,7 @@ module Prism
escapes = ["\\x00", "\\x7F", "\\x80", "\\xFF", "\\u{00}", "\\u{7F}", "\\u{80}", "\\M-\\C-?"]
escapes = escapes.concat(escapes.product(escapes).map(&:join))
symbols = [:a, :ą, :+]
+ regexps = [/a/, /ą/, //]
encodings.each_key do |encoding|
define_method(:"test_encoding_flags_#{encoding.name}") do
@@ -168,6 +169,18 @@ module Prism
end
end
+ encodings.each_key do |encoding|
+ define_method(:"test_regular_expression_encoding_flags_#{encoding.name}") do
+ assert_regular_expression_encoding_flags(encoding, regexps.map(&:inspect))
+ end
+ end
+
+ encodings.each_key do |encoding|
+ define_method(:"test_regular_expression_escape_encoding_flags_#{encoding.name}") do
+ assert_regular_expression_encoding_flags(encoding, escapes.map { |e| "/#{e}/" })
+ end
+ end
+
def test_coding
result = Prism.parse("# coding: utf-8\n'string'")
actual = result.value.statements.body.first.unescaped.encoding
@@ -454,5 +467,50 @@ module Prism
assert_equal expected, actual
end
end
+
+ def assert_regular_expression_encoding_flags(encoding, regexps)
+ regexps.each do |regexp|
+ source = "# encoding: #{encoding.name}\n#{regexp}"
+
+ expected =
+ begin
+ eval(source).encoding
+ rescue SyntaxError => error
+ if error.message.include?("UTF-8 character in non UTF-8 regexp") || error.message.include?("escaped non ASCII character in UTF-8 regexp")
+ error.message[/: (.+?)\n/, 1]
+ elsif error.message.include?("invalid multibyte char")
+ # TODO (nirvdrum 26-Jan-2024): Bail out early of the rest of the test due to https://github.com/ruby/prism/issues/2104.
+ next
+ else
+ raise
+ end
+ end
+
+ actual =
+ Prism.parse(source).then do |result|
+ if result.success?
+ regexp = result.value.statements.body.first
+
+ if regexp.forced_utf8_encoding?
+ Encoding::UTF_8
+ elsif regexp.forced_binary_encoding?
+ Encoding::ASCII_8BIT
+ elsif regexp.forced_us_ascii_encoding?
+ Encoding::US_ASCII
+ else
+ encoding
+ end
+ else
+ error = result.errors.last
+
+ unless error.message.include?("UTF-8 mixed within")
+ raise error.message
+ end
+ end
+ end
+
+ assert_equal expected, actual
+ end
+ end
end
end
diff --git a/test/prism/snapshots/heredoc_with_escaped_newline_at_start.txt b/test/prism/snapshots/heredoc_with_escaped_newline_at_start.txt
index 5d17559ed4..acc6b082fc 100644
--- a/test/prism/snapshots/heredoc_with_escaped_newline_at_start.txt
+++ b/test/prism/snapshots/heredoc_with_escaped_newline_at_start.txt
@@ -21,7 +21,7 @@
│ │ ├── flags: ∅
│ │ └── arguments: (length: 2)
│ │ ├── @ RegularExpressionNode (location: (1,15)-(1,21))
- │ │ │ ├── flags: ∅
+ │ │ │ ├── flags: forced_us_ascii_encoding
│ │ │ ├── opening_loc: (1,15)-(1,16) = "/"
│ │ │ ├── content_loc: (1,16)-(1,20) = "^\\s{"
│ │ │ ├── closing_loc: (1,20)-(1,21) = "/"
@@ -52,7 +52,7 @@
│ ├── flags: ∅
│ └── arguments: (length: 2)
│ ├── @ RegularExpressionNode (location: (5,15)-(5,21))
- │ │ ├── flags: ∅
+ │ │ ├── flags: forced_us_ascii_encoding
│ │ ├── opening_loc: (5,15)-(5,16) = "/"
│ │ ├── content_loc: (5,16)-(5,20) = "^\\s{"
│ │ ├── closing_loc: (5,20)-(5,21) = "/"
diff --git a/test/prism/snapshots/newline_terminated.txt b/test/prism/snapshots/newline_terminated.txt
index 496e86fbe4..6a3b28dba9 100644
--- a/test/prism/snapshots/newline_terminated.txt
+++ b/test/prism/snapshots/newline_terminated.txt
@@ -100,7 +100,7 @@
│ ├── closing_loc: (37,3)-(38,0) = "\n"
│ └── unescaped: "foo"
└── @ RegularExpressionNode (location: (39,0)-(41,0))
- ├── flags: ∅
+ ├── flags: forced_us_ascii_encoding
├── opening_loc: (39,0)-(40,0) = "%r\n"
├── content_loc: (40,0)-(40,3) = "foo"
├── closing_loc: (40,3)-(41,0) = "\n"
diff --git a/test/prism/snapshots/patterns.txt b/test/prism/snapshots/patterns.txt
index 96205349d3..4efd3159fc 100644
--- a/test/prism/snapshots/patterns.txt
+++ b/test/prism/snapshots/patterns.txt
@@ -165,7 +165,7 @@
│ │ └── block: ∅
│ ├── pattern:
│ │ @ RegularExpressionNode (location: (9,7)-(9,12))
- │ │ ├── flags: ∅
+ │ │ ├── flags: forced_us_ascii_encoding
│ │ ├── opening_loc: (9,7)-(9,8) = "/"
│ │ ├── content_loc: (9,8)-(9,11) = "foo"
│ │ ├── closing_loc: (9,11)-(9,12) = "/"
@@ -719,14 +719,14 @@
│ │ ├── flags: ∅
│ │ ├── left:
│ │ │ @ RegularExpressionNode (location: (35,7)-(35,12))
- │ │ │ ├── flags: ∅
+ │ │ │ ├── flags: forced_us_ascii_encoding
│ │ │ ├── opening_loc: (35,7)-(35,8) = "/"
│ │ │ ├── content_loc: (35,8)-(35,11) = "foo"
│ │ │ ├── closing_loc: (35,11)-(35,12) = "/"
│ │ │ └── unescaped: "foo"
│ │ ├── right:
│ │ │ @ RegularExpressionNode (location: (35,16)-(35,21))
- │ │ │ ├── flags: ∅
+ │ │ │ ├── flags: forced_us_ascii_encoding
│ │ │ ├── opening_loc: (35,16)-(35,17) = "/"
│ │ │ ├── content_loc: (35,17)-(35,20) = "foo"
│ │ │ ├── closing_loc: (35,20)-(35,21) = "/"
@@ -2543,7 +2543,7 @@
│ │ └── block: ∅
│ ├── pattern:
│ │ @ RegularExpressionNode (location: (112,7)-(112,12))
- │ │ ├── flags: ∅
+ │ │ ├── flags: forced_us_ascii_encoding
│ │ ├── opening_loc: (112,7)-(112,8) = "/"
│ │ ├── content_loc: (112,8)-(112,11) = "foo"
│ │ ├── closing_loc: (112,11)-(112,12) = "/"
@@ -3126,7 +3126,7 @@
│ │ └── @ InNode (location: (143,10)-(143,23))
│ │ ├── pattern:
│ │ │ @ RegularExpressionNode (location: (143,13)-(143,18))
- │ │ │ ├── flags: ∅
+ │ │ │ ├── flags: forced_us_ascii_encoding
│ │ │ ├── opening_loc: (143,13)-(143,14) = "/"
│ │ │ ├── content_loc: (143,14)-(143,17) = "foo"
│ │ │ ├── closing_loc: (143,17)-(143,18) = "/"
@@ -3914,7 +3914,7 @@
│ │ │ │ @ StatementsNode (location: (170,13)-(170,18))
│ │ │ │ └── body: (length: 1)
│ │ │ │ └── @ RegularExpressionNode (location: (170,13)-(170,18))
- │ │ │ │ ├── flags: ∅
+ │ │ │ │ ├── flags: forced_us_ascii_encoding
│ │ │ │ ├── opening_loc: (170,13)-(170,14) = "/"
│ │ │ │ ├── content_loc: (170,14)-(170,17) = "foo"
│ │ │ │ ├── closing_loc: (170,17)-(170,18) = "/"
diff --git a/test/prism/snapshots/regex.txt b/test/prism/snapshots/regex.txt
index 8f3e259516..9e19bbb18d 100644
--- a/test/prism/snapshots/regex.txt
+++ b/test/prism/snapshots/regex.txt
@@ -15,7 +15,7 @@
│ │ ├── flags: ∅
│ │ └── arguments: (length: 1)
│ │ └── @ RegularExpressionNode (location: (1,4)-(1,9))
- │ │ ├── flags: ∅
+ │ │ ├── flags: forced_us_ascii_encoding
│ │ ├── opening_loc: (1,4)-(1,5) = "/"
│ │ ├── content_loc: (1,5)-(1,8) = "bar"
│ │ ├── closing_loc: (1,8)-(1,9) = "/"
@@ -23,13 +23,13 @@
│ ├── closing_loc: ∅
│ └── block: ∅
├── @ RegularExpressionNode (location: (3,0)-(3,8))
- │ ├── flags: ignore_case
+ │ ├── flags: ignore_case, forced_us_ascii_encoding
│ ├── opening_loc: (3,0)-(3,3) = "%r{"
│ ├── content_loc: (3,3)-(3,6) = "abc"
│ ├── closing_loc: (3,6)-(3,8) = "}i"
│ └── unescaped: "abc"
├── @ RegularExpressionNode (location: (5,0)-(5,5))
- │ ├── flags: ∅
+ │ ├── flags: forced_us_ascii_encoding
│ ├── opening_loc: (5,0)-(5,1) = "/"
│ ├── content_loc: (5,1)-(5,4) = "a\\b"
│ ├── closing_loc: (5,4)-(5,5) = "/"
@@ -92,7 +92,7 @@
│ │ │ │ ├── flags: ∅
│ │ │ │ ├── receiver:
│ │ │ │ │ @ RegularExpressionNode (location: (11,1)-(11,14))
- │ │ │ │ │ ├── flags: ∅
+ │ │ │ │ │ ├── flags: forced_us_ascii_encoding
│ │ │ │ │ ├── opening_loc: (11,1)-(11,2) = "/"
│ │ │ │ │ ├── content_loc: (11,2)-(11,13) = "(?<foo>bar)"
│ │ │ │ │ ├── closing_loc: (11,13)-(11,14) = "/"
@@ -127,31 +127,31 @@
│ ├── opening_loc: (11,0)-(11,1) = "["
│ └── closing_loc: (11,26)-(11,27) = "]"
├── @ RegularExpressionNode (location: (13,0)-(13,6))
- │ ├── flags: ignore_case
+ │ ├── flags: ignore_case, forced_us_ascii_encoding
│ ├── opening_loc: (13,0)-(13,1) = "/"
│ ├── content_loc: (13,1)-(13,4) = "abc"
│ ├── closing_loc: (13,4)-(13,6) = "/i"
│ └── unescaped: "abc"
├── @ RegularExpressionNode (location: (15,0)-(15,26))
- │ ├── flags: ignore_case
+ │ ├── flags: ignore_case, forced_us_ascii_encoding
│ ├── opening_loc: (15,0)-(15,3) = "%r/"
│ ├── content_loc: (15,3)-(15,24) = "[a-z$._?][\\w$.?\#@~]*:"
│ ├── closing_loc: (15,24)-(15,26) = "/i"
│ └── unescaped: "[a-z$._?][\\w$.?\#@~]*:"
├── @ RegularExpressionNode (location: (17,0)-(17,37))
- │ ├── flags: ignore_case
+ │ ├── flags: ignore_case, forced_us_ascii_encoding
│ ├── opening_loc: (17,0)-(17,3) = "%r/"
│ ├── content_loc: (17,3)-(17,35) = "([a-z$._?][\\w$.?\#@~]*)(\\s+)(equ)"
│ ├── closing_loc: (17,35)-(17,37) = "/i"
│ └── unescaped: "([a-z$._?][\\w$.?\#@~]*)(\\s+)(equ)"
├── @ RegularExpressionNode (location: (19,0)-(19,25))
- │ ├── flags: ignore_case
+ │ ├── flags: ignore_case, forced_us_ascii_encoding
│ ├── opening_loc: (19,0)-(19,3) = "%r/"
│ ├── content_loc: (19,3)-(19,23) = "[a-z$._?][\\w$.?\#@~]*"
│ ├── closing_loc: (19,23)-(19,25) = "/i"
│ └── unescaped: "[a-z$._?][\\w$.?\#@~]*"
├── @ RegularExpressionNode (location: (21,0)-(24,1))
- │ ├── flags: ∅
+ │ ├── flags: forced_us_ascii_encoding
│ ├── opening_loc: (21,0)-(21,3) = "%r("
│ ├── content_loc: (21,3)-(24,0) = "\n(?:[\\w\#$%_']|\\(\\)|\\(,\\)|\\[\\]|[0-9])*\n (?:[\\w\#$%_']+)\n"
│ ├── closing_loc: (24,0)-(24,1) = ")"
@@ -160,7 +160,7 @@
│ ├── flags: ∅
│ ├── receiver:
│ │ @ RegularExpressionNode (location: (26,0)-(26,8))
- │ │ ├── flags: ∅
+ │ │ ├── flags: forced_us_ascii_encoding
│ │ ├── opening_loc: (26,0)-(26,1) = "/"
│ │ ├── content_loc: (26,1)-(26,7) = "(?#\\))"
│ │ ├── closing_loc: (26,7)-(26,8) = "/"
@@ -182,7 +182,7 @@
│ ├── closing_loc: ∅
│ └── block: ∅
├── @ RegularExpressionNode (location: (28,0)-(28,9))
- │ ├── flags: ∅
+ │ ├── flags: forced_us_ascii_encoding
│ ├── opening_loc: (28,0)-(28,3) = "%r#"
│ ├── content_loc: (28,3)-(28,8) = "pound"
│ ├── closing_loc: (28,8)-(28,9) = "#"
@@ -220,7 +220,7 @@
│ │ ├── flags: ∅
│ │ ├── receiver:
│ │ │ @ RegularExpressionNode (location: (32,0)-(33,4))
- │ │ │ ├── flags: ∅
+ │ │ │ ├── flags: forced_us_ascii_encoding
│ │ │ ├── opening_loc: (32,0)-(32,1) = "/"
│ │ │ ├── content_loc: (32,1)-(33,3) = "(?<a\\\nb>)"
│ │ │ ├── closing_loc: (33,3)-(33,4) = "/"
@@ -254,7 +254,7 @@
│ │ ├── flags: ∅
│ │ ├── receiver:
│ │ │ @ RegularExpressionNode (location: (35,0)-(35,18))
- │ │ │ ├── flags: ∅
+ │ │ │ ├── flags: forced_us_ascii_encoding
│ │ │ ├── opening_loc: (35,0)-(35,1) = "/"
│ │ │ ├── content_loc: (35,1)-(35,17) = "(?<abc>)(?<abc>)"
│ │ │ ├── closing_loc: (35,17)-(35,18) = "/"
@@ -286,7 +286,7 @@
│ ├── flags: ∅
│ ├── receiver:
│ │ @ RegularExpressionNode (location: (37,0)-(37,10))
- │ │ ├── flags: ∅
+ │ │ ├── flags: forced_us_ascii_encoding
│ │ ├── opening_loc: (37,0)-(37,1) = "/"
│ │ ├── content_loc: (37,1)-(37,9) = "(?<a b>)"
│ │ ├── closing_loc: (37,9)-(37,10) = "/"
@@ -338,7 +338,7 @@
│ │ ├── flags: ∅
│ │ ├── receiver:
│ │ │ @ RegularExpressionNode (location: (40,6)-(40,14))
- │ │ │ ├── flags: ∅
+ │ │ │ ├── flags: forced_us_ascii_encoding
│ │ │ ├── opening_loc: (40,6)-(40,7) = "/"
│ │ │ ├── content_loc: (40,7)-(40,13) = "(?<a>)"
│ │ │ ├── closing_loc: (40,13)-(40,14) = "/"
diff --git a/test/prism/snapshots/seattlerb/TestRubyParserShared.txt b/test/prism/snapshots/seattlerb/TestRubyParserShared.txt
index 4a2a48b794..fabc92e477 100644
--- a/test/prism/snapshots/seattlerb/TestRubyParserShared.txt
+++ b/test/prism/snapshots/seattlerb/TestRubyParserShared.txt
@@ -70,7 +70,7 @@
│ ├── opening_loc: (26,0)-(26,3) = "%i["
│ └── closing_loc: (29,0)-(29,1) = "]"
├── @ RegularExpressionNode (location: (31,0)-(34,1))
- │ ├── flags: ∅
+ │ ├── flags: forced_us_ascii_encoding
│ ├── opening_loc: (31,0)-(31,3) = "%r["
│ ├── content_loc: (31,3)-(34,0) = "\n\n\n"
│ ├── closing_loc: (34,0)-(34,1) = "]"
diff --git a/test/prism/snapshots/seattlerb/bug190.txt b/test/prism/snapshots/seattlerb/bug190.txt
index f7eaefa5c6..b261a166cf 100644
--- a/test/prism/snapshots/seattlerb/bug190.txt
+++ b/test/prism/snapshots/seattlerb/bug190.txt
@@ -4,7 +4,7 @@
@ StatementsNode (location: (1,0)-(1,6))
└── body: (length: 1)
└── @ RegularExpressionNode (location: (1,0)-(1,6))
- ├── flags: ∅
+ ├── flags: forced_us_ascii_encoding
├── opening_loc: (1,0)-(1,3) = "%r'"
├── content_loc: (1,3)-(1,5) = "\\'"
├── closing_loc: (1,5)-(1,6) = "'"
diff --git a/test/prism/snapshots/seattlerb/bug_case_when_regexp.txt b/test/prism/snapshots/seattlerb/bug_case_when_regexp.txt
index f6a6f41c89..0cc1ca05e1 100644
--- a/test/prism/snapshots/seattlerb/bug_case_when_regexp.txt
+++ b/test/prism/snapshots/seattlerb/bug_case_when_regexp.txt
@@ -16,7 +16,7 @@
│ ├── keyword_loc: (1,9)-(1,13) = "when"
│ ├── conditions: (length: 1)
│ │ └── @ RegularExpressionNode (location: (1,14)-(1,17))
- │ │ ├── flags: ∅
+ │ │ ├── flags: forced_us_ascii_encoding
│ │ ├── opening_loc: (1,14)-(1,15) = "/"
│ │ ├── content_loc: (1,15)-(1,16) = "x"
│ │ ├── closing_loc: (1,16)-(1,17) = "/"
diff --git a/test/prism/snapshots/seattlerb/bug_cond_pct.txt b/test/prism/snapshots/seattlerb/bug_cond_pct.txt
index 73cb18f508..cbf3bc3ef0 100644
--- a/test/prism/snapshots/seattlerb/bug_cond_pct.txt
+++ b/test/prism/snapshots/seattlerb/bug_cond_pct.txt
@@ -10,7 +10,7 @@
│ ├── keyword_loc: (1,6)-(1,10) = "when"
│ ├── conditions: (length: 1)
│ │ └── @ RegularExpressionNode (location: (1,11)-(1,23))
- │ │ ├── flags: ∅
+ │ │ ├── flags: forced_us_ascii_encoding
│ │ ├── opening_loc: (1,11)-(1,14) = "%r%"
│ │ ├── content_loc: (1,14)-(1,22) = "blahblah"
│ │ ├── closing_loc: (1,22)-(1,23) = "%"
diff --git a/test/prism/snapshots/seattlerb/case_in.txt b/test/prism/snapshots/seattlerb/case_in.txt
index 6a1cc56da6..e7e291c63f 100644
--- a/test/prism/snapshots/seattlerb/case_in.txt
+++ b/test/prism/snapshots/seattlerb/case_in.txt
@@ -338,7 +338,7 @@
│ │ └── @ InNode (location: (46,0)-(46,11))
│ │ ├── pattern:
│ │ │ @ RegularExpressionNode (location: (46,3)-(46,11))
- │ │ │ ├── flags: ∅
+ │ │ │ ├── flags: forced_us_ascii_encoding
│ │ │ ├── opening_loc: (46,3)-(46,4) = "/"
│ │ │ ├── content_loc: (46,4)-(46,10) = "regexp"
│ │ │ ├── closing_loc: (46,10)-(46,11) = "/"
diff --git a/test/prism/snapshots/seattlerb/regexp.txt b/test/prism/snapshots/seattlerb/regexp.txt
index abe17918e5..06cf99264e 100644
--- a/test/prism/snapshots/seattlerb/regexp.txt
+++ b/test/prism/snapshots/seattlerb/regexp.txt
@@ -4,31 +4,31 @@
@ StatementsNode (location: (1,0)-(9,13))
└── body: (length: 5)
├── @ RegularExpressionNode (location: (1,0)-(1,5))
- │ ├── flags: ∅
+ │ ├── flags: forced_us_ascii_encoding
│ ├── opening_loc: (1,0)-(1,1) = "/"
│ ├── content_loc: (1,1)-(1,4) = "wtf"
│ ├── closing_loc: (1,4)-(1,5) = "/"
│ └── unescaped: "wtf"
├── @ RegularExpressionNode (location: (3,0)-(3,6))
- │ ├── flags: multi_line
+ │ ├── flags: multi_line, forced_us_ascii_encoding
│ ├── opening_loc: (3,0)-(3,1) = "/"
│ ├── content_loc: (3,1)-(3,4) = "wtf"
│ ├── closing_loc: (3,4)-(3,6) = "/m"
│ └── unescaped: "wtf"
├── @ RegularExpressionNode (location: (5,0)-(5,6))
- │ ├── flags: ascii_8bit
+ │ ├── flags: ascii_8bit, forced_us_ascii_encoding
│ ├── opening_loc: (5,0)-(5,1) = "/"
│ ├── content_loc: (5,1)-(5,4) = "wtf"
│ ├── closing_loc: (5,4)-(5,6) = "/n"
│ └── unescaped: "wtf"
├── @ RegularExpressionNode (location: (7,0)-(7,7))
- │ ├── flags: multi_line, ascii_8bit
+ │ ├── flags: multi_line, ascii_8bit, forced_us_ascii_encoding
│ ├── opening_loc: (7,0)-(7,1) = "/"
│ ├── content_loc: (7,1)-(7,4) = "wtf"
│ ├── closing_loc: (7,4)-(7,7) = "/nm"
│ └── unescaped: "wtf"
└── @ RegularExpressionNode (location: (9,0)-(9,13))
- ├── flags: multi_line, ascii_8bit
+ ├── flags: multi_line, ascii_8bit, forced_us_ascii_encoding
├── opening_loc: (9,0)-(9,1) = "/"
├── content_loc: (9,1)-(9,4) = "wtf"
├── closing_loc: (9,4)-(9,13) = "/nmnmnmnm"
diff --git a/test/prism/snapshots/seattlerb/regexp_esc_C_slash.txt b/test/prism/snapshots/seattlerb/regexp_esc_C_slash.txt
index b6bf242612..4dbedc44ca 100644
--- a/test/prism/snapshots/seattlerb/regexp_esc_C_slash.txt
+++ b/test/prism/snapshots/seattlerb/regexp_esc_C_slash.txt
@@ -4,7 +4,7 @@
@ StatementsNode (location: (1,0)-(1,7))
└── body: (length: 1)
└── @ RegularExpressionNode (location: (1,0)-(1,7))
- ├── flags: ∅
+ ├── flags: forced_us_ascii_encoding
├── opening_loc: (1,0)-(1,1) = "/"
├── content_loc: (1,1)-(1,6) = "\\cC\\d"
├── closing_loc: (1,6)-(1,7) = "/"
diff --git a/test/prism/snapshots/seattlerb/regexp_esc_u.txt b/test/prism/snapshots/seattlerb/regexp_esc_u.txt
index 7b1ebdc636..bca451eb3b 100644
--- a/test/prism/snapshots/seattlerb/regexp_esc_u.txt
+++ b/test/prism/snapshots/seattlerb/regexp_esc_u.txt
@@ -4,7 +4,7 @@
@ StatementsNode (location: (1,0)-(1,17))
└── body: (length: 1)
└── @ RegularExpressionNode (location: (1,0)-(1,17))
- ├── flags: ∅
+ ├── flags: forced_us_ascii_encoding
├── opening_loc: (1,0)-(1,1) = "/"
├── content_loc: (1,1)-(1,16) = "[\\u0021-\\u0027]"
├── closing_loc: (1,16)-(1,17) = "/"
diff --git a/test/prism/snapshots/seattlerb/regexp_unicode_curlies.txt b/test/prism/snapshots/seattlerb/regexp_unicode_curlies.txt
index 8dd265af5f..487161b4d0 100644
--- a/test/prism/snapshots/seattlerb/regexp_unicode_curlies.txt
+++ b/test/prism/snapshots/seattlerb/regexp_unicode_curlies.txt
@@ -4,13 +4,13 @@
@ StatementsNode (location: (1,0)-(3,8))
└── body: (length: 2)
├── @ RegularExpressionNode (location: (1,0)-(1,15))
- │ ├── flags: ∅
+ │ ├── flags: forced_utf8_encoding
│ ├── opening_loc: (1,0)-(1,1) = "/"
│ ├── content_loc: (1,1)-(1,14) = "\\u{c0de babe}"
│ ├── closing_loc: (1,14)-(1,15) = "/"
│ └── unescaped: "\\u{c0de babe}"
└── @ RegularExpressionNode (location: (3,0)-(3,8))
- ├── flags: ∅
+ ├── flags: forced_utf8_encoding
├── opening_loc: (3,0)-(3,1) = "/"
├── content_loc: (3,1)-(3,7) = "\\u{df}"
├── closing_loc: (3,7)-(3,8) = "/"
diff --git a/test/prism/snapshots/spanning_heredoc_newlines.txt b/test/prism/snapshots/spanning_heredoc_newlines.txt
index 171b0ff974..e3609ddbba 100644
--- a/test/prism/snapshots/spanning_heredoc_newlines.txt
+++ b/test/prism/snapshots/spanning_heredoc_newlines.txt
@@ -46,7 +46,7 @@
│ │ ├── flags: ∅
│ │ └── arguments: (length: 1)
│ │ └── @ RegularExpressionNode (location: (5,4)-(8,0))
- │ │ ├── flags: ∅
+ │ │ ├── flags: forced_us_ascii_encoding
│ │ ├── opening_loc: (5,4)-(6,0) = "%r\n"
│ │ ├── content_loc: (6,0)-(6,0) = ""
│ │ ├── closing_loc: (7,0)-(8,0) = "\n"
diff --git a/test/prism/snapshots/unescaping.txt b/test/prism/snapshots/unescaping.txt
index 00c5f59cd1..456ef226d0 100644
--- a/test/prism/snapshots/unescaping.txt
+++ b/test/prism/snapshots/unescaping.txt
@@ -15,7 +15,7 @@
│ ├── opening_loc: (1,0)-(1,1) = "["
│ └── closing_loc: (1,9)-(1,10) = "]"
├── @ RegularExpressionNode (location: (3,0)-(3,8))
- │ ├── flags: ∅
+ │ ├── flags: forced_us_ascii_encoding
│ ├── opening_loc: (3,0)-(3,1) = "/"
│ ├── content_loc: (3,1)-(3,7) = "\\c\#{1}"
│ ├── closing_loc: (3,7)-(3,8) = "/"
diff --git a/test/prism/snapshots/unparser/corpus/literal/if.txt b/test/prism/snapshots/unparser/corpus/literal/if.txt
index 6a78779dc9..00eeba179c 100644
--- a/test/prism/snapshots/unparser/corpus/literal/if.txt
+++ b/test/prism/snapshots/unparser/corpus/literal/if.txt
@@ -7,7 +7,7 @@
│ ├── if_keyword_loc: (1,0)-(1,2) = "if"
│ ├── predicate:
│ │ @ MatchLastLineNode (location: (1,3)-(1,8))
- │ │ ├── flags: ∅
+ │ │ ├── flags: forced_us_ascii_encoding
│ │ ├── opening_loc: (1,3)-(1,4) = "/"
│ │ ├── content_loc: (1,4)-(1,7) = "foo"
│ │ ├── closing_loc: (1,7)-(1,8) = "/"
diff --git a/test/prism/snapshots/unparser/corpus/literal/literal.txt b/test/prism/snapshots/unparser/corpus/literal/literal.txt
index 8ed1bf5fe9..ba7dd70b5b 100644
--- a/test/prism/snapshots/unparser/corpus/literal/literal.txt
+++ b/test/prism/snapshots/unparser/corpus/literal/literal.txt
@@ -566,13 +566,13 @@
│ ├── closing_loc: (48,2)-(48,3) = "\""
│ └── unescaped: ""
├── @ RegularExpressionNode (location: (49,0)-(49,5))
- │ ├── flags: ∅
+ │ ├── flags: forced_us_ascii_encoding
│ ├── opening_loc: (49,0)-(49,1) = "/"
│ ├── content_loc: (49,1)-(49,4) = "foo"
│ ├── closing_loc: (49,4)-(49,5) = "/"
│ └── unescaped: "foo"
├── @ RegularExpressionNode (location: (50,0)-(50,28))
- │ ├── flags: ∅
+ │ ├── flags: forced_us_ascii_encoding
│ ├── opening_loc: (50,0)-(50,1) = "/"
│ ├── content_loc: (50,1)-(50,27) = "[^-+',.\\/:@[:alnum:]\\[\\]]+"
│ ├── closing_loc: (50,27)-(50,28) = "/"
@@ -633,25 +633,25 @@
│ │ └── closing_loc: (53,11)-(53,12) = "}"
│ └── closing_loc: (53,12)-(53,13) = "/"
├── @ RegularExpressionNode (location: (54,0)-(54,4))
- │ ├── flags: ∅
+ │ ├── flags: forced_us_ascii_encoding
│ ├── opening_loc: (54,0)-(54,1) = "/"
│ ├── content_loc: (54,1)-(54,3) = "\\n"
│ ├── closing_loc: (54,3)-(54,4) = "/"
│ └── unescaped: "\\n"
├── @ RegularExpressionNode (location: (55,0)-(55,4))
- │ ├── flags: ∅
+ │ ├── flags: forced_us_ascii_encoding
│ ├── opening_loc: (55,0)-(55,1) = "/"
│ ├── content_loc: (55,1)-(55,3) = "\\n"
│ ├── closing_loc: (55,3)-(55,4) = "/"
│ └── unescaped: "\\n"
├── @ RegularExpressionNode (location: (56,0)-(56,5))
- │ ├── flags: extended
+ │ ├── flags: extended, forced_us_ascii_encoding
│ ├── opening_loc: (56,0)-(56,1) = "/"
│ ├── content_loc: (56,1)-(56,3) = "\\n"
│ ├── closing_loc: (56,3)-(56,5) = "/x"
│ └── unescaped: "\\n"
├── @ RegularExpressionNode (location: (57,0)-(57,7))
- │ ├── flags: extended
+ │ ├── flags: extended, forced_us_ascii_encoding
│ ├── opening_loc: (57,0)-(57,1) = "/"
│ ├── content_loc: (57,1)-(57,5) = "\\/\\/"
│ ├── closing_loc: (57,5)-(57,7) = "/x"
diff --git a/test/prism/snapshots/unparser/corpus/literal/send.txt b/test/prism/snapshots/unparser/corpus/literal/send.txt
index 2fa4fd621b..b7eb064717 100644
--- a/test/prism/snapshots/unparser/corpus/literal/send.txt
+++ b/test/prism/snapshots/unparser/corpus/literal/send.txt
@@ -425,7 +425,7 @@
│ │ │ ├── flags: ∅
│ │ │ ├── receiver:
│ │ │ │ @ RegularExpressionNode (location: (37,1)-(37,6))
- │ │ │ │ ├── flags: ∅
+ │ │ │ │ ├── flags: forced_us_ascii_encoding
│ │ │ │ ├── opening_loc: (37,1)-(37,2) = "/"
│ │ │ │ ├── content_loc: (37,2)-(37,5) = "bar"
│ │ │ │ ├── closing_loc: (37,5)-(37,6) = "/"
@@ -511,7 +511,7 @@
│ │ │ │ ├── flags: ∅
│ │ │ │ └── arguments: (length: 1)
│ │ │ │ └── @ RegularExpressionNode (location: (39,8)-(39,13))
- │ │ │ │ ├── flags: ∅
+ │ │ │ │ ├── flags: forced_us_ascii_encoding
│ │ │ │ ├── opening_loc: (39,8)-(39,9) = "/"
│ │ │ │ ├── content_loc: (39,9)-(39,12) = "bar"
│ │ │ │ ├── closing_loc: (39,12)-(39,13) = "/"
@@ -531,7 +531,7 @@
│ ├── flags: ∅
│ ├── receiver:
│ │ @ RegularExpressionNode (location: (40,0)-(40,5))
- │ │ ├── flags: ∅
+ │ │ ├── flags: forced_us_ascii_encoding
│ │ ├── opening_loc: (40,0)-(40,1) = "/"
│ │ ├── content_loc: (40,1)-(40,4) = "bar"
│ │ ├── closing_loc: (40,4)-(40,5) = "/"
@@ -556,7 +556,7 @@
│ ├── flags: ∅
│ ├── receiver:
│ │ @ RegularExpressionNode (location: (41,0)-(41,5))
- │ │ ├── flags: ∅
+ │ │ ├── flags: forced_us_ascii_encoding
│ │ ├── opening_loc: (41,0)-(41,1) = "/"
│ │ ├── content_loc: (41,1)-(41,4) = "bar"
│ │ ├── closing_loc: (41,4)-(41,5) = "/"
@@ -758,7 +758,7 @@
│ │ ├── flags: ∅
│ │ └── arguments: (length: 1)
│ │ └── @ RegularExpressionNode (location: (49,7)-(49,12))
- │ │ ├── flags: ∅
+ │ │ ├── flags: forced_us_ascii_encoding
│ │ ├── opening_loc: (49,7)-(49,8) = "/"
│ │ ├── content_loc: (49,8)-(49,11) = "bar"
│ │ ├── closing_loc: (49,11)-(49,12) = "/"
@@ -1007,7 +1007,7 @@
│ │ │ ├── flags: ∅
│ │ │ └── arguments: (length: 1)
│ │ │ └── @ RegularExpressionNode (location: (57,11)-(57,16))
- │ │ │ ├── flags: ∅
+ │ │ │ ├── flags: forced_us_ascii_encoding
│ │ │ ├── opening_loc: (57,11)-(57,12) = "/"
│ │ │ ├── content_loc: (57,12)-(57,15) = "bar"
│ │ │ ├── closing_loc: (57,15)-(57,16) = "/"
diff --git a/test/prism/snapshots/unparser/corpus/semantic/literal.txt b/test/prism/snapshots/unparser/corpus/semantic/literal.txt
index 7f76e2f561..59e02be64f 100644
--- a/test/prism/snapshots/unparser/corpus/semantic/literal.txt
+++ b/test/prism/snapshots/unparser/corpus/semantic/literal.txt
@@ -31,13 +31,13 @@
│ ├── closing_loc: ∅
│ └── unescaped: "c"
├── @ RegularExpressionNode (location: (9,0)-(9,5))
- │ ├── flags: ∅
+ │ ├── flags: forced_us_ascii_encoding
│ ├── opening_loc: (9,0)-(9,3) = "%r("
│ ├── content_loc: (9,3)-(9,4) = "/"
│ ├── closing_loc: (9,4)-(9,5) = ")"
│ └── unescaped: "/"
├── @ RegularExpressionNode (location: (10,0)-(10,6))
- │ ├── flags: ∅
+ │ ├── flags: forced_us_ascii_encoding
│ ├── opening_loc: (10,0)-(10,3) = "%r("
│ ├── content_loc: (10,3)-(10,5) = "\\)"
│ ├── closing_loc: (10,5)-(10,6) = ")"
diff --git a/test/prism/snapshots/whitequark/bug_regex_verification.txt b/test/prism/snapshots/whitequark/bug_regex_verification.txt
index 5ca85e34c7..4464b66e38 100644
--- a/test/prism/snapshots/whitequark/bug_regex_verification.txt
+++ b/test/prism/snapshots/whitequark/bug_regex_verification.txt
@@ -4,7 +4,7 @@
@ StatementsNode (location: (1,0)-(1,5))
└── body: (length: 1)
└── @ RegularExpressionNode (location: (1,0)-(1,5))
- ├── flags: extended
+ ├── flags: extended, forced_us_ascii_encoding
├── opening_loc: (1,0)-(1,1) = "/"
├── content_loc: (1,1)-(1,3) = "#)"
├── closing_loc: (1,3)-(1,5) = "/x"
diff --git a/test/prism/snapshots/whitequark/cond_match_current_line.txt b/test/prism/snapshots/whitequark/cond_match_current_line.txt
index fb5ff33ed5..700d0966f7 100644
--- a/test/prism/snapshots/whitequark/cond_match_current_line.txt
+++ b/test/prism/snapshots/whitequark/cond_match_current_line.txt
@@ -7,7 +7,7 @@
│ ├── flags: ∅
│ ├── receiver:
│ │ @ MatchLastLineNode (location: (1,1)-(1,6))
- │ │ ├── flags: ∅
+ │ │ ├── flags: forced_us_ascii_encoding
│ │ ├── opening_loc: (1,1)-(1,2) = "/"
│ │ ├── content_loc: (1,2)-(1,5) = "wat"
│ │ ├── closing_loc: (1,5)-(1,6) = "/"
@@ -23,7 +23,7 @@
├── if_keyword_loc: (3,0)-(3,2) = "if"
├── predicate:
│ @ MatchLastLineNode (location: (3,3)-(3,8))
- │ ├── flags: ∅
+ │ ├── flags: forced_us_ascii_encoding
│ ├── opening_loc: (3,3)-(3,4) = "/"
│ ├── content_loc: (3,4)-(3,7) = "wat"
│ ├── closing_loc: (3,7)-(3,8) = "/"
diff --git a/test/prism/snapshots/whitequark/interp_digit_var.txt b/test/prism/snapshots/whitequark/interp_digit_var.txt
index 6c34760bc3..09d9098105 100644
--- a/test/prism/snapshots/whitequark/interp_digit_var.txt
+++ b/test/prism/snapshots/whitequark/interp_digit_var.txt
@@ -106,13 +106,13 @@
│ ├── closing_loc: (23,8)-(23,9) = "}"
│ └── unescaped: "\#@@1"
├── @ RegularExpressionNode (location: (25,1)-(25,8))
- │ ├── flags: ∅
+ │ ├── flags: forced_us_ascii_encoding
│ ├── opening_loc: (25,1)-(25,4) = "%r{"
│ ├── content_loc: (25,4)-(25,7) = "\#@1"
│ ├── closing_loc: (25,7)-(25,8) = "}"
│ └── unescaped: "\#@1"
├── @ RegularExpressionNode (location: (27,1)-(27,9))
- │ ├── flags: ∅
+ │ ├── flags: forced_us_ascii_encoding
│ ├── opening_loc: (27,1)-(27,4) = "%r{"
│ ├── content_loc: (27,4)-(27,8) = "\#@@1"
│ ├── closing_loc: (27,8)-(27,9) = "}"
@@ -188,13 +188,13 @@
│ ├── closing_loc: (47,6)-(47,7) = "'"
│ └── unescaped: "\#@@1"
├── @ RegularExpressionNode (location: (49,1)-(49,6))
- │ ├── flags: ∅
+ │ ├── flags: forced_us_ascii_encoding
│ ├── opening_loc: (49,1)-(49,2) = "/"
│ ├── content_loc: (49,2)-(49,5) = "\#@1"
│ ├── closing_loc: (49,5)-(49,6) = "/"
│ └── unescaped: "\#@1"
├── @ RegularExpressionNode (location: (51,1)-(51,7))
- │ ├── flags: ∅
+ │ ├── flags: forced_us_ascii_encoding
│ ├── opening_loc: (51,1)-(51,2) = "/"
│ ├── content_loc: (51,2)-(51,6) = "\#@@1"
│ ├── closing_loc: (51,6)-(51,7) = "/"
diff --git a/test/prism/snapshots/whitequark/lvar_injecting_match.txt b/test/prism/snapshots/whitequark/lvar_injecting_match.txt
index a1d70e9ccf..0d1df23d0d 100644
--- a/test/prism/snapshots/whitequark/lvar_injecting_match.txt
+++ b/test/prism/snapshots/whitequark/lvar_injecting_match.txt
@@ -9,7 +9,7 @@
│ │ ├── flags: ∅
│ │ ├── receiver:
│ │ │ @ RegularExpressionNode (location: (1,0)-(1,15))
- │ │ │ ├── flags: ∅
+ │ │ │ ├── flags: forced_us_ascii_encoding
│ │ │ ├── opening_loc: (1,0)-(1,1) = "/"
│ │ │ ├── content_loc: (1,1)-(1,14) = "(?<match>bar)"
│ │ │ ├── closing_loc: (1,14)-(1,15) = "/"
diff --git a/test/prism/snapshots/whitequark/parser_bug_830.txt b/test/prism/snapshots/whitequark/parser_bug_830.txt
index e920108731..e52b291d6a 100644
--- a/test/prism/snapshots/whitequark/parser_bug_830.txt
+++ b/test/prism/snapshots/whitequark/parser_bug_830.txt
@@ -4,7 +4,7 @@
@ StatementsNode (location: (1,0)-(1,4))
└── body: (length: 1)
└── @ RegularExpressionNode (location: (1,0)-(1,4))
- ├── flags: ∅
+ ├── flags: forced_us_ascii_encoding
├── opening_loc: (1,0)-(1,1) = "/"
├── content_loc: (1,1)-(1,3) = "\\("
├── closing_loc: (1,3)-(1,4) = "/"
diff --git a/test/prism/snapshots/whitequark/parser_slash_slash_n_escaping_in_literals.txt b/test/prism/snapshots/whitequark/parser_slash_slash_n_escaping_in_literals.txt
index 96cc5671a6..080d4d0e7d 100644
--- a/test/prism/snapshots/whitequark/parser_slash_slash_n_escaping_in_literals.txt
+++ b/test/prism/snapshots/whitequark/parser_slash_slash_n_escaping_in_literals.txt
@@ -55,7 +55,7 @@
│ ├── closing_loc: (17,1)-(17,2) = "}"
│ └── unescaped: "a\\\nb"
├── @ RegularExpressionNode (location: (19,0)-(20,2))
- │ ├── flags: ∅
+ │ ├── flags: forced_us_ascii_encoding
│ ├── opening_loc: (19,0)-(19,3) = "%r{"
│ ├── content_loc: (19,3)-(20,1) = "a\\\nb"
│ ├── closing_loc: (20,1)-(20,2) = "}"
@@ -96,7 +96,7 @@
│ ├── closing_loc: (35,1)-(35,2) = "'"
│ └── unescaped: "a\\\nb"
├── @ RegularExpressionNode (location: (37,0)-(38,2))
- │ ├── flags: ∅
+ │ ├── flags: forced_us_ascii_encoding
│ ├── opening_loc: (37,0)-(37,1) = "/"
│ ├── content_loc: (37,1)-(38,1) = "a\\\nb"
│ ├── closing_loc: (38,1)-(38,2) = "/"
diff --git a/test/prism/snapshots/whitequark/regex_plain.txt b/test/prism/snapshots/whitequark/regex_plain.txt
index 34fe61c687..df771f7a21 100644
--- a/test/prism/snapshots/whitequark/regex_plain.txt
+++ b/test/prism/snapshots/whitequark/regex_plain.txt
@@ -4,7 +4,7 @@
@ StatementsNode (location: (1,0)-(1,10))
└── body: (length: 1)
└── @ RegularExpressionNode (location: (1,0)-(1,10))
- ├── flags: ignore_case, multi_line
+ ├── flags: ignore_case, multi_line, forced_us_ascii_encoding
├── opening_loc: (1,0)-(1,1) = "/"
├── content_loc: (1,1)-(1,7) = "source"
├── closing_loc: (1,7)-(1,10) = "/im"
diff --git a/test/prism/snapshots/whitequark/ruby_bug_11873.txt b/test/prism/snapshots/whitequark/ruby_bug_11873.txt
index af04f59b5e..2999662cc4 100644
--- a/test/prism/snapshots/whitequark/ruby_bug_11873.txt
+++ b/test/prism/snapshots/whitequark/ruby_bug_11873.txt
@@ -112,7 +112,7 @@
│ │ │ ├── closing_loc: (3,7)-(3,8) = ")"
│ │ │ └── block: ∅
│ │ └── @ RegularExpressionNode (location: (3,10)-(3,13))
- │ │ ├── flags: ∅
+ │ │ ├── flags: forced_us_ascii_encoding
│ │ ├── opening_loc: (3,10)-(3,11) = "/"
│ │ ├── content_loc: (3,11)-(3,12) = "x"
│ │ ├── closing_loc: (3,12)-(3,13) = "/"
@@ -173,7 +173,7 @@
│ │ │ ├── closing_loc: (5,7)-(5,8) = ")"
│ │ │ └── block: ∅
│ │ └── @ RegularExpressionNode (location: (5,10)-(5,14))
- │ │ ├── flags: multi_line
+ │ │ ├── flags: multi_line, forced_us_ascii_encoding
│ │ ├── opening_loc: (5,10)-(5,11) = "/"
│ │ ├── content_loc: (5,11)-(5,12) = "x"
│ │ ├── closing_loc: (5,12)-(5,14) = "/m"
@@ -295,7 +295,7 @@
│ │ │ ├── closing_loc: (9,8)-(9,9) = ")"
│ │ │ └── block: ∅
│ │ └── @ RegularExpressionNode (location: (9,11)-(9,14))
- │ │ ├── flags: ∅
+ │ │ ├── flags: forced_us_ascii_encoding
│ │ ├── opening_loc: (9,11)-(9,12) = "/"
│ │ ├── content_loc: (9,12)-(9,13) = "x"
│ │ ├── closing_loc: (9,13)-(9,14) = "/"
@@ -356,7 +356,7 @@
│ │ │ ├── closing_loc: (11,8)-(11,9) = ")"
│ │ │ └── block: ∅
│ │ └── @ RegularExpressionNode (location: (11,11)-(11,15))
- │ │ ├── flags: multi_line
+ │ │ ├── flags: multi_line, forced_us_ascii_encoding
│ │ ├── opening_loc: (11,11)-(11,12) = "/"
│ │ ├── content_loc: (11,12)-(11,13) = "x"
│ │ ├── closing_loc: (11,13)-(11,15) = "/m"
@@ -488,7 +488,7 @@
│ │ │ ├── opening_loc: (15,3)-(15,4) = "{"
│ │ │ └── closing_loc: (15,7)-(15,8) = "}"
│ │ └── @ RegularExpressionNode (location: (15,10)-(15,13))
- │ │ ├── flags: ∅
+ │ │ ├── flags: forced_us_ascii_encoding
│ │ ├── opening_loc: (15,10)-(15,11) = "/"
│ │ ├── content_loc: (15,11)-(15,12) = "x"
│ │ ├── closing_loc: (15,12)-(15,13) = "/"
@@ -554,7 +554,7 @@
│ │ │ ├── opening_loc: (17,3)-(17,4) = "{"
│ │ │ └── closing_loc: (17,7)-(17,8) = "}"
│ │ └── @ RegularExpressionNode (location: (17,10)-(17,14))
- │ │ ├── flags: multi_line
+ │ │ ├── flags: multi_line, forced_us_ascii_encoding
│ │ ├── opening_loc: (17,10)-(17,11) = "/"
│ │ ├── content_loc: (17,11)-(17,12) = "x"
│ │ ├── closing_loc: (17,12)-(17,14) = "/m"
@@ -686,7 +686,7 @@
│ │ │ ├── opening_loc: (21,3)-(21,4) = "{"
│ │ │ └── closing_loc: (21,8)-(21,9) = "}"
│ │ └── @ RegularExpressionNode (location: (21,11)-(21,14))
- │ │ ├── flags: ∅
+ │ │ ├── flags: forced_us_ascii_encoding
│ │ ├── opening_loc: (21,11)-(21,12) = "/"
│ │ ├── content_loc: (21,12)-(21,13) = "x"
│ │ ├── closing_loc: (21,13)-(21,14) = "/"
@@ -752,7 +752,7 @@
│ │ ├── opening_loc: (23,3)-(23,4) = "{"
│ │ └── closing_loc: (23,8)-(23,9) = "}"
│ └── @ RegularExpressionNode (location: (23,11)-(23,15))
- │ ├── flags: multi_line
+ │ ├── flags: multi_line, forced_us_ascii_encoding
│ ├── opening_loc: (23,11)-(23,12) = "/"
│ ├── content_loc: (23,12)-(23,13) = "x"
│ ├── closing_loc: (23,13)-(23,15) = "/m"