aboutsummaryrefslogtreecommitdiffstats
path: root/prism
diff options
context:
space:
mode:
authorKevin Newton <kddnewton@gmail.com>2023-10-06 22:00:01 -0400
committerKevin Newton <kddnewton@gmail.com>2023-10-13 15:31:30 -0400
commit1a941c70e42c1e64b961088e953ded6a148e1351 (patch)
tree58b597f3324bca582f441e9f5921bddc8d37b03a /prism
parente179e62bd22f85557decca5bfb81ea511ee5c5de (diff)
downloadruby-1a941c70e42c1e64b961088e953ded6a148e1351.tar.gz
[ruby/prism] Track current_string to pass forward for character literals
https://github.com/ruby/prism/commit/be1d8ae8bb
Diffstat (limited to 'prism')
-rw-r--r--prism/parser.h5
-rw-r--r--prism/prism.c34
2 files changed, 19 insertions, 20 deletions
diff --git a/prism/parser.h b/prism/parser.h
index 0553e29819..414c26b5aa 100644
--- a/prism/parser.h
+++ b/prism/parser.h
@@ -8,6 +8,7 @@
#include "prism/util/pm_list.h"
#include "prism/util/pm_newline_list.h"
#include "prism/util/pm_state_stack.h"
+#include "prism/util/pm_string.h"
#include <stdbool.h>
@@ -393,6 +394,10 @@ struct pm_parser {
// when we find tokens that we need it for.
pm_node_flags_t integer_base;
+ // This string is used to pass information from the lexer to the parser. It
+ // is particularly necessary because of escape sequences.
+ pm_string_t current_string;
+
// Whether or not we're at the beginning of a command
bool command_start;
diff --git a/prism/prism.c b/prism/prism.c
index 99cfe17c4f..3b4261e5ca 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -4860,8 +4860,6 @@ pm_yield_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_lo
return node;
}
-
-#undef PM_EMPTY_STRING
#undef PM_ALLOC_NODE
/******************************************************************************/
@@ -6099,6 +6097,7 @@ lex_question_mark(pm_parser_t *parser) {
if (parser->current.end >= parser->end) {
pm_parser_err_current(parser, PM_ERR_INCOMPLETE_QUESTION_MARK);
+ pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
return PM_TOKEN_CHARACTER_LITERAL;
}
@@ -6112,6 +6111,8 @@ lex_question_mark(pm_parser_t *parser) {
if (parser->current.start[1] == '\\') {
lex_state_set(parser, PM_LEX_STATE_END);
parser->current.end += pm_unescape_calculate_difference(parser, parser->current.start + 1, PM_UNESCAPE_ALL, true);
+ pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
+ pm_unescape_manipulate_char_literal(parser, &parser->current_string, PM_UNESCAPE_ALL);
return PM_TOKEN_CHARACTER_LITERAL;
} else {
size_t encoding_width = parser->encoding.char_width(parser->current.end, parser->end - parser->current.end);
@@ -6128,6 +6129,7 @@ lex_question_mark(pm_parser_t *parser) {
) {
lex_state_set(parser, PM_LEX_STATE_END);
parser->current.end += encoding_width;
+ pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
return PM_TOKEN_CHARACTER_LITERAL;
}
}
@@ -8078,17 +8080,6 @@ pm_symbol_node_create_and_unescape(pm_parser_t *parser, const pm_token_t *openin
}
static pm_string_node_t *
-pm_char_literal_node_create_and_unescape(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, pm_unescape_type_t unescape_type) {
- pm_string_node_t *node = pm_string_node_create(parser, opening, content, closing);
-
- assert((content->end - content->start) >= 0);
- pm_string_shared_init(&node->unescaped, content->start, content->end);
-
- pm_unescape_manipulate_char_literal(parser, &node->unescaped, unescape_type);
- return node;
-}
-
-static pm_string_node_t *
pm_string_node_create_and_unescape(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, pm_unescape_type_t unescape_type) {
pm_string_node_t *node = pm_string_node_create(parser, opening, content, closing);
@@ -11763,16 +11754,17 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
content.start = content.start + 1;
pm_token_t closing = not_provided(parser);
- pm_node_t *node = (pm_node_t *) pm_char_literal_node_create_and_unescape(parser, &opening, &content, &closing, PM_UNESCAPE_ALL);
+ pm_string_node_t *node = (pm_string_node_t *) pm_string_node_create(parser, &opening, &content, &closing);
+ node->unescaped = parser->current_string;
// Characters can be followed by strings in which case they are
// automatically concatenated.
if (match1(parser, PM_TOKEN_STRING_BEGIN)) {
pm_node_t *concat = parse_strings(parser);
- return (pm_node_t *) pm_string_concat_node_create(parser, node, concat);
+ return (pm_node_t *) pm_string_concat_node_create(parser, (pm_node_t *) node, concat);
}
- return node;
+ return (pm_node_t *) node;
}
case PM_TOKEN_CLASS_VARIABLE: {
parser_lex(parser);
@@ -14538,6 +14530,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const ch
.constant_pool = PM_CONSTANT_POOL_EMPTY,
.newline_list = PM_NEWLINE_LIST_EMPTY,
.integer_base = 0,
+ .current_string = PM_EMPTY_STRING,
.command_start = true,
.recovering = false,
.encoding_changed = false,
@@ -14675,10 +14668,11 @@ pm_parse_serialize(const uint8_t *source, size_t size, pm_buffer_t *buffer, cons
pm_parser_free(&parser);
}
-#undef PM_LOCATION_NULL_VALUE
-#undef PM_LOCATION_TOKEN_VALUE
-#undef PM_LOCATION_NODE_VALUE
-#undef PM_LOCATION_NODE_BASE_VALUE
#undef PM_CASE_KEYWORD
#undef PM_CASE_OPERATOR
#undef PM_CASE_WRITABLE
+#undef PM_EMPTY_STRING
+#undef PM_LOCATION_NODE_BASE_VALUE
+#undef PM_LOCATION_NODE_VALUE
+#undef PM_LOCATION_NULL_VALUE
+#undef PM_LOCATION_TOKEN_VALUE