From 0a8f3670d1c1aa4ec58a08642cccf5ee5dbf95ae Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 14 Sep 2023 09:01:20 -0400 Subject: [ruby/yarp] Introduce MatchWriteNode This rarely used node holds information about the local variables that need to get written in the case a regular expression is used on the left-hand side of a =~ operator and it has named capture groups. Note that we already "handled" these nodes by adding locals to the AST, but we didn't actually expose this information, making it difficult to compile. The general idea behind this node is that it maintains the ability for consumers to find all of the call nodes in the tree easily so it's not flattening down. However, it should be okay because you hopefully don't need any information in the call node to determine what to compile because the locals list is on the top level. https://github.com/ruby/yarp/commit/e136e7f9a8 --- test/yarp/location_test.rb | 4 ++ test/yarp/snapshots/regex.txt | 59 ++++++++++++---------- .../snapshots/whitequark/lvar_injecting_match.txt | 51 ++++++++++--------- yarp/config.yml | 13 +++++ yarp/yarp.c | 41 ++++++++++++--- 5 files changed, 109 insertions(+), 59 deletions(-) diff --git a/test/yarp/location_test.rb b/test/yarp/location_test.rb index d76fac5a80..3ef3da3faf 100644 --- a/test/yarp/location_test.rb +++ b/test/yarp/location_test.rb @@ -545,6 +545,10 @@ module YARP assert_location(MatchRequiredNode, "foo => bar") end + def test_MatchWriteNode + assert_location(MatchWriteNode, "/(?)/ =~ foo") + end + def test_ModuleNode assert_location(ModuleNode, "module Foo end") end diff --git a/test/yarp/snapshots/regex.txt b/test/yarp/snapshots/regex.txt index f474669ee0..6f0d42ce3f 100644 --- a/test/yarp/snapshots/regex.txt +++ b/test/yarp/snapshots/regex.txt @@ -84,34 +84,37 @@ │ └── flags: ∅ ├── @ ArrayNode (location: (59...86)) │ ├── elements: (length: 2) - │ │ ├── @ CallNode (location: (60...80)) - │ │ │ ├── receiver: - │ │ │ │ @ RegularExpressionNode (location: (60...73)) - │ │ │ │ ├── opening_loc: (60...61) = "/" - │ │ │ │ ├── content_loc: (61...72) = "(?bar)" - │ │ │ │ ├── closing_loc: (72...73) = "/" - │ │ │ │ ├── unescaped: "(?bar)" - │ │ │ │ └── flags: ∅ - │ │ │ ├── call_operator_loc: ∅ - │ │ │ ├── message_loc: (74...76) = "=~" - │ │ │ ├── opening_loc: ∅ - │ │ │ ├── arguments: - │ │ │ │ @ ArgumentsNode (location: (77...80)) - │ │ │ │ └── arguments: (length: 1) - │ │ │ │ └── @ CallNode (location: (77...80)) - │ │ │ │ ├── receiver: ∅ - │ │ │ │ ├── call_operator_loc: ∅ - │ │ │ │ ├── message_loc: (77...80) = "baz" - │ │ │ │ ├── opening_loc: ∅ - │ │ │ │ ├── arguments: ∅ - │ │ │ │ ├── closing_loc: ∅ - │ │ │ │ ├── block: ∅ - │ │ │ │ ├── flags: variable_call - │ │ │ │ └── name: "baz" - │ │ │ ├── closing_loc: ∅ - │ │ │ ├── block: ∅ - │ │ │ ├── flags: ∅ - │ │ │ └── name: "=~" + │ │ ├── @ MatchWriteNode (location: (60...80)) + │ │ │ ├── call: + │ │ │ │ @ CallNode (location: (60...80)) + │ │ │ │ ├── receiver: + │ │ │ │ │ @ RegularExpressionNode (location: (60...73)) + │ │ │ │ │ ├── opening_loc: (60...61) = "/" + │ │ │ │ │ ├── content_loc: (61...72) = "(?bar)" + │ │ │ │ │ ├── closing_loc: (72...73) = "/" + │ │ │ │ │ ├── unescaped: "(?bar)" + │ │ │ │ │ └── flags: ∅ + │ │ │ │ ├── call_operator_loc: ∅ + │ │ │ │ ├── message_loc: (74...76) = "=~" + │ │ │ │ ├── opening_loc: ∅ + │ │ │ │ ├── arguments: + │ │ │ │ │ @ ArgumentsNode (location: (77...80)) + │ │ │ │ │ └── arguments: (length: 1) + │ │ │ │ │ └── @ CallNode (location: (77...80)) + │ │ │ │ │ ├── receiver: ∅ + │ │ │ │ │ ├── call_operator_loc: ∅ + │ │ │ │ │ ├── message_loc: (77...80) = "baz" + │ │ │ │ │ ├── opening_loc: ∅ + │ │ │ │ │ ├── arguments: ∅ + │ │ │ │ │ ├── closing_loc: ∅ + │ │ │ │ │ ├── block: ∅ + │ │ │ │ │ ├── flags: variable_call + │ │ │ │ │ └── name: "baz" + │ │ │ │ ├── closing_loc: ∅ + │ │ │ │ ├── block: ∅ + │ │ │ │ ├── flags: ∅ + │ │ │ │ └── name: "=~" + │ │ │ └── locals: [:foo] │ │ └── @ LocalVariableReadNode (location: (82...85)) │ │ ├── name: :foo │ │ └── depth: 0 diff --git a/test/yarp/snapshots/whitequark/lvar_injecting_match.txt b/test/yarp/snapshots/whitequark/lvar_injecting_match.txt index 2375854350..e570bf18a8 100644 --- a/test/yarp/snapshots/whitequark/lvar_injecting_match.txt +++ b/test/yarp/snapshots/whitequark/lvar_injecting_match.txt @@ -3,30 +3,33 @@ └── statements: @ StatementsNode (location: (0...31)) └── body: (length: 2) - ├── @ CallNode (location: (0...24)) - │ ├── receiver: - │ │ @ RegularExpressionNode (location: (0...15)) - │ │ ├── opening_loc: (0...1) = "/" - │ │ ├── content_loc: (1...14) = "(?bar)" - │ │ ├── closing_loc: (14...15) = "/" - │ │ ├── unescaped: "(?bar)" - │ │ └── flags: ∅ - │ ├── call_operator_loc: ∅ - │ ├── message_loc: (16...18) = "=~" - │ ├── opening_loc: ∅ - │ ├── arguments: - │ │ @ ArgumentsNode (location: (19...24)) - │ │ └── arguments: (length: 1) - │ │ └── @ StringNode (location: (19...24)) - │ │ ├── flags: ∅ - │ │ ├── opening_loc: (19...20) = "'" - │ │ ├── content_loc: (20...23) = "bar" - │ │ ├── closing_loc: (23...24) = "'" - │ │ └── unescaped: "bar" - │ ├── closing_loc: ∅ - │ ├── block: ∅ - │ ├── flags: ∅ - │ └── name: "=~" + ├── @ MatchWriteNode (location: (0...24)) + │ ├── call: + │ │ @ CallNode (location: (0...24)) + │ │ ├── receiver: + │ │ │ @ RegularExpressionNode (location: (0...15)) + │ │ │ ├── opening_loc: (0...1) = "/" + │ │ │ ├── content_loc: (1...14) = "(?bar)" + │ │ │ ├── closing_loc: (14...15) = "/" + │ │ │ ├── unescaped: "(?bar)" + │ │ │ └── flags: ∅ + │ │ ├── call_operator_loc: ∅ + │ │ ├── message_loc: (16...18) = "=~" + │ │ ├── opening_loc: ∅ + │ │ ├── arguments: + │ │ │ @ ArgumentsNode (location: (19...24)) + │ │ │ └── arguments: (length: 1) + │ │ │ └── @ StringNode (location: (19...24)) + │ │ │ ├── flags: ∅ + │ │ │ ├── opening_loc: (19...20) = "'" + │ │ │ ├── content_loc: (20...23) = "bar" + │ │ │ ├── closing_loc: (23...24) = "'" + │ │ │ └── unescaped: "bar" + │ │ ├── closing_loc: ∅ + │ │ ├── block: ∅ + │ │ ├── flags: ∅ + │ │ └── name: "=~" + │ └── locals: [:match] └── @ LocalVariableReadNode (location: (26...31)) ├── name: :match └── depth: 0 diff --git a/yarp/config.yml b/yarp/config.yml index eeab5971e0..b446742eab 100644 --- a/yarp/config.yml +++ b/yarp/config.yml @@ -1798,6 +1798,19 @@ nodes: foo => bar ^^^^^^^^^^ + - name: MatchWriteNode + fields: + - name: call + type: node + kind: CallNode + - name: locals + type: constant[] + comment: | + Represents writing local variables using a regular expression match with + named capture groups. + + /(?bar)/ =~ baz + ^^^^^^^^^^^^^^^^^^^^ - name: MissingNode comment: | Represents a node that is missing from the source and results in a syntax diff --git a/yarp/yarp.c b/yarp/yarp.c index 839e1336ac..de019b0f61 100644 --- a/yarp/yarp.c +++ b/yarp/yarp.c @@ -3371,6 +3371,23 @@ yp_match_required_node_create(yp_parser_t *parser, yp_node_t *value, yp_node_t * return node; } +// Allocate and initialize a new MatchWriteNode node. +static yp_match_write_node_t * +yp_match_write_node_create(yp_parser_t *parser, yp_call_node_t *call) { + yp_match_write_node_t *node = YP_ALLOC_NODE(parser, yp_match_write_node_t); + + *node = (yp_match_write_node_t) { + { + .type = YP_MATCH_WRITE_NODE, + .location = call->base.location + }, + .call = call + }; + + yp_constant_id_list_init(&node->locals); + return node; +} + // Allocate a new ModuleNode node. static yp_module_node_t * yp_module_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, const yp_token_t *module_keyword, yp_node_t *constant_path, const yp_token_t *name, yp_node_t *body, const yp_token_t *end_keyword) { @@ -13612,8 +13629,9 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t return (yp_node_t *) yp_or_node_create(parser, node, &token, right); } case YP_TOKEN_EQUAL_TILDE: { - // Note that we _must_ parse the value before adding the local variables - // in order to properly mirror the behavior of Ruby. For example, + // Note that we _must_ parse the value before adding the local + // variables in order to properly mirror the behavior of Ruby. For + // example, // // /(?bar)/ =~ foo // @@ -13621,27 +13639,36 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t parser_lex(parser); yp_node_t *argument = parse_expression(parser, binding_power, YP_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR); - // If the receiver of this =~ is a regular expression node, then we need - // to introduce local variables for it based on its named capture groups. + // By default, we're going to create a call node and then return it. + yp_call_node_t *call = yp_call_node_binary_create(parser, node, &token, argument); + yp_node_t *result = (yp_node_t *) call; + + // If the receiver of this =~ is a regular expression node, then we + // need to introduce local variables for it based on its named + // capture groups. if (YP_NODE_TYPE_P(node, YP_REGULAR_EXPRESSION_NODE)) { yp_string_list_t named_captures; yp_string_list_init(&named_captures); const yp_location_t *content_loc = &((yp_regular_expression_node_t *) node)->content_loc; + if (yp_regexp_named_capture_group_names(content_loc->start, (size_t) (content_loc->end - content_loc->start), &named_captures, parser->encoding_changed, &parser->encoding) && (named_captures.length > 0)) { + yp_match_write_node_t *match = yp_match_write_node_create(parser, call); - if (yp_regexp_named_capture_group_names(content_loc->start, (size_t) (content_loc->end - content_loc->start), &named_captures, parser->encoding_changed, &parser->encoding)) { for (size_t index = 0; index < named_captures.length; index++) { yp_string_t *name = &named_captures.strings[index]; assert(name->type == YP_STRING_SHARED); - yp_parser_local_add_location(parser, name->source, name->source + name->length); + yp_constant_id_t local = yp_parser_local_add_location(parser, name->source, name->source + name->length); + yp_constant_id_list_append(&match->locals, local); } + + result = (yp_node_t *) match; } yp_string_list_free(&named_captures); } - return (yp_node_t *) yp_call_node_binary_create(parser, node, &token, argument); + return result; } case YP_TOKEN_UAMPERSAND: case YP_TOKEN_USTAR: -- cgit v1.2.3