aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKevin Newton <kddnewton@gmail.com>2023-09-14 09:01:20 -0400
committergit <svn-admin@ruby-lang.org>2023-09-14 19:10:58 +0000
commit0a8f3670d1c1aa4ec58a08642cccf5ee5dbf95ae (patch)
treee3e8844905f5e3e9b46d55fb83fdc00830186ba8
parent9d2549ac311f98288bd42c4a8054cba46ca8ce32 (diff)
downloadruby-0a8f3670d1c1aa4ec58a08642cccf5ee5dbf95ae.tar.gz
[ruby/yarp] Introduce MatchWriteNode
This rarely used node holds information about the local variables that need to get written in the case a regular expression is used on the left-hand side of a =~ operator and it has named capture groups. Note that we already "handled" these nodes by adding locals to the AST, but we didn't actually expose this information, making it difficult to compile. The general idea behind this node is that it maintains the ability for consumers to find all of the call nodes in the tree easily so it's not flattening down. However, it should be okay because you hopefully don't need any information in the call node to determine what to compile because the locals list is on the top level. https://github.com/ruby/yarp/commit/e136e7f9a8
-rw-r--r--test/yarp/location_test.rb4
-rw-r--r--test/yarp/snapshots/regex.txt59
-rw-r--r--test/yarp/snapshots/whitequark/lvar_injecting_match.txt51
-rw-r--r--yarp/config.yml13
-rw-r--r--yarp/yarp.c41
5 files changed, 109 insertions, 59 deletions
diff --git a/test/yarp/location_test.rb b/test/yarp/location_test.rb
index d76fac5a80..3ef3da3faf 100644
--- a/test/yarp/location_test.rb
+++ b/test/yarp/location_test.rb
@@ -545,6 +545,10 @@ module YARP
assert_location(MatchRequiredNode, "foo => bar")
end
+ def test_MatchWriteNode
+ assert_location(MatchWriteNode, "/(?<foo>)/ =~ foo")
+ end
+
def test_ModuleNode
assert_location(ModuleNode, "module Foo end")
end
diff --git a/test/yarp/snapshots/regex.txt b/test/yarp/snapshots/regex.txt
index f474669ee0..6f0d42ce3f 100644
--- a/test/yarp/snapshots/regex.txt
+++ b/test/yarp/snapshots/regex.txt
@@ -84,34 +84,37 @@
│ └── flags: ∅
├── @ ArrayNode (location: (59...86))
│ ├── elements: (length: 2)
- │ │ ├── @ CallNode (location: (60...80))
- │ │ │ ├── receiver:
- │ │ │ │ @ RegularExpressionNode (location: (60...73))
- │ │ │ │ ├── opening_loc: (60...61) = "/"
- │ │ │ │ ├── content_loc: (61...72) = "(?<foo>bar)"
- │ │ │ │ ├── closing_loc: (72...73) = "/"
- │ │ │ │ ├── unescaped: "(?<foo>bar)"
- │ │ │ │ └── flags: ∅
- │ │ │ ├── call_operator_loc: ∅
- │ │ │ ├── message_loc: (74...76) = "=~"
- │ │ │ ├── opening_loc: ∅
- │ │ │ ├── arguments:
- │ │ │ │ @ ArgumentsNode (location: (77...80))
- │ │ │ │ └── arguments: (length: 1)
- │ │ │ │ └── @ CallNode (location: (77...80))
- │ │ │ │ ├── receiver: ∅
- │ │ │ │ ├── call_operator_loc: ∅
- │ │ │ │ ├── message_loc: (77...80) = "baz"
- │ │ │ │ ├── opening_loc: ∅
- │ │ │ │ ├── arguments: ∅
- │ │ │ │ ├── closing_loc: ∅
- │ │ │ │ ├── block: ∅
- │ │ │ │ ├── flags: variable_call
- │ │ │ │ └── name: "baz"
- │ │ │ ├── closing_loc: ∅
- │ │ │ ├── block: ∅
- │ │ │ ├── flags: ∅
- │ │ │ └── name: "=~"
+ │ │ ├── @ MatchWriteNode (location: (60...80))
+ │ │ │ ├── call:
+ │ │ │ │ @ CallNode (location: (60...80))
+ │ │ │ │ ├── receiver:
+ │ │ │ │ │ @ RegularExpressionNode (location: (60...73))
+ │ │ │ │ │ ├── opening_loc: (60...61) = "/"
+ │ │ │ │ │ ├── content_loc: (61...72) = "(?<foo>bar)"
+ │ │ │ │ │ ├── closing_loc: (72...73) = "/"
+ │ │ │ │ │ ├── unescaped: "(?<foo>bar)"
+ │ │ │ │ │ └── flags: ∅
+ │ │ │ │ ├── call_operator_loc: ∅
+ │ │ │ │ ├── message_loc: (74...76) = "=~"
+ │ │ │ │ ├── opening_loc: ∅
+ │ │ │ │ ├── arguments:
+ │ │ │ │ │ @ ArgumentsNode (location: (77...80))
+ │ │ │ │ │ └── arguments: (length: 1)
+ │ │ │ │ │ └── @ CallNode (location: (77...80))
+ │ │ │ │ │ ├── receiver: ∅
+ │ │ │ │ │ ├── call_operator_loc: ∅
+ │ │ │ │ │ ├── message_loc: (77...80) = "baz"
+ │ │ │ │ │ ├── opening_loc: ∅
+ │ │ │ │ │ ├── arguments: ∅
+ │ │ │ │ │ ├── closing_loc: ∅
+ │ │ │ │ │ ├── block: ∅
+ │ │ │ │ │ ├── flags: variable_call
+ │ │ │ │ │ └── name: "baz"
+ │ │ │ │ ├── closing_loc: ∅
+ │ │ │ │ ├── block: ∅
+ │ │ │ │ ├── flags: ∅
+ │ │ │ │ └── name: "=~"
+ │ │ │ └── locals: [:foo]
│ │ └── @ LocalVariableReadNode (location: (82...85))
│ │ ├── name: :foo
│ │ └── depth: 0
diff --git a/test/yarp/snapshots/whitequark/lvar_injecting_match.txt b/test/yarp/snapshots/whitequark/lvar_injecting_match.txt
index 2375854350..e570bf18a8 100644
--- a/test/yarp/snapshots/whitequark/lvar_injecting_match.txt
+++ b/test/yarp/snapshots/whitequark/lvar_injecting_match.txt
@@ -3,30 +3,33 @@
└── statements:
@ StatementsNode (location: (0...31))
└── body: (length: 2)
- ├── @ CallNode (location: (0...24))
- │ ├── receiver:
- │ │ @ RegularExpressionNode (location: (0...15))
- │ │ ├── opening_loc: (0...1) = "/"
- │ │ ├── content_loc: (1...14) = "(?<match>bar)"
- │ │ ├── closing_loc: (14...15) = "/"
- │ │ ├── unescaped: "(?<match>bar)"
- │ │ └── flags: ∅
- │ ├── call_operator_loc: ∅
- │ ├── message_loc: (16...18) = "=~"
- │ ├── opening_loc: ∅
- │ ├── arguments:
- │ │ @ ArgumentsNode (location: (19...24))
- │ │ └── arguments: (length: 1)
- │ │ └── @ StringNode (location: (19...24))
- │ │ ├── flags: ∅
- │ │ ├── opening_loc: (19...20) = "'"
- │ │ ├── content_loc: (20...23) = "bar"
- │ │ ├── closing_loc: (23...24) = "'"
- │ │ └── unescaped: "bar"
- │ ├── closing_loc: ∅
- │ ├── block: ∅
- │ ├── flags: ∅
- │ └── name: "=~"
+ ├── @ MatchWriteNode (location: (0...24))
+ │ ├── call:
+ │ │ @ CallNode (location: (0...24))
+ │ │ ├── receiver:
+ │ │ │ @ RegularExpressionNode (location: (0...15))
+ │ │ │ ├── opening_loc: (0...1) = "/"
+ │ │ │ ├── content_loc: (1...14) = "(?<match>bar)"
+ │ │ │ ├── closing_loc: (14...15) = "/"
+ │ │ │ ├── unescaped: "(?<match>bar)"
+ │ │ │ └── flags: ∅
+ │ │ ├── call_operator_loc: ∅
+ │ │ ├── message_loc: (16...18) = "=~"
+ │ │ ├── opening_loc: ∅
+ │ │ ├── arguments:
+ │ │ │ @ ArgumentsNode (location: (19...24))
+ │ │ │ └── arguments: (length: 1)
+ │ │ │ └── @ StringNode (location: (19...24))
+ │ │ │ ├── flags: ∅
+ │ │ │ ├── opening_loc: (19...20) = "'"
+ │ │ │ ├── content_loc: (20...23) = "bar"
+ │ │ │ ├── closing_loc: (23...24) = "'"
+ │ │ │ └── unescaped: "bar"
+ │ │ ├── closing_loc: ∅
+ │ │ ├── block: ∅
+ │ │ ├── flags: ∅
+ │ │ └── name: "=~"
+ │ └── locals: [:match]
└── @ LocalVariableReadNode (location: (26...31))
├── name: :match
└── depth: 0
diff --git a/yarp/config.yml b/yarp/config.yml
index eeab5971e0..b446742eab 100644
--- a/yarp/config.yml
+++ b/yarp/config.yml
@@ -1798,6 +1798,19 @@ nodes:
foo => bar
^^^^^^^^^^
+ - name: MatchWriteNode
+ fields:
+ - name: call
+ type: node
+ kind: CallNode
+ - name: locals
+ type: constant[]
+ comment: |
+ Represents writing local variables using a regular expression match with
+ named capture groups.
+
+ /(?<foo>bar)/ =~ baz
+ ^^^^^^^^^^^^^^^^^^^^
- name: MissingNode
comment: |
Represents a node that is missing from the source and results in a syntax
diff --git a/yarp/yarp.c b/yarp/yarp.c
index 839e1336ac..de019b0f61 100644
--- a/yarp/yarp.c
+++ b/yarp/yarp.c
@@ -3371,6 +3371,23 @@ yp_match_required_node_create(yp_parser_t *parser, yp_node_t *value, yp_node_t *
return node;
}
+// Allocate and initialize a new MatchWriteNode node.
+static yp_match_write_node_t *
+yp_match_write_node_create(yp_parser_t *parser, yp_call_node_t *call) {
+ yp_match_write_node_t *node = YP_ALLOC_NODE(parser, yp_match_write_node_t);
+
+ *node = (yp_match_write_node_t) {
+ {
+ .type = YP_MATCH_WRITE_NODE,
+ .location = call->base.location
+ },
+ .call = call
+ };
+
+ yp_constant_id_list_init(&node->locals);
+ return node;
+}
+
// Allocate a new ModuleNode node.
static yp_module_node_t *
yp_module_node_create(yp_parser_t *parser, yp_constant_id_list_t *locals, const yp_token_t *module_keyword, yp_node_t *constant_path, const yp_token_t *name, yp_node_t *body, const yp_token_t *end_keyword) {
@@ -13612,8 +13629,9 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
return (yp_node_t *) yp_or_node_create(parser, node, &token, right);
}
case YP_TOKEN_EQUAL_TILDE: {
- // Note that we _must_ parse the value before adding the local variables
- // in order to properly mirror the behavior of Ruby. For example,
+ // Note that we _must_ parse the value before adding the local
+ // variables in order to properly mirror the behavior of Ruby. For
+ // example,
//
// /(?<foo>bar)/ =~ foo
//
@@ -13621,27 +13639,36 @@ parse_expression_infix(yp_parser_t *parser, yp_node_t *node, yp_binding_power_t
parser_lex(parser);
yp_node_t *argument = parse_expression(parser, binding_power, YP_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
- // If the receiver of this =~ is a regular expression node, then we need
- // to introduce local variables for it based on its named capture groups.
+ // By default, we're going to create a call node and then return it.
+ yp_call_node_t *call = yp_call_node_binary_create(parser, node, &token, argument);
+ yp_node_t *result = (yp_node_t *) call;
+
+ // If the receiver of this =~ is a regular expression node, then we
+ // need to introduce local variables for it based on its named
+ // capture groups.
if (YP_NODE_TYPE_P(node, YP_REGULAR_EXPRESSION_NODE)) {
yp_string_list_t named_captures;
yp_string_list_init(&named_captures);
const yp_location_t *content_loc = &((yp_regular_expression_node_t *) node)->content_loc;
+ if (yp_regexp_named_capture_group_names(content_loc->start, (size_t) (content_loc->end - content_loc->start), &named_captures, parser->encoding_changed, &parser->encoding) && (named_captures.length > 0)) {
+ yp_match_write_node_t *match = yp_match_write_node_create(parser, call);
- if (yp_regexp_named_capture_group_names(content_loc->start, (size_t) (content_loc->end - content_loc->start), &named_captures, parser->encoding_changed, &parser->encoding)) {
for (size_t index = 0; index < named_captures.length; index++) {
yp_string_t *name = &named_captures.strings[index];
assert(name->type == YP_STRING_SHARED);
- yp_parser_local_add_location(parser, name->source, name->source + name->length);
+ yp_constant_id_t local = yp_parser_local_add_location(parser, name->source, name->source + name->length);
+ yp_constant_id_list_append(&match->locals, local);
}
+
+ result = (yp_node_t *) match;
}
yp_string_list_free(&named_captures);
}
- return (yp_node_t *) yp_call_node_binary_create(parser, node, &token, argument);
+ return result;
}
case YP_TOKEN_UAMPERSAND:
case YP_TOKEN_USTAR: