1 files changed, 201 insertions, 132 deletions
diff --git a/yarp/yarp.c b/yarp/yarp.c
index 730bf8274b..28d71f69c4 100644
--- a/yarp/yarp.c
+++ b/yarp/yarp.c
@@ -10037,6 +10037,53 @@ parse_method_definition_name(yp_parser_t *parser) {
     }
 }
 
+static int
+parse_heredoc_common_whitespace_for_single_node(yp_parser_t *parser, yp_node_t *node, int common_whitespace)
+{
+    const yp_location_t *content_loc = &((yp_string_node_t *) node)->content_loc;
+    int cur_whitespace;
+    const uint8_t *cur_char = content_loc->start;
+
+    while (cur_char && cur_char < content_loc->end) {
+        // Any empty newlines aren't included in the minimum whitespace
+        // calculation.
+        size_t eol_length;
+        while ((eol_length = match_eol_at(parser, cur_char))) {
+            cur_char += eol_length;
+        }
+
+        if (cur_char == content_loc->end) break;
+
+        cur_whitespace = 0;
+
+        while (yp_char_is_inline_whitespace(*cur_char) && cur_char < content_loc->end) {
+            if (cur_char[0] == '\t') {
+                cur_whitespace = (cur_whitespace / YP_TAB_WHITESPACE_SIZE + 1) * YP_TAB_WHITESPACE_SIZE;
+            } else {
+                cur_whitespace++;
+            }
+            cur_char++;
+        }
+
+        // If we hit a newline, then we have encountered a line that
+        // contains only whitespace, and it shouldn't be considered in
+        // the calculation of common leading whitespace.
+        eol_length = match_eol_at(parser, cur_char);
+        if (eol_length) {
+            cur_char += eol_length;
+            continue;
+        }
+
+        if (cur_whitespace < common_whitespace || common_whitespace == -1) {
+            common_whitespace = cur_whitespace;
+        }
+
+        cur_char = next_newline(cur_char + 1, parser->end - (cur_char + 1));
+        if (cur_char) cur_char++;
+    }
+    return common_whitespace;
+}
+
 // Calculate the common leading whitespace for each line in a heredoc.
 static int
 parse_heredoc_common_whitespace(yp_parser_t *parser, yp_node_list_t *nodes) {
@@ -10044,69 +10091,102 @@ parse_heredoc_common_whitespace(yp_parser_t *parser, yp_node_list_t *nodes) {
 
     for (size_t index = 0; index < nodes->size; index++) {
         yp_node_t *node = nodes->nodes[index];
-
         if (!YP_NODE_TYPE_P(node, YP_STRING_NODE)) continue;
-        const yp_location_t *content_loc = &((yp_string_node_t *) node)->content_loc;
 
         // If the previous node wasn't a string node, we don't want to trim
         // whitespace. This could happen after an interpolated expression or
         // variable.
         if (index == 0 || YP_NODE_TYPE_P(nodes->nodes[index - 1], YP_STRING_NODE)) {
-            int cur_whitespace;
-            const uint8_t *cur_char = content_loc->start;
-
-            while (cur_char && cur_char < content_loc->end) {
-                // Any empty newlines aren't included in the minimum whitespace
-                // calculation.
-                size_t eol_length;
-                while ((eol_length = match_eol_at(parser, cur_char))) {
-                    cur_char += eol_length;
-                }
-
-                if (cur_char == content_loc->end) break;
+            common_whitespace = parse_heredoc_common_whitespace_for_single_node(parser, node, common_whitespace);
+        }
+    }
 
-                cur_whitespace = 0;
+    return common_whitespace;
+}
 
-                while (yp_char_is_inline_whitespace(*cur_char) && cur_char < content_loc->end) {
-                    if (cur_char[0] == '\t') {
-                        cur_whitespace = (cur_whitespace / YP_TAB_WHITESPACE_SIZE + 1) * YP_TAB_WHITESPACE_SIZE;
-                    } else {
-                        cur_whitespace++;
-                    }
-                    cur_char++;
+static yp_string_t *
+parse_heredoc_dedent_single_node(yp_parser_t *parser, yp_string_t *string, bool dedent_node, int common_whitespace, yp_heredoc_quote_t quote)
+{
+    // Get a reference to the string struct that is being held by the string
+    // node. This is the value we're going to actually manipulate.
+    yp_string_ensure_owned(string);
+
+    // Now get the bounds of the existing string. We'll use this as a
+    // destination to move bytes into. We'll also use it for bounds checking
+    // since we don't require that these strings be null terminated.
+    size_t dest_length = yp_string_length(string);
+    uint8_t *source_start = (uint8_t *) string->source;
+
+    const uint8_t *source_cursor = source_start;
+    const uint8_t *source_end = source_cursor + dest_length;
+
+    // We're going to move bytes backward in the string when we get leading
+    // whitespace, so we'll maintain a pointer to the current position in the
+    // string that we're writing to.
+    uint8_t *dest_cursor = source_start;
+
+    while (source_cursor < source_end) {
+        // If we need to dedent the next element within the heredoc or the next
+        // line within the string node, then we'll do it here.
+        if (dedent_node) {
+            int trimmed_whitespace = 0;
+
+            // While we haven't reached the amount of common whitespace that we need
+            // to trim and we haven't reached the end of the string, we'll keep
+            // trimming whitespace. Trimming in this context means skipping over
+            // these bytes such that they aren't copied into the new string.
+            while ((source_cursor < source_end) && yp_char_is_inline_whitespace(*source_cursor) && trimmed_whitespace < common_whitespace) {
+                if (*source_cursor == '\t') {
+                    trimmed_whitespace = (trimmed_whitespace / YP_TAB_WHITESPACE_SIZE + 1) * YP_TAB_WHITESPACE_SIZE;
+                    if (trimmed_whitespace > common_whitespace) break;
+                } else {
+                    trimmed_whitespace++;
                 }
 
-                // If we hit a newline, then we have encountered a line that
-                // contains only whitespace, and it shouldn't be considered in
-                // the calculation of common leading whitespace.
-                eol_length = match_eol_at(parser, cur_char);
-                if (eol_length) {
-                    cur_char += eol_length;
-                    continue;
-                }
+                source_cursor++;
+                dest_length--;
+            }
+        }
 
-                if (cur_whitespace < common_whitespace || common_whitespace == -1) {
-                    common_whitespace = cur_whitespace;
-                }
+        // At this point we have dedented all that we need to, so we need to find
+        // the next newline.
+        const uint8_t *breakpoint = next_newline(source_cursor, source_end - source_cursor);
 
-                cur_char = next_newline(cur_char + 1, parser->end - (cur_char + 1));
-                if (cur_char) cur_char++;
-            }
+        if (breakpoint == NULL) {
+            // If there isn't another newline, then we can just move the rest of the
+            // string and break from the loop.
+            memmove(dest_cursor, source_cursor, (size_t) (source_end - source_cursor));
+            break;
         }
+
+        // Otherwise, we need to move everything including the newline, and
+        // then set the dedent_node flag to true.
+        if (breakpoint < source_end) breakpoint++;
+        memmove(dest_cursor, source_cursor, (size_t) (breakpoint - source_cursor));
+        dest_cursor += (breakpoint - source_cursor);
+        source_cursor = breakpoint;
+        dedent_node = true;
     }
 
-    return common_whitespace;
+    // We only want to write this node into the list if it has any content.
+    string->length = dest_length;
+
+    if (dest_length != 0) {
+        yp_unescape_manipulate_string(parser, string, (quote == YP_HEREDOC_QUOTE_SINGLE) ? YP_UNESCAPE_MINIMAL : YP_UNESCAPE_ALL);
+    }
+    return string;
 }
 
 // Take a heredoc node that is indented by a ~ and trim the leading whitespace.
 static void
-parse_heredoc_dedent(yp_parser_t *parser, yp_node_t *node, yp_heredoc_quote_t quote) {
+parse_heredoc_dedent(yp_parser_t *parser, yp_node_t *heredoc_node, yp_heredoc_quote_t quote)
+{
     yp_node_list_t *nodes;
 
     if (quote == YP_HEREDOC_QUOTE_BACKTICK) {
-        nodes = &((yp_interpolated_x_string_node_t *) node)->parts;
+        nodes = &((yp_interpolated_x_string_node_t *) heredoc_node)->parts;
     } else {
-        nodes = &((yp_interpolated_string_node_t *) node)->parts;
+        nodes = &((yp_interpolated_string_node_t *) heredoc_node)->parts;
     }
 
     // First, calculate how much common whitespace we need to trim. If there is
@@ -10135,74 +10215,11 @@ parse_heredoc_dedent(yp_parser_t *parser, yp_node_t *node, yp_heredoc_quote_t qu
             continue;
         }
 
-        // Get a reference to the string struct that is being held by the string
-        // node. This is the value we're going to actual manipulate.
-        yp_string_t *string = &(((yp_string_node_t *) node)->unescaped);
-        yp_string_ensure_owned(string);
-
-        // Now get the bounds of the existing string. We'll use this as a
-        // destination to move bytes into. We'll also use it for bounds checking
-        // since we don't require that these strings be null terminated.
-        size_t dest_length = yp_string_length(string);
-        uint8_t *source_start = (uint8_t *) string->source;
-
-        const uint8_t *source_cursor = source_start;
-        const uint8_t *source_end = source_cursor + dest_length;
-
-        // We're going to move bytes backward in the string when we get leading
-        // whitespace, so we'll maintain a pointer to the current position in the
-        // string that we're writing to.
-        uint8_t *dest_cursor = source_start;
-
-        while (source_cursor < source_end) {
-            // If we need to dedent the next element within the heredoc or the next
-            // line within the string node, then we'll do it here.
-            if (dedent_next) {
-                int trimmed_whitespace = 0;
-
-                // While we haven't reached the amount of common whitespace that we need
-                // to trim and we haven't reached the end of the string, we'll keep
-                // trimming whitespace. Trimming in this context means skipping over
-                // these bytes such that they aren't copied into the new string.
-                while ((source_cursor < source_end) && yp_char_is_inline_whitespace(*source_cursor) && trimmed_whitespace < common_whitespace) {
-                    if (*source_cursor == '\t') {
-                        trimmed_whitespace = (trimmed_whitespace / YP_TAB_WHITESPACE_SIZE + 1) * YP_TAB_WHITESPACE_SIZE;
-                        if (trimmed_whitespace > common_whitespace) break;
-                    } else {
-                        trimmed_whitespace++;
-                    }
-
-                    source_cursor++;
-                    dest_length--;
-                }
-            }
-
-            // At this point we have dedented all that we need to, so we need to find
-            // the next newline.
-            const uint8_t *breakpoint = next_newline(source_cursor, source_end - source_cursor);
-
-            if (breakpoint == NULL) {
-                // If there isn't another newline, then we can just move the rest of the
-                // string and break from the loop.
-                memmove(dest_cursor, source_cursor, (size_t) (source_end - source_cursor));
-                break;
-            }
-
-            // Otherwise, we need to move everything including the newline, and
-            // then set the dedent_next flag to true.
-            if (breakpoint < source_end) breakpoint++;
-            memmove(dest_cursor, source_cursor, (size_t) (breakpoint - source_cursor));
-            dest_cursor += (breakpoint - source_cursor);
-            source_cursor = breakpoint;
-            dedent_next = true;
-        }
-
-        // We only want to write this node into the list if it has any content.
-        if (dest_length == 0) {
+        yp_string_node_t *string_node = ((yp_string_node_t *) node);
+        parse_heredoc_dedent_single_node(parser, &string_node->unescaped, dedent_next, common_whitespace, quote);
+        if (string_node->unescaped.length == 0) {
             yp_node_destroy(parser, node);
         } else {
-            string->length = dest_length;
-            yp_unescape_manipulate_string(parser, string, (quote == YP_HEREDOC_QUOTE_SINGLE) ? YP_UNESCAPE_MINIMAL : YP_UNESCAPE_ALL);
             nodes->nodes[write_index++] = node;
         }
 
@@ -11246,42 +11263,94 @@ parse_expression_prefix(yp_parser_t *parser, yp_binding_power_t binding_power) {
             yp_heredoc_indent_t indent = parser->lex_modes.current->as.heredoc.indent;
 
             yp_node_t *node;
-            if (quote == YP_HEREDOC_QUOTE_BACKTICK) {
-                node = (yp_node_t *) yp_interpolated_xstring_node_create(parser, &parser->current, &parser->current);
-            } else {
-                node = (yp_node_t *) yp_interpolated_string_node_create(parser, &parser->current, NULL, &parser->current);
-            }
 
             parser_lex(parser);
-            yp_node_t *part;
-
-            while (!match_any_type_p(parser, 2, YP_TOKEN_HEREDOC_END, YP_TOKEN_EOF)) {
-                if ((part = parse_string_part(parser)) == NULL) continue;
 
+            if (parser->current.type == YP_TOKEN_HEREDOC_END) {
                 if (quote == YP_HEREDOC_QUOTE_BACKTICK) {
-                    yp_interpolated_xstring_node_append((yp_interpolated_x_string_node_t *) node, part);
+                    node = (yp_node_t *) yp_xstring_node_create(
+                            parser,
+                            &parser->previous,
+                            &((yp_token_t) { .type = YP_TOKEN_STRING_CONTENT, .start = parser->previous.end, .end = parser->current.start }),
+                            &parser->current);
+
                 } else {
-                    yp_interpolated_string_node_append((yp_interpolated_string_node_t *) node, part);
-                }
+                    node = (yp_node_t *)yp_string_node_create(
+                            parser,
+                            &parser->previous,
+                            &((yp_token_t) { .type = YP_TOKEN_STRING_CONTENT, .start = parser->previous.end, .end = parser->current.start }),
+                            &parser->current);
+                }
+                node->location.end = parser->previous.end;
+                lex_state_set(parser, YP_LEX_STATE_END);
+                expect(parser, YP_TOKEN_HEREDOC_END, YP_ERR_HEREDOC_TERM);
+                return node;
             }
 
-            lex_state_set(parser, YP_LEX_STATE_END);
-            expect(parser, YP_TOKEN_HEREDOC_END, YP_ERR_HEREDOC_TERM);
+            yp_token_t opening_token = parser->previous;
 
-            if (quote == YP_HEREDOC_QUOTE_BACKTICK) {
-                assert(YP_NODE_TYPE_P(node, YP_INTERPOLATED_X_STRING_NODE));
-                yp_interpolated_xstring_node_closing_set(((yp_interpolated_x_string_node_t *) node), &parser->previous);
-                node->location = ((yp_interpolated_x_string_node_t *) node)->opening_loc;
-            } else {
-                assert(YP_NODE_TYPE_P(node, YP_INTERPOLATED_STRING_NODE));
-                yp_interpolated_string_node_closing_set((yp_interpolated_string_node_t *) node, &parser->previous);
-                node->location = ((yp_interpolated_string_node_t *) node)->opening_loc;
+            yp_node_t *part = parse_string_part(parser);
+
+            if (part == NULL) {
+                // We couldn't parse anything, so return a missing node
+                return (yp_node_t *) yp_missing_node_create(parser, parser->previous.start, parser->previous.end);
             }
 
-            // If this is a heredoc that is indented with a ~, then we need to dedent
-            // each line by the common leading whitespace.
-            if (indent == YP_HEREDOC_INDENT_TILDE) {
-                parse_heredoc_dedent(parser, node, quote);
+            if (YP_NODE_TYPE_P(part, YP_STRING_NODE) && match_any_type_p(parser, 2, YP_TOKEN_HEREDOC_END, YP_TOKEN_EOF)) {
+                // We only have a single string, so we can return it
+                yp_string_node_t *str_part = (yp_string_node_t *)part;
+                str_part->opening_loc = YP_LOCATION_TOKEN_VALUE(&opening_token);
+                str_part->closing_loc = YP_LOCATION_TOKEN_VALUE(&parser->current);
+                str_part->base.location = str_part->opening_loc;
+                if (quote == YP_HEREDOC_QUOTE_BACKTICK) {
+                    part->type = YP_X_STRING_NODE;
+                }
+                lex_state_set(parser, YP_LEX_STATE_END);
+                expect(parser, YP_TOKEN_HEREDOC_END, YP_ERR_HEREDOC_TERM);
+                node = part;
+                if (indent == YP_HEREDOC_INDENT_TILDE) {
+                    int common_whitespace = parse_heredoc_common_whitespace_for_single_node(parser, node, -1);
+                    parse_heredoc_dedent_single_node(parser, &str_part->unescaped, true, common_whitespace, quote);
+                }
+            }
+            else {
+                // We have multiple parts, continue parsing them
+                yp_node_list_t parts = YP_EMPTY_NODE_LIST;
+                yp_node_list_append(&parts, part);
+
+                while (!match_any_type_p(parser, 2, YP_TOKEN_HEREDOC_END, YP_TOKEN_EOF)) {
+                    if ((part = parse_string_part(parser)) != NULL) {
+                        yp_node_list_append(&parts, part);
+                    }
+                }
+
+                if (quote == YP_HEREDOC_QUOTE_BACKTICK) {
+                    node = (yp_node_t *) yp_interpolated_xstring_node_create(parser, &opening_token, &opening_token);
+                    ((yp_interpolated_x_string_node_t *)node)->parts = parts;
+                } else {
+                    node = (yp_node_t *) yp_interpolated_string_node_create(parser, &opening_token, NULL, &opening_token);
+                    ((yp_interpolated_string_node_t *)node)->parts = parts;
+                }
+
+
+                lex_state_set(parser, YP_LEX_STATE_END);
+                expect(parser, YP_TOKEN_HEREDOC_END, YP_ERR_HEREDOC_TERM);
+
+                if (quote == YP_HEREDOC_QUOTE_BACKTICK) {
+                    assert(YP_NODE_TYPE_P(node, YP_INTERPOLATED_X_STRING_NODE));
+                    yp_interpolated_xstring_node_closing_set(((yp_interpolated_x_string_node_t *) node), &parser->previous);
+                    node->location = ((yp_interpolated_x_string_node_t *) node)->opening_loc;
+                } else {
+                    assert(YP_NODE_TYPE_P(node, YP_INTERPOLATED_STRING_NODE));
+                    yp_interpolated_string_node_closing_set((yp_interpolated_string_node_t *) node, &parser->previous);
+                    node->location = ((yp_interpolated_string_node_t *) node)->opening_loc;
+                }
+
+                // If this is a heredoc that is indented with a ~, then we need to dedent
+                // each line by the common leading whitespace.
+                if (indent == YP_HEREDOC_INDENT_TILDE) {
+                    parse_heredoc_dedent(parser, node, quote);
+                }
             }
 
             // If there's a string immediately following this heredoc, then it's a