diff options
author | Kevin Newton <kddnewton@gmail.com> | 2024-03-07 15:24:43 -0500 |
---|---|---|
committer | git <svn-admin@ruby-lang.org> | 2024-03-07 20:40:39 +0000 |
commit | ec159fc8ba17cb70e34a5b62c1ef804e393b7b2f (patch) | |
tree | dba97733c5296ab09cf507b47dcbd6c327934328 /prism/prism.c | |
parent | 76e11595e28e258f4a4187a6d3eaccc9ca752e10 (diff) | |
download | ruby-ec159fc8ba17cb70e34a5b62c1ef804e393b7b2f.tar.gz |
[ruby/prism] Support parsing streams
https://github.com/ruby/prism/commit/efdc2b7222
Diffstat (limited to 'prism/prism.c')
-rw-r--r-- | prism/prism.c | 115 |
1 files changed, 115 insertions, 0 deletions
diff --git a/prism/prism.c b/prism/prism.c index d7ee5ac7db..045fe63f06 100644 --- a/prism/prism.c +++ b/prism/prism.c @@ -18703,6 +18703,99 @@ pm_parse(pm_parser_t *parser) { return parse_program(parser); } +/** + * Read into the stream until the gets callback returns false. If the last read + * line from the stream matches an __END__ marker, then halt and return false, + * otherwise return true. + */ +static bool +pm_parse_stream_read(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *fgets) { +#define LINE_SIZE 4096 + char line[LINE_SIZE]; + + while (fgets(line, LINE_SIZE, stream) != NULL) { + size_t length = strlen(line); + + if (length == LINE_SIZE && line[length - 1] != '\n') { + // If we read a line that is the maximum size and it doesn't end + // with a newline, then we'll just append it to the buffer and + // continue reading. + pm_buffer_append_string(buffer, line, length); + continue; + } + + // Append the line to the buffer. + pm_buffer_append_string(buffer, line, length); + + // Check if the line matches the __END__ marker. If it does, then stop + // reading and return false. In most circumstances, this means we should + // stop reading from the stream so that the DATA constant can pick it + // up. + switch (length) { + case 7: + if (strncmp(line, "__END__", 7) == 0) return false; + break; + case 8: + if (strncmp(line, "__END__\n", 8) == 0) return false; + break; + case 9: + if (strncmp(line, "__END__\r\n", 9) == 0) return false; + break; + } + } + + return true; +#undef LINE_SIZE +} + +/** + * Determine if there was an unterminated heredoc at the end of the input, which + * would mean the stream isn't finished and we should keep reading. + * + * For the other lex modes we can check if the lex mode has been closed, but for + * heredocs when we hit EOF we close the lex mode and then go back to parse the + * rest of the line after the heredoc declaration so that we get more of the + * syntax tree. + */ +static bool +pm_parse_stream_unterminated_heredoc_p(pm_parser_t *parser) { + pm_diagnostic_t *diagnostic = (pm_diagnostic_t *) parser->error_list.head; + + for (; diagnostic != NULL; diagnostic = (pm_diagnostic_t *) diagnostic->node.next) { + if (diagnostic->diag_id == PM_ERR_HEREDOC_TERM) { + return true; + } + } + + return false; +} + +/** + * Parse a stream of Ruby source and return the tree. + * + * Prism is designed around having the entire source in memory at once, but you + * can stream stdin in to Ruby so we need to support a streaming API. + */ +PRISM_EXPORTED_FUNCTION pm_node_t * +pm_parse_stream(pm_parser_t *parser, pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *fgets, const pm_options_t *options) { + pm_buffer_init(buffer); + + bool eof = pm_parse_stream_read(buffer, stream, fgets); + pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options); + pm_node_t *node = pm_parse(parser); + + while (!eof && parser->error_list.size > 0 && (parser->lex_modes.index > 0 || pm_parse_stream_unterminated_heredoc_p(parser))) { + pm_node_destroy(parser, node); + eof = pm_parse_stream_read(buffer, stream, fgets); + + pm_parser_free(parser); + pm_parser_init(parser, (const uint8_t *) pm_buffer_value(buffer), pm_buffer_length(buffer), options); + node = pm_parse(parser); + } + + return node; +} + static inline void pm_serialize_header(pm_buffer_t *buffer) { pm_buffer_append_string(buffer, "PRISM", 5); @@ -18746,6 +18839,28 @@ pm_serialize_parse(pm_buffer_t *buffer, const uint8_t *source, size_t size, cons } /** + * Parse and serialize the AST represented by the source that is read out of the + * given stream into to the given buffer. + */ +PRISM_EXPORTED_FUNCTION void +pm_serialize_parse_stream(pm_buffer_t *buffer, void *stream, pm_parse_stream_fgets_t *fgets, const char *data) { + pm_parser_t parser; + pm_options_t options = { 0 }; + pm_options_read(&options, data); + + pm_buffer_t parser_buffer; + pm_node_t *node = pm_parse_stream(&parser, &parser_buffer, stream, fgets, &options); + pm_serialize_header(buffer); + pm_serialize_content(&parser, node, buffer); + pm_buffer_append_byte(buffer, '\0'); + + pm_node_destroy(&parser, node); + pm_buffer_free(&parser_buffer); + pm_parser_free(&parser); + pm_options_free(&options); +} + +/** * Parse and serialize the comments in the given source to the given buffer. */ PRISM_EXPORTED_FUNCTION void |