diff options
author | Kevin Newton <kddnewton@gmail.com> | 2023-08-23 10:53:08 -0400 |
---|---|---|
committer | git <svn-admin@ruby-lang.org> | 2023-08-25 21:10:17 +0000 |
commit | 481388769407b533879e97510dc8160d094356e0 (patch) | |
tree | f141a77ac31f0b3a1eb50cafa9e92746334b2298 /yarp/extension.c | |
parent | 0c1a749eef476045e0b098467a564e888ef03c47 (diff) | |
download | ruby-481388769407b533879e97510dc8160d094356e0.tar.gz |
[ruby/yarp] Accept a block to parse and parse_file to get lexer output as well
https://github.com/ruby/yarp/commit/40fbf61a8d
Diffstat (limited to 'yarp/extension.c')
-rw-r--r-- | yarp/extension.c | 51 |
1 files changed, 50 insertions, 1 deletions
diff --git a/yarp/extension.c b/yarp/extension.c index 3c67890200..7e735b1956 100644 --- a/yarp/extension.c +++ b/yarp/extension.c @@ -316,16 +316,65 @@ lex_file(VALUE self, VALUE filepath) { /* Parsing Ruby code */ /******************************************************************************/ +// This is passed as a callback to the parser. It gets called every time a new +// token is found from within a call to parse that accepted a block. +static void +parse_token(void *data, yp_parser_t *parser, yp_token_t *token) { + lex_data_t *lex_data = (lex_data_t *) parser->lex_callback->data; + rb_yield_values(2, yp_token_new(parser, token, lex_data->encoding, lex_data->source), INT2FIX(parser->lex_state)); +} + // Parse the given input and return a ParseResult instance. static VALUE parse_input(yp_string_t *input, const char *filepath) { yp_parser_t parser; yp_parser_init(&parser, yp_string_source(input), yp_string_length(input), filepath); + VALUE offsets; + VALUE source; + + // If a block was given to the parse method, then we're going to register a + // lex callback that will yield the tokens to the block. This means you can + // get the lexer and the parser output in one method call instead of having + // to parse twice. + if (rb_block_given_p()) { + offsets = rb_ary_new(); + + VALUE source_argv[] = { rb_str_new(yp_string_source(input), yp_string_length(input)), offsets }; + source = rb_class_new_instance(2, source_argv, rb_cYARPSource); + + lex_data_t lex_data = { + .source = source, + .tokens = Qnil, + .encoding = rb_utf8_encoding() + }; + + lex_data_t *data = &lex_data; + yp_lex_callback_t lex_callback = (yp_lex_callback_t) { + .data = (void *) data, + .callback = parse_token, + }; + + parser.lex_callback = &lex_callback; + yp_parser_register_encoding_changed_callback(&parser, lex_encoding_changed_callback); + } + yp_node_t *node = yp_parse(&parser); rb_encoding *encoding = rb_enc_find(parser.encoding.name); - VALUE source = yp_source_new(&parser); + if (rb_block_given_p()) { + // Here we need to update the source range to have the correct newline + // offsets. We do it here because we've already created the object and + // given it over to all of the tokens. + for (size_t index = 0; index < parser.newline_list.size; index++) { + rb_ary_push(offsets, INT2FIX(parser.newline_list.offsets[index])); + } + } else { + // Since a block was not given, we can just create the source now the + // regular way. + source = yp_source_new(&parser); + } + VALUE result_argv[] = { yp_ast_new(&parser, node, encoding), parser_comments(&parser, source), |