aboutsummaryrefslogtreecommitdiffstats
path: root/yarp/extension.c
diff options
context:
space:
mode:
authorKevin Newton <kddnewton@gmail.com>2023-08-23 10:53:08 -0400
committergit <svn-admin@ruby-lang.org>2023-08-25 21:10:17 +0000
commit481388769407b533879e97510dc8160d094356e0 (patch)
treef141a77ac31f0b3a1eb50cafa9e92746334b2298 /yarp/extension.c
parent0c1a749eef476045e0b098467a564e888ef03c47 (diff)
downloadruby-481388769407b533879e97510dc8160d094356e0.tar.gz
[ruby/yarp] Accept a block to parse and parse_file to get lexer output as well
https://github.com/ruby/yarp/commit/40fbf61a8d
Diffstat (limited to 'yarp/extension.c')
-rw-r--r--yarp/extension.c51
1 files changed, 50 insertions, 1 deletions
diff --git a/yarp/extension.c b/yarp/extension.c
index 3c67890200..7e735b1956 100644
--- a/yarp/extension.c
+++ b/yarp/extension.c
@@ -316,16 +316,65 @@ lex_file(VALUE self, VALUE filepath) {
/* Parsing Ruby code */
/******************************************************************************/
+// This is passed as a callback to the parser. It gets called every time a new
+// token is found from within a call to parse that accepted a block.
+static void
+parse_token(void *data, yp_parser_t *parser, yp_token_t *token) {
+ lex_data_t *lex_data = (lex_data_t *) parser->lex_callback->data;
+ rb_yield_values(2, yp_token_new(parser, token, lex_data->encoding, lex_data->source), INT2FIX(parser->lex_state));
+}
+
// Parse the given input and return a ParseResult instance.
static VALUE
parse_input(yp_string_t *input, const char *filepath) {
yp_parser_t parser;
yp_parser_init(&parser, yp_string_source(input), yp_string_length(input), filepath);
+ VALUE offsets;
+ VALUE source;
+
+ // If a block was given to the parse method, then we're going to register a
+ // lex callback that will yield the tokens to the block. This means you can
+ // get the lexer and the parser output in one method call instead of having
+ // to parse twice.
+ if (rb_block_given_p()) {
+ offsets = rb_ary_new();
+
+ VALUE source_argv[] = { rb_str_new(yp_string_source(input), yp_string_length(input)), offsets };
+ source = rb_class_new_instance(2, source_argv, rb_cYARPSource);
+
+ lex_data_t lex_data = {
+ .source = source,
+ .tokens = Qnil,
+ .encoding = rb_utf8_encoding()
+ };
+
+ lex_data_t *data = &lex_data;
+ yp_lex_callback_t lex_callback = (yp_lex_callback_t) {
+ .data = (void *) data,
+ .callback = parse_token,
+ };
+
+ parser.lex_callback = &lex_callback;
+ yp_parser_register_encoding_changed_callback(&parser, lex_encoding_changed_callback);
+ }
+
yp_node_t *node = yp_parse(&parser);
rb_encoding *encoding = rb_enc_find(parser.encoding.name);
- VALUE source = yp_source_new(&parser);
+ if (rb_block_given_p()) {
+ // Here we need to update the source range to have the correct newline
+ // offsets. We do it here because we've already created the object and
+ // given it over to all of the tokens.
+ for (size_t index = 0; index < parser.newline_list.size; index++) {
+ rb_ary_push(offsets, INT2FIX(parser.newline_list.offsets[index]));
+ }
+ } else {
+ // Since a block was not given, we can just create the source now the
+ // regular way.
+ source = yp_source_new(&parser);
+ }
+
VALUE result_argv[] = {
yp_ast_new(&parser, node, encoding),
parser_comments(&parser, source),