aboutsummaryrefslogtreecommitdiffstats
path: root/yarp/extension.c
diff options
context:
space:
mode:
authorKevin Newton <kddnewton@gmail.com>2023-08-25 15:22:42 -0400
committergit <svn-admin@ruby-lang.org>2023-08-25 21:10:19 +0000
commit9b8602dd903b2515463a1a314cb8fdf735a354aa (patch)
tree9ae69aabbc89658e22abcd9b5c770ac8572ae79d /yarp/extension.c
parent76512d78fcde99458db211c0f958bd39cb23dd98 (diff)
downloadruby-9b8602dd903b2515463a1a314cb8fdf735a354aa.tar.gz
[ruby/yarp] Introduce parse_lex instead of asking for a block
https://github.com/ruby/yarp/commit/7e70339fe1
Diffstat (limited to 'yarp/extension.c')
-rw-r--r--yarp/extension.c143
1 files changed, 65 insertions, 78 deletions
diff --git a/yarp/extension.c b/yarp/extension.c
index 7e735b1956..b59ccc1bca 100644
--- a/yarp/extension.c
+++ b/yarp/extension.c
@@ -198,66 +198,67 @@ typedef struct {
VALUE source;
VALUE tokens;
rb_encoding *encoding;
-} lex_data_t;
+} parse_lex_data_t;
// This is passed as a callback to the parser. It gets called every time a new
// token is found. Once found, we initialize a new instance of Token and push it
// onto the tokens array.
static void
-lex_token(void *data, yp_parser_t *parser, yp_token_t *token) {
- lex_data_t *lex_data = (lex_data_t *) parser->lex_callback->data;
+parse_lex_token(void *data, yp_parser_t *parser, yp_token_t *token) {
+ parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
VALUE yields = rb_ary_new_capa(2);
- rb_ary_push(yields, yp_token_new(parser, token, lex_data->encoding, lex_data->source));
+ rb_ary_push(yields, yp_token_new(parser, token, parse_lex_data->encoding, parse_lex_data->source));
rb_ary_push(yields, INT2FIX(parser->lex_state));
- rb_ary_push(lex_data->tokens, yields);
+ rb_ary_push(parse_lex_data->tokens, yields);
}
// This is called whenever the encoding changes based on the magic comment at
// the top of the file. We use it to update the encoding that we are using to
// create tokens.
static void
-lex_encoding_changed_callback(yp_parser_t *parser) {
- lex_data_t *lex_data = (lex_data_t *) parser->lex_callback->data;
- lex_data->encoding = rb_enc_find(parser->encoding.name);
+parse_lex_encoding_changed_callback(yp_parser_t *parser) {
+ parse_lex_data_t *parse_lex_data = (parse_lex_data_t *) parser->lex_callback->data;
+ parse_lex_data->encoding = rb_enc_find(parser->encoding.name);
- // Since we got a new encoding, we need to go back and change the encoding
- // of the tokens that we've already lexed. This should be a tiny amount
- // since encoding magic comments need to be the first or second line of the
+ // Since the encoding changed, we need to go back and change the encoding of
+ // the tokens that were already lexed. This is only going to end up being
+ // one or two tokens, since the encoding can only change at the top of the
// file.
- VALUE tokens = lex_data->tokens;
+ VALUE tokens = parse_lex_data->tokens;
for (long index = 0; index < RARRAY_LEN(tokens); index++) {
VALUE yields = rb_ary_entry(tokens, index);
VALUE token = rb_ary_entry(yields, 0);
VALUE value = rb_ivar_get(token, rb_intern("@value"));
- rb_enc_associate(value, lex_data->encoding);
+ rb_enc_associate(value, parse_lex_data->encoding);
ENC_CODERANGE_CLEAR(value);
}
}
-// Return an array of tokens corresponding to the given source.
+// Parse the given input and return a ParseResult containing just the tokens or
+// the nodes and tokens.
static VALUE
-lex_input(yp_string_t *input, const char *filepath) {
+parse_lex_input(yp_string_t *input, const char *filepath, bool return_nodes) {
yp_parser_t parser;
yp_parser_init(&parser, yp_string_source(input), yp_string_length(input), filepath);
- yp_parser_register_encoding_changed_callback(&parser, lex_encoding_changed_callback);
+ yp_parser_register_encoding_changed_callback(&parser, parse_lex_encoding_changed_callback);
VALUE offsets = rb_ary_new();
VALUE source_argv[] = { rb_str_new(yp_string_source(input), yp_string_length(input)), offsets };
VALUE source = rb_class_new_instance(2, source_argv, rb_cYARPSource);
- lex_data_t lex_data = {
+ parse_lex_data_t parse_lex_data = {
.source = source,
.tokens = rb_ary_new(),
.encoding = rb_utf8_encoding()
};
- lex_data_t *data = &lex_data;
+ parse_lex_data_t *data = &parse_lex_data;
yp_lex_callback_t lex_callback = (yp_lex_callback_t) {
.data = (void *) data,
- .callback = lex_token,
+ .callback = parse_lex_token,
};
parser.lex_callback = &lex_callback;
@@ -270,20 +271,26 @@ lex_input(yp_string_t *input, const char *filepath) {
rb_ary_push(offsets, INT2FIX(parser.newline_list.offsets[index]));
}
+ VALUE value;
+ if (return_nodes) {
+ value = rb_ary_new_capa(2);
+ rb_ary_push(value, yp_ast_new(&parser, node, parse_lex_data.encoding));
+ rb_ary_push(value, parse_lex_data.tokens);
+ } else {
+ value = parse_lex_data.tokens;
+ }
+
VALUE result_argv[] = {
- lex_data.tokens,
+ value,
parser_comments(&parser, source),
- parser_errors(&parser, lex_data.encoding, source),
- parser_warnings(&parser, lex_data.encoding, source),
+ parser_errors(&parser, parse_lex_data.encoding, source),
+ parser_warnings(&parser, parse_lex_data.encoding, source),
source
};
- VALUE result = rb_class_new_instance(5, result_argv, rb_cYARPParseResult);
-
yp_node_destroy(&parser, node);
yp_parser_free(&parser);
-
- return result;
+ return rb_class_new_instance(5, result_argv, rb_cYARPParseResult);
}
// Return an array of tokens corresponding to the given string.
@@ -295,7 +302,8 @@ lex(int argc, VALUE *argv, VALUE self) {
yp_string_t input;
input_load_string(&input, string);
- return lex_input(&input, check_string(filepath));
+
+ return parse_lex_input(&input, check_string(filepath), false);
}
// Return an array of tokens corresponding to the given file.
@@ -306,7 +314,7 @@ lex_file(VALUE self, VALUE filepath) {
const char *checked = check_string(filepath);
if (!yp_string_mapped_init(&input, checked)) return Qnil;
- VALUE value = lex_input(&input, checked);
+ VALUE value = parse_lex_input(&input, checked, false);
yp_string_free(&input);
return value;
@@ -316,65 +324,16 @@ lex_file(VALUE self, VALUE filepath) {
/* Parsing Ruby code */
/******************************************************************************/
-// This is passed as a callback to the parser. It gets called every time a new
-// token is found from within a call to parse that accepted a block.
-static void
-parse_token(void *data, yp_parser_t *parser, yp_token_t *token) {
- lex_data_t *lex_data = (lex_data_t *) parser->lex_callback->data;
- rb_yield_values(2, yp_token_new(parser, token, lex_data->encoding, lex_data->source), INT2FIX(parser->lex_state));
-}
-
// Parse the given input and return a ParseResult instance.
static VALUE
parse_input(yp_string_t *input, const char *filepath) {
yp_parser_t parser;
yp_parser_init(&parser, yp_string_source(input), yp_string_length(input), filepath);
- VALUE offsets;
- VALUE source;
-
- // If a block was given to the parse method, then we're going to register a
- // lex callback that will yield the tokens to the block. This means you can
- // get the lexer and the parser output in one method call instead of having
- // to parse twice.
- if (rb_block_given_p()) {
- offsets = rb_ary_new();
-
- VALUE source_argv[] = { rb_str_new(yp_string_source(input), yp_string_length(input)), offsets };
- source = rb_class_new_instance(2, source_argv, rb_cYARPSource);
-
- lex_data_t lex_data = {
- .source = source,
- .tokens = Qnil,
- .encoding = rb_utf8_encoding()
- };
-
- lex_data_t *data = &lex_data;
- yp_lex_callback_t lex_callback = (yp_lex_callback_t) {
- .data = (void *) data,
- .callback = parse_token,
- };
-
- parser.lex_callback = &lex_callback;
- yp_parser_register_encoding_changed_callback(&parser, lex_encoding_changed_callback);
- }
-
yp_node_t *node = yp_parse(&parser);
rb_encoding *encoding = rb_enc_find(parser.encoding.name);
- if (rb_block_given_p()) {
- // Here we need to update the source range to have the correct newline
- // offsets. We do it here because we've already created the object and
- // given it over to all of the tokens.
- for (size_t index = 0; index < parser.newline_list.size; index++) {
- rb_ary_push(offsets, INT2FIX(parser.newline_list.offsets[index]));
- }
- } else {
- // Since a block was not given, we can just create the source now the
- // regular way.
- source = yp_source_new(&parser);
- }
-
+ VALUE source = yp_source_new(&parser);
VALUE result_argv[] = {
yp_ast_new(&parser, node, encoding),
parser_comments(&parser, source),
@@ -431,6 +390,32 @@ parse_file(VALUE self, VALUE filepath) {
return value;
}
+// Parse the given string and return a ParseResult instance.
+static VALUE
+parse_lex(int argc, VALUE *argv, VALUE self) {
+ VALUE string;
+ VALUE filepath;
+ rb_scan_args(argc, argv, "11", &string, &filepath);
+
+ yp_string_t input;
+ input_load_string(&input, string);
+ return parse_lex_input(&input, check_string(filepath), true);
+}
+
+// Parse and lex the given file and return a ParseResult instance.
+static VALUE
+parse_lex_file(VALUE self, VALUE filepath) {
+ yp_string_t input;
+
+ const char *checked = check_string(filepath);
+ if (!yp_string_mapped_init(&input, checked)) return Qnil;
+
+ VALUE value = parse_lex_input(&input, checked, true);
+ yp_string_free(&input);
+
+ return value;
+}
+
/******************************************************************************/
/* Utility functions exposed to make testing easier */
/******************************************************************************/
@@ -590,6 +575,8 @@ Init_yarp(void) {
rb_define_singleton_method(rb_cYARP, "lex_file", lex_file, 1);
rb_define_singleton_method(rb_cYARP, "parse", parse, -1);
rb_define_singleton_method(rb_cYARP, "parse_file", parse_file, 1);
+ rb_define_singleton_method(rb_cYARP, "parse_lex", parse_lex, -1);
+ rb_define_singleton_method(rb_cYARP, "parse_lex_file", parse_lex_file, 1);
// Next, the functions that will be called by the parser to perform various
// internal tasks. We expose these to make them easier to test.