aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHASUMI Hitoshi <hasumikin@gmail.com>2024-03-28 10:26:42 +0900
committerYuichiro Kaneko <spiketeika@gmail.com>2024-04-15 20:51:54 +0900
commit9b1e97b211565b605b8eb7fab277efe117fe2604 (patch)
tree8d2b58dc312f564005ab377d2f32ac1354dcc769
parentbb1c3418d0fd3235c678ad68f7b45d32f8183a3f (diff)
downloadruby-9b1e97b211565b605b8eb7fab277efe117fe2604.tar.gz
[Universal parser] DeVALUE of p->debug_lines and ast->body.script_lines
This patch is part of universal parser work. ## Summary - Decouple VALUE from members below: - `(struct parser_params *)->debug_lines` - `(rb_ast_t *)->body.script_lines` - Instead, they are now `rb_parser_ary_t *` - They can also be a `(VALUE)FIXNUM` as before to hold line count - `ISEQ_BODY(iseq)->variable.script_lines` remains VALUE - In order to do this, - Add `VALUE script_lines` param to `rb_iseq_new_with_opt()` - Introduce `rb_parser_build_script_lines_from()` to convert `rb_parser_ary_t *` into `VALUE` ## Other details - Extend `rb_parser_ary_t *`. It previously could only store `rb_parser_ast_token *`, now can store script_lines, too - Change tactics of building the top-level `SCRIPT_LINES__` in `yycompile0()` - Before: While parsing, each line of the script is added to `SCRIPT_LINES__[path]` - After: After `yyparse(p)`, `SCRIPT_LINES__[path]` will be built from `p->debug_lines` - Remove the second parameter of `rb_parser_set_script_lines()` to make it simple - Introduce `script_lines_free()` to be called from `rb_ast_free()` because the GC no longer takes care of the script_lines - Introduce `rb_parser_string_deep_copy()` in parse.y to maintain script_lines when `rb_ruby_parser_free()` called - With regard to this, please see *Future tasks* below ## Future tasks - Decouple IMEMO from `rb_ast_t *` - This lifts the five-members-restriction of Ruby object, - So we will be able to move the ownership of the `lex.string_buffer` from parser to AST - Then we remove `rb_parser_string_deep_copy()` to make the whole thing simple
-rw-r--r--ast.c12
-rw-r--r--compile.c10
-rw-r--r--imemo.c2
-rw-r--r--internal/parse.h2
-rw-r--r--internal/ruby_parser.h4
-rw-r--r--iseq.c40
-rw-r--r--mini_builtin.c2
-rw-r--r--node.c17
-rw-r--r--node.h1
-rw-r--r--parse.y164
-rw-r--r--ruby.c7
-rw-r--r--ruby_parser.c59
-rw-r--r--rubyparser.h14
-rw-r--r--template/prelude.c.tmpl3
-rw-r--r--vm.c2
-rw-r--r--vm_core.h3
-rw-r--r--vm_eval.c2
17 files changed, 234 insertions, 110 deletions
diff --git a/ast.c b/ast.c
index 70f298c7f8..a4c57b898b 100644
--- a/ast.c
+++ b/ast.c
@@ -97,7 +97,7 @@ rb_ast_parse_str(VALUE str, VALUE keep_script_lines, VALUE error_tolerant, VALUE
StringValue(str);
VALUE vparser = ast_parse_new();
- if (RTEST(keep_script_lines)) rb_parser_set_script_lines(vparser, Qtrue);
+ if (RTEST(keep_script_lines)) rb_parser_set_script_lines(vparser);
if (RTEST(error_tolerant)) rb_parser_error_tolerant(vparser);
if (RTEST(keep_tokens)) rb_parser_keep_tokens(vparser);
ast = rb_parser_compile_string_path(vparser, Qnil, str, 1);
@@ -120,7 +120,7 @@ rb_ast_parse_file(VALUE path, VALUE keep_script_lines, VALUE error_tolerant, VAL
f = rb_file_open_str(path, "r");
rb_funcall(f, rb_intern("set_encoding"), 2, rb_enc_from_encoding(enc), rb_str_new_cstr("-"));
VALUE vparser = ast_parse_new();
- if (RTEST(keep_script_lines)) rb_parser_set_script_lines(vparser, Qtrue);
+ if (RTEST(keep_script_lines)) rb_parser_set_script_lines(vparser);
if (RTEST(error_tolerant)) rb_parser_error_tolerant(vparser);
if (RTEST(keep_tokens)) rb_parser_keep_tokens(vparser);
ast = rb_parser_compile_file_path(vparser, Qnil, f, 1);
@@ -148,7 +148,7 @@ rb_ast_parse_array(VALUE array, VALUE keep_script_lines, VALUE error_tolerant, V
array = rb_check_array_type(array);
VALUE vparser = ast_parse_new();
- if (RTEST(keep_script_lines)) rb_parser_set_script_lines(vparser, Qtrue);
+ if (RTEST(keep_script_lines)) rb_parser_set_script_lines(vparser);
if (RTEST(error_tolerant)) rb_parser_error_tolerant(vparser);
if (RTEST(keep_tokens)) rb_parser_keep_tokens(vparser);
ast = rb_parser_compile_generic(vparser, lex_array, Qnil, array, 1);
@@ -806,9 +806,9 @@ ast_node_script_lines(rb_execution_context_t *ec, VALUE self)
{
struct ASTNodeData *data;
TypedData_Get_Struct(self, struct ASTNodeData, &rb_node_type, data);
- VALUE ret = data->ast->body.script_lines;
- if (!RB_TYPE_P(ret, T_ARRAY)) return Qnil;
- return ret;
+ rb_parser_ary_t *ret = data->ast->body.script_lines;
+ if (!ret || FIXNUM_P((VALUE)ret)) return Qnil;
+ return rb_parser_build_script_lines_from(ret);
}
#include "ast.rbinc"
diff --git a/compile.c b/compile.c
index 0fb3b85583..d5fa6a5c74 100644
--- a/compile.c
+++ b/compile.c
@@ -1483,7 +1483,7 @@ new_child_iseq(rb_iseq_t *iseq, const NODE *const node,
ast.root = node;
ast.frozen_string_literal = -1;
ast.coverage_enabled = -1;
- ast.script_lines = ISEQ_BODY(iseq)->variable.script_lines;
+ ast.script_lines = NULL;
debugs("[new_child_iseq]> ---------------------------------------\n");
int isolated_depth = ISEQ_COMPILE_DATA(iseq)->isolated_depth;
@@ -1491,7 +1491,8 @@ new_child_iseq(rb_iseq_t *iseq, const NODE *const node,
rb_iseq_path(iseq), rb_iseq_realpath(iseq),
line_no, parent,
isolated_depth ? isolated_depth + 1 : 0,
- type, ISEQ_COMPILE_DATA(iseq)->option);
+ type, ISEQ_COMPILE_DATA(iseq)->option,
+ ISEQ_BODY(iseq)->variable.script_lines);
debugs("[new_child_iseq]< ---------------------------------------\n");
return ret_iseq;
}
@@ -8740,14 +8741,15 @@ compile_builtin_mandatory_only_method(rb_iseq_t *iseq, const NODE *node, const N
.root = RNODE(&scope_node),
.frozen_string_literal = -1,
.coverage_enabled = -1,
- .script_lines = ISEQ_BODY(iseq)->variable.script_lines,
+ .script_lines = NULL
};
ISEQ_BODY(iseq)->mandatory_only_iseq =
rb_iseq_new_with_opt(&ast, rb_iseq_base_label(iseq),
rb_iseq_path(iseq), rb_iseq_realpath(iseq),
nd_line(line_node), NULL, 0,
- ISEQ_TYPE_METHOD, ISEQ_COMPILE_DATA(iseq)->option);
+ ISEQ_TYPE_METHOD, ISEQ_COMPILE_DATA(iseq)->option,
+ ISEQ_BODY(iseq)->variable.script_lines);
ALLOCV_END(idtmp);
return COMPILE_OK;
diff --git a/imemo.c b/imemo.c
index 0031b3322c..8403859146 100644
--- a/imemo.c
+++ b/imemo.c
@@ -274,7 +274,7 @@ rb_imemo_mark_and_move(VALUE obj, bool reference_updating)
{
switch (imemo_type(obj)) {
case imemo_ast:
- rb_ast_mark_and_move((rb_ast_t *)obj, reference_updating);
+ // TODO: Make AST decoupled from IMEMO
break;
case imemo_callcache: {
diff --git a/internal/parse.h b/internal/parse.h
index 20367730d1..80328686c1 100644
--- a/internal/parse.h
+++ b/internal/parse.h
@@ -51,7 +51,7 @@ size_t rb_ruby_parser_memsize(const void *ptr);
void rb_ruby_parser_set_options(rb_parser_t *p, int print, int loop, int chomp, int split);
rb_parser_t *rb_ruby_parser_set_context(rb_parser_t *p, const struct rb_iseq_struct *base, int main);
-void rb_ruby_parser_set_script_lines(rb_parser_t *p, VALUE lines_array);
+void rb_ruby_parser_set_script_lines(rb_parser_t *p);
void rb_ruby_parser_error_tolerant(rb_parser_t *p);
rb_ast_t* rb_ruby_parser_compile_file_path(rb_parser_t *p, VALUE fname, VALUE file, int start);
void rb_ruby_parser_keep_tokens(rb_parser_t *p);
diff --git a/internal/ruby_parser.h b/internal/ruby_parser.h
index 0a00075211..f0cec86668 100644
--- a/internal/ruby_parser.h
+++ b/internal/ruby_parser.h
@@ -39,9 +39,11 @@ RUBY_SYMBOL_EXPORT_END
VALUE rb_parser_end_seen_p(VALUE);
VALUE rb_parser_encoding(VALUE);
VALUE rb_parser_set_yydebug(VALUE, VALUE);
+VALUE rb_parser_build_script_lines_from(rb_parser_ary_t *script_lines);
+void rb_parser_aset_script_lines_for(VALUE path, rb_parser_ary_t *script_lines);
void rb_parser_set_options(VALUE, int, int, int, int);
void *rb_parser_load_file(VALUE parser, VALUE name);
-void rb_parser_set_script_lines(VALUE vparser, VALUE lines_array);
+void rb_parser_set_script_lines(VALUE vparser);
void rb_parser_error_tolerant(VALUE vparser);
void rb_parser_keep_tokens(VALUE vparser);
diff --git a/iseq.c b/iseq.c
index 4d4006777e..6d4fa5bd33 100644
--- a/iseq.c
+++ b/iseq.c
@@ -839,20 +839,21 @@ rb_iseq_new(const rb_ast_body_t *ast, VALUE name, VALUE path, VALUE realpath,
const rb_iseq_t *parent, enum rb_iseq_type type)
{
return rb_iseq_new_with_opt(ast, name, path, realpath, 0, parent,
- 0, type, &COMPILE_OPTION_DEFAULT);
+ 0, type, &COMPILE_OPTION_DEFAULT,
+ Qnil);
}
static int
ast_line_count(const rb_ast_body_t *ast)
{
- if (ast->script_lines == Qfalse) {
+ if (ast->script_lines == NULL) {
// this occurs when failed to parse the source code with a syntax error
return 0;
}
- if (RB_TYPE_P(ast->script_lines, T_ARRAY)){
- return (int)RARRAY_LEN(ast->script_lines);
+ if (!FIXNUM_P((VALUE)ast->script_lines)) {
+ return (int)ast->script_lines->len;
}
- return FIX2INT(ast->script_lines);
+ return FIX2INT((VALUE)ast->script_lines);
}
static VALUE
@@ -888,7 +889,8 @@ rb_iseq_new_top(const rb_ast_body_t *ast, VALUE name, VALUE path, VALUE realpath
iseq_new_setup_coverage(path, ast, 0);
return rb_iseq_new_with_opt(ast, name, path, realpath, 0, parent, 0,
- ISEQ_TYPE_TOP, &COMPILE_OPTION_DEFAULT);
+ ISEQ_TYPE_TOP, &COMPILE_OPTION_DEFAULT,
+ Qnil);
}
/**
@@ -910,7 +912,8 @@ rb_iseq_new_main(const rb_ast_body_t *ast, VALUE path, VALUE realpath, const rb_
return rb_iseq_new_with_opt(ast, rb_fstring_lit("<main>"),
path, realpath, 0,
- parent, 0, ISEQ_TYPE_MAIN, opt ? &COMPILE_OPTION_DEFAULT : &COMPILE_OPTION_FALSE);
+ parent, 0, ISEQ_TYPE_MAIN, opt ? &COMPILE_OPTION_DEFAULT : &COMPILE_OPTION_FALSE,
+ Qnil);
}
/**
@@ -938,7 +941,8 @@ rb_iseq_new_eval(const rb_ast_body_t *ast, VALUE name, VALUE path, VALUE realpat
}
return rb_iseq_new_with_opt(ast, name, path, realpath, first_lineno,
- parent, isolated_depth, ISEQ_TYPE_EVAL, &COMPILE_OPTION_DEFAULT);
+ parent, isolated_depth, ISEQ_TYPE_EVAL, &COMPILE_OPTION_DEFAULT,
+ Qnil);
}
rb_iseq_t *
@@ -966,7 +970,8 @@ iseq_translate(rb_iseq_t *iseq)
rb_iseq_t *
rb_iseq_new_with_opt(const rb_ast_body_t *ast, VALUE name, VALUE path, VALUE realpath,
int first_lineno, const rb_iseq_t *parent, int isolated_depth,
- enum rb_iseq_type type, const rb_compile_option_t *option)
+ enum rb_iseq_type type, const rb_compile_option_t *option,
+ VALUE script_lines)
{
const NODE *node = ast ? ast->root : 0;
/* TODO: argument check */
@@ -979,10 +984,11 @@ rb_iseq_new_with_opt(const rb_ast_body_t *ast, VALUE name, VALUE path, VALUE rea
option = set_compile_option_from_ast(&new_opt, ast);
}
- VALUE script_lines = Qnil;
-
- if (ast && !FIXNUM_P(ast->script_lines) && ast->script_lines) {
- script_lines = ast->script_lines;
+ if (!NIL_P(script_lines)) {
+ // noop
+ }
+ else if (ast && !FIXNUM_P((VALUE)ast->script_lines) && ast->script_lines) {
+ script_lines = rb_parser_build_script_lines_from(ast->script_lines);
}
else if (parent) {
script_lines = ISEQ_BODY(parent)->variable.script_lines;
@@ -1225,7 +1231,7 @@ rb_iseq_compile_with_option(VALUE src, VALUE file, VALUE realpath, VALUE line, V
const rb_iseq_t *outer_scope = rb_iseq_new(NULL, name, name, Qnil, 0, ISEQ_TYPE_TOP);
VALUE outer_scope_v = (VALUE)outer_scope;
rb_parser_set_context(parser, outer_scope, FALSE);
- rb_parser_set_script_lines(parser, RBOOL(ruby_vm_keep_script_lines));
+ if (ruby_vm_keep_script_lines) rb_parser_set_script_lines(parser);
RB_GC_GUARD(outer_scope_v);
ast = (*parse)(parser, file, src, ln);
}
@@ -1236,7 +1242,8 @@ rb_iseq_compile_with_option(VALUE src, VALUE file, VALUE realpath, VALUE line, V
}
else {
iseq = rb_iseq_new_with_opt(&ast->body, name, file, realpath, ln,
- NULL, 0, ISEQ_TYPE_TOP, &option);
+ NULL, 0, ISEQ_TYPE_TOP, &option,
+ Qnil);
rb_ast_dispose(ast);
}
@@ -1627,7 +1634,8 @@ iseqw_s_compile_file(int argc, VALUE *argv, VALUE self)
ret = iseqw_new(rb_iseq_new_with_opt(&ast->body, rb_fstring_lit("<main>"),
file,
rb_realpath_internal(Qnil, file, 1),
- 1, NULL, 0, ISEQ_TYPE_TOP, &option));
+ 1, NULL, 0, ISEQ_TYPE_TOP, &option,
+ Qnil));
rb_ast_dispose(ast);
rb_vm_pop_frame(ec);
diff --git a/mini_builtin.c b/mini_builtin.c
index dce822a86c..38b0ca8d81 100644
--- a/mini_builtin.c
+++ b/mini_builtin.c
@@ -39,7 +39,7 @@ builtin_iseq_load(const char *feature_name, const struct rb_builtin_function *ta
.coverage_enabled = FALSE,
.debug_level = 0,
};
- const rb_iseq_t *iseq = rb_iseq_new_with_opt(&ast->body, name_str, name_str, Qnil, 0, NULL, 0, ISEQ_TYPE_TOP, &optimization);
+ const rb_iseq_t *iseq = rb_iseq_new_with_opt(&ast->body, name_str, name_str, Qnil, 0, NULL, 0, ISEQ_TYPE_TOP, &optimization, Qnil);
GET_VM()->builtin_function_table = NULL;
rb_ast_dispose(ast);
diff --git a/node.c b/node.c
index 79520f0a1e..8a6b55b0b5 100644
--- a/node.c
+++ b/node.c
@@ -20,12 +20,13 @@
#include "internal.h"
#include "internal/hash.h"
-#include "internal/variable.h"
#include "ruby/ruby.h"
#include "vm_core.h"
#endif
+#include "internal/variable.h"
+
#define NODE_BUF_DEFAULT_SIZE (sizeof(struct RNode) * 16)
static void
@@ -344,18 +345,24 @@ iterate_node_values(rb_ast_t *ast, node_buffer_list_t *nb, node_itr_t * func, vo
}
}
-void
-rb_ast_mark_and_move(rb_ast_t *ast, bool reference_updating)
+static void
+script_lines_free(rb_ast_t *ast, rb_parser_ary_t *script_lines)
{
- if (ast->node_buffer) {
- if (ast->body.script_lines) rb_gc_mark_and_move(&ast->body.script_lines);
+ for (long i = 0; i < script_lines->len; i++) {
+ parser_string_free(ast, (rb_parser_string_t *)script_lines->data[i]);
}
+ xfree(script_lines->data);
+ xfree(script_lines);
}
void
rb_ast_free(rb_ast_t *ast)
{
if (ast->node_buffer) {
+ if (ast->body.script_lines && !FIXNUM_P((VALUE)ast->body.script_lines)) {
+ script_lines_free(ast, ast->body.script_lines);
+ ast->body.script_lines = NULL;
+ }
rb_node_buffer_free(ast, ast->node_buffer);
ast->node_buffer = 0;
}
diff --git a/node.h b/node.h
index d5522c82ec..bcc7e451d2 100644
--- a/node.h
+++ b/node.h
@@ -56,7 +56,6 @@ void rb_ast_dispose(rb_ast_t*);
const char *ruby_node_name(int node);
void rb_node_init(NODE *n, enum node_type type);
-void rb_ast_mark_and_move(rb_ast_t *ast, bool reference_updating);
void rb_ast_update_references(rb_ast_t*);
void rb_ast_free(rb_ast_t*);
NODE *rb_ast_newnode(rb_ast_t*, enum node_type type, size_t size, size_t alignment);
diff --git a/parse.y b/parse.y
index 1a052d4757..a87be73e3c 100644
--- a/parse.y
+++ b/parse.y
@@ -86,6 +86,10 @@ VALUE rb_io_gets_internal(VALUE io);
static int rb_parser_string_hash_cmp(rb_parser_string_t *str1, rb_parser_string_t *str2);
+#ifndef RIPPER
+static rb_parser_string_t *rb_parser_string_deep_copy(struct parser_params *p, const rb_parser_string_t *original);
+#endif
+
static int
node_integer_cmp(rb_node_integer_t *n1, rb_node_integer_t *n2)
{
@@ -582,7 +586,7 @@ struct parser_params {
unsigned int keep_tokens: 1;
VALUE error_buffer;
- VALUE debug_lines;
+ rb_parser_ary_t *debug_lines;
/*
* Store specific keyword locations to generate dummy end token.
* Refer to the tail of list element.
@@ -2559,15 +2563,19 @@ rb_parser_ary_extend(rb_parser_t *p, rb_parser_ary_t *ary, long len)
long i;
if (ary->capa < len) {
ary->capa = len;
- ary->data = xrealloc(ary->data, sizeof(void *) * len);
+ ary->data = (rb_parser_ary_data *)xrealloc(ary->data, sizeof(rb_parser_ary_data) * len);
for (i = ary->len; i < len; i++) {
ary->data[i] = 0;
}
}
}
+/*
+ * Do not call this directly.
+ * Use rb_parser_ary_new_capa_for_script_line() or rb_parser_ary_new_capa_for_ast_token() instead.
+ */
static rb_parser_ary_t *
-rb_parser_ary_new_capa(rb_parser_t *p, long len)
+parser_ary_new_capa(rb_parser_t *p, long len)
{
if (len < 0) {
rb_bug("negative array size (or size too big): %ld", len);
@@ -2576,17 +2584,36 @@ rb_parser_ary_new_capa(rb_parser_t *p, long len)
ary->len = 0;
ary->capa = len;
if (0 < len) {
- ary->data = (rb_parser_ast_token_t **)xcalloc(len, sizeof(rb_parser_ast_token_t *));
+ ary->data = (rb_parser_ary_data *)xcalloc(len, sizeof(rb_parser_ary_data));
}
else {
ary->data = NULL;
}
return ary;
}
-#define rb_parser_ary_new2 rb_parser_ary_new_capa
static rb_parser_ary_t *
-rb_parser_ary_push(rb_parser_t *p, rb_parser_ary_t *ary, rb_parser_ast_token_t *val)
+rb_parser_ary_new_capa_for_script_line(rb_parser_t *p, long len)
+{
+ rb_parser_ary_t *ary = parser_ary_new_capa(p, len);
+ ary->data_type = PARSER_ARY_DATA_SCRIPT_LINE;
+ return ary;
+}
+
+static rb_parser_ary_t *
+rb_parser_ary_new_capa_for_ast_token(rb_parser_t *p, long len)
+{
+ rb_parser_ary_t *ary = parser_ary_new_capa(p, len);
+ ary->data_type = PARSER_ARY_DATA_AST_TOKEN;
+ return ary;
+}
+
+/*
+ * Do not call this directly.
+ * Use rb_parser_ary_push_script_line() or rb_parser_ary_push_ast_token() instead.
+ */
+static rb_parser_ary_t *
+parser_ary_push(rb_parser_t *p, rb_parser_ary_t *ary, rb_parser_ary_data val)
{
if (ary->len == ary->capa) {
rb_parser_ary_extend(p, ary, ary->len == 0 ? 1 : ary->len * 2);
@@ -2595,6 +2622,24 @@ rb_parser_ary_push(rb_parser_t *p, rb_parser_ary_t *ary, rb_parser_ast_token_t *
return ary;
}
+static rb_parser_ary_t *
+rb_parser_ary_push_ast_token(rb_parser_t *p, rb_parser_ary_t *ary, rb_parser_ast_token_t *val)
+{
+ if (ary->data_type != PARSER_ARY_DATA_AST_TOKEN) {
+ rb_bug("unexpected rb_parser_ary_data_type: %d", ary->data_type);
+ }
+ return parser_ary_push(p, ary, val);
+}
+
+static rb_parser_ary_t *
+rb_parser_ary_push_script_line(rb_parser_t *p, rb_parser_ary_t *ary, rb_parser_string_t *val)
+{
+ if (ary->data_type != PARSER_ARY_DATA_SCRIPT_LINE) {
+ rb_bug("unexpected rb_parser_ary_data_type: %d", ary->data_type);
+ }
+ return parser_ary_push(p, ary, val);
+}
+
static void
rb_parser_ast_token_free(rb_parser_t *p, rb_parser_ast_token_t *token)
{
@@ -2604,12 +2649,24 @@ rb_parser_ast_token_free(rb_parser_t *p, rb_parser_ast_token_t *token)
}
static void
-rb_parser_tokens_free(rb_parser_t *p, rb_parser_ary_t *tokens)
+rb_parser_ary_free(rb_parser_t *p, rb_parser_ary_t *ary)
{
- for (long i = 0; i < tokens->len; i++) {
- rb_parser_ast_token_free(p, tokens->data[i]);
+ void (*free_func)(rb_parser_t *, rb_parser_ary_data) = NULL;
+ switch (ary->data_type) {
+ case PARSER_ARY_DATA_AST_TOKEN:
+ free_func = (void (*)(rb_parser_t *, rb_parser_ary_data))rb_parser_ast_token_free;
+ break;
+ case PARSER_ARY_DATA_SCRIPT_LINE:
+ free_func = (void (*)(rb_parser_t *, rb_parser_ary_data))rb_parser_string_free;
+ break;
+ default:
+ rb_bug("unexpected rb_parser_ary_data_type: %d", ary->data_type);
+ break;
}
- xfree(tokens);
+ for (long i = 0; i < ary->len; i++) {
+ free_func(p, ary->data[i]);
+ }
+ xfree(ary);
}
#endif /* !RIPPER */
@@ -7144,7 +7201,7 @@ parser_append_tokens(struct parser_params *p, rb_parser_string_t *str, enum yyto
token->str = str;
token->loc.beg_pos = p->yylloc->beg_pos;
token->loc.end_pos = p->yylloc->end_pos;
- rb_parser_ary_push(p, p->tokens, token);
+ rb_parser_ary_push_ast_token(p, p->tokens, token);
p->token_id++;
if (p->debug) {
@@ -7656,22 +7713,12 @@ yycompile0(VALUE arg)
struct parser_params *p = (struct parser_params *)arg;
int cov = FALSE;
- if (!compile_for_eval && !NIL_P(p->ruby_sourcefile_string)) {
- if (p->debug_lines && p->ruby_sourceline > 0) {
- VALUE str = rb_default_rs;
- n = p->ruby_sourceline;
- do {
- rb_ary_push(p->debug_lines, str);
- } while (--n);
- }
-
- if (!e_option_supplied(p)) {
- cov = TRUE;
- }
+ if (!compile_for_eval && !NIL_P(p->ruby_sourcefile_string) && !e_option_supplied(p)) {
+ cov = TRUE;
}
if (p->debug_lines) {
- RB_OBJ_WRITE(p->ast, &p->ast->body.script_lines, p->debug_lines);
+ p->ast->body.script_lines = p->debug_lines;
}
parser_prepare(p);
@@ -7682,6 +7729,8 @@ yycompile0(VALUE arg)
RUBY_DTRACE_PARSE_HOOK(BEGIN);
n = yyparse(p);
RUBY_DTRACE_PARSE_HOOK(END);
+
+ rb_parser_aset_script_lines_for(p->ruby_sourcefile_string, p->debug_lines);
p->debug_lines = 0;
xfree(p->lex.strterm);
@@ -7715,7 +7764,7 @@ yycompile0(VALUE arg)
}
}
p->ast->body.root = tree;
- if (!p->ast->body.script_lines) p->ast->body.script_lines = INT2FIX(p->line_count);
+ if (!p->ast->body.script_lines) p->ast->body.script_lines = (rb_parser_ary_t *)INT2FIX(p->line_count);
return TRUE;
}
@@ -7975,9 +8024,9 @@ nextline(struct parser_params *p, int set_encoding)
}
#ifndef RIPPER
if (p->debug_lines) {
- VALUE v = rb_str_new_mutable_parser_string(str);
- if (set_encoding) rb_enc_associate(v, p->enc);
- rb_ary_push(p->debug_lines, v);
+ if (set_encoding) rb_parser_enc_associate(p, str, p->enc);
+ rb_parser_string_t *copy = rb_parser_string_deep_copy(p, str);
+ rb_parser_ary_push_script_line(p, p->debug_lines, copy);
}
#endif
p->cr_seen = FALSE;
@@ -9653,10 +9702,9 @@ parser_set_encode(struct parser_params *p, const char *name)
p->enc = enc;
#ifndef RIPPER
if (p->debug_lines) {
- VALUE lines = p->debug_lines;
- long i, n = RARRAY_LEN(lines);
- for (i = 0; i < n; ++i) {
- rb_enc_associate_index(RARRAY_AREF(lines, i), idx);
+ long i;
+ for (i = 0; i < p->debug_lines->len; i++) {
+ rb_parser_enc_associate(p, p->debug_lines->data[i], enc);
}
}
#endif
@@ -12870,6 +12918,19 @@ string_literal_head(struct parser_params *p, enum node_type htype, NODE *head)
return lit;
}
+#ifndef RIPPER
+static rb_parser_string_t *
+rb_parser_string_deep_copy(struct parser_params *p, const rb_parser_string_t *orig)
+{
+ rb_parser_string_t *copy;
+ if (!orig) return NULL;
+ copy = rb_parser_string_new(p, PARSER_STRING_PTR(orig), PARSER_STRING_LEN(orig));
+ copy->coderange = orig->coderange;
+ copy->enc = orig->enc;
+ return copy;
+}
+#endif
+
/* concat two string literals */
static NODE *
literal_concat(struct parser_params *p, NODE *head, NODE *tail, const YYLTYPE *loc)
@@ -15826,7 +15887,6 @@ rb_ruby_parser_mark(void *ptr)
rb_gc_mark(p->ruby_sourcefile_string);
rb_gc_mark((VALUE)p->ast);
#ifndef RIPPER
- rb_gc_mark(p->debug_lines);
rb_gc_mark(p->error_buffer);
#else
rb_gc_mark(p->value);
@@ -15848,7 +15908,7 @@ rb_ruby_parser_free(void *ptr)
#ifndef RIPPER
if (p->tokens) {
- rb_parser_tokens_free(p, p->tokens);
+ rb_parser_ary_free(p, p->tokens);
}
#endif
@@ -15948,19 +16008,9 @@ rb_ruby_parser_set_context(rb_parser_t *p, const struct rb_iseq_struct *base, in
}
void
-rb_ruby_parser_set_script_lines(rb_parser_t *p, VALUE lines)
+rb_ruby_parser_set_script_lines(rb_parser_t *p)
{
- if (!RTEST(lines)) {
- lines = Qfalse;
- }
- else if (lines == Qtrue) {
- lines = rb_ary_new();
- }
- else {
- Check_Type(lines, T_ARRAY);
- rb_ary_modify(lines);
- }
- p->debug_lines = lines;
+ p->debug_lines = rb_parser_ary_new_capa_for_script_line(p, 10);
}
void
@@ -15973,7 +16023,7 @@ void
rb_ruby_parser_keep_tokens(rb_parser_t *p)
{
p->keep_tokens = 1;
- p->tokens = rb_parser_ary_new_capa(p, 10);
+ p->tokens = rb_parser_ary_new_capa_for_ast_token(p, 10);
}
#ifndef UNIVERSAL_PARSER
@@ -16045,12 +16095,12 @@ rb_parser_error_tolerant(VALUE vparser)
}
void
-rb_parser_set_script_lines(VALUE vparser, VALUE lines)
+rb_parser_set_script_lines(VALUE vparser)
{
struct parser_params *p;
TypedData_Get_Struct(vparser, struct parser_params, &parser_data_type, p);
- rb_ruby_parser_set_script_lines(p, lines);
+ rb_ruby_parser_set_script_lines(p);
}
void
@@ -16100,6 +16150,22 @@ rb_parser_set_yydebug(VALUE self, VALUE flag)
rb_ruby_parser_set_yydebug(p, RTEST(flag));
return flag;
}
+
+void
+rb_set_script_lines_for(VALUE self, VALUE path)
+{
+ struct parser_params *p;
+ VALUE hash;
+ ID script_lines;
+ CONST_ID(script_lines, "SCRIPT_LINES__");
+ if (!rb_const_defined_at(rb_cObject, script_lines)) return;
+ hash = rb_const_get_at(rb_cObject, script_lines);
+ if (RB_TYPE_P(hash, T_HASH)) {
+ rb_hash_aset(hash, path, Qtrue);
+ TypedData_Get_Struct(self, struct parser_params, &parser_data_type, p);
+ rb_ruby_parser_set_script_lines(p);
+ }
+}
#endif /* !UNIVERSAL_PARSER */
VALUE
diff --git a/ruby.c b/ruby.c
index 3234460894..fb60551c3f 100644
--- a/ruby.c
+++ b/ruby.c
@@ -2592,7 +2592,7 @@ struct load_file_arg {
VALUE f;
};
-VALUE rb_script_lines_for(VALUE path);
+void rb_set_script_lines_for(VALUE vparser, VALUE path);
static VALUE
load_file_internal(VALUE argp_v)
@@ -2697,10 +2697,7 @@ load_file_internal(VALUE argp_v)
rb_parser_set_options(parser, opt->do_print, opt->do_loop,
opt->do_line, opt->do_split);
- VALUE lines = rb_script_lines_for(orig_fname);
- if (!NIL_P(lines)) {
- rb_parser_set_script_lines(parser, lines);
- }
+ rb_set_script_lines_for(parser, orig_fname);
if (NIL_P(f)) {
f = rb_str_new(0, 0);
diff --git a/ruby_parser.c b/ruby_parser.c
index 1991735af4..5d9c6c938f 100644
--- a/ruby_parser.c
+++ b/ruby_parser.c
@@ -622,12 +622,12 @@ rb_parser_set_context(VALUE vparser, const struct rb_iseq_struct *base, int main
}
void
-rb_parser_set_script_lines(VALUE vparser, VALUE lines)
+rb_parser_set_script_lines(VALUE vparser)
{
struct ruby_parser *parser;
TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
- rb_ruby_parser_set_script_lines(parser->parser_params, lines);
+ rb_ruby_parser_set_script_lines(parser->parser_params);
}
void
@@ -727,9 +727,40 @@ rb_parser_set_yydebug(VALUE vparser, VALUE flag)
rb_ruby_parser_set_yydebug(parser->parser_params, RTEST(flag));
return flag;
}
+
+void
+rb_set_script_lines_for(VALUE vparser, VALUE path)
+{
+ struct ruby_parser *parser;
+ VALUE hash;
+ ID script_lines;
+ CONST_ID(script_lines, "SCRIPT_LINES__");
+ if (!rb_const_defined_at(rb_cObject, script_lines)) return;
+ hash = rb_const_get_at(rb_cObject, script_lines);
+ if (RB_TYPE_P(hash, T_HASH)) {
+ rb_hash_aset(hash, path, Qtrue);
+ TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
+ rb_ruby_parser_set_script_lines(parser->parser_params);
+ }
+}
#endif
VALUE
+rb_parser_build_script_lines_from(rb_parser_ary_t *lines)
+{
+ int i;
+ if (lines->data_type != PARSER_ARY_DATA_SCRIPT_LINE) {
+ rb_bug("unexpected rb_parser_ary_data_type (%d) for script lines", lines->data_type);
+ }
+ VALUE script_lines = rb_ary_new_capa(lines->len);
+ for (i = 0; i < lines->len; i++) {
+ rb_parser_string_t *str = (rb_parser_string_t *)lines->data[i];
+ rb_ary_push(script_lines, rb_enc_str_new(str->ptr, str->len, str->enc));
+ }
+ return script_lines;
+}
+
+VALUE
rb_str_new_parser_string(rb_parser_string_t *str)
{
VALUE string = rb_enc_interned_str(str->ptr, str->len, str->enc);
@@ -935,15 +966,17 @@ rb_node_encoding_val(const NODE *node)
return rb_enc_from_encoding(RNODE_ENCODING(node)->enc);
}
-VALUE
-rb_script_lines_for(VALUE path)
-{
- VALUE hash, lines;
- ID script_lines;
- CONST_ID(script_lines, "SCRIPT_LINES__");
- if (!rb_const_defined_at(rb_cObject, script_lines)) return Qnil;
- hash = rb_const_get_at(rb_cObject, script_lines);
- if (!RB_TYPE_P(hash, T_HASH)) return Qnil;
- rb_hash_aset(hash, path, lines = rb_ary_new());
- return lines;
+void
+rb_parser_aset_script_lines_for(VALUE path, rb_parser_ary_t *lines)
+{
+ VALUE hash, script_lines;
+ ID script_lines_id;
+ if (NIL_P(path) || !lines || FIXNUM_P((VALUE)lines)) return;
+ CONST_ID(script_lines_id, "SCRIPT_LINES__");
+ if (!rb_const_defined_at(rb_cObject, script_lines_id)) return;
+ hash = rb_const_get_at(rb_cObject, script_lines_id);
+ if (!RB_TYPE_P(hash, T_HASH)) return;
+ if (rb_hash_lookup(hash, path) == Qnil) return;
+ script_lines = rb_parser_build_script_lines_from(lines);
+ rb_hash_aset(hash, path, script_lines);
}
diff --git a/rubyparser.h b/rubyparser.h
index c51b9ee44a..d36e8dcede 100644
--- a/rubyparser.h
+++ b/rubyparser.h
@@ -219,8 +219,16 @@ typedef struct rb_parser_ast_token {
/*
* Array-like object for parser
*/
+typedef void* rb_parser_ary_data;
+
+enum rb_parser_ary_data_type {
+ PARSER_ARY_DATA_AST_TOKEN,
+ PARSER_ARY_DATA_SCRIPT_LINE
+};
+
typedef struct rb_parser_ary {
- rb_parser_ast_token_t **data;
+ enum rb_parser_ary_data_type data_type;
+ rb_parser_ary_data *data;
long len; // current size
long capa; // capacity
} rb_parser_ary_t;
@@ -1201,10 +1209,10 @@ typedef struct node_buffer_struct node_buffer_t;
/* T_IMEMO/ast */
typedef struct rb_ast_body_struct {
const NODE *root;
- VALUE script_lines;
+ rb_parser_ary_t *script_lines;
// script_lines is either:
// - a Fixnum that represents the line count of the original source, or
- // - an Array that contains the lines of the original source
+ // - an rb_parser_ary_t* that contains the lines of the original source
signed int frozen_string_literal:2; /* -1: not specified, 0: false, 1: true */
signed int coverage_enabled:2; /* -1: not specified, 0: false, 1: true */
} rb_ast_body_t;
diff --git a/template/prelude.c.tmpl b/template/prelude.c.tmpl
index 74f6c08da7..dc0a143004 100644
--- a/template/prelude.c.tmpl
+++ b/template/prelude.c.tmpl
@@ -198,7 +198,8 @@ prelude_eval(VALUE code, VALUE name, int line)
rb_ast_t *ast = prelude_ast(name, code, line);
rb_iseq_eval(rb_iseq_new_with_opt(&ast->body, name, name, Qnil, line,
- NULL, 0, ISEQ_TYPE_TOP, &optimization));
+ NULL, 0, ISEQ_TYPE_TOP, &optimization,
+ Qnil));
rb_ast_dispose(ast);
}
COMPILER_WARNING_POP
diff --git a/vm.c b/vm.c
index 328187c790..e7335aa1bd 100644
--- a/vm.c
+++ b/vm.c
@@ -1479,7 +1479,7 @@ rb_binding_add_dynavars(VALUE bindval, rb_binding_t *bind, int dyncount, const I
ast.root = RNODE(&tmp_node);
ast.frozen_string_literal = -1;
ast.coverage_enabled = -1;
- ast.script_lines = INT2FIX(-1);
+ ast.script_lines = (rb_parser_ary_t *)INT2FIX(-1);
if (base_iseq) {
iseq = rb_iseq_new(&ast, ISEQ_BODY(base_iseq)->location.label, path, realpath, base_iseq, ISEQ_TYPE_EVAL);
diff --git a/vm_core.h b/vm_core.h
index 57d90b343f..9873ada2d5 100644
--- a/vm_core.h
+++ b/vm_core.h
@@ -1201,7 +1201,8 @@ rb_iseq_t *rb_iseq_new_top (const rb_ast_body_t *ast, VALUE name, VALUE path
rb_iseq_t *rb_iseq_new_main (const rb_ast_body_t *ast, VALUE path, VALUE realpath, const rb_iseq_t *parent, int opt);
rb_iseq_t *rb_iseq_new_eval (const rb_ast_body_t *ast, VALUE name, VALUE path, VALUE realpath, int first_lineno, const rb_iseq_t *parent, int isolated_depth);
rb_iseq_t *rb_iseq_new_with_opt(const rb_ast_body_t *ast, VALUE name, VALUE path, VALUE realpath, int first_lineno, const rb_iseq_t *parent, int isolated_depth,
- enum rb_iseq_type, const rb_compile_option_t*);
+ enum rb_iseq_type, const rb_compile_option_t*,
+ VALUE script_lines);
struct iseq_link_anchor;
struct rb_iseq_new_with_callback_callback_func {
diff --git a/vm_eval.c b/vm_eval.c
index d7447bd060..25fa28d828 100644
--- a/vm_eval.c
+++ b/vm_eval.c
@@ -1809,7 +1809,7 @@ eval_make_iseq(VALUE src, VALUE fname, int line,
}
rb_parser_set_context(parser, parent, FALSE);
- rb_parser_set_script_lines(parser, RBOOL(ruby_vm_keep_script_lines));
+ if (ruby_vm_keep_script_lines) rb_parser_set_script_lines(parser);
ast = rb_parser_compile_string_path(parser, fname, src, line);
if (ast->body.root) {
ast->body.coverage_enabled = coverage_enabled;