aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKevin Newton <kddnewton@gmail.com>2023-09-28 15:19:56 -0400
committerKevin Newton <kddnewton@gmail.com>2023-09-29 09:51:23 -0400
commit2de5c57dce510e809e7af4bb0289bfa0f92aca98 (patch)
tree6ac8310b485f529ab2a6ecdc37f22d202067d40f
parent2e25289aee55e023ce0481ecd00ff20b4d8952c0 (diff)
downloadruby-2de5c57dce510e809e7af4bb0289bfa0f92aca98.tar.gz
Move more things through pm_static_literal_value
-rw-r--r--prism_compile.c207
1 files changed, 124 insertions, 83 deletions
diff --git a/prism_compile.c b/prism_compile.c
index 3d1679f4b2..28710f848c 100644
--- a/prism_compile.c
+++ b/prism_compile.c
@@ -174,39 +174,118 @@ pm_optimizable_range_item_p(pm_node_t *node)
}
/**
+ * Check the prism flags of a regular expression-like node and return the flags
+ * that are expected by the CRuby VM.
+ */
+static int
+pm_reg_flags(const pm_node_t *node) {
+ int flags = 0;
+
+ if (node->flags & PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE) {
+ flags |= ONIG_OPTION_IGNORECASE;
+ }
+
+ if (node->flags & PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE) {
+ flags |= ONIG_OPTION_MULTILINE;
+ }
+
+ if (node->flags & PM_REGULAR_EXPRESSION_FLAGS_EXTENDED) {
+ flags |= ONIG_OPTION_EXTEND;
+ }
+
+ return flags;
+}
+
+/**
* Certain nodes can be compiled literally, which can lead to further
* optimizations. These nodes will all have the PM_NODE_FLAG_STATIC_LITERAL flag
* set.
*/
static inline bool
-pm_static_node_literal_p(const pm_node_t *node)
+pm_static_literal_p(const pm_node_t *node)
{
return node->flags & PM_NODE_FLAG_STATIC_LITERAL;
}
+/**
+ * Certain nodes can be compiled literally. This function returns the literal
+ * value described by the given node. For example, an array node with all static
+ * literal values can be compiled into a literal array.
+ */
static inline VALUE
-pm_static_literal_value(pm_node_t *node)
+pm_static_literal_value(const pm_node_t *node, pm_compile_context_t *compile_context)
{
- switch(PM_NODE_TYPE(node)) {
+ // Every node that comes into this function should already be marked as
+ // static literal. If it's not, then we have a bug somewhere.
+ assert(pm_static_literal_p(node));
+
+ switch (PM_NODE_TYPE(node)) {
+ case PM_ARRAY_NODE: {
+ pm_array_node_t *cast = (pm_array_node_t *) node;
+ pm_node_list_t *elements = &cast->elements;
+
+ VALUE value = rb_ary_hidden_new(elements->size);
+ for (size_t index = 0; index < elements->size; index++) {
+ rb_ary_push(value, pm_static_literal_value(elements->nodes[index], compile_context));
+ }
+
+ OBJ_FREEZE(value);
+ return value;
+ }
case PM_FALSE_NODE:
return Qfalse;
case PM_FLOAT_NODE:
return parse_float(node);
+ case PM_HASH_NODE: {
+ pm_hash_node_t *cast = (pm_hash_node_t *) node;
+ pm_node_list_t *elements = &cast->elements;
+
+ VALUE array = rb_ary_hidden_new(elements->size * 2);
+ for (size_t index = 0; index < elements->size; index++) {
+ assert(PM_NODE_TYPE_P(elements->nodes[index], PM_ASSOC_NODE));
+ pm_assoc_node_t *cast = (pm_assoc_node_t *) elements->nodes[index];
+ VALUE pair[2] = { pm_static_literal_value(cast->key, compile_context), pm_static_literal_value(cast->value, compile_context) };
+ rb_ary_cat(array, pair, 2);
+ }
+
+ VALUE value = rb_hash_new_with_size(elements->size);
+ rb_hash_bulk_insert(RARRAY_LEN(array), RARRAY_CONST_PTR(array), value);
+
+ value = rb_obj_hide(value);
+ OBJ_FREEZE(value);
+ return value;
+ }
case PM_IMAGINARY_NODE:
- return parse_imaginary((pm_imaginary_node_t *)node);
+ return parse_imaginary((pm_imaginary_node_t *) node);
case PM_INTEGER_NODE:
return parse_integer((pm_integer_node_t *) node);
case PM_NIL_NODE:
return Qnil;
case PM_RATIONAL_NODE:
return parse_rational(node);
+ case PM_REGULAR_EXPRESSION_NODE: {
+ pm_regular_expression_node_t *cast = (pm_regular_expression_node_t *) node;
+
+ VALUE string = parse_string(&cast->unescaped);
+ return rb_reg_new(RSTRING_PTR(string), RSTRING_LEN(string), pm_reg_flags(node));
+ }
+ case PM_SOURCE_ENCODING_NODE: {
+ rb_encoding *encoding = rb_find_encoding(rb_str_new_cstr(compile_context->parser->encoding.name));
+ if (!encoding) rb_bug("Encoding not found!");
+ return rb_enc_from_encoding(encoding);
+ }
+ case PM_SOURCE_FILE_NODE: {
+ pm_source_file_node_t *cast = (pm_source_file_node_t *)node;
+ return cast->filepath.length ? parse_string(&cast->filepath) : rb_fstring_lit("<compiled>");
+ }
+ case PM_SOURCE_LINE_NODE:
+ return INT2FIX((int) pm_newline_list_line_column(&compile_context->parser->newline_list, node->location.start).line);
case PM_STRING_NODE:
- return parse_string(&((pm_string_node_t *)node)->unescaped);
+ return parse_string(&((pm_string_node_t *) node)->unescaped);
case PM_SYMBOL_NODE:
- return ID2SYM(parse_string_symbol(&((pm_symbol_node_t *)node)->unescaped));
+ return ID2SYM(parse_string_symbol(&((pm_symbol_node_t *) node)->unescaped));
case PM_TRUE_NODE:
return Qtrue;
- // TODO: Implement this method for the other literal nodes described above
default:
rb_raise(rb_eArgError, "Don't have a literal value for this type");
return Qfalse;
@@ -589,29 +668,6 @@ pm_compile_multi_write_lhs(rb_iseq_t *iseq, NODE dummy_line_node, const pm_node_
}
/**
- * Check the prism flags of a regular expression-like node and return the flags
- * that are expected by the CRuby VM.
- */
-static int
-pm_reg_flags(const pm_node_t *node) {
- int flags = 0;
-
- if (node->flags & PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE) {
- flags |= ONIG_OPTION_IGNORECASE;
- }
-
- if (node->flags & PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE) {
- flags |= ONIG_OPTION_MULTILINE;
- }
-
- if (node->flags & PM_REGULAR_EXPRESSION_FLAGS_EXTENDED) {
- flags |= ONIG_OPTION_EXTEND;
- }
-
- return flags;
-}
-
-/**
* Compile a pattern matching expression.
*/
static int
@@ -837,24 +893,16 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret,
return;
}
case PM_ARRAY_NODE: {
- pm_array_node_t *cast = (pm_array_node_t *) node;
- pm_node_list_t *elements = &cast->elements;
-
// If every node in the array is static, then we can compile the entire
// array now instead of later.
- if (pm_static_node_literal_p(node)) {
+ if (pm_static_literal_p(node)) {
// We're only going to compile this node if it's not popped. If it
// is popped, then we know we don't need to do anything since it's
// statically known.
if (!popped) {
- VALUE array = rb_ary_hidden_new(elements->size);
- for (size_t index = 0; index < elements->size; index++) {
- rb_ary_push(array, pm_static_literal_value(elements->nodes[index]));
- }
-
- OBJ_FREEZE(array);
- ADD_INSN1(ret, &dummy_line_node, duparray, array);
- RB_OBJ_WRITTEN(iseq, Qundef, array);
+ VALUE value = pm_static_literal_value(node, compile_context);
+ ADD_INSN1(ret, &dummy_line_node, duparray, value);
+ RB_OBJ_WRITTEN(iseq, Qundef, value);
}
} else {
// Here since we know there are possible side-effects inside the
@@ -865,6 +913,9 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret,
// If this hash is popped, then this serves only to ensure we enact
// all side-effects (like method calls) that are contained within
// the hash contents.
+ pm_array_node_t *cast = (pm_array_node_t *) node;
+ pm_node_list_t *elements = &cast->elements;
+
for (size_t index = 0; index < elements->size; index++) {
PM_COMPILE(elements->nodes[index]);
}
@@ -1457,30 +1508,16 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret,
return;
}
case PM_HASH_NODE: {
- pm_hash_node_t *cast = (pm_hash_node_t *) node;
- pm_node_list_t elements = cast->elements;
-
// If every node in the hash is static, then we can compile the entire
// hash now instead of later.
- if (pm_static_node_literal_p(node)) {
+ if (pm_static_literal_p(node)) {
// We're only going to compile this node if it's not popped. If it
// is popped, then we know we don't need to do anything since it's
// statically known.
if (!popped) {
- VALUE array = rb_ary_hidden_new(elements.size * 2);
-
- for (size_t index = 0; index < elements.size; index++) {
- pm_assoc_node_t *cast = (pm_assoc_node_t *) elements.nodes[index];
- VALUE pair[2] = { pm_static_literal_value(cast->key), pm_static_literal_value(cast->value) };
- rb_ary_cat(array, pair, 2);
- }
-
- VALUE hash = rb_hash_new_with_size(elements.size);
- rb_hash_bulk_insert(RARRAY_LEN(array), RARRAY_CONST_PTR(array), hash);
-
- hash = rb_obj_hide(hash);
- OBJ_FREEZE(hash);
- ADD_INSN1(ret, &dummy_line_node, duphash, hash);
+ VALUE value = pm_static_literal_value(node, compile_context);
+ ADD_INSN1(ret, &dummy_line_node, duphash, value);
+ RB_OBJ_WRITTEN(iseq, Qundef, value);
}
} else {
// Here since we know there are possible side-effects inside the
@@ -1491,12 +1528,15 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret,
// If this hash is popped, then this serves only to ensure we enact
// all side-effects (like method calls) that are contained within
// the hash contents.
- for (size_t index = 0; index < elements.size; index++) {
- PM_COMPILE(elements.nodes[index]);
+ pm_hash_node_t *cast = (pm_hash_node_t *) node;
+ pm_node_list_t *elements = &cast->elements;
+
+ for (size_t index = 0; index < elements->size; index++) {
+ PM_COMPILE(elements->nodes[index]);
}
if (!popped) {
- ADD_INSN1(ret, &dummy_line_node, newhash, INT2FIX(elements.size * 2));
+ ADD_INSN1(ret, &dummy_line_node, newhash, INT2FIX(elements->size * 2));
}
}
@@ -2305,35 +2345,33 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret,
return;
}
case PM_SOURCE_ENCODING_NODE: {
- const char *encoding = compile_context->parser->encoding.name;
+ // Source encoding nodes are generated by the __ENCODING__ syntax. They
+ // reference the encoding object corresponding to the encoding of the
+ // source file, and can be changed by a magic encoding comment.
if (!popped) {
- rb_encoding *enc = rb_find_encoding(rb_str_new_cstr(encoding));
- if (!enc) {
- rb_bug("Encoding not found!");
- }
- ADD_INSN1(ret, &dummy_line_node, putobject, rb_enc_from_encoding(enc));
+ VALUE value = pm_static_literal_value(node, compile_context);
+ ADD_INSN1(ret, &dummy_line_node, putobject, value);
+ RB_OBJ_WRITTEN(iseq, Qundef, value);
}
return;
}
case PM_SOURCE_FILE_NODE: {
- pm_source_file_node_t *source_file_node = (pm_source_file_node_t *)node;
-
+ // Source file nodes are generated by the __FILE__ syntax. They
+ // reference the file name of the source file.
if (!popped) {
- VALUE filepath;
- if (source_file_node->filepath.length == 0) {
- filepath = rb_fstring_lit("<compiled>");
- }
- else {
- filepath = parse_string(&source_file_node->filepath);
- }
-
- ADD_INSN1(ret, &dummy_line_node, putstring, filepath);
+ VALUE value = pm_static_literal_value(node, compile_context);
+ ADD_INSN1(ret, &dummy_line_node, putstring, value);
+ RB_OBJ_WRITTEN(iseq, Qundef, value);
}
return;
}
case PM_SOURCE_LINE_NODE: {
+ // Source line nodes are generated by the __LINE__ syntax. They
+ // reference the line number where they occur in the source file.
if (!popped) {
- ADD_INSN1(ret, &dummy_line_node, putobject, INT2FIX(lineno));
+ VALUE value = pm_static_literal_value(node, compile_context);
+ ADD_INSN1(ret, &dummy_line_node, putobject, value);
+ RB_OBJ_WRITTEN(iseq, Qundef, value);
}
return;
}
@@ -2374,9 +2412,12 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret,
return;
}
case PM_SYMBOL_NODE: {
- pm_symbol_node_t *symbol_node = (pm_symbol_node_t *) node;
+ // Symbols nodes are symbol literals with no interpolation. They are
+ // always marked as static literals.
if (!popped) {
- ADD_INSN1(ret, &dummy_line_node, putobject, ID2SYM(parse_string_symbol(&symbol_node->unescaped)));
+ VALUE value = pm_static_literal_value(node, compile_context);
+ ADD_INSN1(ret, &dummy_line_node, putobject, value);
+ RB_OBJ_WRITTEN(iseq, Qundef, value);
}
return;
}