diff options
author | Kevin Newton <kddnewton@gmail.com> | 2023-09-28 15:19:56 -0400 |
---|---|---|
committer | Kevin Newton <kddnewton@gmail.com> | 2023-09-29 09:51:23 -0400 |
commit | 2de5c57dce510e809e7af4bb0289bfa0f92aca98 (patch) | |
tree | 6ac8310b485f529ab2a6ecdc37f22d202067d40f | |
parent | 2e25289aee55e023ce0481ecd00ff20b4d8952c0 (diff) | |
download | ruby-2de5c57dce510e809e7af4bb0289bfa0f92aca98.tar.gz |
Move more things through pm_static_literal_value
-rw-r--r-- | prism_compile.c | 207 |
1 files changed, 124 insertions, 83 deletions
diff --git a/prism_compile.c b/prism_compile.c index 3d1679f4b2..28710f848c 100644 --- a/prism_compile.c +++ b/prism_compile.c @@ -174,39 +174,118 @@ pm_optimizable_range_item_p(pm_node_t *node) } /** + * Check the prism flags of a regular expression-like node and return the flags + * that are expected by the CRuby VM. + */ +static int +pm_reg_flags(const pm_node_t *node) { + int flags = 0; + + if (node->flags & PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE) { + flags |= ONIG_OPTION_IGNORECASE; + } + + if (node->flags & PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE) { + flags |= ONIG_OPTION_MULTILINE; + } + + if (node->flags & PM_REGULAR_EXPRESSION_FLAGS_EXTENDED) { + flags |= ONIG_OPTION_EXTEND; + } + + return flags; +} + +/** * Certain nodes can be compiled literally, which can lead to further * optimizations. These nodes will all have the PM_NODE_FLAG_STATIC_LITERAL flag * set. */ static inline bool -pm_static_node_literal_p(const pm_node_t *node) +pm_static_literal_p(const pm_node_t *node) { return node->flags & PM_NODE_FLAG_STATIC_LITERAL; } +/** + * Certain nodes can be compiled literally. This function returns the literal + * value described by the given node. For example, an array node with all static + * literal values can be compiled into a literal array. + */ static inline VALUE -pm_static_literal_value(pm_node_t *node) +pm_static_literal_value(const pm_node_t *node, pm_compile_context_t *compile_context) { - switch(PM_NODE_TYPE(node)) { + // Every node that comes into this function should already be marked as + // static literal. If it's not, then we have a bug somewhere. + assert(pm_static_literal_p(node)); + + switch (PM_NODE_TYPE(node)) { + case PM_ARRAY_NODE: { + pm_array_node_t *cast = (pm_array_node_t *) node; + pm_node_list_t *elements = &cast->elements; + + VALUE value = rb_ary_hidden_new(elements->size); + for (size_t index = 0; index < elements->size; index++) { + rb_ary_push(value, pm_static_literal_value(elements->nodes[index], compile_context)); + } + + OBJ_FREEZE(value); + return value; + } case PM_FALSE_NODE: return Qfalse; case PM_FLOAT_NODE: return parse_float(node); + case PM_HASH_NODE: { + pm_hash_node_t *cast = (pm_hash_node_t *) node; + pm_node_list_t *elements = &cast->elements; + + VALUE array = rb_ary_hidden_new(elements->size * 2); + for (size_t index = 0; index < elements->size; index++) { + assert(PM_NODE_TYPE_P(elements->nodes[index], PM_ASSOC_NODE)); + pm_assoc_node_t *cast = (pm_assoc_node_t *) elements->nodes[index]; + VALUE pair[2] = { pm_static_literal_value(cast->key, compile_context), pm_static_literal_value(cast->value, compile_context) }; + rb_ary_cat(array, pair, 2); + } + + VALUE value = rb_hash_new_with_size(elements->size); + rb_hash_bulk_insert(RARRAY_LEN(array), RARRAY_CONST_PTR(array), value); + + value = rb_obj_hide(value); + OBJ_FREEZE(value); + return value; + } case PM_IMAGINARY_NODE: - return parse_imaginary((pm_imaginary_node_t *)node); + return parse_imaginary((pm_imaginary_node_t *) node); case PM_INTEGER_NODE: return parse_integer((pm_integer_node_t *) node); case PM_NIL_NODE: return Qnil; case PM_RATIONAL_NODE: return parse_rational(node); + case PM_REGULAR_EXPRESSION_NODE: { + pm_regular_expression_node_t *cast = (pm_regular_expression_node_t *) node; + + VALUE string = parse_string(&cast->unescaped); + return rb_reg_new(RSTRING_PTR(string), RSTRING_LEN(string), pm_reg_flags(node)); + } + case PM_SOURCE_ENCODING_NODE: { + rb_encoding *encoding = rb_find_encoding(rb_str_new_cstr(compile_context->parser->encoding.name)); + if (!encoding) rb_bug("Encoding not found!"); + return rb_enc_from_encoding(encoding); + } + case PM_SOURCE_FILE_NODE: { + pm_source_file_node_t *cast = (pm_source_file_node_t *)node; + return cast->filepath.length ? parse_string(&cast->filepath) : rb_fstring_lit("<compiled>"); + } + case PM_SOURCE_LINE_NODE: + return INT2FIX((int) pm_newline_list_line_column(&compile_context->parser->newline_list, node->location.start).line); case PM_STRING_NODE: - return parse_string(&((pm_string_node_t *)node)->unescaped); + return parse_string(&((pm_string_node_t *) node)->unescaped); case PM_SYMBOL_NODE: - return ID2SYM(parse_string_symbol(&((pm_symbol_node_t *)node)->unescaped)); + return ID2SYM(parse_string_symbol(&((pm_symbol_node_t *) node)->unescaped)); case PM_TRUE_NODE: return Qtrue; - // TODO: Implement this method for the other literal nodes described above default: rb_raise(rb_eArgError, "Don't have a literal value for this type"); return Qfalse; @@ -589,29 +668,6 @@ pm_compile_multi_write_lhs(rb_iseq_t *iseq, NODE dummy_line_node, const pm_node_ } /** - * Check the prism flags of a regular expression-like node and return the flags - * that are expected by the CRuby VM. - */ -static int -pm_reg_flags(const pm_node_t *node) { - int flags = 0; - - if (node->flags & PM_REGULAR_EXPRESSION_FLAGS_IGNORE_CASE) { - flags |= ONIG_OPTION_IGNORECASE; - } - - if (node->flags & PM_REGULAR_EXPRESSION_FLAGS_MULTI_LINE) { - flags |= ONIG_OPTION_MULTILINE; - } - - if (node->flags & PM_REGULAR_EXPRESSION_FLAGS_EXTENDED) { - flags |= ONIG_OPTION_EXTEND; - } - - return flags; -} - -/** * Compile a pattern matching expression. */ static int @@ -837,24 +893,16 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, return; } case PM_ARRAY_NODE: { - pm_array_node_t *cast = (pm_array_node_t *) node; - pm_node_list_t *elements = &cast->elements; - // If every node in the array is static, then we can compile the entire // array now instead of later. - if (pm_static_node_literal_p(node)) { + if (pm_static_literal_p(node)) { // We're only going to compile this node if it's not popped. If it // is popped, then we know we don't need to do anything since it's // statically known. if (!popped) { - VALUE array = rb_ary_hidden_new(elements->size); - for (size_t index = 0; index < elements->size; index++) { - rb_ary_push(array, pm_static_literal_value(elements->nodes[index])); - } - - OBJ_FREEZE(array); - ADD_INSN1(ret, &dummy_line_node, duparray, array); - RB_OBJ_WRITTEN(iseq, Qundef, array); + VALUE value = pm_static_literal_value(node, compile_context); + ADD_INSN1(ret, &dummy_line_node, duparray, value); + RB_OBJ_WRITTEN(iseq, Qundef, value); } } else { // Here since we know there are possible side-effects inside the @@ -865,6 +913,9 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, // If this hash is popped, then this serves only to ensure we enact // all side-effects (like method calls) that are contained within // the hash contents. + pm_array_node_t *cast = (pm_array_node_t *) node; + pm_node_list_t *elements = &cast->elements; + for (size_t index = 0; index < elements->size; index++) { PM_COMPILE(elements->nodes[index]); } @@ -1457,30 +1508,16 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, return; } case PM_HASH_NODE: { - pm_hash_node_t *cast = (pm_hash_node_t *) node; - pm_node_list_t elements = cast->elements; - // If every node in the hash is static, then we can compile the entire // hash now instead of later. - if (pm_static_node_literal_p(node)) { + if (pm_static_literal_p(node)) { // We're only going to compile this node if it's not popped. If it // is popped, then we know we don't need to do anything since it's // statically known. if (!popped) { - VALUE array = rb_ary_hidden_new(elements.size * 2); - - for (size_t index = 0; index < elements.size; index++) { - pm_assoc_node_t *cast = (pm_assoc_node_t *) elements.nodes[index]; - VALUE pair[2] = { pm_static_literal_value(cast->key), pm_static_literal_value(cast->value) }; - rb_ary_cat(array, pair, 2); - } - - VALUE hash = rb_hash_new_with_size(elements.size); - rb_hash_bulk_insert(RARRAY_LEN(array), RARRAY_CONST_PTR(array), hash); - - hash = rb_obj_hide(hash); - OBJ_FREEZE(hash); - ADD_INSN1(ret, &dummy_line_node, duphash, hash); + VALUE value = pm_static_literal_value(node, compile_context); + ADD_INSN1(ret, &dummy_line_node, duphash, value); + RB_OBJ_WRITTEN(iseq, Qundef, value); } } else { // Here since we know there are possible side-effects inside the @@ -1491,12 +1528,15 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, // If this hash is popped, then this serves only to ensure we enact // all side-effects (like method calls) that are contained within // the hash contents. - for (size_t index = 0; index < elements.size; index++) { - PM_COMPILE(elements.nodes[index]); + pm_hash_node_t *cast = (pm_hash_node_t *) node; + pm_node_list_t *elements = &cast->elements; + + for (size_t index = 0; index < elements->size; index++) { + PM_COMPILE(elements->nodes[index]); } if (!popped) { - ADD_INSN1(ret, &dummy_line_node, newhash, INT2FIX(elements.size * 2)); + ADD_INSN1(ret, &dummy_line_node, newhash, INT2FIX(elements->size * 2)); } } @@ -2305,35 +2345,33 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, return; } case PM_SOURCE_ENCODING_NODE: { - const char *encoding = compile_context->parser->encoding.name; + // Source encoding nodes are generated by the __ENCODING__ syntax. They + // reference the encoding object corresponding to the encoding of the + // source file, and can be changed by a magic encoding comment. if (!popped) { - rb_encoding *enc = rb_find_encoding(rb_str_new_cstr(encoding)); - if (!enc) { - rb_bug("Encoding not found!"); - } - ADD_INSN1(ret, &dummy_line_node, putobject, rb_enc_from_encoding(enc)); + VALUE value = pm_static_literal_value(node, compile_context); + ADD_INSN1(ret, &dummy_line_node, putobject, value); + RB_OBJ_WRITTEN(iseq, Qundef, value); } return; } case PM_SOURCE_FILE_NODE: { - pm_source_file_node_t *source_file_node = (pm_source_file_node_t *)node; - + // Source file nodes are generated by the __FILE__ syntax. They + // reference the file name of the source file. if (!popped) { - VALUE filepath; - if (source_file_node->filepath.length == 0) { - filepath = rb_fstring_lit("<compiled>"); - } - else { - filepath = parse_string(&source_file_node->filepath); - } - - ADD_INSN1(ret, &dummy_line_node, putstring, filepath); + VALUE value = pm_static_literal_value(node, compile_context); + ADD_INSN1(ret, &dummy_line_node, putstring, value); + RB_OBJ_WRITTEN(iseq, Qundef, value); } return; } case PM_SOURCE_LINE_NODE: { + // Source line nodes are generated by the __LINE__ syntax. They + // reference the line number where they occur in the source file. if (!popped) { - ADD_INSN1(ret, &dummy_line_node, putobject, INT2FIX(lineno)); + VALUE value = pm_static_literal_value(node, compile_context); + ADD_INSN1(ret, &dummy_line_node, putobject, value); + RB_OBJ_WRITTEN(iseq, Qundef, value); } return; } @@ -2374,9 +2412,12 @@ pm_compile_node(rb_iseq_t *iseq, const pm_node_t *node, LINK_ANCHOR *const ret, return; } case PM_SYMBOL_NODE: { - pm_symbol_node_t *symbol_node = (pm_symbol_node_t *) node; + // Symbols nodes are symbol literals with no interpolation. They are + // always marked as static literals. if (!popped) { - ADD_INSN1(ret, &dummy_line_node, putobject, ID2SYM(parse_string_symbol(&symbol_node->unescaped))); + VALUE value = pm_static_literal_value(node, compile_context); + ADD_INSN1(ret, &dummy_line_node, putobject, value); + RB_OBJ_WRITTEN(iseq, Qundef, value); } return; } |