aboutsummaryrefslogtreecommitdiffstats
path: root/mjit_compile.c
diff options
context:
space:
mode:
authork0kubun <k0kubun@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2019-04-16 17:02:16 +0000
committerk0kubun <k0kubun@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2019-04-16 17:02:16 +0000
commitd71b78575b08f55d35194b2c091719b2c4463ea3 (patch)
treedd0dba7549e3bc80ddc70c24c7e10120b5acf54e /mjit_compile.c
parentb914166fcdba3faf5d4cb5e3a18d49691546ce21 (diff)
downloadruby-d71b78575b08f55d35194b2c091719b2c4463ea3.tar.gz
Introduce frame-omitted method inlining
for ISeq including only leaf and no-handles_sp insns except leaf. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@67574 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'mjit_compile.c')
-rw-r--r--mjit_compile.c127
1 files changed, 109 insertions, 18 deletions
diff --git a/mjit_compile.c b/mjit_compile.c
index 4f80bfb0da..e2ff5bb954 100644
--- a/mjit_compile.c
+++ b/mjit_compile.c
@@ -25,6 +25,14 @@
#define NOT_COMPILED_STACK_SIZE -1
#define ALREADY_COMPILED_P(status, pos) (status->stack_size_for_pos[pos] != NOT_COMPILED_STACK_SIZE)
+// For propagating information needed for lazily pushing a frame.
+struct inlined_call_context {
+ int orig_argc; // ci->orig_argc
+ VALUE me; // cc->me
+ int param_size; // def_iseq_ptr(cc->me->def)->body->param.size
+ int local_size; // def_iseq_ptr(cc->me->def)->body->local_table_size
+};
+
// Storage to keep compiler's status. This should have information
// which is global during one `mjit_compile` call. Ones conditional
// in each branch should be stored in `compile_branch`.
@@ -39,8 +47,9 @@ struct compile_status {
struct rb_call_cache *cc_entries;
// Mutated optimization levels
struct rb_mjit_compile_info *compile_info;
- // If `iseq_for_pos[pos]` is not NULL, `mjit_compile_body` tries to inline ISeq there.
- const struct rb_iseq_constant_body **iseq_for_pos;
+ // If `inlined_iseqs[pos]` is not NULL, `mjit_compile_body` tries to inline ISeq there.
+ const struct rb_iseq_constant_body **inlined_iseqs;
+ struct inlined_call_context inline_context;
};
// Storage to keep data which is consistent in each conditional branch.
@@ -66,10 +75,10 @@ has_valid_method_type(CALL_CACHE cc)
&& mjit_valid_class_serial_p(cc->class_serial) && cc->me;
}
-// Returns true if iseq is inlinable, otherwise NULL. This becomes true in the same condition
+// Returns true if iseq can use fastpath for setup, otherwise NULL. This becomes true in the same condition
// as CC_SET_FASTPATH (in vm_callee_setup_arg) is called from vm_call_iseq_setup.
static bool
-inlinable_iseq_p(const CALL_INFO ci, const CALL_CACHE cc, const rb_iseq_t *iseq)
+fastpath_applied_iseq_p(const CALL_INFO ci, const CALL_CACHE cc, const rb_iseq_t *iseq)
{
extern bool rb_simple_iseq_p(const rb_iseq_t *iseq);
return iseq != NULL
@@ -187,11 +196,48 @@ compile_insns(FILE *f, const struct rb_iseq_constant_body *body, unsigned int st
}
}
+// Print the block to cancel inlined method call. It's supporting only `opt_send_without_block` for now.
+static void
+compile_inlined_cancel_handler(FILE *f, const struct rb_iseq_constant_body *body, struct inlined_call_context *inline_context)
+{
+ fprintf(f, "\ncancel:\n");
+ fprintf(f, " RB_DEBUG_COUNTER_INC(mjit_cancel);\n");
+
+ // Swap pc/sp set on cancel with original pc/sp.
+ fprintf(f, " const VALUE current_pc = reg_cfp->pc;\n");
+ fprintf(f, " const VALUE current_sp = reg_cfp->sp;\n");
+ fprintf(f, " reg_cfp->pc = orig_pc;\n");
+ fprintf(f, " reg_cfp->sp = orig_sp;\n\n");
+
+ // Lazily push the current call frame.
+ fprintf(f, " struct rb_calling_info calling;\n");
+ fprintf(f, " calling.block_handler = VM_BLOCK_HANDLER_NONE;\n"); // assumes `opt_send_without_block`
+ fprintf(f, " calling.argc = %d;\n", inline_context->orig_argc);
+ fprintf(f, " calling.recv = reg_cfp->self;\n");
+ fprintf(f, " reg_cfp->self = orig_self;\n");
+ fprintf(f, " vm_call_iseq_setup_normal(ec, reg_cfp, &calling, (const rb_callable_method_entry_t *)0x%"PRIxVALUE", 0, %d, %d);\n\n",
+ inline_context->me, inline_context->param_size, inline_context->local_size); // fastpath_applied_iseq_p checks rb_simple_iseq_p, which ensures has_opt == FALSE
+
+ // Start usual cancel from here.
+ fprintf(f, " reg_cfp = ec->cfp;\n"); // work on the new frame
+ fprintf(f, " reg_cfp->pc = current_pc;\n");
+ fprintf(f, " reg_cfp->sp = current_sp;\n");
+ for (unsigned int i = 0; i < body->stack_max; i++) { // should be always `status->local_stack_p`
+ fprintf(f, " *(vm_base_ptr(reg_cfp) + %d) = stack[%d];\n", i, i);
+ }
+ // We're not just returning Qundef here so that caller's normal cancel handler can
+ // push back `stack` to `cfp->sp`.
+ fprintf(f, " return vm_exec(ec, ec->cfp);\n");
+}
+
// Print the block to cancel JIT execution.
static void
compile_cancel_handler(FILE *f, const struct rb_iseq_constant_body *body, struct compile_status *status)
{
- unsigned int i;
+ if (status->inlined_iseqs == NULL) { // the current ISeq is being inlined
+ compile_inlined_cancel_handler(f, body, &status->inline_context);
+ return;
+ }
fprintf(f, "\nsend_cancel:\n");
fprintf(f, " RB_DEBUG_COUNTER_INC(mjit_cancel_send_inline);\n");
@@ -208,7 +254,7 @@ compile_cancel_handler(FILE *f, const struct rb_iseq_constant_body *body, struct
fprintf(f, "\ncancel:\n");
fprintf(f, " RB_DEBUG_COUNTER_INC(mjit_cancel);\n");
if (status->local_stack_p) {
- for (i = 0; i < body->stack_max; i++) {
+ for (unsigned int i = 0; i < body->stack_max; i++) {
fprintf(f, " *(vm_base_ptr(reg_cfp) + %d) = stack[%d];\n", i, i);
}
}
@@ -253,11 +299,45 @@ mjit_compile_body(FILE *f, const rb_iseq_t *iseq, struct compile_status *status)
return status->success;
}
+// Return true if the ISeq can be inlined without pushing a new control frame.
+static bool
+inlinable_iseq_p(const struct rb_iseq_constant_body *body)
+{
+ // 1) If catch_except_p, caller frame should be preserved when callee catches an exception.
+ // Then we need to wrap `vm_exec()` but then we can't inline the call inside it.
+ //
+ // 2) If `body->catch_except_p` is false and `handles_sp?` of an insn is false,
+ // sp is not moved as we assume `status->local_stack_p = !body->catch_except_p`.
+ //
+ // 3) If `body->catch_except_p` is false and `always_leaf?` of an insn is true,
+ // pc is not moved.
+ if (body->catch_except_p)
+ return false;
+
+ unsigned int pos = 0;
+ while (pos < body->iseq_size) {
+#if OPT_DIRECT_THREADED_CODE || OPT_CALL_THREADED_CODE
+ int insn = rb_vm_insn_addr2insn((void *)body->iseq_encoded[pos]);
+#else
+ int insn = (int)body->iseq_encoded[pos];
+#endif
+ // All insns in the ISeq except `leave` (to be overridden in the inlined code)
+ // should meet following strong assumptions:
+ // * Do not require `cfp->sp` motion
+ // * Do not move `cfp->pc`
+ // * Do not read any `cfp->pc`
+ if (insn != BIN(leave) && insn_may_depend_on_sp_or_pc(insn, body->iseq_encoded + (pos + 1)))
+ return false;
+ pos += insn_len(insn);
+ }
+ return true;
+}
+
// This needs to be macro instead of a function because it's using `alloca`.
#define INIT_COMPILE_STATUS(status, body, compile_root_p) do { \
status = (struct compile_status){ \
.stack_size_for_pos = (int *)alloca(sizeof(int) * body->iseq_size), \
- .iseq_for_pos = compile_root_p ? \
+ .inlined_iseqs = compile_root_p ? \
alloca(sizeof(const struct rb_iseq_constant_body *) * body->iseq_size) : NULL, \
.cc_entries = (body->ci_size + body->ci_kw_size) > 0 ? \
alloca(sizeof(struct rb_call_cache) * (body->ci_size + body->ci_kw_size)) : NULL, \
@@ -268,15 +348,16 @@ mjit_compile_body(FILE *f, const rb_iseq_t *iseq, struct compile_status *status)
}; \
memset(status.stack_size_for_pos, NOT_COMPILED_STACK_SIZE, sizeof(int) * body->iseq_size); \
if (compile_root_p) \
- memset(status.iseq_for_pos, 0, sizeof(const struct rb_iseq_constant_body *) * body->iseq_size); \
+ memset(status.inlined_iseqs, 0, sizeof(const struct rb_iseq_constant_body *) * body->iseq_size); \
else \
memset(status.compile_info, 0, sizeof(struct rb_mjit_compile_info)); \
} while (0)
// Compile inlinable ISeqs to C code in `f`. It returns true if it succeeds to compile them.
static bool
-precompile_inlinable_iseqs(FILE *f, const struct rb_iseq_constant_body *body, struct compile_status *status)
+precompile_inlinable_iseqs(FILE *f, const rb_iseq_t *iseq, struct compile_status *status)
{
+ const struct rb_iseq_constant_body *body = iseq->body;
unsigned int pos = 0;
while (pos < body->iseq_size) {
#if OPT_DIRECT_THREADED_CODE || OPT_CALL_THREADED_CODE
@@ -285,30 +366,40 @@ precompile_inlinable_iseqs(FILE *f, const struct rb_iseq_constant_body *body, st
int insn = (int)body->iseq_encoded[pos];
#endif
- if (insn == BIN(opt_send_without_block) || insn == BIN(send)) {
+ if (insn == BIN(opt_send_without_block)) { // `compile_inlined_cancel_handler` supports only `opt_send_without_block`
CALL_INFO ci = (CALL_INFO)body->iseq_encoded[pos + 1];
CALL_CACHE cc_copy = status->cc_entries + ((CALL_CACHE)body->iseq_encoded[pos + 2] - body->cc_entries); // use copy to avoid race condition
const rb_iseq_t *child_iseq;
if (has_valid_method_type(cc_copy) &&
!(ci->flag & VM_CALL_TAILCALL) && // inlining only non-tailcall path
- cc_copy->me->def->type == VM_METHOD_TYPE_ISEQ && inlinable_iseq_p(ci, cc_copy, child_iseq = def_iseq_ptr(cc_copy->me->def)) && // CC_SET_FASTPATH in vm_callee_setup_arg
- !child_iseq->body->catch_except_p && // if catch_except_p, caller frame should be preserved when callee catches an exception.
- mjit_target_iseq_p(child_iseq->body)) {
- status->iseq_for_pos[pos] = child_iseq->body;
+ cc_copy->me->def->type == VM_METHOD_TYPE_ISEQ && fastpath_applied_iseq_p(ci, cc_copy, child_iseq = def_iseq_ptr(cc_copy->me->def)) && // CC_SET_FASTPATH in vm_callee_setup_arg
+ inlinable_iseq_p(child_iseq->body)) {
+ status->inlined_iseqs[pos] = child_iseq->body;
if (mjit_opts.verbose >= 1) // print beforehand because ISeq may be GCed during copy job.
- fprintf(stderr, "JIT inline: %s@%s:%d\n", RSTRING_PTR(child_iseq->body->location.label),
+ fprintf(stderr, "JIT inline: %s@%s:%d => %s@%s:%d\n",
+ RSTRING_PTR(iseq->body->location.label),
+ RSTRING_PTR(rb_iseq_path(iseq)), FIX2INT(iseq->body->location.first_lineno),
+ RSTRING_PTR(child_iseq->body->location.label),
RSTRING_PTR(rb_iseq_path(child_iseq)), FIX2INT(child_iseq->body->location.first_lineno));
struct compile_status child_status;
INIT_COMPILE_STATUS(child_status, child_iseq->body, false);
+ child_status.inline_context = (struct inlined_call_context){
+ .orig_argc = ci->orig_argc,
+ .me = (VALUE)cc_copy->me,
+ .param_size = child_iseq->body->param.size,
+ .local_size = child_iseq->body->local_table_size
+ };
if ((child_status.cc_entries != NULL || child_status.is_entries != NULL)
&& !mjit_copy_cache_from_main_thread(child_iseq, child_status.cc_entries, child_status.is_entries))
return false;
- fprintf(f, "ALWAYS_INLINE(static VALUE _mjit_inlined_%d(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp));\n", pos);
- fprintf(f, "static inline VALUE\n_mjit_inlined_%d(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp)\n{\n", pos);
+ fprintf(f, "ALWAYS_INLINE(static VALUE _mjit_inlined_%d(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, const VALUE orig_self));\n", pos);
+ fprintf(f, "static inline VALUE\n_mjit_inlined_%d(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, const VALUE orig_self)\n{\n", pos);
+ fprintf(f, " const VALUE *orig_pc = reg_cfp->pc;\n");
+ fprintf(f, " const VALUE *orig_sp = reg_cfp->sp;\n");
bool success = mjit_compile_body(f, child_iseq, &child_status);
fprintf(f, "\n} /* end of _mjit_inlined_%d */\n\n", pos);
@@ -337,7 +428,7 @@ mjit_compile(FILE *f, const rb_iseq_t *iseq, const char *funcname)
&& !mjit_copy_cache_from_main_thread(iseq, status.cc_entries, status.is_entries))
return false;
- bool success = precompile_inlinable_iseqs(f, iseq->body, &status);
+ bool success = precompile_inlinable_iseqs(f, iseq, &status);
if (!success)
return false;