aboutsummaryrefslogtreecommitdiffstats
path: root/vm_insnhelper.c
diff options
context:
space:
mode:
authorJeremy Evans <code@jeremyevans.net>2023-04-01 09:19:35 -0700
committerJeremy Evans <code@jeremyevans.net>2023-04-25 08:06:16 -0700
commitaf2da6419aba1e242e851664b4e6816aeb27f8cb (patch)
tree6c9a085aa6ad05f1134a0c52fd6e58f5c3bf39fd /vm_insnhelper.c
parentf6254f77f7a7c4d1f11180b3b382680868bd9ee4 (diff)
downloadruby-af2da6419aba1e242e851664b4e6816aeb27f8cb.tar.gz
Optimize cfunc calls for f(*a) and f(*a, **kw) if kw is empty
This optimizes the following calls: * ~10-15% for f(*a) when a does not end with a flagged keywords hash * ~10-15% for f(*a) when a ends with an empty flagged keywords hash * ~35-40% for f(*a, **kw) if kw is empty This still copies the array contents to the VM stack, but avoids some overhead. It would be faster to use the array pointer directly, but that could cause problems if the array was modified during the call to the function. You could do that optimization for frozen arrays, but as splatting frozen arrays is uncommon, and the speedup is minimal (<5%), it doesn't seem worth it. The vm_send_cfunc benchmark has been updated to test additional cfunc call types, and the numbers above were taken from the benchmark results.
Diffstat (limited to 'vm_insnhelper.c')
-rw-r--r--vm_insnhelper.c85
1 files changed, 83 insertions, 2 deletions
diff --git a/vm_insnhelper.c b/vm_insnhelper.c
index 264dedecb5..d66f1e833a 100644
--- a/vm_insnhelper.c
+++ b/vm_insnhelper.c
@@ -3462,10 +3462,10 @@ vm_call_cfunc_with_frame(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp
}
static VALUE
-vm_call_cfunc(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling)
+vm_call_cfunc_other(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling)
{
const struct rb_callinfo *ci = calling->ci;
- RB_DEBUG_COUNTER_INC(ccf_cfunc);
+ RB_DEBUG_COUNTER_INC(ccf_cfunc_other);
CALLER_SETUP_ARG(reg_cfp, calling, ci, ALLOW_HEAP_ARGV_KEEP_KWSPLAT);
VALUE argv_ary;
@@ -3488,6 +3488,87 @@ vm_call_cfunc(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb
}
}
+static inline VALUE
+vm_call_cfunc_array_argv(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling, int stack_offset, int argc_offset)
+{
+ VALUE argv_ary = reg_cfp->sp[-1 - stack_offset];
+ int argc = RARRAY_LENINT(argv_ary) - argc_offset;
+
+ if (UNLIKELY(argc > VM_ARGC_STACK_MAX)) {
+ return vm_call_cfunc_other(ec, reg_cfp, calling);
+ }
+
+ VALUE *argv = (VALUE *)RARRAY_CONST_PTR(argv_ary);
+ calling->kw_splat = 0;
+ int i;
+ VALUE *stack_bottom = reg_cfp->sp - 2 - stack_offset;
+ VALUE *sp = stack_bottom;
+ CHECK_VM_STACK_OVERFLOW(reg_cfp, argc);
+ for(i = 0; i < argc; i++) {
+ *++sp = argv[i];
+ }
+ reg_cfp->sp = sp+1;
+
+ return vm_call_cfunc_with_frame_(ec, reg_cfp, calling, argc, stack_bottom+1, stack_bottom);
+}
+
+static inline VALUE
+vm_call_cfunc_only_splat(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling)
+{
+ RB_DEBUG_COUNTER_INC(ccf_cfunc_only_splat);
+ VALUE argv_ary = reg_cfp->sp[-1];
+ int argc = RARRAY_LENINT(argv_ary);
+ VALUE *argv = (VALUE *)RARRAY_CONST_PTR(argv_ary);
+ VALUE last_hash;
+ int argc_offset = 0;
+
+ if (UNLIKELY(argc > 0 &&
+ RB_TYPE_P((last_hash = argv[argc-1]), T_HASH) &&
+ (((struct RHash *)last_hash)->basic.flags & RHASH_PASS_AS_KEYWORDS))) {
+ if (!RHASH_EMPTY_P(last_hash)) {
+ return vm_call_cfunc_other(ec, reg_cfp, calling);
+ }
+ argc_offset++;
+ }
+ return vm_call_cfunc_array_argv(ec, reg_cfp, calling, 0, argc_offset);
+}
+
+static inline VALUE
+vm_call_cfunc_only_splat_kw(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling)
+{
+ RB_DEBUG_COUNTER_INC(ccf_cfunc_only_splat_kw);
+ VALUE keyword_hash = reg_cfp->sp[-1];
+
+ if (RB_TYPE_P(keyword_hash, T_HASH) && RHASH_EMPTY_P(keyword_hash)) {
+ return vm_call_cfunc_array_argv(ec, reg_cfp, calling, 1, 0);
+ }
+
+ return vm_call_cfunc_other(ec, reg_cfp, calling);
+}
+
+static VALUE
+vm_call_cfunc(rb_execution_context_t *ec, rb_control_frame_t *reg_cfp, struct rb_calling_info *calling)
+{
+ const struct rb_callinfo *ci = calling->ci;
+ RB_DEBUG_COUNTER_INC(ccf_cfunc);
+
+ if (IS_ARGS_SPLAT(ci)) {
+ if (!IS_ARGS_KW_SPLAT(ci) && vm_ci_argc(ci) == 1) {
+ // f(*a)
+ CC_SET_FASTPATH(calling->cc, vm_call_cfunc_only_splat, TRUE);
+ return vm_call_cfunc_only_splat(ec, reg_cfp, calling);
+ }
+ if (IS_ARGS_KW_SPLAT(ci) && vm_ci_argc(ci) == 2) {
+ // f(*a, **kw)
+ CC_SET_FASTPATH(calling->cc, vm_call_cfunc_only_splat_kw, TRUE);
+ return vm_call_cfunc_only_splat_kw(ec, reg_cfp, calling);
+ }
+ }
+
+ CC_SET_FASTPATH(calling->cc, vm_call_cfunc_other, TRUE);
+ return vm_call_cfunc_other(ec, reg_cfp, calling);
+}
+
static VALUE
vm_call_ivar(rb_execution_context_t *ec, rb_control_frame_t *cfp, struct rb_calling_info *calling)
{