From 583e9d24d419023bc1123190768297a468113613 Mon Sep 17 00:00:00 2001 From: Jeremy Evans Date: Sun, 2 Apr 2023 11:06:13 -0700 Subject: Optimize symproc calls Similar to the bmethod/send optimization, this avoids using CALLER_ARG_SPLAT if not necessary. As long as the receiver argument can be shifted off, other arguments are passed through as-is. This optimizes the following types of calls: * symproc.(recv) ~5% * symproc.(recv, *args) ~65% for args.length == 200 * symproc.(recv, *args, **kw) ~45% for args.length == 200 * symproc.(recv, **kw) ~30% * symproc.(recv, kw: 1) ~100% Note that empty argument splats do get slower with this approach, by about 2-3%. This is probably because iseq argument setup is slower for empty argument splats than CALLER_SETUP_ARG is. Other than non-empty argument splats, other argument splats are faster, with the speedup depending on the number of arguments. The following types of calls are not optimized: * symproc.(*args) * symproc.(*args, **kw) This is because the you cannot shift the receiver argument off without first splatting the arg. --- benchmark/vm_call_symproc.yml | 83 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 benchmark/vm_call_symproc.yml (limited to 'benchmark/vm_call_symproc.yml') diff --git a/benchmark/vm_call_symproc.yml b/benchmark/vm_call_symproc.yml new file mode 100644 index 0000000000..16e0ac579e --- /dev/null +++ b/benchmark/vm_call_symproc.yml @@ -0,0 +1,83 @@ +prelude: | + def self.a0; end + def self.a1(a) a; end + def self.s(*a) a; end + def self.b(kw: 1) kw end + def self.sb(*a, kw: 1) kw end + + t0 = 0.times.to_a + t1 = 1.times.to_a + t10 = 10.times.to_a + t200 = 200.times.to_a + + a0_t0 = [self, *t0] + a1_t1 = [self, *t1] + s_t0 = [self, *t0] + s_t1 = [self, *t1] + s_t10 = [self, *t10] + s_t200 = [self, *t200] + sb_t0 = [self, *t0] + sb_t1 = [self, *t1] + sb_t10 = [self, *t10] + sb_t200 = [self, *t200] + kw = {kw: 2} + + a0 = :a0.to_proc + a1 = :a1.to_proc + s = :s.to_proc + b = :b.to_proc + sb = :sb.to_proc +benchmark: + symproc_simple_0: | + a0.(self) + symproc_simple_1: | + a1.(self, 1) + symproc_simple_0_splat: | + a0.(self, *t0) + symproc_simple_1_splat: | + a1.(self, *t1) + symproc_simple_0_splat_comb: | + a0.(*a0_t0) + symproc_simple_1_splat_comb: | + a1.(*a1_t1) + symproc_no_splat: | + s.(self) + symproc_0_splat: | + s.(self, *t0) + symproc_1_splat: | + s.(self, *t1) + symproc_10_splat: | + s.(self, *t10) + symproc_200_splat: | + s.(self, *t200) + symproc_0_splat_comb: | + s.(*s_t0) + symproc_1_splat_comb: | + s.(*s_t1) + symproc_10_splat_comb: | + s.(*s_t10) + symproc_200_splat_comb: | + s.(*s_t200) + symproc_kw: | + b.(self, kw: 1) + symproc_no_kw: | + b.(self) + symproc_kw_splat: | + b.(self, **kw) + symproc_0_splat_kw: | + sb.(self, *t0, **kw) + symproc_1_splat_kw: | + sb.(self, *t1, **kw) + symproc_10_splat_kw: | + sb.(self, *t10, **kw) + symproc_200_splat_kw: | + sb.(self, *t200, **kw) + symproc_0_splat_comb_kw: | + sb.(*sb_t0, **kw) + symproc_1_splat_comb_kw: | + sb.(*sb_t1, **kw) + symproc_10_splat_comb_kw: | + sb.(*sb_t10, **kw) + symproc_200_splat_comb_kw: | + sb.(*sb_t200, **kw) +loop_count: 1000000 -- cgit v1.2.3