aboutsummaryrefslogtreecommitdiffstats
path: root/internal.h
diff options
context:
space:
mode:
author卜部昌平 <shyouhei@ruby-lang.org>2019-10-07 12:59:57 +0900
committer卜部昌平 <shyouhei@ruby-lang.org>2019-11-07 17:41:30 +0900
commitd45a013a1a3bcc860e6f7f303220b3297e2abdbc (patch)
tree9dd459d6b41542cdfcd75cede71a96e06981e3e0 /internal.h
parent3c252651e1ee28d015dbe1648dfdf0140232b733 (diff)
downloadruby-d45a013a1a3bcc860e6f7f303220b3297e2abdbc.tar.gz
extend rb_call_cache
Prior to this changeset, majority of inline cache mishits resulted into the same method entry when rb_callable_method_entry() resolves a method search. Let's not call the function at the first place on such situations. In doing so we extend the struct rb_call_cache from 44 bytes (in case of 64 bit machine) to 64 bytes, and fill the gap with secondary class serial(s). Call cache's class serials now behavies as a LRU cache. Calculating ------------------------------------- ours 2.7 2.6 vm2_poly_same_method 2.339M 1.744M 1.369M i/s - 6.000M times in 2.565086s 3.441329s 4.381386s Comparison: vm2_poly_same_method ours: 2339103.0 i/s 2.7: 1743512.3 i/s - 1.34x slower 2.6: 1369429.8 i/s - 1.71x slower
Diffstat (limited to 'internal.h')
-rw-r--r--internal.h25
1 files changed, 24 insertions, 1 deletions
diff --git a/internal.h b/internal.h
index 703dd57699..1b27df0009 100644
--- a/internal.h
+++ b/internal.h
@@ -2357,10 +2357,32 @@ struct rb_execution_context_struct;
struct rb_control_frame_struct;
struct rb_calling_info;
struct rb_call_data;
+/* I have several reasons to chose 64 here:
+ *
+ * - A cache line must be a power-of-two size.
+ * - Setting this to anything less than or equal to 32 boosts nothing.
+ * - I have never seen an architecture that has 128 byte L1 cache line.
+ * - I know Intel Core and Sparc T4 at least uses 64.
+ * - I know jemalloc internally has this exact same `#define CACHE_LINE 64`.
+ * https://github.com/jemalloc/jemalloc/blob/dev/include/jemalloc/internal/jemalloc_internal_types.h
+ */
+#define CACHELINE 64
struct rb_call_cache {
/* inline cache: keys */
rb_serial_t method_state;
- rb_serial_t class_serial;
+ rb_serial_t class_serial[
+ (CACHELINE
+ - sizeof(rb_serial_t) /* method_state */
+ - sizeof(struct rb_callable_method_entry_struct *) /* me */
+ - sizeof(struct rb_callable_method_definition_struct *) /* def */
+ - sizeof(enum method_missing_reason) /* aux */
+ - sizeof(VALUE (*)( /* call */
+ struct rb_execution_context_struct *e,
+ struct rb_control_frame_struct *,
+ struct rb_calling_info *,
+ const struct rb_call_data *)))
+ / sizeof(rb_serial_t)
+ ];
/* inline cache: values */
const struct rb_callable_method_entry_struct *me;
@@ -2377,6 +2399,7 @@ struct rb_call_cache {
int inc_sp; /* used by cfunc */
} aux;
};
+STATIC_ASSERT(cachelined, sizeof(struct rb_call_cache) <= CACHELINE);
struct rb_call_info {
/* fixed at compile time */
ID mid;