aboutsummaryrefslogtreecommitdiffstats
path: root/hash.c
diff options
context:
space:
mode:
authorko1 <ko1@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2016-11-07 00:45:00 +0000
committerko1 <ko1@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2016-11-07 00:45:00 +0000
commite49a472aad43e92e7b71ed38d6c7c091645a90f6 (patch)
tree311920522c27ab515ddfda5d1f45d1d3f68f142a /hash.c
parent599de5be6e006743739834da0c5d484b47988b2f (diff)
downloadruby-e49a472aad43e92e7b71ed38d6c7c091645a90f6.tar.gz
Introduce table improvement by Vladimir Makarov <vmakarov@redhat.com>.
[Feature #12142] See header of st.c for improvment details. You can see all of code history here: <https://github.com/vnmakarov/ruby/tree/hash_tables_with_open_addressing> This improvement is discussed at <https://bugs.ruby-lang.org/issues/12142> with many people, especially with Yura Sokolov. * st.c: improve st_table. * include/ruby/st.h: ditto. * internal.h, numeric.c, hash.c (rb_dbl_long_hash): extract a function. * ext/-test-/st/foreach/foreach.c: catch up this change. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@56650 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'hash.c')
-rw-r--r--hash.c108
1 files changed, 77 insertions, 31 deletions
diff --git a/hash.c b/hash.c
index 8ed8c48ebb..4615c47b82 100644
--- a/hash.c
+++ b/hash.c
@@ -145,14 +145,36 @@ rb_hash(VALUE obj)
long rb_objid_hash(st_index_t index);
-static st_index_t
-any_hash(VALUE a, st_index_t (*other_func)(VALUE))
+long
+rb_dbl_long_hash(double d)
+{
+ /* normalize -0.0 to 0.0 */
+ if (d == 0.0) d = 0.0;
+#if SIZEOF_INT == SIZEOF_VOIDP
+ return rb_memhash(&d, sizeof(d));
+#else
+ {
+ union {double d; uint64_t i;} u;
+
+ u.d = d;
+ return rb_objid_hash(u.i);
+ }
+#endif
+}
+
+#if SIZEOF_INT == SIZEOF_VOIDP
+static const st_index_t str_seed = 0xfa835867;
+#else
+static const st_index_t str_seed = 0xc42b5e2e6480b23bULL;
+#endif
+
+static inline st_index_t
+any_hash_general(VALUE a, int strong_p, st_index_t (*other_func)(VALUE))
{
VALUE hval;
st_index_t hnum;
if (SPECIAL_CONST_P(a)) {
- if (a == Qundef) return 0;
if (STATIC_SYM_P(a)) {
hnum = a >> (RUBY_SPECIAL_SHIFT + ID_SCOPE_SHIFT);
goto out;
@@ -164,7 +186,9 @@ any_hash(VALUE a, st_index_t (*other_func)(VALUE))
hnum = rb_objid_hash((st_index_t)a);
}
else if (BUILTIN_TYPE(a) == T_STRING) {
- hnum = rb_str_hash(a);
+ hnum = (strong_p
+ ? rb_str_hash(a)
+ : st_hash(RSTRING_PTR(a), RSTRING_LEN(a), str_seed));
}
else if (BUILTIN_TYPE(a) == T_SYMBOL) {
hnum = RSYMBOL(a)->hashval;
@@ -175,8 +199,7 @@ any_hash(VALUE a, st_index_t (*other_func)(VALUE))
}
else if (BUILTIN_TYPE(a) == T_FLOAT) {
flt:
- hval = rb_dbl_hash(rb_float_value(a));
- hnum = FIX2LONG(hval);
+ hnum = rb_dbl_long_hash(rb_float_value(a));
}
else {
hnum = other_func(a);
@@ -193,40 +216,62 @@ obj_any_hash(VALUE obj)
return FIX2LONG(obj);
}
+static inline st_index_t
+any_hash_weak(VALUE a, st_index_t (*other_func)(VALUE)) {
+ return any_hash_general(a, FALSE, other_func);
+}
+
static st_index_t
-rb_any_hash(VALUE a)
-{
- return any_hash(a, obj_any_hash);
+rb_any_hash_weak(VALUE a) {
+ return any_hash_weak(a, obj_any_hash);
+}
+
+static inline st_index_t
+any_hash(VALUE a, st_index_t (*other_func)(VALUE)) {
+ return any_hash_general(a, TRUE, other_func);
}
static st_index_t
-rb_num_hash_start(st_index_t n)
+rb_any_hash(VALUE a) {
+ return any_hash(a, obj_any_hash);
+}
+
+/* Here is a hash function for 64-bit key. It is about 5 times faster
+ (2 times faster when uint128 type is absent) on Haswell than
+ tailored Spooky or City hash function can be. */
+
+/* Here we two primes with random bit generation. */
+static const uint64_t prime1 = 0x2e0bb864e9ea7df5ULL;
+static const uint64_t prime2 = 0xcdb32970830fcaa1ULL;
+
+
+static inline uint64_t
+mult_and_mix (uint64_t m1, uint64_t m2)
{
- /*
- * This hash function is lightly-tuned for Ruby. Further tuning
- * should be possible. Notes:
- *
- * - (n >> 3) alone is great for heap objects and OK for fixnum,
- * however symbols perform poorly.
- * - (n >> (RUBY_SPECIAL_SHIFT+3)) was added to make symbols hash well,
- * n.b.: +3 to remove most ID scope, +1 worked well initially, too
- * n.b.: +1 (instead of 3) worked well initially, too
- * - (n << 16) was finally added to avoid losing bits for fixnums
- * - avoid expensive modulo instructions, it is currently only
- * shifts and bitmask operations.
- */
- return (n >> (RUBY_SPECIAL_SHIFT + 3) ^ (n << 16)) ^ (n >> 3);
+#if defined(__GNUC__) && UINT_MAX != ULONG_MAX
+ __uint128_t r = (__uint128_t) m1 * (__uint128_t) m2;
+ return (uint64_t) (r >> 64) ^ (uint64_t) r;
+#else
+ uint64_t hm1 = m1 >> 32, hm2 = m2 >> 32;
+ uint64_t lm1 = m1, lm2 = m2;
+ uint64_t v64_128 = hm1 * hm2;
+ uint64_t v32_96 = hm1 * lm2 + lm1 * hm2;
+ uint64_t v1_32 = lm1 * lm2;
+
+ return (v64_128 + (v32_96 >> 32)) ^ ((v32_96 << 32) + v1_32);
+#endif
+}
+
+static inline uint64_t
+key64_hash (uint64_t key, uint32_t seed)
+{
+ return mult_and_mix(key + seed, prime1);
}
long
rb_objid_hash(st_index_t index)
{
- st_index_t hnum = rb_num_hash_start(index);
-
- hnum = rb_hash_start(hnum);
- hnum = rb_hash_uint(hnum, (st_index_t)rb_any_hash);
- hnum = rb_hash_end(hnum);
- return hnum;
+ return key64_hash(index, (uint32_t) prime2);
}
static st_index_t
@@ -250,6 +295,7 @@ rb_hash_iter_lev(VALUE h)
static const struct st_hash_type objhash = {
rb_any_cmp,
+ rb_any_hash_weak,
rb_any_hash,
};
@@ -269,7 +315,7 @@ rb_ident_hash(st_data_t n)
}
#endif
- return (st_index_t)rb_num_hash_start((st_index_t)n);
+ return (st_index_t) key64_hash((st_index_t)n, (uint32_t) prime2);
}
static const struct st_hash_type identhash = {