aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authornobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2016-12-06 04:43:48 +0000
committernobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2016-12-06 04:43:48 +0000
commit4793e6f30f37084e9518b1ad1b73274642c1e72f (patch)
tree3e6602e764c51d275207b3b5eaf6ab98acd04963
parentf12bd41728cf23c33ebf34bf971b194ebf46715a (diff)
downloadruby-4793e6f30f37084e9518b1ad1b73274642c1e72f.tar.gz
switching hash removal
* st.h (struct st_hash_type): Remove strong_hash. (struct st_table): Remove inside_rebuild_p and curr_hash. * st.c (do_hash): Use type->hash instead of curr_hash. (make_tab_empty): Remove setting up curr_hash. (st_init_table_with_size): Remove setting up inside_rebuild_p. (rebuild_table): Remove clearing inside_rebuild_p. (reset_entry_hashes, HIT_THRESHOULD_FOR_STRONG_HASH): Remove code recognizing a denial attack and switching to strong hash. * hash.c (rb_dbl_long_hash, rb_objid_hash, rb_ident_hash): Use rb_hash_start to randomize the hash. (str_seed): Remove. (any_hash): Remove strong_p and use always rb_str_hash for strings. (any_hash_weak, rb_any_hash_weak): Remove. (st_hash_type objhash): Remove rb_any_hash_weak. based on the patch by Vladimir N Makarov <vmakarov@redhat.com> at [ruby-core:78490]. [Bug #13002] * test/ruby/test_hash.rb (test_wrapper): objects other than special constants should be able to be wrapped. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@56992 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--hash.c38
-rw-r--r--include/ruby/st.h9
-rw-r--r--st.c44
-rw-r--r--test/ruby/test_hash.rb41
-rw-r--r--test/ruby/test_string.rb12
5 files changed, 46 insertions, 98 deletions
diff --git a/hash.c b/hash.c
index 388446efad..afbc5220fb 100644
--- a/hash.c
+++ b/hash.c
@@ -157,19 +157,13 @@ rb_dbl_long_hash(double d)
union {double d; uint64_t i;} u;
u.d = d;
- return rb_objid_hash(u.i);
+ return rb_objid_hash(rb_hash_start(u.i));
}
#endif
}
-#if SIZEOF_INT == SIZEOF_VOIDP
-static const st_index_t str_seed = 0xfa835867;
-#else
-static const st_index_t str_seed = 0xc42b5e2e6480b23bULL;
-#endif
-
static inline st_index_t
-any_hash_general(VALUE a, int strong_p, st_index_t (*other_func)(VALUE))
+any_hash(VALUE a, st_index_t (*other_func)(VALUE))
{
VALUE hval;
st_index_t hnum;
@@ -177,6 +171,7 @@ any_hash_general(VALUE a, int strong_p, st_index_t (*other_func)(VALUE))
if (SPECIAL_CONST_P(a)) {
if (STATIC_SYM_P(a)) {
hnum = a >> (RUBY_SPECIAL_SHIFT + ID_SCOPE_SHIFT);
+ hnum = rb_hash_start(hnum);
goto out;
}
else if (FLONUM_P(a)) {
@@ -186,9 +181,7 @@ any_hash_general(VALUE a, int strong_p, st_index_t (*other_func)(VALUE))
hnum = rb_objid_hash((st_index_t)a);
}
else if (BUILTIN_TYPE(a) == T_STRING) {
- hnum = (strong_p
- ? rb_str_hash(a)
- : st_hash(RSTRING_PTR(a), RSTRING_LEN(a), str_seed));
+ hnum = rb_str_hash(a);
}
else if (BUILTIN_TYPE(a) == T_SYMBOL) {
hnum = RSYMBOL(a)->hashval;
@@ -216,24 +209,6 @@ obj_any_hash(VALUE obj)
return FIX2LONG(obj);
}
-static inline st_index_t
-any_hash_weak(VALUE a, st_index_t (*other_func)(VALUE))
-{
- return any_hash_general(a, FALSE, other_func);
-}
-
-static st_index_t
-rb_any_hash_weak(VALUE a)
-{
- return any_hash_weak(a, obj_any_hash);
-}
-
-static inline st_index_t
-any_hash(VALUE a, st_index_t (*other_func)(VALUE))
-{
- return any_hash_general(a, TRUE, other_func);
-}
-
static st_index_t
rb_any_hash(VALUE a)
{
@@ -275,7 +250,7 @@ key64_hash(uint64_t key, uint32_t seed)
long
rb_objid_hash(st_index_t index)
{
- return (long)key64_hash(index, (uint32_t)prime2);
+ return (long)key64_hash(rb_hash_start(index), (uint32_t)prime2);
}
static st_index_t
@@ -299,7 +274,6 @@ rb_hash_iter_lev(VALUE h)
static const struct st_hash_type objhash = {
rb_any_cmp,
- rb_any_hash_weak,
rb_any_hash,
};
@@ -319,7 +293,7 @@ rb_ident_hash(st_data_t n)
}
#endif
- return (st_index_t) key64_hash((st_index_t)n, (uint32_t) prime2);
+ return (st_index_t)key64_hash(rb_hash_start((st_index_t)n), (uint32_t)prime2);
}
static const struct st_hash_type identhash = {
diff --git a/include/ruby/st.h b/include/ruby/st.h
index ede75458c4..41f2913b80 100644
--- a/include/ruby/st.h
+++ b/include/ruby/st.h
@@ -61,11 +61,6 @@ typedef char st_check_for_sizeof_st_index_t[SIZEOF_VOIDP == (int)sizeof(st_index
struct st_hash_type {
int (*compare)(ANYARGS /*st_data_t, st_data_t*/); /* st_compare_func* */
st_index_t (*hash)(ANYARGS /*st_data_t*/); /* st_hash_func* */
- /* The following is an optional func for stronger hash. When we
- have many different keys with the same hash we can switch to
- use it to prevent a denial attack with usage of hash table
- collisions. */
- st_index_t (*strong_hash)(ANYARGS /*st_data_t*/);
};
#if defined(HAVE_BUILTIN___BUILTIN_CHOOSE_EXPR) && defined(HAVE_BUILTIN___BUILTIN_TYPES_COMPATIBLE_P)
@@ -82,12 +77,8 @@ struct st_table_entry; /* defined in st.c */
struct st_table {
/* Cached features of the table -- see st.c for more details. */
unsigned char entry_power, bin_power, size_ind;
- /* True when we are rebuilding the table. */
- unsigned char inside_rebuild_p;
/* How many times the table was rebuilt. */
unsigned int rebuilds_num;
- /* Currently used hash function. */
- st_index_t (*curr_hash)(ANYARGS /*st_data_t*/);
const struct st_hash_type *type;
/* Number of entries currently in the table. */
st_index_t num_entries;
diff --git a/st.c b/st.c
index 89dc248969..887b81bc03 100644
--- a/st.c
+++ b/st.c
@@ -461,7 +461,6 @@ initialize_bins(st_table *tab)
static void
make_tab_empty(st_table *tab)
{
- tab->curr_hash = tab->type->hash;
tab->num_entries = 0;
tab->entries_start = tab->entries_bound = 0;
if (tab->bins != NULL)
@@ -575,7 +574,6 @@ st_init_table_with_size(const struct st_hash_type *type, st_index_t size)
tab->entry_power = n;
tab->bin_power = features[n].bin_power;
tab->size_ind = features[n].size_ind;
- tab->inside_rebuild_p = FALSE;
if (n <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
tab->bins = NULL;
else
@@ -744,7 +742,6 @@ rebuild_table(st_table *tab)
st_assert(tab != NULL);
bound = tab->entries_bound;
entries = tab->entries;
- tab->inside_rebuild_p = TRUE;
if ((2 * tab->num_entries <= get_allocated_entries(tab)
&& REBUILD_THRESHOLD * tab->num_entries > get_allocated_entries(tab))
|| tab->num_entries < (1 << MINIMAL_POWER2)) {
@@ -758,8 +755,6 @@ rebuild_table(st_table *tab)
else {
new_tab = st_init_table_with_size(tab->type,
2 * tab->num_entries - 1);
- st_assert(new_tab->curr_hash == new_tab->type->hash);
- new_tab->curr_hash = tab->curr_hash;
new_entries = new_tab->entries;
}
ni = 0;
@@ -798,7 +793,6 @@ rebuild_table(st_table *tab)
tab->entries_start = 0;
tab->entries_bound = tab->num_entries;
tab->rebuilds_num++;
- tab->inside_rebuild_p = FALSE;
#ifdef ST_DEBUG
st_check(tab);
#endif
@@ -966,28 +960,6 @@ find_table_bin_ind_direct(st_table *tab, st_hash_t hash_value, st_data_t key)
}
}
-/* Recalculate hashes of entries in table TAB. */
-static void
-reset_entry_hashes (st_table *tab)
-{
- st_index_t i, bound;
- st_table_entry *entries, *curr_entry_ptr;
-
- bound = tab->entries_bound;
- entries = tab->entries;
- for (i = tab->entries_start; i < bound; i++) {
- curr_entry_ptr = &entries[i];
- if (! DELETED_ENTRY_P(curr_entry_ptr))
- curr_entry_ptr->hash = do_hash(curr_entry_ptr->key, tab);
- }
-}
-
-/* If we have the following number of collisions with different keys
- but with the same hash during finding a bin for new entry
- inclusions, possibly a denial attack is going on. Start to use a
- stronger hash. */
-#define HIT_THRESHOULD_FOR_STRONG_HASH 10
-
/* Return index of table TAB bin for HASH_VALUE and KEY through
BIN_IND and the pointed value as the function result. Reserve the
bin for inclusion of the corresponding entry into the table if it
@@ -1009,12 +981,10 @@ find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
st_index_t entry_index;
st_index_t first_deleted_bin_ind;
st_table_entry *entries;
- int hit;
st_assert(tab != NULL && tab->bins != NULL
&& tab->entries_bound <= get_allocated_entries(tab)
&& tab->entries_start <= tab->entries_bound);
- repeat:
ind = hash_bin(curr_hash_value, tab);
#ifdef QUADRATIC_PROBE
d = 1;
@@ -1024,7 +994,6 @@ find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
FOUND_BIN;
first_deleted_bin_ind = UNDEFINED_BIN_IND;
entries = tab->entries;
- hit = 0;
for (;;) {
entry_index = get_bin(tab->bins, get_size_ind(tab), ind);
if (EMPTY_BIN_P(entry_index)) {
@@ -1039,19 +1008,6 @@ find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
} else if (! DELETED_BIN_P(entry_index)) {
if (PTR_EQUAL(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key))
break;
- if (curr_hash_value == entries[entry_index - ENTRY_BASE].hash) {
- hit++;
- if (hit > HIT_THRESHOULD_FOR_STRONG_HASH
- && tab->curr_hash != tab->type->strong_hash
- && tab->type->strong_hash != NULL
- && ! tab->inside_rebuild_p) {
- tab->curr_hash = tab->type->strong_hash;
- *hash_value = curr_hash_value = do_hash(key, tab);
- reset_entry_hashes(tab);
- rebuild_table(tab);
- goto repeat;
- }
- }
} else if (first_deleted_bin_ind == UNDEFINED_BIN_IND)
first_deleted_bin_ind = ind;
#ifdef QUADRATIC_PROBE
diff --git a/test/ruby/test_hash.rb b/test/ruby/test_hash.rb
index fdb65ae6ca..679bd2702a 100644
--- a/test/ruby/test_hash.rb
+++ b/test/ruby/test_hash.rb
@@ -1302,7 +1302,7 @@ class TestHash < Test::Unit::TestCase
assert_no_memory_leak([], prepare, code, bug9187)
end
- def test_wrapper_of_special_const
+ def test_wrapper
bug9381 = '[ruby-core:59638] [Bug #9381]'
wrapper = Class.new do
@@ -1323,6 +1323,7 @@ class TestHash < Test::Unit::TestCase
5, true, false, nil,
0.0, 1.72723e-77,
:foo, "dsym_#{self.object_id.to_s(16)}_#{Time.now.to_i.to_s(16)}".to_sym,
+ "str",
].select do |x|
hash = {x => bug9381}
hash[wrapper.new(x)] != bug9381
@@ -1330,6 +1331,44 @@ class TestHash < Test::Unit::TestCase
assert_empty(bad, bug9381)
end
+ def assert_hash_random(obj, dump = obj.inspect)
+ a = [obj.hash.to_s]
+ 3.times {
+ assert_in_out_err(["-e", "print #{dump}.hash"], "") do |r, e|
+ a += r
+ assert_equal([], e)
+ end
+ }
+ assert_not_equal([obj.hash.to_s], a.uniq)
+ assert_operator(a.uniq.size, :>, 2, proc {a.inspect})
+ end
+
+ def test_string_hash_random
+ assert_hash_random('abc')
+ end
+
+ def test_symbol_hash_random
+ assert_hash_random(:-)
+ assert_hash_random(:foo)
+ assert_hash_random("dsym_#{self.object_id.to_s(16)}_#{Time.now.to_i.to_s(16)}".to_sym)
+ end
+
+ def test_integer_hash_random
+ assert_hash_random(0)
+ assert_hash_random(+1)
+ assert_hash_random(-1)
+ assert_hash_random(+(1<<100))
+ assert_hash_random(-(1<<100))
+ end
+
+ def test_float_hash_random
+ assert_hash_random(0.0)
+ assert_hash_random(+1.0)
+ assert_hash_random(-1.0)
+ assert_hash_random(1.72723e-77)
+ assert_hash_random(Float::INFINITY, "Float::INFINITY")
+ end
+
def test_label_syntax
return unless @cls == Hash
diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb
index 7ee7fa7e97..2bd35b1703 100644
--- a/test/ruby/test_string.rb
+++ b/test/ruby/test_string.rb
@@ -979,18 +979,6 @@ CODE
assert_not_equal(S("sub-setter").hash, S("discover").hash, bug9172)
end
- def test_hash_random
- str = 'abc'
- a = [str.hash.to_s]
- 3.times {
- assert_in_out_err(["-e", "print #{str.dump}.hash"], "") do |r, e|
- a += r
- assert_equal([], e)
- end
- }
- assert_not_equal([str.hash.to_s], a.uniq)
- end
-
def test_hex
assert_equal(255, S("0xff").hex)
assert_equal(-255, S("-0xff").hex)