aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--hash.c8
-rw-r--r--insns.def8
-rw-r--r--internal.h3
-rw-r--r--st.c165
-rw-r--r--test/ruby/test_hash.rb45
-rw-r--r--test/ruby/test_literal.rb29
-rw-r--r--vm.c6
7 files changed, 245 insertions, 19 deletions
diff --git a/hash.c b/hash.c
index b583390819..3d16b4da9b 100644
--- a/hash.c
+++ b/hash.c
@@ -650,7 +650,6 @@ static VALUE
rb_hash_s_create(int argc, VALUE *argv, VALUE klass)
{
VALUE hash, tmp;
- int i;
if (argc == 1) {
tmp = rb_hash_s_try_convert(Qnil, argv[0]);
@@ -704,12 +703,7 @@ rb_hash_s_create(int argc, VALUE *argv, VALUE klass)
}
hash = hash_alloc(klass);
- if (argc > 0) {
- RHASH(hash)->ntbl = st_init_table_with_size(&objhash, argc / 2);
- }
- for (i=0; i<argc; i+=2) {
- rb_hash_aset(hash, argv[i], argv[i + 1]);
- }
+ rb_hash_bulk_insert(argc, argv, hash);
return hash;
}
diff --git a/insns.def b/insns.def
index a056b221d5..5f1ff62d1a 100644
--- a/insns.def
+++ b/insns.def
@@ -492,16 +492,12 @@ newhash
(...)
(VALUE val) // inc += 1 - num;
{
- rb_num_t i;
-
RUBY_DTRACE_CREATE_HOOK(HASH, num);
val = rb_hash_new();
- for (i = num; i > 0; i -= 2) {
- const VALUE v = TOPN(i - 2);
- const VALUE k = TOPN(i - 1);
- rb_hash_aset(val, k, v);
+ if (num) {
+ rb_hash_bulk_insert(num, STACK_ADDR_FROM_TOP(num), val);
}
POPN(num);
}
diff --git a/internal.h b/internal.h
index 0988f301dd..53f31b7804 100644
--- a/internal.h
+++ b/internal.h
@@ -1550,6 +1550,9 @@ extern int ruby_enable_coredump;
int rb_get_next_signal(void);
int rb_sigaltstack_size(void);
+/* st.c */
+extern void rb_hash_bulk_insert(long, const VALUE *, VALUE);
+
/* strftime.c */
#ifdef RUBY_ENCODING_H
VALUE rb_strftime_timespec(const char *format, size_t format_len, rb_encoding *enc,
diff --git a/st.c b/st.c
index 528c528572..a8c9a44096 100644
--- a/st.c
+++ b/st.c
@@ -1955,3 +1955,168 @@ st_numhash(st_data_t n)
enum {s1 = 11, s2 = 3};
return (st_index_t)((n>>s1|(n<<s2)) ^ (n>>s2));
}
+
+/* Expand TAB to be suitable for holding SIZ entries in total.
+ Pre-existing entries remain not deleted inside of TAB, but its bins
+ are cleared to expect future reconstruction. See rehash below. */
+static void
+st_expand_table(st_table *tab, st_index_t siz)
+{
+ st_table *tmp;
+ st_index_t n;
+
+ if (siz <= get_allocated_entries(tab))
+ return; /* enough room already */
+
+ tmp = st_init_table_with_size(tab->type, siz);
+ n = get_allocated_entries(tab);
+ MEMCPY(tmp->entries, tab->entries, st_table_entry, n);
+ free(tab->entries);
+ if (tab->bins != NULL)
+ free(tab->bins);
+ if (tmp->bins != NULL)
+ free(tmp->bins);
+ tab->entry_power = tmp->entry_power;
+ tab->bin_power = tmp->bin_power;
+ tab->size_ind = tmp->size_ind;
+ tab->entries = tmp->entries;
+ tab->bins = NULL;
+ tab->rebuilds_num++;
+ free(tmp);
+}
+
+/* Rehash using linear search. */
+static void
+st_rehash_linear(st_table *tab)
+{
+ st_index_t i, j;
+ st_table_entry *p, *q;
+ if (tab->bins) {
+ free(tab->bins);
+ tab->bins = NULL;
+ }
+ for (i = tab->entries_start; i < tab->entries_bound; i++) {
+ p = &tab->entries[i];
+ if (DELETED_ENTRY_P(p))
+ continue;
+ for (j = i + 1; j < tab->entries_bound; j++) {
+ q = &tab->entries[j];
+ if (DELETED_ENTRY_P(q))
+ continue;
+ if (PTR_EQUAL(tab, p, q->hash, q->key)) {
+ st_assert(p < q);
+ *p = *q;
+ MARK_ENTRY_DELETED(q);
+ tab->num_entries--;
+ update_range_for_deleted(tab, j);
+ }
+ }
+ }
+}
+
+/* Rehash using index */
+static void
+st_rehash_indexed(st_table *tab)
+{
+ st_index_t i;
+ st_index_t const n = bins_size(tab);
+ unsigned int const size_ind = get_size_ind(tab);
+ st_index_t *bins = realloc(tab->bins, n);
+ st_assert(bins != NULL);
+ tab->bins = bins;
+ initialize_bins(tab);
+ for (i = tab->entries_start; i < tab->entries_bound; i++) {
+ st_table_entry *p = &tab->entries[i];
+ st_index_t ind;
+#ifdef QUADRATIC_PROBE
+ st_index_t d = 1;
+#else
+ st_index_t peterb = p->hash;
+#endif
+
+ if (DELETED_ENTRY_P(p))
+ continue;
+
+ ind = hash_bin(p->hash, tab);
+ for(;;) {
+ st_index_t bin = get_bin(bins, size_ind, ind);
+ st_table_entry *q = &tab->entries[bin - ENTRY_BASE];
+ if (EMPTY_OR_DELETED_BIN_P(bin)) {
+ /* ok, new room */
+ set_bin(bins, size_ind, ind, i + ENTRY_BASE);
+ break;
+ }
+ else if (PTR_EQUAL(tab, q, p->hash, p->key)) {
+ /* duplicated key; delete it */
+ st_assert(q < p);
+ q->record = p->record;
+ MARK_ENTRY_DELETED(p);
+ tab->num_entries--;
+ update_range_for_deleted(tab, bin);
+ break;
+ }
+ else {
+ /* hash collision; skip it */
+#ifdef QUADRATIC_PROBE
+ ind = hash_bin(ind + d, tab);
+ d++;
+#else
+ ind = secondary_hash(ind, tab, &peterb);
+#endif
+ }
+ }
+ }
+}
+
+/* Reconstruct TAB's bins according to TAB's entries. This function
+ permits conflicting keys inside of entries. No errors are reported
+ then. All but one of them are discarded silently. */
+static void
+st_rehash(st_table *tab)
+{
+ if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
+ st_rehash_linear(tab);
+ else
+ st_rehash_indexed(tab);
+}
+
+#ifdef RUBY
+/* Mimics ruby's { foo => bar } syntax. This function is placed here
+ because it touches table internals and write barriers at once. */
+void
+rb_hash_bulk_insert(long argc, const VALUE *argv, VALUE hash)
+{
+ int i;
+ st_table *tab;
+
+ st_assert(argc % 2);
+ if (! argc)
+ return;
+ if (! RHASH(hash)->ntbl)
+ rb_hash_tbl_raw(hash);
+ tab = RHASH(hash)->ntbl;
+
+ /* make room */
+ st_expand_table(tab, tab->num_entries + argc);
+
+ /* push elems */
+ for (i = 0; i < argc; /* */) {
+ VALUE key = argv[i++];
+ VALUE val = argv[i++];
+ st_data_t k = (rb_obj_class(key) == rb_cString) ?
+ rb_str_new_frozen(key) : key;
+ st_table_entry e;
+ e.hash = do_hash(k, tab);
+ e.key = k;
+ e.record = val;
+
+ tab->entries[tab->entries_bound++] = e;
+ tab->num_entries++;
+ RB_OBJ_WRITTEN(hash, Qundef, k);
+ RB_OBJ_WRITTEN(hash, Qundef, val);
+ }
+
+ /* reindex */
+ st_rehash(tab);
+}
+#endif
diff --git a/test/ruby/test_hash.rb b/test/ruby/test_hash.rb
index 040d25e144..798bd2fe60 100644
--- a/test/ruby/test_hash.rb
+++ b/test/ruby/test_hash.rb
@@ -133,6 +133,50 @@ class TestHash < Test::Unit::TestCase
assert_equal(100, h['a'])
assert_equal(200, h['b'])
assert_nil(h['c'])
+
+ h = @cls["a", 100, "b", 200]
+ assert_equal(100, h['a'])
+ assert_equal(200, h['b'])
+ assert_nil(h['c'])
+
+ h = @cls[[["a", 100], ["b", 200]]]
+ assert_equal(100, h['a'])
+ assert_equal(200, h['b'])
+ assert_nil(h['c'])
+
+ h = @cls[[["a", 100], ["b"], ["c", 300]]]
+ assert_equal(100, h['a'])
+ assert_equal(nil, h['b'])
+ assert_equal(300, h['c'])
+
+ h = @cls[[["a", 100], "b", ["c", 300]]]
+ assert_equal(100, h['a'])
+ assert_equal(nil, h['b'])
+ assert_equal(300, h['c'])
+ end
+
+ def test_s_AREF_duplicated_key
+ alist = [["a", 100], ["b", 200], ["a", 300], ["a", 400]]
+ h = @cls[alist]
+ assert_equal(2, h.size)
+ assert_equal(400, h['a'])
+ assert_equal(200, h['b'])
+ assert_nil(h['c'])
+ assert_equal(nil, h.key('300'))
+ end
+
+ def test_s_AREF_frozen_key_id
+ key = "a".freeze
+ h = @cls[key, 100]
+ assert_equal(100, h['a'])
+ assert_same(key, *h.keys)
+ end
+
+ def test_s_AREF_key_tampering
+ key = "a".dup
+ h = @cls[key, 100]
+ key.upcase!
+ assert_equal(100, h['a'])
end
def test_s_new
@@ -145,7 +189,6 @@ class TestHash < Test::Unit::TestCase
assert_instance_of(@cls, h)
assert_equal('default', h.default)
assert_equal('default', h['spurious'])
-
end
def test_try_convert
diff --git a/test/ruby/test_literal.rb b/test/ruby/test_literal.rb
index 28290fb19d..4a447d59fc 100644
--- a/test/ruby/test_literal.rb
+++ b/test/ruby/test_literal.rb
@@ -394,6 +394,35 @@ class TestRubyLiteral < Test::Unit::TestCase
end;
end
+ def test_hash_duplicated_key
+ h = EnvUtil.suppress_warning do
+ eval <<~end
+ # This is a syntax that renders warning at very early stage.
+ # eval used to delay warning, to be suppressible by EnvUtil.
+ {"a" => 100, "b" => 200, "a" => 300, "a" => 400}
+ end
+ end
+ assert_equal(2, h.size)
+ assert_equal(400, h['a'])
+ assert_equal(200, h['b'])
+ assert_nil(h['c'])
+ assert_equal(nil, h.key('300'))
+ end
+
+ def test_hash_frozen_key_id
+ key = "a".freeze
+ h = {key => 100}
+ assert_equal(100, h['a'])
+ assert_same(key, *h.keys)
+ end
+
+ def test_hash_key_tampering
+ key = "a"
+ h = {key => 100}
+ key.upcase!
+ assert_equal(100, h['a'])
+ end
+
def test_range
assert_instance_of Range, (1..2)
assert_equal(1..2, 1..2)
diff --git a/vm.c b/vm.c
index e64c072fdc..de2cee9485 100644
--- a/vm.c
+++ b/vm.c
@@ -2653,13 +2653,9 @@ static VALUE core_hash_merge_kwd(int argc, VALUE *argv);
static VALUE
core_hash_merge(VALUE hash, long argc, const VALUE *argv)
{
- long i;
-
Check_Type(hash, T_HASH);
VM_ASSERT(argc % 2 == 0);
- for (i=0; i<argc; i+=2) {
- rb_hash_aset(hash, argv[i], argv[i+1]);
- }
+ rb_hash_bulk_insert(argc, argv, hash);
return hash;
}