aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJean Boussier <byroot@ruby-lang.org>2022-02-18 12:54:42 +0100
committerJean Boussier <jean.boussier@gmail.com>2023-02-23 16:01:57 +0100
commit2a5354e59324cb296a423c73ec15ff9191086964 (patch)
tree4f05a09a431ee431f1312eef8eb23da4cab7e23a
parent9406245dbcaa324ce9ff0aae0f28b64beacc0836 (diff)
downloadruby-2a5354e59324cb296a423c73ec15ff9191086964.tar.gz
Implement ObjectSpace::WeakKeyMap basic allocator
[Feature #18498]
-rw-r--r--NEWS.md6
-rw-r--r--gc.c325
-rw-r--r--hash.c4
-rw-r--r--internal/hash.h2
-rw-r--r--test/ruby/weakkeymap.rb112
5 files changed, 447 insertions, 2 deletions
diff --git a/NEWS.md b/NEWS.md
index 55925c6dcf..e655d615f4 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -20,6 +20,12 @@ Note: We're only listing outstanding class updates.
* `String#unpack` now raises ArgumentError for unknown directives. [[Bug #19150]]
* `String#bytesplice` now accepts new arguments index/length or range of the source string to be copied. [[Feature #19314]]
+* ObjectSpace::WeakKeyMap
+
+ * New core class to build collections with weak references.
+ The class use equality semantic to lookup keys like a regular hash,
+ but it doesn't hold strong references on the keys. [[Feature #18498]]
+
## Stdlib updates
The following default gems are updated.
diff --git a/gc.c b/gc.c
index 70ea3c80b9..fe67887826 100644
--- a/gc.c
+++ b/gc.c
@@ -13306,6 +13306,311 @@ wmap_size(VALUE self)
#endif
}
+
+/*
+ ------------------------------ WeakKeyMap ------------------------------
+*/
+
+struct weakkeymap_entry {
+ VALUE obj;
+ st_index_t hash;
+} typedef weakkeymap_entry_t;
+
+struct weakkeymap {
+ st_table *map;
+ st_table *obj2hash;
+ VALUE final;
+};
+
+static int
+weakkeymap_cmp_entry(st_data_t a, st_data_t b)
+{
+ struct weakkeymap_entry *entry_a = (struct weakkeymap_entry *)a;
+ struct weakkeymap_entry *entry_b = (struct weakkeymap_entry *)b;
+ if (entry_a == entry_b) {
+ return 0;
+ }
+ else {
+ return rb_any_cmp(entry_a->obj, entry_b->obj);
+ }
+}
+
+static st_index_t
+weakkeymap_hash_entry(st_data_t a)
+{
+ struct weakkeymap_entry *entry_a = (struct weakkeymap_entry *)a;
+ return entry_a->hash;
+}
+
+static const struct st_hash_type weakkeymap_hash = {
+ weakkeymap_cmp_entry,
+ weakkeymap_hash_entry,
+};
+
+static void
+wkmap_compact(void *ptr)
+{
+ struct weakkeymap *w = ptr;
+ if (w->map) rb_gc_update_tbl_refs(w->map);
+ w->final = rb_gc_location(w->final);
+}
+
+static void
+wkmap_mark(void *ptr)
+{
+ struct weakkeymap *w = ptr;
+ rb_mark_tbl_no_pin(w->map);
+ rb_gc_mark_movable(w->final);
+}
+
+static void
+wkmap_free(void *ptr)
+{
+ struct weakkeymap *w = ptr;
+ st_free_table(w->map);
+ st_free_table(w->obj2hash);
+ xfree(w);
+}
+
+static size_t
+wkmap_memsize(const void *ptr)
+{
+ const struct weakkeymap *w = ptr;
+ return sizeof(struct weakkeymap) + st_memsize(w->map) + st_memsize(w->obj2hash);
+}
+
+static const rb_data_type_t weakkeymap_type = {
+ "weakkeymap",
+ {
+ wkmap_mark,
+ wkmap_free,
+ wkmap_memsize,
+ wkmap_compact,
+ },
+ 0, 0, RUBY_TYPED_FREE_IMMEDIATELY
+};
+
+static VALUE
+wkmap_finalize(RB_BLOCK_CALL_FUNC_ARGLIST(objid, self))
+{
+ struct weakkeymap *w;
+ VALUE key;
+
+ TypedData_Get_Struct(self, struct weakkeymap, &weakkeymap_type, w);
+
+ /* Get reference from object id. */
+ if ((key = id2ref_obj_tbl(&rb_objspace, objid)) == Qundef) {
+ rb_bug("wkmap_finalize: objid is not found.");
+ }
+
+ st_index_t hash;
+ if (st_delete(w->obj2hash, (st_data_t *)key, &hash)) {
+ weakkeymap_entry_t lookup_entry = {key, hash};
+ weakkeymap_entry_t *deleted_entry = NULL;
+ if (st_get_key(w->map, (st_data_t)&lookup_entry, (st_data_t *)deleted_entry)) {
+ st_data_t deleted_value;
+ st_delete(w->map, (st_data_t *)deleted_entry, &deleted_value);
+ xfree(deleted_entry);
+ }
+ }
+
+ return self;
+}
+
+static VALUE
+wkmap_allocate(VALUE klass)
+{
+ struct weakkeymap *w;
+ VALUE obj = TypedData_Make_Struct(klass, struct weakkeymap, &weakkeymap_type, w);
+ w->map = st_init_table(&weakkeymap_hash);
+ w->obj2hash = rb_init_identtable();
+ w->final = rb_func_lambda_new(wkmap_finalize, obj, 1, 1);
+ return obj;
+}
+
+static st_index_t
+wkmap_lookup_hash(struct weakkeymap *w, VALUE key)
+{
+ st_index_t hash;
+ if (!st_lookup(w->obj2hash, (st_data_t)key, &hash)) {
+ hash = rb_any_hash(key);
+ }
+ return hash;
+}
+
+static weakkeymap_entry_t*
+wkmap_lookup_entry(struct weakkeymap *w, VALUE key, st_index_t hash)
+{
+ st_data_t data;
+ weakkeymap_entry_t lookup_entry = {key, hash};
+
+ if (st_get_key(w->map, (st_data_t)&lookup_entry, &data)) {
+ return (weakkeymap_entry_t *)data;
+ }
+
+ return NULL;
+}
+
+static VALUE
+wkmap_lookup(VALUE self, VALUE key)
+{
+ st_data_t data;
+ struct weakkeymap *w;
+ TypedData_Get_Struct(self, struct weakkeymap, &weakkeymap_type, w);
+
+ st_index_t hash = rb_any_hash(key);
+ weakkeymap_entry_t lookup_entry = {key, hash};
+
+ if (st_lookup(w->map, (st_data_t)&lookup_entry, &data)) {
+ return (VALUE)data;
+ }
+ return Qundef;
+}
+
+/*
+ * call-seq:
+ * map[key] -> value
+ *
+ * Returns the value associated with the given +key+ if found.
+ *
+ * If +key+ is not found, returns +nil+.
+ */
+static VALUE
+wkmap_aref(VALUE self, VALUE key)
+{
+ VALUE obj = wkmap_lookup(self, key);
+ return obj != Qundef ? obj : Qnil;
+}
+
+/*
+ * call-seq:
+ * map[key] = value -> value
+ *
+ * Associates the given +value+ with the given +key+; returns +value+.
+ *
+ * The reference to +key+ is weak, so when there is no other reference
+ * to +key+ it may be garbage collected.
+ *
+ * If the given +key+ exists, replaces its value with the given +value+;
+ * the ordering is not affected
+ */
+static VALUE
+wkmap_aset(VALUE self, VALUE key, VALUE value)
+{
+ struct weakkeymap *w;
+ TypedData_Get_Struct(self, struct weakkeymap, &weakkeymap_type, w);
+
+ if (!(FL_ABLE(key) && !SYMBOL_P(key) && !RB_BIGNUM_TYPE_P(key))) {
+ rb_raise(rb_eArgError, "WeakKeyMap must be garbage collectable");
+ }
+
+ st_index_t hash = wkmap_lookup_hash(w, key);
+ weakkeymap_entry_t *key_entry = wkmap_lookup_entry(w, key, hash);
+
+ if (!key_entry) {
+ key_entry = ALLOC(weakkeymap_entry_t);
+ key_entry->obj = key;
+ key_entry->hash = hash;
+ }
+
+ if (!st_insert(w->map, (st_data_t)key_entry, (st_data_t)value)) {
+ st_insert(w->obj2hash, (st_data_t)key, (st_data_t)hash);
+ define_final0(key, w->final);
+ }
+
+ return value;
+}
+
+/*
+ * call-seq:
+ * map.getkey(key) -> existing_key or nil
+ *
+ * Returns the existing equal key if it exists, otherwise returns +nil+.
+ */
+static VALUE
+wkmap_getkey(VALUE self, VALUE key)
+{
+ struct weakkeymap *w;
+ TypedData_Get_Struct(self, struct weakkeymap, &weakkeymap_type, w);
+
+ st_index_t hash = rb_any_hash(key);
+ weakkeymap_entry_t lookup_entry = {key, hash};
+
+ weakkeymap_entry_t *key_entry = NULL;
+ if (st_get_key(w->map, (st_data_t)&lookup_entry, (st_data_t *)&key_entry)) {
+ GC_ASSERT(key_entry != NULL);
+
+ VALUE obj = key_entry->obj;
+ if (wmap_live_p(&rb_objspace, obj)) {
+ return obj;
+ }
+ }
+ return Qnil;
+}
+
+/*
+ * call-seq:
+ * hash.key?(key) -> true or false
+ *
+ * Returns +true+ if +key+ is a key in +self+, otherwise +false+.
+ */
+static VALUE
+wkmap_has_key(VALUE self, VALUE key)
+{
+ return RBOOL(wkmap_lookup(self, key) != Qundef);
+}
+
+/*
+ * call-seq:
+ * map.clear -> self
+ *
+ * Removes all map entries; returns +self+.
+ */
+static VALUE
+wkmap_clear(VALUE self) {
+ struct weakkeymap *w;
+ TypedData_Get_Struct(self, struct weakkeymap, &weakkeymap_type, w);
+ if (w->map) {
+ st_clear(w->map);
+ }
+ if (w->obj2hash) {
+ st_clear(w->obj2hash);
+ }
+ return self;
+}
+
+/*
+ * call-seq:
+ * map.inspect -> new_string
+ *
+ * Returns a new \String containing informations about the map:
+
+ * m = ObjectSpace::WeakKeyMap.new
+ * m[key] = value
+ * m.inspect # => "#<ObjectSpace::WeakKeyMap:0x00000001028dcba8 size=1>"
+ *
+ */
+static VALUE
+wkmap_inspect(VALUE self)
+{
+ struct weakkeymap *w;
+ TypedData_Get_Struct(self, struct weakkeymap, &weakkeymap_type, w);
+
+ st_index_t n = 0;
+ if (w->map) {
+ n = w->map->num_entries;
+ }
+
+#if SIZEOF_ST_INDEX_T <= SIZEOF_LONG
+ const char * format = "#<%"PRIsVALUE":%p size=%lu>";
+#else
+ const char * format = "#<%"PRIsVALUE":%p size=%llu>";
+#endif
+
+ VALUE str = rb_sprintf(format, rb_class_name(CLASS_OF(self)), (void *)self, n);
+ return str;
+}
+
/*
------------------------------ GC profiler ------------------------------
*/
@@ -14484,6 +14789,15 @@ rb_gcdebug_remove_stress_to_class(int argc, VALUE *argv, VALUE self)
* +lib/weakref.rb+ for the public interface.
*/
+/*
+ * Document-class: ObjectSpace::WeakKeyMap
+ *
+ * An ObjectSpace::WeakKeyMap object holds references to
+ * any objects, but objects uses as keys can be garbage collected.
+ *
+ * Objects used as values can't be garbage collected until the key is.
+ */
+
/* Document-class: GC::Profiler
*
* The GC profiler provides access to information on GC runs including time,
@@ -14593,6 +14907,17 @@ Init_GC(void)
rb_include_module(rb_cWeakMap, rb_mEnumerable);
}
+ {
+ VALUE rb_cWeakKeyMap = rb_define_class_under(rb_mObjSpace, "WeakKeyMap", rb_cObject);
+ rb_define_alloc_func(rb_cWeakKeyMap, wkmap_allocate);
+ rb_define_method(rb_cWeakKeyMap, "[]=", wkmap_aset, 2);
+ rb_define_method(rb_cWeakKeyMap, "[]", wkmap_aref, 1);
+ rb_define_method(rb_cWeakKeyMap, "getkey", wkmap_getkey, 1);
+ rb_define_method(rb_cWeakKeyMap, "key?", wkmap_has_key, 1);
+ rb_define_method(rb_cWeakKeyMap, "clear", wkmap_clear, 0);
+ rb_define_method(rb_cWeakKeyMap, "inspect", wkmap_inspect, 0);
+ }
+
/* internal methods */
rb_define_singleton_method(rb_mGC, "verify_internal_consistency", gc_verify_internal_consistency_m, 0);
rb_define_singleton_method(rb_mGC, "verify_transient_heap_internal_consistency", gc_verify_transient_heap_internal_consistency, 0);
diff --git a/hash.c b/hash.c
index 9c57a96812..b6651c9489 100644
--- a/hash.c
+++ b/hash.c
@@ -106,7 +106,7 @@ rb_hash_set_ifnone(VALUE hash, VALUE ifnone)
return hash;
}
-static int
+int
rb_any_cmp(VALUE a, VALUE b)
{
if (a == b) return 0;
@@ -221,7 +221,7 @@ obj_any_hash(VALUE obj)
return FIX2LONG(hval);
}
-static st_index_t
+st_index_t
rb_any_hash(VALUE a)
{
return any_hash(a, obj_any_hash);
diff --git a/internal/hash.h b/internal/hash.h
index 5545ecb855..dff421137a 100644
--- a/internal/hash.h
+++ b/internal/hash.h
@@ -73,6 +73,8 @@ VALUE rb_hash_default_value(VALUE hash, VALUE key);
VALUE rb_hash_set_default_proc(VALUE hash, VALUE proc);
long rb_dbl_long_hash(double d);
st_table *rb_init_identtable(void);
+st_index_t rb_any_hash(VALUE a);
+int rb_any_cmp(VALUE a, VALUE b);
VALUE rb_to_hash_type(VALUE obj);
VALUE rb_hash_key_str(VALUE);
VALUE rb_hash_values(VALUE hash);
diff --git a/test/ruby/weakkeymap.rb b/test/ruby/weakkeymap.rb
new file mode 100644
index 0000000000..20bfe5ec2c
--- /dev/null
+++ b/test/ruby/weakkeymap.rb
@@ -0,0 +1,112 @@
+# frozen_string_literal: false
+require 'test/unit'
+
+class TestWeakKeyMap < Test::Unit::TestCase
+ def setup
+ @wm = ObjectSpace::WeakKeyMap.new
+ end
+
+ def test_map
+ x = Object.new
+ k = "foo"
+ @wm[k] = x
+ assert_same(x, @wm[k])
+ assert_same(x, @wm["FOO".downcase])
+ end
+
+ def test_aset_const
+ x = Object.new
+ assert_raise(ArgumentError) { @wm[true] = x }
+ assert_raise(ArgumentError) { @wm[false] = x }
+ assert_raise(ArgumentError) { @wm[nil] = x }
+ assert_raise(ArgumentError) { @wm[42] = x }
+ assert_raise(ArgumentError) { @wm[2**128] = x }
+ assert_raise(ArgumentError) { @wm[1.23] = x }
+ assert_raise(ArgumentError) { @wm[:foo] = x }
+ assert_raise(ArgumentError) { @wm["foo#{rand}".to_sym] = x }
+ end
+
+ def test_getkey
+ k = "foo"
+ @wm[k] = true
+ assert_same(k, @wm.getkey("FOO".downcase))
+ end
+
+ def test_key?
+ 1.times do
+ assert_weak_include(:key?, "foo")
+ end
+ GC.start
+ assert_not_send([@wm, :key?, "FOO".downcase])
+ end
+
+ def test_clear
+ k = "foo"
+ @wm[k] = true
+ assert @wm[k]
+ assert_same @wm, @wm.clear
+ refute @wm[k]
+ end
+
+ def test_inspect
+ x = Object.new
+ k = Object.new
+ @wm[k] = x
+ assert_match(/\A\#<#{@wm.class.name}:[\dxa-f]+ size=\d+>\z/, @wm.inspect)
+
+ 1000.times do |i|
+ @wm[i.to_s] = Object.new
+ @wm.inspect
+ end
+ assert_match(/\A\#<#{@wm.class.name}:[\dxa-f]+ size=\d+>\z/, @wm.inspect)
+ end
+
+ def test_no_hash_method
+ k = BasicObject.new
+ assert_raise NoMethodError do
+ @wm[k] = 42
+ end
+ end
+
+ def test_frozen_object
+ o = Object.new.freeze
+ assert_nothing_raised(FrozenError) {@wm[o] = 'foo'}
+ assert_nothing_raised(FrozenError) {@wm['foo'] = o}
+ end
+
+ def test_inconsistent_hash_key
+ assert_no_memory_leak [], '', <<~RUBY
+ class BadHash
+ def initialize
+ @hash = 0
+ end
+
+ def hash
+ @hash += 1
+ end
+ end
+
+ k = BadHash.new
+ wm = ObjectSpace::WeakKeyMap.new
+
+ 100_000.times do |i|
+ wm[k] = i
+ end
+ RUBY
+ end
+
+ private
+
+ def assert_weak_include(m, k, n = 100)
+ if n > 0
+ return assert_weak_include(m, k, n-1)
+ end
+ 1.times do
+ x = Object.new
+ @wm[k] = x
+ assert_send([@wm, m, k])
+ assert_send([@wm, m, "FOO".downcase])
+ x = Object.new
+ end
+ end
+end