From c35ff11ae516421809e0d03c278576a70fda45c4 Mon Sep 17 00:00:00 2001 From: ko1 Date: Wed, 12 Aug 2015 08:43:55 +0000 Subject: * id_table.h: introduce ID key table. [Feature #11420] This table only manage ID->VALUE table to reduce overhead of st. Some functions prefixed rb_id_table_* are provided. * id_table.c: implement rb_id_table_*. There are several algorithms to implement it. Now, there are roughly 4 types: * st * array * hash (implemented by Yura Sokolov) * mix of array and hash The macro ID_TABLE_IMPL can choose implementation. You can see detailes about them at the head of id_table.c. At the default, I choose 34 (mix of list and hash). This is not final decision. Please report your suitable parameters or your data structure. * symbol.c: introduce rb_id_serial_t and rb_id_to_serial() to represent ID by serial number. * internal.h: use id_table for method tables. * class.c, gc.c, marshal.c, vm.c, vm_method.c: ditto. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@51541 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 35 ++ class.c | 63 ++- common.mk | 8 +- gc.c | 30 +- id_table.c | 1527 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ id_table.h | 23 + internal.h | 10 +- marshal.c | 3 +- symbol.c | 34 +- symbol.h | 33 +- vm.c | 8 +- vm_method.c | 32 +- 12 files changed, 1707 insertions(+), 99 deletions(-) create mode 100644 id_table.c create mode 100644 id_table.h diff --git a/ChangeLog b/ChangeLog index 19d65445c7..9d122e2cae 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,38 @@ +Wed Aug 12 17:05:36 2015 Koichi Sasada + + * id_table.h: introduce ID key table. + [Feature #11420] + + This table only manage ID->VALUE table to reduce overhead of st. + + Some functions prefixed rb_id_table_* are provided. + + * id_table.c: implement rb_id_table_*. + + There are several algorithms to implement it. + + Now, there are roughly 4 types: + + * st + * array + * hash (implemented by Yura Sokolov) + * mix of array and hash + + The macro ID_TABLE_IMPL can choose implementation. + You can see detailes about them at the head of id_table.c. + + At the default, I choose 34 (mix of list and hash). + This is not final decision. + Please report your suitable parameters or + your data structure. + + * symbol.c: introduce rb_id_serial_t and rb_id_to_serial() + to represent ID by serial number. + + * internal.h: use id_table for method tables. + + * class.c, gc.c, marshal.c, vm.c, vm_method.c: ditto. + Wed Aug 12 05:19:11 2015 Eric Wong * parse.y (rb_parser_compile_cstr): remove volatile arg diff --git a/class.c b/class.c index 15f5592204..aeba7b794e 100644 --- a/class.c +++ b/class.c @@ -27,6 +27,7 @@ #include "ruby/st.h" #include "constant.h" #include "vm_core.h" +#include "id_table.h" #include #define id_attached id__attached__ @@ -181,6 +182,11 @@ class_alloc(VALUE flags, VALUE klass) return (VALUE)obj; } +static void +RCLASS_M_TBL_INIT(VALUE c) +{ + RCLASS_M_TBL(c) = rb_id_table_create(0); +} /*! * A utility function that wraps class_alloc. @@ -258,11 +264,11 @@ struct clone_method_arg { VALUE old_klass; }; -static int -clone_method_i(st_data_t key, st_data_t value, st_data_t data) +static enum rb_id_table_iterator_result +clone_method_i(ID key, VALUE value, void *data) { const struct clone_method_arg *arg = (struct clone_method_arg *)data; - clone_method(arg->old_klass, arg->new_klass, (ID)key, (const rb_method_entry_t *)value); + clone_method(arg->old_klass, arg->new_klass, key, (const rb_method_entry_t *)value); return ST_CONTINUE; } @@ -350,7 +356,7 @@ rb_mod_init_copy(VALUE clone, VALUE orig) arg.old_klass = orig; arg.new_klass = clone; RCLASS_M_TBL_INIT(clone); - st_foreach(RCLASS_M_TBL(orig), clone_method_i, (st_data_t)&arg); + rb_id_table_foreach(RCLASS_M_TBL(orig), clone_method_i, &arg); } return clone; @@ -400,7 +406,7 @@ rb_singleton_class_clone_and_attach(VALUE obj, VALUE attach) struct clone_method_arg arg; arg.old_klass = klass; arg.new_klass = clone; - st_foreach(RCLASS_M_TBL(klass), clone_method_i, (st_data_t)&arg); + rb_id_table_foreach(RCLASS_M_TBL(klass), clone_method_i, &arg); } rb_singleton_class_attached(RBASIC(clone)->klass, clone); FL_SET(clone, FL_SINGLETON); @@ -836,10 +842,10 @@ rb_include_module(VALUE klass, VALUE module) rb_raise(rb_eArgError, "cyclic include detected"); } -static int -add_refined_method_entry_i(st_data_t key, st_data_t value, st_data_t data) +static enum rb_id_table_iterator_result +add_refined_method_entry_i(ID key, VALUE value, void *data) { - rb_add_refined_method_entry((VALUE) data, (ID) key); + rb_add_refined_method_entry((VALUE)data, key); return ST_CONTINUE; } @@ -848,7 +854,7 @@ include_modules_at(const VALUE klass, VALUE c, VALUE module, int search_super) { VALUE p, iclass; int method_changed = 0, constant_changed = 0; - const st_table *const klass_m_tbl = RCLASS_M_TBL(RCLASS_ORIGIN(klass)); + struct rb_id_table *const klass_m_tbl = RCLASS_M_TBL(RCLASS_ORIGIN(klass)); while (module) { int superclass_seen = FALSE; @@ -886,14 +892,11 @@ include_modules_at(const VALUE klass, VALUE c, VALUE module, int search_super) VALUE refined_class = rb_refinement_module_get_refined_class(klass); - st_foreach(RMODULE_M_TBL(module), add_refined_method_entry_i, - (st_data_t) refined_class); + rb_id_table_foreach(RMODULE_M_TBL(module), add_refined_method_entry_i, (void *)refined_class); FL_SET(c, RMODULE_INCLUDED_INTO_REFINEMENT); } - if (RMODULE_M_TBL(module) && RMODULE_M_TBL(module)->num_entries) - method_changed = 1; - if (RMODULE_CONST_TBL(module) && RMODULE_CONST_TBL(module)->num_entries) - constant_changed = 1; + if (RMODULE_M_TBL(module) && rb_id_table_size(RMODULE_M_TBL(module))) method_changed = 1; + if (RMODULE_CONST_TBL(module) && RMODULE_CONST_TBL(module)->num_entries) constant_changed = 1; skip: module = RCLASS_SUPER(module); } @@ -904,23 +907,23 @@ include_modules_at(const VALUE klass, VALUE c, VALUE module, int search_super) return method_changed; } -static int -move_refined_method(st_data_t key, st_data_t value, st_data_t data) +static enum rb_id_table_iterator_result +move_refined_method(ID key, VALUE value, void *data) { rb_method_entry_t *me = (rb_method_entry_t *) value; - st_table *tbl = (st_table *) data; + struct rb_id_table *tbl = (struct rb_id_table *) data; if (me->def->type == VM_METHOD_TYPE_REFINED) { if (me->def->body.refined.orig_me) { const rb_method_entry_t *orig_me = me->def->body.refined.orig_me, *new_me; RB_OBJ_WRITE(me, &me->def->body.refined.orig_me, NULL); new_me = rb_method_entry_clone(me); - st_add_direct(tbl, key, (st_data_t) new_me); + rb_id_table_insert(tbl, key, (VALUE)new_me); rb_method_entry_copy(me, orig_me); return ST_CONTINUE; } else { - st_add_direct(tbl, key, (st_data_t) me); + rb_id_table_insert(tbl, key, (VALUE)me); return ST_DELETE; } } @@ -950,8 +953,7 @@ rb_prepend_module(VALUE klass, VALUE module) RCLASS_SET_ORIGIN(klass, origin); RCLASS_M_TBL(origin) = RCLASS_M_TBL(klass); RCLASS_M_TBL_INIT(klass); - st_foreach(RCLASS_M_TBL(origin), move_refined_method, - (st_data_t) RCLASS_M_TBL(klass)); + rb_id_table_foreach(RCLASS_M_TBL(origin), move_refined_method, (void *)RCLASS_M_TBL(klass)); } changed = include_modules_at(klass, klass, module, FALSE); if (changed < 0) @@ -1109,8 +1111,8 @@ struct method_entry_arg { int recur; }; -static int -method_entry_i(st_data_t key, st_data_t value, st_data_t data) +static enum rb_id_table_iterator_result +method_entry_i(ID key, VALUE value, void *data) { const rb_method_entry_t *me = (const rb_method_entry_t *)value; struct method_entry_arg *arg = (struct method_entry_arg *)data; @@ -1158,7 +1160,7 @@ class_instance_method_list(int argc, const VALUE *argv, VALUE mod, int obj, int me_arg.list = st_init_numtable(); me_arg.recur = recur; for (; mod; mod = RCLASS_SUPER(mod)) { - if (RCLASS_M_TBL(mod)) st_foreach(RCLASS_M_TBL(mod), method_entry_i, (st_data_t)&me_arg); + if (RCLASS_M_TBL(mod)) rb_id_table_foreach(RCLASS_M_TBL(mod), method_entry_i, &me_arg); if (BUILTIN_TYPE(mod) == T_ICLASS && !prepended) continue; if (obj && FL_TEST(mod, FL_SINGLETON)) continue; if (!recur) break; @@ -1379,7 +1381,7 @@ rb_obj_singleton_methods(int argc, const VALUE *argv, VALUE obj) { VALUE recur, ary, klass, origin; struct method_entry_arg me_arg; - st_table *mtbl; + struct rb_id_table *mtbl; if (argc == 0) { recur = Qtrue; @@ -1392,14 +1394,12 @@ rb_obj_singleton_methods(int argc, const VALUE *argv, VALUE obj) me_arg.list = st_init_numtable(); me_arg.recur = RTEST(recur); if (klass && FL_TEST(klass, FL_SINGLETON)) { - if ((mtbl = RCLASS_M_TBL(origin)) != 0) - st_foreach(mtbl, method_entry_i, (st_data_t)&me_arg); + if ((mtbl = RCLASS_M_TBL(origin)) != 0) rb_id_table_foreach(mtbl, method_entry_i, &me_arg); klass = RCLASS_SUPER(klass); } if (RTEST(recur)) { while (klass && (FL_TEST(klass, FL_SINGLETON) || RB_TYPE_P(klass, T_ICLASS))) { - if (klass != origin && (mtbl = RCLASS_M_TBL(klass)) != 0) - st_foreach(mtbl, method_entry_i, (st_data_t)&me_arg); + if (klass != origin && (mtbl = RCLASS_M_TBL(klass)) != 0) rb_id_table_foreach(mtbl, method_entry_i, &me_arg); klass = RCLASS_SUPER(klass); } } @@ -1989,8 +1989,7 @@ rb_get_kwargs(VALUE keyword_hash, const ID *table, int required, int optional, V int rb_class_has_methods(VALUE c) { - st_table *mtbl = RCLASS_M_TBL(c); - return mtbl && mtbl->num_entries ? TRUE : FALSE; + return rb_id_table_size(RCLASS_M_TBL(c)) == 0 ? FALSE : TRUE; } /*! diff --git a/common.mk b/common.mk index 82a56ed346..f359abd68e 100644 --- a/common.mk +++ b/common.mk @@ -1117,6 +1117,7 @@ class.$(OBJEXT): {$(VPATH)}constant.h class.$(OBJEXT): {$(VPATH)}defines.h class.$(OBJEXT): {$(VPATH)}encoding.h class.$(OBJEXT): {$(VPATH)}id.h +class.$(OBJEXT): {$(VPATH)}id_table.h class.$(OBJEXT): {$(VPATH)}intern.h class.$(OBJEXT): {$(VPATH)}internal.h class.$(OBJEXT): {$(VPATH)}io.h @@ -1456,6 +1457,7 @@ gc.$(OBJEXT): {$(VPATH)}eval_intern.h gc.$(OBJEXT): {$(VPATH)}gc.c gc.$(OBJEXT): {$(VPATH)}gc.h gc.$(OBJEXT): {$(VPATH)}id.h +gc.$(OBJEXT): {$(VPATH)}id_table.h gc.$(OBJEXT): {$(VPATH)}intern.h gc.$(OBJEXT): {$(VPATH)}internal.h gc.$(OBJEXT): {$(VPATH)}io.h @@ -1668,7 +1670,8 @@ marshal.$(OBJEXT): $(top_srcdir)/include/ruby.h marshal.$(OBJEXT): {$(VPATH)}config.h marshal.$(OBJEXT): {$(VPATH)}defines.h marshal.$(OBJEXT): {$(VPATH)}encoding.h -marshal.$(OBJEXT): {$(VPATH)}intern.h +marshal.$(OBJEXT): {$(VPATH)}id_table.h +marshal.$(OBJEXT): {$(VPATH)}intern.h marshal.$(OBJEXT): {$(VPATH)}internal.h marshal.$(OBJEXT): {$(VPATH)}io.h marshal.$(OBJEXT): {$(VPATH)}marshal.c @@ -2215,6 +2218,8 @@ symbol.$(OBJEXT): {$(VPATH)}probes.h symbol.$(OBJEXT): {$(VPATH)}st.h symbol.$(OBJEXT): {$(VPATH)}subst.h symbol.$(OBJEXT): {$(VPATH)}symbol.c +symbol.$(OBJEXT): {$(VPATH)}id_table.c +symbol.$(OBJEXT): {$(VPATH)}id_table.h symbol.$(OBJEXT): {$(VPATH)}symbol.h symbol.$(OBJEXT): {$(VPATH)}vm_opts.h thread.$(OBJEXT): $(CCAN_DIR)/check_type/check_type.h @@ -2330,6 +2335,7 @@ vm.$(OBJEXT): {$(VPATH)}encoding.h vm.$(OBJEXT): {$(VPATH)}eval_intern.h vm.$(OBJEXT): {$(VPATH)}gc.h vm.$(OBJEXT): {$(VPATH)}id.h +vm.$(OBJEXT): {$(VPATH)}id_table.h vm.$(OBJEXT): {$(VPATH)}insns.def vm.$(OBJEXT): {$(VPATH)}insns.inc vm.$(OBJEXT): {$(VPATH)}intern.h diff --git a/gc.c b/gc.c index cd8682547a..bc3e901a98 100644 --- a/gc.c +++ b/gc.c @@ -27,6 +27,7 @@ #include "constant.h" #include "ruby_atomic.h" #include "probes.h" +#include "id_table.h" #include #include #include @@ -1928,14 +1929,6 @@ is_pointer_to_heap(rb_objspace_t *objspace, void *ptr) return FALSE; } -static void -rb_free_m_tbl(st_table *tbl) -{ - if (tbl) { - st_free_table(tbl); - } -} - static int free_const_entry_i(st_data_t key, st_data_t value, st_data_t data) { @@ -2010,7 +2003,7 @@ obj_free(rb_objspace_t *objspace, VALUE obj) break; case T_MODULE: case T_CLASS: - rb_free_m_tbl(RCLASS_M_TBL(obj)); + rb_id_table_free(RCLASS_M_TBL(obj)); if (RCLASS_IV_TBL(obj)) { st_free_table(RCLASS_IV_TBL(obj)); } @@ -2104,9 +2097,11 @@ obj_free(rb_objspace_t *objspace, VALUE obj) case T_ICLASS: /* Basically , T_ICLASS shares table with the module */ if (FL_TEST(obj, RICLASS_IS_ORIGIN)) { - rb_free_m_tbl(RCLASS_M_TBL(obj)); + rb_id_table_free(RCLASS_M_TBL(obj)); + } + if (RCLASS_CALLABLE_M_TBL(obj) != NULL) { + rb_id_table_free(RCLASS_CALLABLE_M_TBL(obj)); } - rb_free_m_tbl(RCLASS_CALLABLE_M_TBL(obj)); if (RCLASS_EXT(obj)->subclasses) { rb_class_detach_subclasses(obj); RCLASS_EXT(obj)->subclasses = NULL; @@ -3001,7 +2996,7 @@ obj_memsize_of(VALUE obj, int use_all_types) case T_MODULE: case T_CLASS: if (RCLASS_M_TBL(obj)) { - size += st_memsize(RCLASS_M_TBL(obj)); + size += rb_id_table_memsize(RCLASS_M_TBL(obj)); } if (RCLASS_EXT(obj)) { if (RCLASS_IV_TBL(obj)) { @@ -3022,7 +3017,7 @@ obj_memsize_of(VALUE obj, int use_all_types) case T_ICLASS: if (FL_TEST(obj, RICLASS_IS_ORIGIN)) { if (RCLASS_M_TBL(obj)) { - size += st_memsize(RCLASS_M_TBL(obj)); + size += rb_id_table_memsize(RCLASS_M_TBL(obj)); } } break; @@ -3966,10 +3961,9 @@ mark_method_entry(rb_objspace_t *objspace, const rb_method_entry_t *me) } } -static int -mark_method_entry_i(st_data_t key, st_data_t value, st_data_t data) +static enum rb_id_table_iterator_result +mark_method_entry_i(VALUE me, void *data) { - VALUE me = (VALUE)value; rb_objspace_t *objspace = (rb_objspace_t *)data; gc_mark(objspace, me); @@ -3977,10 +3971,10 @@ mark_method_entry_i(st_data_t key, st_data_t value, st_data_t data) } static void -mark_m_tbl(rb_objspace_t *objspace, struct st_table *tbl) +mark_m_tbl(rb_objspace_t *objspace, struct rb_id_table *tbl) { if (tbl) { - st_foreach(tbl, mark_method_entry_i, (st_data_t)objspace); + rb_id_table_foreach_values(tbl, mark_method_entry_i, objspace); } } diff --git a/id_table.c b/id_table.c new file mode 100644 index 0000000000..ad1df059bc --- /dev/null +++ b/id_table.c @@ -0,0 +1,1527 @@ +/* This file is included by symbol.c */ + +#include "id_table.h" + +#ifndef ID_TABLE_DEBUG +#define ID_TABLE_DEBUG 0 +#endif + +#if ID_TABLE_DEBUG == 0 +#define NDEBUG +#endif +#include + +/* + * st + * 0: using st with debug information. + * 1: using st. + * array + * 11: simple array. ids = [ID1, ID2, ...], values = [val1, val2, ...] + * 12: simple array, and use rb_id_serial_t instead of ID. + * 13: simple array, and use rb_id_serial_t instead of ID. Swap recent access. + * 14: sorted array, and use rb_id_serial_t instead of ID. + * 15: sorted array, and use rb_id_serial_t instead of ID, linear small part. + * hash + * 21: funny falcon's Coalesced Hashing implementation [Feature #6962] + * 22: simple open addressing with quadratic probing. + * mix (array + hash) + * 31: array(12) (capa <= 32) + hash(22) + * 32: array(14) (capa <= 32) + hash(22) + * 33: array(12) (capa <= 64) + hash(22) + * 34: array(14) (capa <= 64) + hash(22) + * 34: array(15) (capa <= 64) + hash(22) + */ + +#ifndef ID_TABLE_IMPL +#define ID_TABLE_IMPL 34 +#endif + +#if ID_TABLE_IMPL == 0 +#define ID_TABLE_NAME st +#define ID_TABLE_IMPL_TYPE struct st_id_table + +#define ID_TABLE_USE_ST 1 +#define ID_TABLE_USE_ST_DEBUG 1 + +#elif ID_TABLE_IMPL == 1 +#define ID_TABLE_NAME st +#define ID_TABLE_IMPL_TYPE struct st_id_table + +#define ID_TABLE_USE_ST 1 +#define ID_TABLE_USE_ST_DEBUG 0 + +#elif ID_TABLE_IMPL == 11 +#define ID_TABLE_NAME list +#define ID_TABLE_IMPL_TYPE struct list_id_table + +#define ID_TABLE_USE_LIST 1 +#define ID_TABLE_USE_CALC_VALUES 1 + +#elif ID_TABLE_IMPL == 12 +#define ID_TABLE_NAME list +#define ID_TABLE_IMPL_TYPE struct list_id_table + +#define ID_TABLE_USE_LIST 1 +#define ID_TABLE_USE_CALC_VALUES 1 +#define ID_TABLE_USE_ID_SERIAL 1 + +#elif ID_TABLE_IMPL == 13 +#define ID_TABLE_NAME list +#define ID_TABLE_IMPL_TYPE struct list_id_table + +#define ID_TABLE_USE_LIST 1 +#define ID_TABLE_USE_CALC_VALUES 1 +#define ID_TABLE_USE_ID_SERIAL 1 +#define ID_TABLE_SWAP_RECENT_ACCESS 1 + +#elif ID_TABLE_IMPL == 14 +#define ID_TABLE_NAME list +#define ID_TABLE_IMPL_TYPE struct list_id_table + +#define ID_TABLE_USE_LIST 1 +#define ID_TABLE_USE_CALC_VALUES 1 +#define ID_TABLE_USE_ID_SERIAL 1 +#define ID_TABLE_USE_LIST_SORTED 1 + +#elif ID_TABLE_IMPL == 15 +#define ID_TABLE_NAME list +#define ID_TABLE_IMPL_TYPE struct list_id_table + +#define ID_TABLE_USE_LIST 1 +#define ID_TABLE_USE_CALC_VALUES 1 +#define ID_TABLE_USE_ID_SERIAL 1 +#define ID_TABLE_USE_LIST_SORTED 1 +#define ID_TABLE_USE_LIST_SORTED_LINEAR_SMALL_RANGE 1 + +#elif ID_TABLE_IMPL == 21 +#define ID_TABLE_NAME hash +#define ID_TABLE_IMPL_TYPE sa_table + +#define ID_TABLE_USE_COALESCED_HASHING 1 +#define ID_TABLE_USE_ID_SERIAL 1 + +#elif ID_TABLE_IMPL == 22 +#define ID_TABLE_NAME hash +#define ID_TABLE_IMPL_TYPE struct hash_id_table + +#define ID_TABLE_USE_SMALL_HASH 1 +#define ID_TABLE_USE_ID_SERIAL 1 + +#elif ID_TABLE_IMPL == 31 +#define ID_TABLE_NAME mix +#define ID_TABLE_IMPL_TYPE struct mix_id_table + +#define ID_TABLE_USE_MIX 1 +#define ID_TABLE_USE_MIX_LIST_MAX_CAPA 32 + +#define ID_TABLE_USE_ID_SERIAL 1 + +#define ID_TABLE_USE_LIST 1 +#define ID_TABLE_USE_CALC_VALUES 1 +#define ID_TABLE_USE_SMALL_HASH 1 + +#elif ID_TABLE_IMPL == 32 +#define ID_TABLE_NAME mix +#define ID_TABLE_IMPL_TYPE struct mix_id_table + +#define ID_TABLE_USE_MIX 1 +#define ID_TABLE_USE_MIX_LIST_MAX_CAPA 32 + +#define ID_TABLE_USE_ID_SERIAL 1 + +#define ID_TABLE_USE_LIST 1 +#define ID_TABLE_USE_CALC_VALUES 1 +#define ID_TABLE_USE_LIST_SORTED 1 + +#define ID_TABLE_USE_SMALL_HASH 1 + +#elif ID_TABLE_IMPL == 33 +#define ID_TABLE_NAME mix +#define ID_TABLE_IMPL_TYPE struct mix_id_table + +#define ID_TABLE_USE_MIX 1 +#define ID_TABLE_USE_MIX_LIST_MAX_CAPA 64 + +#define ID_TABLE_USE_ID_SERIAL 1 + +#define ID_TABLE_USE_LIST 1 +#define ID_TABLE_USE_CALC_VALUES 1 +#define ID_TABLE_USE_SMALL_HASH 1 + +#elif ID_TABLE_IMPL == 34 +#define ID_TABLE_NAME mix +#define ID_TABLE_IMPL_TYPE struct mix_id_table + +#define ID_TABLE_USE_MIX 1 +#define ID_TABLE_USE_MIX_LIST_MAX_CAPA 64 + +#define ID_TABLE_USE_ID_SERIAL 1 + +#define ID_TABLE_USE_LIST 1 +#define ID_TABLE_USE_CALC_VALUES 1 +#define ID_TABLE_USE_LIST_SORTED 1 + +#define ID_TABLE_USE_SMALL_HASH 1 + +#elif ID_TABLE_IMPL == 35 +#define ID_TABLE_NAME mix +#define ID_TABLE_IMPL_TYPE struct mix_id_table + +#define ID_TABLE_USE_MIX 1 +#define ID_TABLE_USE_MIX_LIST_MAX_CAPA 64 + +#define ID_TABLE_USE_ID_SERIAL 1 + +#define ID_TABLE_USE_LIST 1 +#define ID_TABLE_USE_CALC_VALUES 1 +#define ID_TABLE_USE_LIST_SORTED 1 +#define ID_TABLE_USE_LIST_SORTED_LINEAR_SMALL_RANGE 1 + +#define ID_TABLE_USE_SMALL_HASH 1 + +#else +#error +#endif + +#if ID_TABLE_SWAP_RECENT_ACCESS && ID_TABLE_USE_LIST_SORTED +#error +#endif + +#if ID_TABLE_USE_ID_SERIAL +typedef rb_id_serial_t id_key_t; +static inline ID +key2id(id_key_t key) +{ + return rb_id_serial_to_id(key); +} + +static inline id_key_t +id2key(ID id) +{ + return rb_id_to_serial(id); +} +#else /* ID_TABLE_USE_ID_SERIAL */ + +typedef ID id_key_t; +#define key2id(key) key +#define id2key(id) id + +#endif /* ID_TABLE_USE_ID_SERIAL */ + +/*************************************************************** + * 0: using st with debug information. + * 1: using st. + ***************************************************************/ +#if ID_TABLE_USE_ST +#if ID_TABLE_USE_ST_DEBUG +#define ID_TABLE_MARK 0x12345678 + +struct st_id_table { + struct st_table *st; + unsigned int check; +}; + +static struct st_table * +tbl2st(struct st_id_table *tbl) { + if (tbl->check != ID_TABLE_MARK) rb_bug("tbl2st: check error %x", tbl->check); + return tbl->st; +} + +static struct st_id_table * +st_id_table_create(size_t size) +{ + struct st_id_table *tbl = ALLOC(struct st_id_table); + tbl->st = st_init_numtable_with_size(size); + tbl->check = ID_TABLE_MARK; + return tbl; +} + +static void +st_id_table_free(struct st_id_table *tbl) +{ + st_free_table(tbl->st); + xfree(tbl); +} + +#else /* ID_TABLE_USE_ST_DEBUG */ + +struct st_id_table { + struct st_table st; +}; + +static struct st_table * +tbl2st(struct st_id_table *tbl) { + return (struct st_table *)tbl; +} + +static struct st_id_table * +st_id_table_create(size_t size) +{ + return (struct st_id_table *)st_init_numtable_with_size(size); +} + +static void +st_id_table_free(struct st_id_table *tbl) +{ + st_free_table((struct st_table*)tbl); +} + +#endif /* ID_TABLE_USE_ST_DEBUG */ + +static void +st_id_table_clear(struct st_id_table *tbl) +{ + st_clear(tbl2st(tbl)); +} + +static size_t +st_id_table_size(struct st_id_table *tbl) +{ + return tbl2st(tbl)->num_entries; +} + +static size_t +st_id_table_memsize(struct st_id_table *tbl) +{ + size_t header_size = ID_TABLE_USE_ST_DEBUG ? sizeof(struct st_id_table) : 0; + return header_size + st_memsize(tbl2st(tbl)); +} + +static int +st_id_table_lookup(struct st_id_table *tbl, ID id, VALUE *val) +{ + return st_lookup(tbl2st(tbl), (st_data_t)id, (st_data_t *)val); +} + +static int +st_id_table_insert(struct st_id_table *tbl, ID id, VALUE val) +{ + return st_insert(tbl2st(tbl), id, val); +} + +static int +st_id_table_delete(struct st_id_table *tbl, ID id) +{ + return st_delete(tbl2st(tbl), (st_data_t *)&id, NULL); +} + +static void +st_id_table_foreach(struct st_id_table *tbl, enum rb_id_table_iterator_result (*func)(ID id, VALUE val, void *data), void *data) +{ + st_foreach(tbl2st(tbl), (int (*)(ANYARGS))func, (st_data_t)data); +} + +struct values_iter_data { + enum rb_id_table_iterator_result (*values_i)(VALUE val, void *data); + void *data; +}; + +static int +each_values(st_data_t key, st_data_t val, st_data_t ptr) +{ + struct values_iter_data *values_iter_data = (struct values_iter_data *)ptr; + return values_iter_data->values_i(val, values_iter_data->data); +} + +static void +st_id_table_foreach_values(struct st_id_table *tbl, enum rb_id_table_iterator_result (*func)(VALUE val, void *data), void *data) +{ + struct values_iter_data values_iter_data; + values_iter_data.values_i = func; + values_iter_data.data = data; + st_foreach(tbl2st(tbl), each_values, (st_data_t)&values_iter_data); +} +#endif /* ID_TABLE_USE_ST */ + +#if ID_TABLE_USE_LIST + +#define LIST_MIN_CAPA 4 + +struct list_id_table { + int capa; + int num; + id_key_t *keys; +#if ID_TABLE_USE_CALC_VALUES == 0 + VALUE *values_; +#endif +}; + +#if ID_TABLE_USE_CALC_VALUES +#define TABLE_VALUES(tbl) ((VALUE *)((tbl)->keys + (tbl)->capa)) +#else +#define TABLE_VALUES(tbl) (tbl)->values_ +#endif + +static struct list_id_table * +list_id_table_init(struct list_id_table *tbl, size_t capa) +{ + if (capa > 0) { + tbl->capa = (int)capa; +#if ID_TABLE_USE_CALC_VALUES + tbl->keys = (id_key_t *)xmalloc(sizeof(id_key_t) * capa + sizeof(VALUE) * capa); +#else + tbl->keys = ALLOC_N(id_key_t, capa); + tbl->values_ = ALLOC_N(VALUE, capa); +#endif + } + return tbl; +} + +#ifndef ID_TABLE_USE_MIX +static struct list_id_table * +list_id_table_create(size_t capa) +{ + struct list_id_table *tbl = ZALLOC(struct list_id_table); + return list_id_table_init(tbl, capa); +} +#endif + +static void +list_id_table_free(struct list_id_table *tbl) +{ + xfree(tbl->keys); +#if ID_TABLE_USE_CALC_VALUES == 0 + xfree(tbl->values_); +#endif + xfree(tbl); +} + +static void +list_id_table_clear(struct list_id_table *tbl) +{ + tbl->num = 0; +} + +static size_t +list_id_table_size(struct list_id_table *tbl) +{ + return (size_t)tbl->num; +} + +static size_t +list_id_table_memsize(struct list_id_table *tbl) +{ + return (sizeof(id_key_t) + sizeof(VALUE)) * tbl->capa + sizeof(struct list_id_table); +} + +static void +list_table_extend(struct list_id_table *tbl) +{ + if (tbl->capa == tbl->num) { + const int capa = tbl->capa == 0 ? LIST_MIN_CAPA : (tbl->capa * 2); + +#if ID_TABLE_USE_CALC_VALUES + { + VALUE *old_values, *new_values; + VALUE *debug_values = NULL; + const int num = tbl->num; + const int size = sizeof(id_key_t) * capa + sizeof(VALUE) * capa; + int i; + + if (num > 0) { + VALUE *orig_values = (VALUE *)(tbl->keys + num); + debug_values = ALLOC_N(VALUE, num); + + for (i=0; ikeys[i]; + size_t j; + fprintf(stderr, ">> %3d | %p - ", i, cs); + for (j=0; jkeys = (id_key_t *)xrealloc(tbl->keys, size); + old_values = (VALUE *)(tbl->keys + num); + new_values = (VALUE *)(tbl->keys + capa); + + /* [ keys (num) ] [ values (num) ] + * ^ old_values + * realloc => + * [ keys (capa = num * 2) ] [ values (capa = num * 2) ] + * ^ new_values + */ + + /* memmove */ + // fprintf(stderr, "memmove: %p -> %p (%d, capa: %d)\n", old_values, new_values, num, capa); + assert(num < capa); + assert(num == 0 || old_values < new_values); + + for (i=num-1; i>=0; i--) { + new_values[i] = old_values[i]; + } + + if (num > 0) { + for (i=0; icapa = capa; +#else + tbl->capa = capa; + tbl->keys = (id_key_t *)xrealloc(tbl->keys, sizeof(id_key_t) * capa); + tbl->values_ = (VALUE *)xrealloc(tbl->values_, sizeof(VALUE) * capa); +#endif + } +} + +#if ID_TABLE_DEBUG +static void +list_table_show(struct list_id_table *tbl) +{ + const id_key_t *keys = tbl->keys; + const int num = tbl->num; + int i; + + fprintf(stderr, "tbl: %p (num: %d)\n", tbl, num); + for (i=0; i [%d] %s %d\n", i, rb_id2name(key2id(keys[i])), (int)keys[i]); + } +} +#endif + +static void +tbl_assert(struct list_id_table *tbl) +{ +#if ID_TABLE_DEBUG +#if ID_TABLE_USE_LIST_SORTED + const id_key_t *keys = tbl->keys; + const int num = tbl->num; + int i; + + for (i=0; i= keys[i+1]) { + list_table_show(tbl); + rb_bug(": not sorted."); + } + } +#endif +#endif +} + +#if ID_TABLE_USE_LIST_SORTED +static int +list_ids_bsearch(const id_key_t *keys, id_key_t key, int num) +{ + int p, min = 0, max = num; + +#if ID_TABLE_USE_LIST_SORTED_LINEAR_SMALL_RANGE + if (num <= 64) { + if (num > 32) { + if (keys[num/2] <= key) { + min = num/2; + } else { + max = num/2; + } + } + for (p = min; p= max) { + break; + } + else { + id_key_t kp = keys[p]; + assert(p < max); + assert(p >= min); + + if (kp > key) max = p; + else if (kp < key) min = p+1; + else { + assert(kp == key); + assert(p >= 0); + assert(p < num); + return p; + } + } + } + + assert(min == max); + assert(min == p); + return -p-1; +} +#endif /* ID_TABLE_USE_LIST_SORTED */ + +static int +list_table_index(struct list_id_table *tbl, id_key_t key) +{ + const int num = tbl->num; + const id_key_t *keys = tbl->keys; + +#if ID_TABLE_USE_LIST_SORTED + return list_ids_bsearch(keys, key, num); +#else /* ID_TABLE_USE_LIST_SORTED */ + int i; + + for (i=0; i= 0) { + *valp = TABLE_VALUES(tbl)[index]; + +#if ID_TABLE_SWAP_RECENT_ACCESS + if (index > 0) { + VALUE *values = TABLE_VALUES(tbl); + id_key_t tk = tbl->keys[index-1]; + VALUE tv = values[index-1]; + tbl->keys[index-1] = tbl->keys[index]; + tbl->keys[index] = tk; + values[index-1] = values[index]; + values[index] = tv; + } +#endif /* ID_TABLE_SWAP_RECENT_ACCESS */ + return TRUE; + } + else { + return FALSE; + } +} + +static int +list_id_table_insert(struct list_id_table *tbl, ID id, VALUE val) +{ + const id_key_t key = id2key(id); + const int index = list_table_index(tbl, key); + + if (index >= 0) { + TABLE_VALUES(tbl)[index] = val; + } + else { + list_table_extend(tbl); + { + const int num = tbl->num++; +#if ID_TABLE_USE_LIST_SORTED + const int insert_index = -(index + 1); + id_key_t *keys = tbl->keys; + VALUE *values = TABLE_VALUES(tbl); + int i; + + if (0) fprintf(stderr, "insert: %d into %d on\n", (int)key, insert_index); + + for (i=num; i>insert_index; i--) { + keys[i] = keys[i-1]; + values[i] = values[i-1]; + } + keys[i] = key; + values[i] = val; + + tbl_assert(tbl); +#else + tbl->keys[num] = key; + TABLE_VALUES(tbl)[num] = val; +#endif + } + } + + return TRUE; +} + +static int +list_delete_index(struct list_id_table *tbl, id_key_t key, int index) +{ + if (index >= 0) { + VALUE *values = TABLE_VALUES(tbl); + +#if ID_TABLE_USE_LIST_SORTED + int i; + const int num = tbl->num; + id_key_t *keys = tbl->keys; + + for (i=index+1; ikeys[index] = tbl->keys[tbl->num-1]; + values[index] = values[tbl->num-1]; +#endif + tbl->num--; + tbl_assert(tbl); + + return TRUE; + } + else { + return FALSE; + } +} + +static int +list_id_table_delete(struct list_id_table *tbl, ID id) +{ + const id_key_t key = id2key(id); + int index = list_table_index(tbl, key); + return list_delete_index(tbl, key, index); +} + +#define FOREACH_LAST() do { \ + switch (ret) { \ + case ID_TABLE_CONTINUE: \ + case ID_TABLE_STOP: \ + break; \ + case ID_TABLE_DELETE: \ + list_delete_index(tbl, key, i); \ + values = TABLE_VALUES(tbl); \ + num = tbl->num; \ + i--; /* redo smae index */ \ + break; \ + } \ +} while (0) + +static void +list_id_table_foreach(struct list_id_table *tbl, enum rb_id_table_iterator_result (*func)(ID id, VALUE val, void *data), void *data) +{ + int num = tbl->num; + int i; + const id_key_t *keys = tbl->keys; + const VALUE *values = TABLE_VALUES(tbl); + + for (i=0; inum; + int i; + const id_key_t *keys = tbl->keys; + VALUE *values = TABLE_VALUES(tbl); + + for (i=0; inum_entries = 0; + table->entries = ZALLOC_N(sa_entry, num_bins); + table->num_bins = num_bins; + table->free_pos = num_bins; + } +} + +static sa_table* +hash_id_table_create(size_t size) +{ + sa_table* table = ZALLOC(sa_table); + sa_init_table(table, (sa_index_t)size); + return table; +} + +static void +hash_id_table_clear(sa_table *table) +{ + xfree(table->entries); + memset(table, 0, sizeof(sa_table)); +} + +static void +hash_id_table_free(sa_table *table) +{ + xfree(table->entries); + xfree(table); +} + +static size_t +hash_id_table_memsize(sa_table *table) +{ + return sizeof(sa_table) + table->num_bins * sizeof (sa_entry); +} + +static inline sa_index_t +calc_pos(register sa_table* table, id_key_t key) +{ + return key & (table->num_bins - 1); +} + +static void +fix_empty(register sa_table* table) +{ + while(--table->free_pos && + table->entries[table->free_pos-1].next != SA_EMPTY); +} + +#define FLOOR_TO_4 ((~((sa_index_t)0)) << 2) +static sa_index_t +find_empty(register sa_table* table, register sa_index_t pos) +{ + sa_index_t new_pos = table->free_pos-1; + sa_entry *entry; + static unsigned offsets[][3] = { + {1, 2, 3}, + {2, 3, 0}, + {3, 1, 0}, + {2, 1, 0} + }; + unsigned *check = offsets[pos&3]; + pos &= FLOOR_TO_4; + entry = table->entries+pos; + + if (entry[check[0]].next == SA_EMPTY) { new_pos = pos + check[0]; goto check; } + if (entry[check[1]].next == SA_EMPTY) { new_pos = pos + check[1]; goto check; } + if (entry[check[2]].next == SA_EMPTY) { new_pos = pos + check[2]; goto check; } + + check: + if (new_pos+1 == table->free_pos) fix_empty(table); + return new_pos; +} + +static void resize(register sa_table* table); +static int insert_into_chain(register sa_table*, register sa_index_t, st_data_t, sa_index_t pos); +static int insert_into_main(register sa_table*, sa_index_t, st_data_t, sa_index_t pos, sa_index_t prev_pos); + +static int +sa_insert(register sa_table* table, id_key_t key, VALUE value) +{ + register sa_entry *entry; + sa_index_t pos, main_pos; + + if (table->num_bins == 0) { + sa_init_table(table, SA_MIN_SIZE); + } + + pos = calc_pos(table, key); + entry = table->entries + pos; + + if (entry->next == SA_EMPTY) { + entry->next = SA_LAST; + entry->key = key; + entry->value = value; + table->num_entries++; + if (pos+1 == table->free_pos) fix_empty(table); + return 0; + } + + if (entry->key == key) { + entry->value = value; + return 1; + } + + if (table->num_entries + (table->num_entries >> 2) > table->num_bins) { + resize(table); + return sa_insert(table, key, value); + } + + main_pos = calc_pos(table, entry->key); + if (main_pos == pos) { + return insert_into_chain(table, key, value, pos); + } + else { + if (!table->free_pos) { + resize(table); + return sa_insert(table, key, value); + } + return insert_into_main(table, key, value, pos, main_pos); + } +} + +static int +hash_id_table_insert(register sa_table* table, ID id, VALUE value) +{ + return sa_insert(table, id2key(id), value); +} + +static int +insert_into_chain(register sa_table* table, id_key_t key, st_data_t value, sa_index_t pos) +{ + sa_entry *entry = table->entries + pos, *new_entry; + sa_index_t new_pos; + + while (entry->next != SA_LAST) { + pos = entry->next - SA_OFFSET; + entry = table->entries + pos; + if (entry->key == key) { + entry->value = value; + return 1; + } + } + + if (!table->free_pos) { + resize(table); + return sa_insert(table, key, value); + } + + new_pos = find_empty(table, pos); + new_entry = table->entries + new_pos; + entry->next = new_pos + SA_OFFSET; + + new_entry->next = SA_LAST; + new_entry->key = key; + new_entry->value = value; + table->num_entries++; + return 0; +} + +static int +insert_into_main(register sa_table* table, id_key_t key, st_data_t value, sa_index_t pos, sa_index_t prev_pos) +{ + sa_entry *entry = table->entries + pos; + sa_index_t new_pos = find_empty(table, pos); + sa_entry *new_entry = table->entries + new_pos; + sa_index_t npos; + + *new_entry = *entry; + + while((npos = table->entries[prev_pos].next - SA_OFFSET) != pos) { + prev_pos = npos; + } + table->entries[prev_pos].next = new_pos + SA_OFFSET; + + entry->next = SA_LAST; + entry->key = key; + entry->value = value; + table->num_entries++; + return 0; +} + +static sa_index_t +new_size(sa_index_t num_entries) +{ + sa_index_t size = num_entries >> 3; + size |= size >> 1; + size |= size >> 2; + size |= size >> 4; + size |= size >> 8; + size |= size >> 16; + return (size + 1) << 3; +} + +static void +resize(register sa_table *table) +{ + sa_table tmp_table; + sa_entry *entry; + sa_index_t i; + + if (table->num_entries == 0) { + xfree(table->entries); + memset(table, 0, sizeof(sa_table)); + return; + } + + sa_init_table(&tmp_table, new_size(table->num_entries + (table->num_entries >> 2))); + entry = table->entries; + + for(i = 0; i < table->num_bins; i++, entry++) { + if (entry->next != SA_EMPTY) { + sa_insert(&tmp_table, entry->key, entry->value); + } + } + xfree(table->entries); + *table = tmp_table; +} + +static int +hash_id_table_lookup(register sa_table *table, ID id, VALUE *valuep) +{ + register sa_entry *entry; + id_key_t key = id2key(id); + + if (table->num_entries == 0) return 0; + + entry = table->entries + calc_pos(table, key); + if (entry->next == SA_EMPTY) return 0; + + if (entry->key == key) goto found; + if (entry->next == SA_LAST) return 0; + + entry = table->entries + (entry->next - SA_OFFSET); + if (entry->key == key) goto found; + + while(entry->next != SA_LAST) { + entry = table->entries + (entry->next - SA_OFFSET); + if (entry->key == key) goto found; + } + return 0; +found: + if (valuep) *valuep = entry->value; + return 1; +} + +static size_t +hash_id_table_size(sa_table *table) +{ + return table->num_entries; +} + +static int +hash_id_table_delete(sa_table *table, ID id) +{ + sa_index_t pos, prev_pos = ~0; + sa_entry *entry; + id_key_t key = id2key(id); + + if (table->num_entries == 0) goto not_found; + + pos = calc_pos(table, key); + entry = table->entries + pos; + + if (entry->next == SA_EMPTY) goto not_found; + + do { + if (entry->key == key) { + if (entry->next != SA_LAST) { + sa_index_t npos = entry->next - SA_OFFSET; + *entry = table->entries[npos]; + memset(table->entries + npos, 0, sizeof(sa_entry)); + } + else { + memset(table->entries + pos, 0, sizeof(sa_entry)); + if (~prev_pos) { + table->entries[prev_pos].next = SA_LAST; + } + } + table->num_entries--; + if (table->num_entries < table->num_bins / 4) { + resize(table); + } + return 1; + } + if (entry->next == SA_LAST) break; + prev_pos = pos; + pos = entry->next - SA_OFFSET; + entry = table->entries + pos; + } while(1); + +not_found: + return 0; +} + +enum foreach_type { + foreach_key_values, + foreach_values +}; + +static void +hash_foreach(sa_table *table, enum rb_id_table_iterator_result (*func)(ANYARGS), void *arg, enum foreach_type type) +{ + sa_index_t i; + + if (table->num_bins > 0) { + for(i = 0; i < table->num_bins ; i++) { + if (table->entries[i].next != SA_EMPTY) { + id_key_t key = table->entries[i].key; + st_data_t val = table->entries[i].value; + enum rb_id_table_iterator_result ret; + + switch (type) { + case foreach_key_values: + ret = (*func)(key2id(key), val, arg); + break; + case foreach_values: + ret = (*func)(val, arg); + break; + } + + switch (ret) { + case ID_TABLE_DELETE: + rb_warn("unsupported yet"); + break; + default: + break; + } + if (ret == ID_TABLE_STOP) break; + } + } + } +} + +static void +hash_id_table_foreach(sa_table *table, enum rb_id_table_iterator_result (*func)(ID, VALUE, void *), void *arg) +{ + hash_foreach(table, func, arg, foreach_key_values); +} + +static void +hash_id_table_foreach_values(sa_table *table, enum rb_id_table_iterator_result (*func)(VALUE, void *), void *arg) +{ + hash_foreach(table, func, arg, foreach_values); +} +#endif /* ID_TABLE_USE_COALESCED_HASHING */ + +#ifdef ID_TABLE_USE_SMALL_HASH +/* simple open addressing with quadratic probing. + uses mark-bit on collisions - need extra 1 bit, + ID is strictly 3 bits larger than rb_id_serial_t */ + +typedef struct rb_id_item { + id_key_t key; +#if SIZEOF_VALUE == 8 + int collision; +#endif + VALUE val; +} item_t; + +struct hash_id_table { + int capa; + int num; + int used; + item_t *items; +}; + +#if SIZEOF_VALUE == 8 +#define ITEM_GET_KEY(tbl, i) ((tbl)->items[i].key) +#define ITEM_KEY_ISSET(tbl, i) ((tbl)->items[i].key) +#define ITEM_COLLIDED(tbl, i) ((tbl)->items[i].collision) +#define ITEM_SET_COLLIDED(tbl, i) ((tbl)->items[i].collision = 1) +static inline void +ITEM_SET_KEY(struct hash_id_table *tbl, int i, id_key_t key) +{ + tbl->items[i].key = key; +} +#else +#define ITEM_GET_KEY(tbl, i) ((tbl)->items[i].key >> 1) +#define ITEM_KEY_ISSET(tbl, i) ((tbl)->items[i].key > 1) +#define ITEM_COLLIDED(tbl, i) ((tbl)->items[i].key & 1) +#define ITEM_SET_COLLIDED(tbl, i) ((tbl)->items[i].key |= 1) +static inline void +ITEM_SET_KEY(struct hash_id_table *tbl, int i, id_key_t key) +{ + tbl->items[i].key = (key << 1) | ITEM_COLLIDED(tbl, i); +} +#endif + +static inline int +round_capa(int capa) { + /* minsize is 4 */ + capa >>= 2; + capa |= capa >> 1; + capa |= capa >> 2; + capa |= capa >> 4; + capa |= capa >> 8; + capa |= capa >> 16; + return (capa + 1) << 2; +} + +static struct hash_id_table * +hash_id_table_init(struct hash_id_table *tbl, int capa) +{ + MEMZERO(tbl, struct hash_id_table, 1); + if (capa > 0) { + capa = round_capa(capa); + tbl->capa = (int)capa; + tbl->items = ZALLOC_N(item_t, capa); + } + return tbl; +} + +#ifndef ID_TABLE_USE_MIX +static struct hash_id_table * +hash_id_table_create(size_t capa) +{ + struct hash_id_table *tbl = ALLOC(struct hash_id_table); + return hash_id_table_init(tbl, (int)capa); +} +#endif + +static void +hash_id_table_free(struct hash_id_table *tbl) +{ + xfree(tbl->items); + xfree(tbl); +} + +static void +hash_id_table_clear(struct hash_id_table *tbl) +{ + tbl->num = 0; + tbl->used = 0; + MEMZERO(tbl->items, item_t, tbl->capa); +} + +static size_t +hash_id_table_size(struct hash_id_table *tbl) +{ + return (size_t)tbl->num; +} + +static size_t +hash_id_table_memsize(struct hash_id_table *tbl) +{ + return sizeof(item_t) * tbl->capa + sizeof(struct hash_id_table); +} + +static int +hash_table_index(struct hash_id_table* tbl, id_key_t key) +{ + if (tbl->capa > 0) { + int mask = tbl->capa - 1; + int ix = key & mask; + int d = 1; + while (key != ITEM_GET_KEY(tbl, ix)) { + if (!ITEM_COLLIDED(tbl, ix)) + return -1; + ix = (ix + d) & mask; + d++; + } + return ix; + } + return -1; +} + +static void +hash_table_raw_insert(struct hash_id_table *tbl, id_key_t key, VALUE val) +{ + int mask = tbl->capa - 1; + int ix = key & mask; + int d = 1; + assert(key != 0); + while (ITEM_KEY_ISSET(tbl, ix)) { + ITEM_SET_COLLIDED(tbl, ix); + ix = (ix + d) & mask; + d++; + } + tbl->num++; + if (!ITEM_COLLIDED(tbl, ix)) { + tbl->used++; + } + ITEM_SET_KEY(tbl, ix, key); + tbl->items[ix].val = val; +} + +static int +hash_delete_index(struct hash_id_table *tbl, int ix) +{ + if (ix >= 0) { + if (!ITEM_COLLIDED(tbl, ix)) { + tbl->used--; + } + tbl->num--; + ITEM_SET_KEY(tbl, ix, 0); + tbl->items[ix].val = 0; + return TRUE; + } else { + return FALSE; + } +} + +static void +hash_table_extend(struct hash_id_table* tbl) +{ + if (tbl->used + (tbl->used >> 1) >= tbl->capa) { + int new_cap = round_capa(tbl->num + (tbl->num >> 1)); + int i; + item_t* old; + struct hash_id_table tmp_tbl = {new_cap, 0, 0}; + tmp_tbl.items = ZALLOC_N(item_t, new_cap); + for (i = 0; i < tbl->capa; i++) { + id_key_t key = ITEM_GET_KEY(tbl, i); + if (key != 0) { + hash_table_raw_insert(&tmp_tbl, key, tbl->items[i].val); + } + } + old = tbl->items; + *tbl = tmp_tbl; + xfree(old); + } +} + +#if ID_TABLE_DEBUG && 0 +static void +hash_table_show(struct hash_id_table *tbl) +{ + const id_key_t *keys = tbl->keys; + const int capa = tbl->capa; + int i; + + fprintf(stderr, "tbl: %p (capa: %d, num: %d, used: %d)\n", tbl, tbl->capa, tbl->num, tbl->used); + for (i=0; i [%d] %s %d\n", i, rb_id2name(key2id(keys[i])), (int)keys[i]); + } + } +} +#endif + +static int +hash_id_table_lookup(struct hash_id_table *tbl, ID id, VALUE *valp) +{ + id_key_t key = id2key(id); + int index = hash_table_index(tbl, key); + + if (index >= 0) { + *valp = tbl->items[index].val; + return TRUE; + } + else { + return FALSE; + } +} + +static int +hash_id_table_insert_key(struct hash_id_table *tbl, const id_key_t key, const VALUE val) +{ + const int index = hash_table_index(tbl, key); + + if (index >= 0) { + tbl->items[index].val = val; + } + else { + hash_table_extend(tbl); + hash_table_raw_insert(tbl, key, val); + } + return TRUE; +} + +static int +hash_id_table_insert(struct hash_id_table *tbl, ID id, VALUE val) +{ + return hash_id_table_insert_key(tbl, id2key(id), val); +} + +static int +hash_id_table_delete(struct hash_id_table *tbl, ID id) +{ + const id_key_t key = id2key(id); + int index = hash_table_index(tbl, key); + return hash_delete_index(tbl, index); +} + +static void +hash_id_table_foreach(struct hash_id_table *tbl, enum rb_id_table_iterator_result (*func)(ID id, VALUE val, void *data), void *data) +{ + int i, capa = tbl->capa; + + for (i=0; iitems[i].val, data); + assert(key != 0); + + if (ret == ID_TABLE_DELETE) + hash_delete_index(tbl, i); + else if (ret == ID_TABLE_STOP) + return; + } + } +} + +static void +hash_id_table_foreach_values(struct hash_id_table *tbl, enum rb_id_table_iterator_result (*func)(VALUE val, void *data), void *data) +{ + int i, capa = tbl->capa; + + for (i=0; iitems[i].val, data); + + if (ret == ID_TABLE_DELETE) + hash_delete_index(tbl, i); + else if (ret == ID_TABLE_STOP) + return; + } + } +} +#endif /* ID_TABLE_USE_SMALL_HASH */ + +#if ID_TABLE_USE_MIX + +struct mix_id_table { + union { + struct { + int capa; + int num; + } size; + struct list_id_table list; + struct hash_id_table hash; + } aux; +}; + +#define LIST_P(mix) ((mix)->aux.size.capa <= ID_TABLE_USE_MIX_LIST_MAX_CAPA) + +static struct mix_id_table * +mix_id_table_create(size_t size) +{ + struct mix_id_table *mix = ZALLOC(struct mix_id_table); + list_id_table_init((struct list_id_table *)mix, size); + return mix; +} + +static void +mix_id_table_free(struct mix_id_table *tbl) +{ + if (LIST_P(tbl)) list_id_table_free(&tbl->aux.list); + else hash_id_table_free(&tbl->aux.hash); +} + +static void +mix_id_table_clear(struct mix_id_table *tbl) +{ + if (LIST_P(tbl)) list_id_table_clear(&tbl->aux.list); + else hash_id_table_clear(&tbl->aux.hash); +} + +static size_t +mix_id_table_size(struct mix_id_table *tbl) +{ + if (LIST_P(tbl)) return list_id_table_size(&tbl->aux.list); + else return hash_id_table_size(&tbl->aux.hash); +} + +static size_t +mix_id_table_memsize(struct mix_id_table *tbl) +{ + if (LIST_P(tbl)) return list_id_table_memsize(&tbl->aux.list) - sizeof(struct list_id_table) + sizeof(struct mix_id_table); + else return hash_id_table_memsize(&tbl->aux.hash); +} + +static int +mix_id_table_insert(struct mix_id_table *tbl, ID id, VALUE val) +{ + if (LIST_P(tbl)) { + int r = list_id_table_insert(&tbl->aux.list, id, val); + + if (!LIST_P(tbl)) { + /* overflow. TODO: this promotion should be done in list_extend_table */ + struct list_id_table *list = &tbl->aux.list; + struct hash_id_table *hash = &tbl->aux.hash; + id_key_t *keys = list->keys; + VALUE *values = TABLE_VALUES(list); + const int num = list->num; + int i; + + hash_id_table_init(hash, 0); + + for (i=0; iaux.hash, id, val); + } +} + +static int +mix_id_table_lookup(struct mix_id_table *tbl, ID id, VALUE *valp) +{ + if (LIST_P(tbl)) return list_id_table_lookup(&tbl->aux.list, id, valp); + else return hash_id_table_lookup(&tbl->aux.hash, id, valp); +} + +static int +mix_id_table_delete(struct mix_id_table *tbl, ID id) +{ + if (LIST_P(tbl)) return list_id_table_delete(&tbl->aux.list, id); + else return hash_id_table_delete(&tbl->aux.hash, id); +} + +static void +mix_id_table_foreach(struct mix_id_table *tbl, enum rb_id_table_iterator_result (*func)(ID id, VALUE val, void *data), void *data) +{ + if (LIST_P(tbl)) list_id_table_foreach(&tbl->aux.list, func, data); + else hash_id_table_foreach(&tbl->aux.hash, func, data); +} + +static void +mix_id_table_foreach_values(struct mix_id_table *tbl, enum rb_id_table_iterator_result (*func)(VALUE val, void *data), void *data) +{ + if (LIST_P(tbl)) list_id_table_foreach_values(&tbl->aux.list, func, data); + else hash_id_table_foreach_values(&tbl->aux.hash, func, data); +} + +#endif /* ID_TABLE_USE_MIX */ + +/* IMPL(create) will be "hash_id_table_create" and so on */ +#define IMPL2(name, op) name##_id_table_##op +#define IMPL1(name, op) IMPL2(name, op) +#define IMPL(op) IMPL1(ID_TABLE_NAME, op) + +struct rb_id_table *rb_id_table_create(size_t size) {return (struct rb_id_table *)IMPL(create)(size);} +void rb_id_table_free(struct rb_id_table *tbl) { IMPL(free)((ID_TABLE_IMPL_TYPE *)tbl);} +void rb_id_table_clear(struct rb_id_table *tbl) { IMPL(clear)((ID_TABLE_IMPL_TYPE *)tbl);} +size_t rb_id_table_size(struct rb_id_table *tbl) {return IMPL(size)((ID_TABLE_IMPL_TYPE *)tbl);} +size_t rb_id_table_memsize(struct rb_id_table *tbl) {return IMPL(memsize)((ID_TABLE_IMPL_TYPE *)tbl);} + +int rb_id_table_insert(struct rb_id_table *tbl, ID id, VALUE val) {return IMPL(insert)((ID_TABLE_IMPL_TYPE *)tbl, id, val);} +int rb_id_table_lookup(struct rb_id_table *tbl, ID id, VALUE *valp) {return IMPL(lookup)((ID_TABLE_IMPL_TYPE *)tbl, id, valp);} +int rb_id_table_delete(struct rb_id_table *tbl, ID id) {return IMPL(delete)((ID_TABLE_IMPL_TYPE *)tbl, id);} + +void rb_id_table_foreach(struct rb_id_table *tbl, enum rb_id_table_iterator_result (*func)(ID id, VALUE val, void *data), void *data) { + IMPL(foreach)((ID_TABLE_IMPL_TYPE *)tbl, func, data);} +void rb_id_table_foreach_values(struct rb_id_table *tbl, enum rb_id_table_iterator_result (*func)(VALUE val, void *data), void *data) { + IMPL(foreach_values)((ID_TABLE_IMPL_TYPE *)tbl, func, data);} + +#if ID_TABLE_STARTUP_SIG +__attribute__((constructor)) +static void +show_impl(void) +{ + fprintf(stderr, "impl: %d\n", ID_TABLE_IMPL); +} +#endif diff --git a/id_table.h b/id_table.h new file mode 100644 index 0000000000..503842f250 --- /dev/null +++ b/id_table.h @@ -0,0 +1,23 @@ +#include "ruby/ruby.h" + +struct rb_id_table; + +/* compatible with ST_* */ +enum rb_id_table_iterator_result { + ID_TABLE_CONTINUE = ST_CONTINUE, + ID_TABLE_STOP = ST_STOP, + ID_TABLE_DELETE = ST_DELETE, +}; + +struct rb_id_table *rb_id_table_create(size_t size); +void rb_id_table_free(struct rb_id_table *tbl); +void rb_id_table_clear(struct rb_id_table *tbl); + +size_t rb_id_table_size(struct rb_id_table *tbl); +size_t rb_id_table_memsize(struct rb_id_table *tbl); + +int rb_id_table_insert(struct rb_id_table *tbl, ID id, VALUE val); +int rb_id_table_lookup(struct rb_id_table *tbl, ID id, VALUE *valp); +int rb_id_table_delete(struct rb_id_table *tbl, ID id); +void rb_id_table_foreach(struct rb_id_table *tbl, enum rb_id_table_iterator_result (*func)(ID id, VALUE val, void *data), void *data); +void rb_id_table_foreach_values(struct rb_id_table *tbl, enum rb_id_table_iterator_result (*func)(VALUE val, void *data), void *data); diff --git a/internal.h b/internal.h index 45742d059d..59526c8ec7 100644 --- a/internal.h +++ b/internal.h @@ -464,7 +464,7 @@ struct rb_classext_struct { struct st_table *iv_index_tbl; struct st_table *iv_tbl; struct st_table *const_tbl; - struct st_table *callable_m_tbl; + struct rb_id_table *callable_m_tbl; rb_subclass_entry_t *subclasses; rb_subclass_entry_t **parent_subclasses; /** @@ -486,7 +486,7 @@ struct RClass { struct RBasic basic; VALUE super; rb_classext_t *ptr; - struct st_table *m_tbl; + struct rb_id_table *m_tbl; }; void rb_class_subclass_add(VALUE super, VALUE klass); @@ -511,12 +511,6 @@ RCLASS_SET_ORIGIN(VALUE klass, VALUE origin) if (klass != origin) FL_SET(origin, RICLASS_IS_ORIGIN); } -static inline void -RCLASS_M_TBL_INIT(VALUE c) -{ - RCLASS_M_TBL(c) = st_init_numtable(); -} - #undef RCLASS_SUPER static inline VALUE RCLASS_SUPER(VALUE klass) diff --git a/marshal.c b/marshal.c index fc333476c3..24453c1d2d 100644 --- a/marshal.c +++ b/marshal.c @@ -17,6 +17,7 @@ #include "ruby/io.h" #include "ruby/st.h" #include "ruby/util.h" +#include "id_table.h" #include #ifdef HAVE_FLOAT_H @@ -474,7 +475,7 @@ hash_each(VALUE key, VALUE value, struct dump_call_arg *arg) } #define SINGLETON_DUMP_UNABLE_P(klass) \ - (RCLASS_M_TBL(klass)->num_entries || \ + (rb_id_table_size(RCLASS_M_TBL(klass)) > 0 || \ (RCLASS_IV_TBL(klass) && RCLASS_IV_TBL(klass)->num_entries > 1)) static void diff --git a/symbol.c b/symbol.c index 899ccda018..0167fd8dcb 100644 --- a/symbol.c +++ b/symbol.c @@ -107,7 +107,7 @@ enum id_entry_type { }; static struct symbols { - ID last_id; + rb_id_serial_t last_id; st_table *str_sym; VALUE ids; VALUE dsymbol_fstr_hash; @@ -143,8 +143,6 @@ WARN_UNUSED_RESULT(static ID attrsetname_to_attr(VALUE name)); WARN_UNUSED_RESULT(static ID attrsetname_to_attr_id(VALUE name)); WARN_UNUSED_RESULT(static ID intern_str(VALUE str, int mutable)); -#define id_to_serial(id) (is_notop_id(id) ? id >> ID_SCOPE_SHIFT : id) - ID rb_id_attrset(ID id) { @@ -373,7 +371,7 @@ rb_str_symname_type(VALUE name, unsigned int allowed_attrset) } static void -set_id_entry(ID num, VALUE str, VALUE sym) +set_id_entry(rb_id_serial_t num, VALUE str, VALUE sym) { size_t idx = num / ID_ENTRY_UNIT; VALUE ary, ids = global_symbols.ids; @@ -387,7 +385,7 @@ set_id_entry(ID num, VALUE str, VALUE sym) } static VALUE -get_id_entry(ID num, const enum id_entry_type t) +get_id_entry(rb_id_serial_t num, const enum id_entry_type t) { if (num && num <= global_symbols.last_id) { size_t idx = num / ID_ENTRY_UNIT; @@ -401,6 +399,18 @@ get_id_entry(ID num, const enum id_entry_type t) return 0; } +static inline ID +rb_id_serial_to_id(rb_id_serial_t num) +{ + if (is_notop_id((ID)num)) { + VALUE sym = get_id_entry(num, ID_ENTRY_SYM); + return SYM2ID(sym); + } + else { + return (ID)num; + } +} + #if SYMBOL_DEBUG static int register_sym_update_callback(st_data_t *key, st_data_t *value, st_data_t arg, int existing) @@ -444,7 +454,7 @@ register_static_symid(ID id, const char *name, long len, rb_encoding *enc) static ID register_static_symid_str(ID id, VALUE str) { - ID num = id_to_serial(id); + rb_id_serial_t num = rb_id_to_serial(id); VALUE sym = STATIC_ID2SYM(id); OBJ_FREEZE(str); @@ -584,7 +594,7 @@ lookup_str_sym(const VALUE str) static VALUE lookup_id_str(ID id) { - return get_id_entry(id_to_serial(id), ID_ENTRY_STR); + return get_id_entry(rb_id_to_serial(id), ID_ENTRY_STR); } ID @@ -604,7 +614,9 @@ rb_intern3(const char *name, long len, rb_encoding *enc) static ID next_id_base(void) { - if (global_symbols.last_id >= ~(ID)0 >> (ID_SCOPE_SHIFT+RUBY_SPECIAL_SHIFT)) { + rb_id_serial_t next_serial = global_symbols.last_id + 1; + + if (next_serial == 0) { return (ID)-1; } else { @@ -744,7 +756,7 @@ rb_sym2id(VALUE sym) RSYMBOL(sym)->id = id |= num; /* make it permanent object */ - set_id_entry(num >>= ID_SCOPE_SHIFT, fstr, sym); + set_id_entry(rb_id_to_serial(num), fstr, sym); rb_hash_delete_entry(global_symbols.dsymbol_fstr_hash, fstr); } } @@ -760,7 +772,7 @@ VALUE rb_id2sym(ID x) { if (!DYNAMIC_ID_P(x)) return STATIC_ID2SYM(x); - return get_id_entry(id_to_serial(x), ID_ENTRY_SYM); + return get_id_entry(rb_id_to_serial(x), ID_ENTRY_SYM); } @@ -1122,3 +1134,5 @@ rb_is_junk_name(VALUE name) { return rb_str_symname_type(name, IDSET_ATTRSET_FOR_SYNTAX) == -1; } + +#include "id_table.c" diff --git a/symbol.h b/symbol.h index 5c52b9742b..001d6de1f8 100644 --- a/symbol.h +++ b/symbol.h @@ -32,15 +32,6 @@ struct RSymbol { #define RSYMBOL(obj) (R_CAST(RSymbol)(obj)) -static inline int -id_type(ID id) -{ - if (id<=tLAST_OP_ID) { - return -1; - } - return (int)(id&ID_SCOPE_MASK); -} - #define is_notop_id(id) ((id)>tLAST_OP_ID) #define is_local_id(id) (id_type(id)==ID_LOCAL) #define is_global_id(id) (id_type(id)==ID_GLOBAL) @@ -50,6 +41,30 @@ id_type(ID id) #define is_class_id(id) (id_type(id)==ID_CLASS) #define is_junk_id(id) (id_type(id)==ID_JUNK) +static inline int +id_type(ID id) +{ + if (is_notop_id(id)) { + return (int)(id&ID_SCOPE_MASK); + } + else { + return -1; + } +} + +typedef uint32_t rb_id_serial_t; + +static inline rb_id_serial_t +rb_id_to_serial(ID id) +{ + if (is_notop_id(id)) { + return (rb_id_serial_t)(id >> ID_SCOPE_SHIFT); + } + else { + return (rb_id_serial_t)id; + } +} + static inline int sym_type(VALUE sym) { diff --git a/vm.c b/vm.c index 4245ea4fb0..4a83fbeebc 100644 --- a/vm.c +++ b/vm.c @@ -1242,10 +1242,9 @@ rb_vm_check_redefinition_opt_method(const rb_method_entry_t *me, VALUE klass) } } -static int -check_redefined_method(st_data_t key, st_data_t value, st_data_t data) +static enum rb_id_table_iterator_result +check_redefined_method(ID mid, VALUE value, void *data) { - ID mid = (ID)key; VALUE klass = (VALUE)data; const rb_method_entry_t *me = (rb_method_entry_t *)value; const rb_method_entry_t *newme = rb_method_entry(klass, mid); @@ -1259,8 +1258,7 @@ void rb_vm_check_redefinition_by_prepend(VALUE klass) { if (!vm_redefinition_check_flag(klass)) return; - st_foreach(RCLASS_M_TBL(RCLASS_ORIGIN(klass)), check_redefined_method, - (st_data_t)klass); + rb_id_table_foreach(RCLASS_M_TBL(RCLASS_ORIGIN(klass)), check_redefined_method, (void *)klass); } static void diff --git a/vm_method.c b/vm_method.c index 8dda2916e8..f03d312d97 100644 --- a/vm_method.c +++ b/vm_method.c @@ -2,6 +2,8 @@ * This file is included by vm.c */ +#include "id_table.h" + #define METHOD_DEBUG 0 #if OPT_GLOBAL_METHOD_CACHE @@ -98,8 +100,8 @@ rb_clear_method_cache_by_class(VALUE klass) rb_subclass_entry_t *entry = RCLASS_EXT(klass)->subclasses; for (; entry != NULL; entry = entry->next) { - struct st_table *table = RCLASS_CALLABLE_M_TBL(entry->klass); - if (table) st_clear(table); + struct rb_id_table *table = RCLASS_CALLABLE_M_TBL(entry->klass); + if (table)rb_id_table_clear(table); } } } @@ -165,8 +167,9 @@ static inline rb_method_entry_t * lookup_method_table(VALUE klass, ID id) { st_data_t body; - st_table *m_tbl = RCLASS_M_TBL(klass); - if (st_lookup(m_tbl, id, &body)) { + struct rb_id_table *m_tbl = RCLASS_M_TBL(klass); + + if (rb_id_table_lookup(m_tbl, id, &body)) { return (rb_method_entry_t *) body; } else { @@ -448,7 +451,7 @@ rb_method_entry_make(VALUE klass, ID mid, VALUE defined_class, rb_method_visibil { rb_method_entry_t *me; - st_table *mtbl; + struct rb_id_table *mtbl; st_data_t data; int make_refined = 0; @@ -484,7 +487,7 @@ rb_method_entry_make(VALUE klass, ID mid, VALUE defined_class, rb_method_visibil mtbl = RCLASS_M_TBL(klass); /* check re-definition */ - if (st_lookup(mtbl, mid, &data)) { + if (rb_id_table_lookup(mtbl, mid, &data)) { rb_method_entry_t *old_me = (rb_method_entry_t *)data; rb_method_definition_t *old_def = old_me->def; @@ -543,7 +546,7 @@ rb_method_entry_make(VALUE klass, ID mid, VALUE defined_class, rb_method_visibil make_method_entry_refined(klass, me); } - st_insert(mtbl, mid, (st_data_t) me); + rb_id_table_insert(mtbl, mid, (VALUE)me); RB_OBJ_WRITTEN(klass, Qundef, (VALUE)me); VM_ASSERT(me->def != NULL); @@ -743,7 +746,7 @@ rb_method_entry(VALUE klass, ID id) static const rb_callable_method_entry_t * prepare_callable_method_entry(VALUE defined_class, ID id, const rb_method_entry_t *me) { - struct st_table *mtbl; + struct rb_id_table *mtbl; const rb_callable_method_entry_t *cme; if (me && me->defined_class == 0) { @@ -751,16 +754,16 @@ prepare_callable_method_entry(VALUE defined_class, ID id, const rb_method_entry_ VM_ASSERT(me->defined_class == 0); if ((mtbl = RCLASS_EXT(defined_class)->callable_m_tbl) == NULL) { - mtbl = RCLASS_EXT(defined_class)->callable_m_tbl = st_init_numtable(); + mtbl = RCLASS_EXT(defined_class)->callable_m_tbl = rb_id_table_create(0); } - if (st_lookup(mtbl, id, (st_data_t *)&me)) { + if (rb_id_table_lookup(mtbl, id, (VALUE *)&me)) { cme = (rb_callable_method_entry_t *)me; VM_ASSERT(callable_method_entry_p(cme)); } else { cme = rb_method_entry_complement_defined_class(me, defined_class); - st_insert(mtbl, id, (st_data_t)cme); + rb_id_table_insert(mtbl, id, (VALUE)cme); VM_ASSERT(callable_method_entry_p(cme)); } } @@ -902,7 +905,7 @@ rb_resolve_refined_method_callable(VALUE refinements, const rb_callable_method_e static void remove_method(VALUE klass, ID mid) { - st_data_t key, data; + VALUE data; rb_method_entry_t *me = 0; VALUE self = klass; @@ -912,7 +915,7 @@ remove_method(VALUE klass, ID mid) rb_warn("removing `%s' may cause serious problems", rb_id2name(mid)); } - if (!st_lookup(RCLASS_M_TBL(klass), mid, &data) || + if (!rb_id_table_lookup(RCLASS_M_TBL(klass), mid, &data) || !(me = (rb_method_entry_t *)data) || (!me->def || me->def->type == VM_METHOD_TYPE_UNDEF) || UNDEFINED_REFINED_METHOD_P(me->def)) { @@ -920,8 +923,7 @@ remove_method(VALUE klass, ID mid) rb_id2str(mid), rb_class_path(klass)); } - key = (st_data_t)mid; - st_delete(RCLASS_M_TBL(klass), &key, &data); + rb_id_table_delete(RCLASS_M_TBL(klass), mid); rb_vm_check_redefinition_opt_method(me, klass); rb_clear_method_cache_by_class(klass); -- cgit v1.2.3