From 76a929a7fca3c84630574e4daa9dab7a96b04fc8 Mon Sep 17 00:00:00 2001 From: ko1 Date: Wed, 9 Jul 2014 06:14:41 +0000 Subject: * parse.y: change Symbol <-> ID relationship to avoid exposing IDs from collectable symbols. [Bug #10014] Now, rb_check_id() returns 0 if corresponding symbol is pinned dynamic symbol. There is remaining intern_cstr_without_pindown(), it can return IDs from collectable symbols. We must be careful to use it (only used in parse.y). I think it should be removed if it does not have impact for performance. * parse.y: add: * STATIC_SYM2ID() * STATIC_ID2SYM() rename: * rb_pin_dynamic_symbol() -> dsymbol_pindown() * internal.h: remove: * rb_check_id_without_pindown() * rb_sym2id_without_pindown() add: * rb_check_symbol() * rb_check_symbol_cstr() * load.c: use rb_check_id() or rb_check_id_cstr(). * object.c: ditto. * struct.c: ditto. * thread.c: ditto. * vm_method.c: ditto. * string.c (sym_find): use only rb_check_symbol(). * sprintf.c (rb_str_format): use rb_check_symbol_cstr(). git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@46764 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 43 +++++++++++ internal.h | 8 +- load.c | 2 +- object.c | 12 +-- parse.y | 251 +++++++++++++++++++++++++++++++++++++----------------------- sprintf.c | 14 ++-- string.c | 9 +-- struct.c | 2 +- thread.c | 8 +- variable.c | 8 +- vm_method.c | 10 +-- 11 files changed, 231 insertions(+), 136 deletions(-) diff --git a/ChangeLog b/ChangeLog index 630a61619b..bbad1ab743 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,46 @@ +Wed Jul 9 14:45:39 2014 Koichi Sasada + + * parse.y: change Symbol <-> ID relationship to avoid + exposing IDs from collectable symbols. + [Bug #10014] + + Now, rb_check_id() returns 0 if corresponding symbol is + pinned dynamic symbol. + + There is remaining intern_cstr_without_pindown(), it can return + IDs from collectable symbols. We must be careful to use it + (only used in parse.y). I think it should be removed if + it does not have impact for performance. + + * parse.y: + add: + * STATIC_SYM2ID() + * STATIC_ID2SYM() + rename: + * rb_pin_dynamic_symbol() -> dsymbol_pindown() + + * internal.h: + remove: + * rb_check_id_without_pindown() + * rb_sym2id_without_pindown() + add: + * rb_check_symbol() + * rb_check_symbol_cstr() + + * load.c: use rb_check_id() or rb_check_id_cstr(). + + * object.c: ditto. + + * struct.c: ditto. + + * thread.c: ditto. + + * vm_method.c: ditto. + + * string.c (sym_find): use only rb_check_symbol(). + + * sprintf.c (rb_str_format): use rb_check_symbol_cstr(). + Wed Jul 9 12:21:55 2014 Koichi Sasada * parse.y (symbols_i): delete garbage symbols for Symbol.all_symbols. diff --git a/internal.h b/internal.h index 45b9eff0db..e3ab0dec8b 100644 --- a/internal.h +++ b/internal.h @@ -809,12 +809,12 @@ void rb_gc_mark_symbols(int full_mark); ID rb_make_internal_id(void); void rb_gc_free_dsymbol(VALUE); VALUE rb_str_dynamic_intern(VALUE); -ID rb_check_id_without_pindown(VALUE *); -ID rb_sym2id_without_pindown(VALUE); +ID rb_id_attrget(ID id); + +VALUE rb_check_symbol(volatile VALUE *namep); #ifdef RUBY_ENCODING_H -ID rb_check_id_cstr_without_pindown(const char *, long, rb_encoding *); +VALUE rb_check_symbol_cstr(const char *ptr, long len, rb_encoding *enc); #endif -ID rb_id_attrget(ID id); /* proc.c */ VALUE rb_proc_location(VALUE self); diff --git a/load.c b/load.c index bb8829da05..e88abfbb0c 100644 --- a/load.c +++ b/load.c @@ -1108,7 +1108,7 @@ rb_mod_autoload(VALUE mod, VALUE sym, VALUE file) static VALUE rb_mod_autoload_p(VALUE mod, VALUE sym) { - ID id = rb_check_id_without_pindown(&sym); + ID id = rb_check_id(&sym); if (!id) { return Qnil; } diff --git a/object.c b/object.c index a154d028cb..84360ac0e4 100644 --- a/object.c +++ b/object.c @@ -2135,7 +2135,7 @@ rb_mod_const_get(int argc, VALUE *argv, VALUE mod) if (pbeg == p) goto wrong_name; - id = rb_check_id_cstr_without_pindown(pbeg, len = p-pbeg, enc); + id = rb_check_id_cstr(pbeg, len = p-pbeg, enc); beglen = pbeg-path; if (p < pend && p[0] == ':') { @@ -2277,7 +2277,7 @@ rb_mod_const_defined(int argc, VALUE *argv, VALUE mod) if (pbeg == p) goto wrong_name; - id = rb_check_id_cstr_without_pindown(pbeg, len = p-pbeg, enc); + id = rb_check_id_cstr(pbeg, len = p-pbeg, enc); beglen = pbeg-path; if (p < pend && p[0] == ':') { @@ -2348,7 +2348,7 @@ rb_mod_const_defined(int argc, VALUE *argv, VALUE mod) static VALUE rb_obj_ivar_get(VALUE obj, VALUE iv) { - ID id = rb_check_id_without_pindown(&iv); + ID id = rb_check_id(&iv); if (!id) { if (rb_is_instance_name(iv)) { @@ -2419,7 +2419,7 @@ rb_obj_ivar_set(VALUE obj, VALUE iv, VALUE val) static VALUE rb_obj_ivar_defined(VALUE obj, VALUE iv) { - ID id = rb_check_id_without_pindown(&iv); + ID id = rb_check_id(&iv); if (!id) { if (rb_is_instance_name(iv)) { @@ -2456,7 +2456,7 @@ rb_obj_ivar_defined(VALUE obj, VALUE iv) static VALUE rb_mod_cvar_get(VALUE obj, VALUE iv) { - ID id = rb_check_id_without_pindown(&iv); + ID id = rb_check_id(&iv); if (!id) { if (rb_is_class_name(iv)) { @@ -2522,7 +2522,7 @@ rb_mod_cvar_set(VALUE obj, VALUE iv, VALUE val) static VALUE rb_mod_cvar_defined(VALUE obj, VALUE iv) { - ID id = rb_check_id_without_pindown(&iv); + ID id = rb_check_id(&iv); if (!id) { if (rb_is_class_name(iv)) { diff --git a/parse.y b/parse.y index 7b47522583..26c5c314da 100644 --- a/parse.y +++ b/parse.y @@ -51,7 +51,10 @@ static ID register_static_symid_str(ID, VALUE); #define REGISTER_SYMID(id, name) register_static_symid((id), (name), strlen(name), enc) #include "id.c" #endif + #define ID_DYNAMIC_SYM_P(id) (!(id&ID_STATIC_SYM)&&id>tLAST_TOKEN) +#define STATIC_SYM2ID(sym) RSHIFT((unsigned long)(sym), RUBY_SPECIAL_SHIFT) +#define STATIC_ID2SYM(id) (((VALUE)(id)<tLAST_OP_ID) @@ -8831,7 +8834,6 @@ block_dup_check_gen(struct parser_params *parser, NODE *node1, NODE *node2) } } -static ID rb_pin_dynamic_symbol(VALUE); static ID attrsetname_to_attr(VALUE name); static int lookup_id_str(ID id, st_data_t *data); @@ -10481,7 +10483,7 @@ dsymbol_check(const VALUE sym) } static ID -rb_pin_dynamic_symbol(VALUE sym) +dsymbol_pindown(VALUE sym) { must_be_dynamic_symbol(sym); @@ -10496,19 +10498,55 @@ rb_pin_dynamic_symbol(VALUE sym) return (ID)sym; } -static int -lookup_str_id(st_data_t str, st_data_t *data) +static ID +lookup_str_id(st_data_t str, st_data_t *id_datap) { - ID id; + if (st_lookup(global_symbols.str_id, str, id_datap)) { + const ID id = (ID)*id_datap; - if (!st_lookup(global_symbols.str_id, str, data)) { + if (ID_DYNAMIC_SYM_P(id) && !SYMBOL_PINNED_P(id)) { + *id_datap = 0; + return FALSE; + } + else { + return TRUE; + } + } + else { return FALSE; } - id = (ID)*data; +} + +static VALUE +lookup_str_sym(const st_data_t str, st_data_t *sym_datap) +{ + if (st_lookup(global_symbols.str_id, str, sym_datap)) { + const ID id = *sym_datap; + + if (ID_DYNAMIC_SYM_P(id)) { + *sym_datap = dsymbol_check(id); + } + else { + *sym_datap = STATIC_ID2SYM(id); + } + return TRUE; + } + else { + return FALSE; + } +} + +static int +lookup_id_str(ID id, st_data_t *data) +{ if (ID_DYNAMIC_SYM_P(id)) { - *data = (st_data_t)rb_pin_dynamic_symbol((VALUE)id); + *data = RSYMBOL(id)->fstr; + return TRUE; } - return TRUE; + if (st_lookup(global_symbols.id_str, id, data)) { + return TRUE; + } + return FALSE; } static ID @@ -10535,7 +10573,7 @@ rb_intern3(const char *name, long len, rb_encoding *enc) id = intern_cstr_without_pindown(name, len, enc); if (ID_DYNAMIC_SYM_P(id)) { - id = rb_pin_dynamic_symbol((VALUE)id); + id = dsymbol_pindown((VALUE)id); } return id; @@ -10684,10 +10722,12 @@ rb_intern(const char *name) ID rb_intern_str(VALUE str) { - st_data_t id; + st_data_t sym; + + if (lookup_str_sym(str, &sym)) { + return SYM2ID(sym); + } - if (lookup_str_id(str, &id)) - return (ID)id; return intern_str(rb_str_dup(str)); } @@ -10733,15 +10773,9 @@ rb_str_dynamic_intern(VALUE str) { #if USE_SYMBOL_GC rb_encoding *enc, *ascii; - ID id; + VALUE sym; - if (st_lookup(global_symbols.str_id, str, &id)) { - VALUE sym = ID2SYM(id); - if (ID_DYNAMIC_SYM_P(id)) { - /* because of lazy sweep, dynamic symbol may be unmarked already and swept - * at next time */ - sym = dsymbol_check(sym); - } + if (lookup_str_sym(str, &sym)) { return sym; } @@ -10762,40 +10796,17 @@ rb_str_dynamic_intern(VALUE str) #endif } -static int -lookup_id_str(ID id, st_data_t *data) -{ - if (ID_DYNAMIC_SYM_P(id)) { - id = (ID)dsymbol_check((VALUE)id); - *data = RSYMBOL(id)->fstr; - return TRUE; - } - if (st_lookup(global_symbols.id_str, id, data)) { - return TRUE; - } - return FALSE; -} - ID -rb_sym2id(VALUE x) +rb_sym2id(VALUE sym) { - if (STATIC_SYM_P(x)) { - return RSHIFT((unsigned long)(x),RUBY_SPECIAL_SHIFT); + if (STATIC_SYM_P(sym)) { + return STATIC_SYM2ID(sym); } else { - return rb_pin_dynamic_symbol(x); - } -} - -ID -rb_sym2id_without_pindown(VALUE x) -{ - if (STATIC_SYM_P(x)) { - return RSHIFT((unsigned long)(x),RUBY_SPECIAL_SHIFT); - } - else { - must_be_dynamic_symbol(x); - return (ID)x; + if (!SYMBOL_PINNED_P(sym)) { + dsymbol_pindown(sym); + } + return (ID)sym; } } @@ -10803,7 +10814,7 @@ VALUE rb_id2sym(ID x) { if (!ID_DYNAMIC_SYM_P(x)) { - return ((VALUE)(x)<fstr; + } + else { + return rb_id2str(STATIC_SYM2ID(sym)); + } } - VALUE rb_id2str(ID id) { @@ -10862,7 +10877,7 @@ rb_id2str(ID id) str = rb_str_dup(str); rb_str_cat(str, "=", 1); register_static_symid_str(id, str); - if (st_lookup(global_symbols.id_str, id, &data)) { + if (lookup_id_str(id, &data)) { VALUE str = (VALUE)data; if (RBASIC(str)->klass == 0) RBASIC_SET_CLASS_RAW(str, rb_cString); @@ -10985,38 +11000,56 @@ rb_is_junk_id(ID id) ID rb_check_id(volatile VALUE *namep) { - ID id; + st_data_t id; + VALUE tmp; + VALUE name = *namep; - id = rb_check_id_without_pindown((VALUE *)namep); - if (ID_DYNAMIC_SYM_P(id)) { - id = rb_pin_dynamic_symbol((VALUE)id); + if (STATIC_SYM_P(name)) { + return STATIC_SYM2ID(name); + } + else if (DYNAMIC_SYM_P(name)) { + if (SYMBOL_PINNED_P(name)) { + return (ID)name; + } + else { + *namep = RSYMBOL(name)->fstr; + return 0; + } + } + else if (!RB_TYPE_P(name, T_STRING)) { + tmp = rb_check_string_type(name); + if (NIL_P(tmp)) { + tmp = rb_inspect(name); + rb_raise(rb_eTypeError, "%s is not a symbol nor a string", + RSTRING_PTR(tmp)); + } + name = tmp; + *namep = name; } - return id; -} + sym_check_asciionly(name); -ID -rb_check_id_cstr(const char *ptr, long len, rb_encoding *enc) -{ - ID id; + if (lookup_str_id(name, &id)) { + return id; + } - id = rb_check_id_cstr_without_pindown(ptr, len, enc); - if (ID_DYNAMIC_SYM_P(id)) { - id = rb_pin_dynamic_symbol((VALUE)id); + { + ID gid = attrsetname_to_attr(name); + if (gid) return rb_id_attrset(gid); } - return id; + return (ID)0; } -ID -rb_check_id_without_pindown(VALUE *namep) +VALUE +rb_check_symbol(volatile VALUE *namep) { - st_data_t id; + st_data_t sym; VALUE tmp; VALUE name = *namep; if (SYMBOL_P(name)) { - return rb_sym2id_without_pindown(name); + return name; } else if (!RB_TYPE_P(name, T_STRING)) { tmp = rb_check_string_type(name); @@ -11031,55 +11064,81 @@ rb_check_id_without_pindown(VALUE *namep) sym_check_asciionly(name); - if (st_lookup(global_symbols.str_id, (st_data_t)name, &id)) - return (ID)id; + if (lookup_str_sym(name, &sym)) { + return sym; + } { ID gid = attrsetname_to_attr(name); - if (gid) return rb_id_attrset(gid); + if (gid) return ID2SYM(rb_id_attrset(gid)); } - return (ID)0; + return Qnil; } -static ID -attrsetname_to_attr(VALUE name) +ID +rb_check_id_cstr(const char *ptr, long len, rb_encoding *enc) { - if (rb_is_attrset_name(name)) { - st_data_t id; - struct RString fake_str; - /* make local name by chopping '=' */ - const VALUE localname = setup_fake_str(&fake_str, RSTRING_PTR(name), RSTRING_LEN(name) - 1); - rb_enc_copy(localname, name); - OBJ_FREEZE(localname); + st_data_t id; + struct RString fake_str; + const VALUE name = setup_fake_str(&fake_str, ptr, len); + rb_enc_associate(name, enc); - if (st_lookup(global_symbols.str_id, (st_data_t)localname, &id)) { - return (ID)id; + sym_check_asciionly(name); + + if (lookup_str_id(name, &id)) { + return id; + } + + if (rb_is_attrset_name(name)) { + fake_str.as.heap.len = len - 1; + if (lookup_str_id((st_data_t)name, &id)) { + return rb_id_attrset((ID)id); } - RB_GC_GUARD(name); } return (ID)0; } -ID -rb_check_id_cstr_without_pindown(const char *ptr, long len, rb_encoding *enc) +VALUE +rb_check_symbol_cstr(const char *ptr, long len, rb_encoding *enc) { - st_data_t id; + st_data_t sym; struct RString fake_str; const VALUE name = setup_fake_str(&fake_str, ptr, len); rb_enc_associate(name, enc); sym_check_asciionly(name); - if (st_lookup(global_symbols.str_id, (st_data_t)name, &id)) - return (ID)id; + if (lookup_str_sym(name, &sym)) { + return sym; + } if (rb_is_attrset_name(name)) { fake_str.as.heap.len = len - 1; - if (st_lookup(global_symbols.str_id, (st_data_t)name, &id)) { - return rb_id_attrset((ID)id); + if (lookup_str_sym((st_data_t)name, &sym)) { + return ID2SYM(rb_id_attrset(SYM2ID(sym))); + } + } + + return Qnil; +} + +static ID +attrsetname_to_attr(VALUE name) +{ + if (rb_is_attrset_name(name)) { + st_data_t id; + struct RString fake_str; + /* make local name by chopping '=' */ + const VALUE localname = setup_fake_str(&fake_str, RSTRING_PTR(name), RSTRING_LEN(name) - 1); + rb_enc_copy(localname, name); + OBJ_FREEZE(localname); + + if (lookup_str_id((st_data_t)localname, &id)) { + return (ID)id; } + RB_GC_GUARD(name); } return (ID)0; diff --git a/sprintf.c b/sprintf.c index 6a2ee402aa..3d2d6db4b6 100644 --- a/sprintf.c +++ b/sprintf.c @@ -506,7 +506,7 @@ rb_str_format(int argc, const VALUE *argv, VALUE fmt) for (; p < end; p++) { const char *t; int n; - ID id = 0; + VALUE sym = Qnil; for (t = p; t < end && *t != '%'; t++) ; PUSH(p, t - p); @@ -601,16 +601,16 @@ rb_str_format(int argc, const VALUE *argv, VALUE fmt) } #endif len = (int)(p - start + 1); /* including parenthesis */ - if (id) { + if (sym != Qnil) { rb_enc_raise(enc, rb_eArgError, "named%.*s after <%s>", - len, start, rb_id2name(id)); + len, start, RSTRING_PTR(rb_sym2str(sym))); } CHECKNAMEARG(start, len, enc); get_hash(&hash, argc, argv); - id = rb_check_id_cstr_without_pindown(start + 1, - len - 2 /* without parenthesis */, - enc); - if (id) nextvalue = rb_hash_lookup2(hash, ID2SYM(id), Qundef); + sym = rb_check_symbol_cstr(start + 1, + len - 2 /* without parenthesis */, + enc); + if (sym != Qnil) nextvalue = rb_hash_lookup2(hash, sym, Qundef); if (nextvalue == Qundef) { rb_enc_raise(enc, rb_eKeyError, "key%.*s not found", len, start); } diff --git a/string.c b/string.c index 552af5c678..210be620c6 100644 --- a/string.c +++ b/string.c @@ -8323,14 +8323,7 @@ str_scrub_bang(int argc, VALUE *argv, VALUE str) static VALUE sym_find(VALUE dummy, VALUE sym) { - ID id = rb_check_id(&sym); - - if (id) { - return ID2SYM(id); - } - else { - return Qnil; - } + return rb_check_symbol(&sym); } /* diff --git a/struct.c b/struct.c index c71bef8ee6..b2d7590a44 100644 --- a/struct.c +++ b/struct.c @@ -759,7 +759,7 @@ rb_struct_aref(VALUE s, VALUE idx) return rb_struct_aref_sym(s, idx); } else if (RB_TYPE_P(idx, T_STRING)) { - ID id = rb_check_id_without_pindown(&idx); + ID id = rb_check_id(&idx); if (!id) { rb_name_error_str(idx, "no member '%"PRIsVALUE"' in struct", QUOTE(idx)); diff --git a/thread.c b/thread.c index ae24ca41e1..682e05f89e 100644 --- a/thread.c +++ b/thread.c @@ -2820,7 +2820,7 @@ rb_thread_local_aref(VALUE thread, ID id) static VALUE rb_thread_aref(VALUE thread, VALUE key) { - ID id = rb_check_id_without_pindown(&key); + ID id = rb_check_id(&key); if (!id) return Qnil; return rb_thread_local_aref(thread, id); } @@ -2906,7 +2906,7 @@ static VALUE rb_thread_variable_get(VALUE thread, VALUE key) { VALUE locals; - ID id = rb_check_id_without_pindown(&key); + ID id = rb_check_id(&key); if (!id) return Qnil; locals = rb_ivar_get(thread, id_locals); @@ -2952,7 +2952,7 @@ static VALUE rb_thread_key_p(VALUE self, VALUE key) { rb_thread_t *th; - ID id = rb_check_id_without_pindown(&key); + ID id = rb_check_id(&key); GetThreadPtr(self, th); @@ -3073,7 +3073,7 @@ static VALUE rb_thread_variable_p(VALUE thread, VALUE key) { VALUE locals; - ID id = rb_check_id_without_pindown(&key); + ID id = rb_check_id(&key); if (!id) return Qfalse; diff --git a/variable.c b/variable.c index 34528e87fc..975fcf3ab2 100644 --- a/variable.c +++ b/variable.c @@ -353,7 +353,7 @@ rb_path_to_class(VALUE pathname) } while (*p) { while (*p && *p != ':') p++; - id = rb_check_id_cstr_without_pindown(pbeg, p-pbeg, enc); + id = rb_check_id_cstr(pbeg, p-pbeg, enc); if (p[0] == ':') { if (p[1] != ':') goto undefined_class; p += 2; @@ -1404,7 +1404,7 @@ VALUE rb_obj_remove_instance_variable(VALUE obj, VALUE name) { VALUE val = Qnil; - const ID id = rb_check_id_without_pindown(&name); + const ID id = rb_check_id(&name); st_data_t n, v; struct st_table *iv_index_tbl; st_data_t index; @@ -1920,7 +1920,7 @@ rb_public_const_get_at(VALUE klass, ID id) VALUE rb_mod_remove_const(VALUE mod, VALUE name) { - const ID id = rb_check_id_without_pindown(&name); + const ID id = rb_check_id(&name); if (!id) { if (rb_is_const_name(name)) { @@ -2569,7 +2569,7 @@ rb_mod_class_variables(int argc, const VALUE *argv, VALUE mod) VALUE rb_mod_remove_cvar(VALUE mod, VALUE name) { - const ID id = rb_check_id_without_pindown(&name); + const ID id = rb_check_id(&name); st_data_t val, n = id; if (!id) { diff --git a/vm_method.c b/vm_method.c index 966fc419b6..cb450b1910 100644 --- a/vm_method.c +++ b/vm_method.c @@ -797,7 +797,7 @@ rb_mod_remove_method(int argc, VALUE *argv, VALUE mod) for (i = 0; i < argc; i++) { VALUE v = argv[i]; - ID id = rb_check_id_without_pindown(&v); + ID id = rb_check_id(&v); if (!id) { rb_name_error_str(v, "method `%s' not defined in %s", RSTRING_PTR(v), rb_class2name(mod)); @@ -1008,7 +1008,7 @@ rb_mod_undef_method(int argc, VALUE *argv, VALUE mod) int i; for (i = 0; i < argc; i++) { VALUE v = argv[i]; - ID id = rb_check_id_without_pindown(&v); + ID id = rb_check_id(&v); if (!id) { rb_method_name_error(mod, v); } @@ -1048,7 +1048,7 @@ rb_mod_undef_method(int argc, VALUE *argv, VALUE mod) static VALUE rb_mod_method_defined(VALUE mod, VALUE mid) { - ID id = rb_check_id_without_pindown(&mid); + ID id = rb_check_id(&mid); if (!id || !rb_method_boundp(mod, id, 1)) { return Qfalse; } @@ -1062,7 +1062,7 @@ static VALUE check_definition(VALUE mod, VALUE mid, rb_method_flag_t noex) { const rb_method_entry_t *me; - ID id = rb_check_id_without_pindown(&mid); + ID id = rb_check_id(&mid); if (!id) return Qfalse; me = rb_method_entry(mod, id, 0); if (me) { @@ -1691,7 +1691,7 @@ obj_respond_to(int argc, VALUE *argv, VALUE obj) ID id; rb_scan_args(argc, argv, "11", &mid, &priv); - if (!(id = rb_check_id_without_pindown(&mid))) { + if (!(id = rb_check_id(&mid))) { if (!rb_method_basic_definition_p(CLASS_OF(obj), idRespond_to_missing)) { VALUE args[2]; args[0] = ID2SYM(rb_to_id(mid)); -- cgit v1.2.3