From 5ec93b19bbbadfaffdd47f24d9540c78510abcd9 Mon Sep 17 00:00:00 2001 From: normal Date: Tue, 18 Jul 2017 02:29:59 +0000 Subject: hash: keep fstrings of tainted strings for string keys The same hash keys may be loaded from tainted data sources frequently (e.g. parsing headers from socket or loading YAML data from a file). If a non-tainted fstring already exists (because the application expects the hash key), cache and deduplicate the tainted version in the new tainted_frozen_strings table. For non-embedded strings, this also allows sharing with the underlying malloc-ed data. * vm_core.h (rb_vm_struct): add tainted_frozen_strings * vm.c (ruby_vm_destruct): free tainted_frozen_strings (Init_vm_objects): initialize tainted_frozen_strings (rb_vm_tfstring_table): accessor for tainted_frozen_strings * internal.h: declare rb_fstring_existing, rb_vm_tfstring_table * hash.c (fstring_existing_str): remove (moved to string.c) (hash_aset_str): use rb_fstring_existing * string.c (rb_fstring_existing): new, based on fstring_existing_str (tainted_fstr_update): new (rb_fstring_existing0): new, based on fstring_existing_str (rb_tainted_fstring_existing): new, special case for tainted strings (rb_str_free): delete from tainted_frozen_strings table * test/ruby/test_optimization.rb (test_hash_reuse_fstring): new test [ruby-core:82012] [Bug #13737] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@59354 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- vm.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'vm.c') diff --git a/vm.c b/vm.c index 814f8b6780..75ca12f2a5 100644 --- a/vm.c +++ b/vm.c @@ -2206,6 +2206,10 @@ ruby_vm_destruct(rb_vm_t *vm) st_free_table(vm->frozen_strings); vm->frozen_strings = 0; } + if (vm->tainted_frozen_strings) { + st_free_table(vm->tainted_frozen_strings); + vm->tainted_frozen_strings = 0; + } rb_vm_gvl_destroy(vm); if (objspace) { rb_objspace_free(objspace); @@ -3142,6 +3146,8 @@ Init_vm_objects(void) vm->mark_object_ary = rb_ary_tmp_new(128); vm->loading_table = st_init_strtable(); vm->frozen_strings = st_init_table_with_size(&rb_fstring_hash_type, 1000); + vm->tainted_frozen_strings = + st_init_table_with_size(&rb_fstring_hash_type, 1000); } /* top self */ @@ -3203,6 +3209,12 @@ rb_vm_fstring_table(void) return GET_VM()->frozen_strings; } +st_table * +rb_vm_tfstring_table(void) +{ + return GET_VM()->tainted_frozen_strings; +} + #if VM_COLLECT_USAGE_DETAILS #define HASH_ASET(h, k, v) rb_hash_aset((h), (st_data_t)(k), (st_data_t)(v)) -- cgit v1.2.3