From 2de1dbdffc99e21fc034241e581b85e4531097f4 Mon Sep 17 00:00:00 2001 From: nobu Date: Fri, 20 Jan 2017 02:39:27 +0000 Subject: array.c: improve Array#sample * array.c (rb_ary_sample): improve performance when many samples from a large array. based on the patch by tomoya ishida in [ruby-dev:49956]. [Bug #13136] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@57380 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- array.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/array.c b/array.c index a19e7bb710..f170a942ac 100644 --- a/array.c +++ b/array.c @@ -4797,6 +4797,7 @@ rb_ary_sample(int argc, VALUE *argv, VALUE ary) VALUE opts, randgen = rb_cRandom; long n, len, i, j, k, idx[10]; long rnds[numberof(idx)]; + long memo_threshold; if (OPTHASH_GIVEN_P(opts)) { VALUE rnd; @@ -4856,6 +4857,11 @@ rb_ary_sample(int argc, VALUE *argv, VALUE ary) } return rb_ary_new_from_args(3, RARRAY_AREF(ary, i), RARRAY_AREF(ary, j), RARRAY_AREF(ary, k)); } + memo_threshold = + len < 2560 ? len / 128 : + len < 5120 ? len / 64 : + len < 10240 ? len / 32 : + len / 16; if (n <= numberof(idx)) { long sorted[numberof(idx)]; sorted[0] = idx[0] = rnds[0]; @@ -4875,6 +4881,38 @@ rb_ary_sample(int argc, VALUE *argv, VALUE ary) } }); } + else if (n <= memo_threshold / 2) { + long max_idx = 0; +#undef RUBY_UNTYPED_DATA_WARNING +#define RUBY_UNTYPED_DATA_WARNING 0 + VALUE vmemo = Data_Wrap_Struct(0, 0, 0, st_free_table); + st_table *memo = st_init_numtable_with_size(n); + DATA_PTR(vmemo) = memo; + result = rb_ary_new_capa(n); + RARRAY_PTR_USE(result, ptr_result, { + for (i=0; i max_idx) max_idx = r; + } + len = RARRAY_LEN(ary); + if (len <= max_idx) n = 0; + else if (n > len) n = len; + RARRAY_PTR_USE(ary, ptr_ary, { + for (i=0; i