aboutsummaryrefslogtreecommitdiffstats
path: root/array.c
diff options
context:
space:
mode:
authornobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2017-09-29 07:43:22 +0000
committernobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2017-09-29 07:43:22 +0000
commita13b6ba18dc4a8b84236bc733f921e33545f6a7f (patch)
treef12916c712e112936e1ac5a25321193ab72b95bd /array.c
parentcd6ce13070bb44c99a1583da69d1d62f4693cb47 (diff)
downloadruby-a13b6ba18dc4a8b84236bc733f921e33545f6a7f.tar.gz
array.c: improve operations on small arrays
[Feature #13884] Reduce number of memory allocations for "and", "or" and "diff" operations on small arrays Very often, arrays are used to filter parameters and to select interesting items from 2 collections and very often these collections are small enough, for example: ```ruby SAFE_COLUMNS = [:id, :title, :created_at] def columns @all_columns & SAFE_COLUMNS end ``` In this patch, I got rid of unnecessary memory allocations for small arrays when "and", "or" and "diff" operations are performed. name | HEAD | PATCH -----------------+------:+------: array_small_and | 0.615 | 0.263 array_small_diff | 0.676 | 0.282 array_small_or | 0.953 | 0.463 name | PATCH -----------------+------: array_small_and | 2.343 array_small_diff | 2.392 array_small_or | 2.056 name | HEAD | PATCH -----------------+------:+------: array_small_and | 1.429 | 1.005 array_small_diff | 1.493 | 0.878 array_small_or | 1.672 | 1.152 name | PATCH -----------------+------: array_small_and | 1.422 array_small_diff | 1.700 array_small_or | 1.452 Author: Dmitry Bochkarev <dimabochkarev@gmail.com> git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@60057 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'array.c')
-rw-r--r--array.c54
1 files changed, 52 insertions, 2 deletions
diff --git a/array.c b/array.c
index 27126cefbc..a59cb6c3e9 100644
--- a/array.c
+++ b/array.c
@@ -30,6 +30,7 @@ VALUE rb_cArray;
#define ARY_DEFAULT_SIZE 16
#define ARY_MAX_SIZE (LONG_MAX / (int)sizeof(VALUE))
+#define SMALL_ARRAY_LEN 16
# define ARY_SHARED_P(ary) \
(assert(!FL_TEST((ary), ELTS_SHARED) || !FL_TEST((ary), RARRAY_EMBED_FLAG)), \
@@ -3985,6 +3986,20 @@ rb_ary_includes(VALUE ary, VALUE item)
return Qfalse;
}
+static VALUE
+rb_ary_includes_by_eql(VALUE ary, VALUE item)
+{
+ long i;
+ VALUE e;
+
+ for (i=0; i<RARRAY_LEN(ary); i++) {
+ e = RARRAY_AREF(ary, i);
+ if (rb_eql(item, e)) {
+ return Qtrue;
+ }
+ }
+ return Qfalse;
+}
static VALUE
recursive_cmp(VALUE ary1, VALUE ary2, int recur)
@@ -4135,9 +4150,19 @@ rb_ary_diff(VALUE ary1, VALUE ary2)
VALUE hash;
long i;
- hash = ary_make_hash(to_ary(ary2));
+ ary2 = to_ary(ary2);
ary3 = rb_ary_new();
+ if (RARRAY_LEN(ary2) <= SMALL_ARRAY_LEN) {
+ for (i=0; i<RARRAY_LEN(ary1); i++) {
+ VALUE elt = rb_ary_elt(ary1, i);
+ if (rb_ary_includes_by_eql(ary2, elt)) continue;
+ rb_ary_push(ary3, elt);
+ }
+ return ary3;
+ }
+
+ hash = ary_make_hash(ary2);
for (i=0; i<RARRAY_LEN(ary1); i++) {
if (st_lookup(rb_hash_tbl_raw(hash), RARRAY_AREF(ary1, i), 0)) continue;
rb_ary_push(ary3, rb_ary_elt(ary1, i));
@@ -4173,6 +4198,17 @@ rb_ary_and(VALUE ary1, VALUE ary2)
ary2 = to_ary(ary2);
ary3 = rb_ary_new();
if (RARRAY_LEN(ary2) == 0) return ary3;
+
+ if (RARRAY_LEN(ary1) <= SMALL_ARRAY_LEN && RARRAY_LEN(ary2) <= SMALL_ARRAY_LEN) {
+ for (i=0; i<RARRAY_LEN(ary1); i++) {
+ v = RARRAY_AREF(ary1, i);
+ if (!rb_ary_includes_by_eql(ary2, v)) continue;
+ if (rb_ary_includes_by_eql(ary3, v)) continue;
+ rb_ary_push(ary3, v);
+ }
+ return ary3;
+ }
+
hash = ary_make_hash(ary2);
table = rb_hash_tbl_raw(hash);
@@ -4218,8 +4254,22 @@ rb_ary_or(VALUE ary1, VALUE ary2)
long i;
ary2 = to_ary(ary2);
- hash = ary_make_hash(ary1);
+ if (RARRAY_LEN(ary1) + RARRAY_LEN(ary2) <= SMALL_ARRAY_LEN) {
+ ary3 = rb_ary_new();
+ for (i=0; i<RARRAY_LEN(ary1); i++) {
+ VALUE elt = rb_ary_elt(ary1, i);
+ if (rb_ary_includes_by_eql(ary3, elt)) continue;
+ rb_ary_push(ary3, elt);
+ }
+ for (i=0; i<RARRAY_LEN(ary2); i++) {
+ VALUE elt = rb_ary_elt(ary2, i);
+ if (rb_ary_includes_by_eql(ary3, elt)) continue;
+ rb_ary_push(ary3, elt);
+ }
+ return ary3;
+ }
+ hash = ary_make_hash(ary1);
for (i=0; i<RARRAY_LEN(ary2); i++) {
VALUE elt = RARRAY_AREF(ary2, i);
if (!st_update(RHASH_TBL_RAW(hash), (st_data_t)elt, ary_hash_orset, (st_data_t)elt)) {