From 13b4440b121b5f9fcd420ddc8d679f54a1a808c3 Mon Sep 17 00:00:00 2001 From: nobu Date: Sat, 21 Oct 2017 23:21:05 +0000 Subject: Improve performance of string interpolation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch will add pre-allocation in string interpolation. By this, unecessary capacity resizing is avoided. For small strings, optimized `rb_str_resurrect` operation is faster, so pre-allocation is done only when concatenated strings are large. `MIN_PRE_ALLOC_SIZE` was decided by experimenting with local machine (x86_64-apple-darwin 16.5.0, Apple LLVM version 8.1.0 (clang - 802.0.42)). String interpolation will be faster around 72% when large string is created. * Before ``` Calculating ------------------------------------- Large string interpolation 1.276M (± 5.9%) i/s - 6.358M in 5.002022s Small string interpolation 5.156M (± 5.5%) i/s - 25.728M in 5.005731s ``` * After ``` Calculating ------------------------------------- Large string interpolation 2.201M (± 5.8%) i/s - 11.063M in 5.043724s Small string interpolation 5.192M (± 5.7%) i/s - 25.971M in 5.020516s ``` * Test code ```ruby require 'benchmark/ips' Benchmark.ips do |x| x.report "Large string interpolation" do |t| a = "Hellooooooooooooooooooooooooooooooooooooooooooooooooooo" b = "Wooooooooooooooooooooooooooooooooooooooooooooooooooorld" t.times do "#{a}, #{b}!" end end x.report "Small string interpolation" do |t| a = "Hello" b = "World" t.times do "#{a}, #{b}!" end end end ``` [Fix GH-1626] From: Nao Minami git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@60320 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- string.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) (limited to 'string.c') diff --git a/string.c b/string.c index c1e986fb9a..411ec314d7 100644 --- a/string.c +++ b/string.c @@ -2903,15 +2903,30 @@ rb_str_append(VALUE str, VALUE str2) return rb_str_buf_append(str, str2); } +#define MIN_PRE_ALLOC_SIZE 48 + VALUE rb_str_concat_literals(size_t num, const VALUE *strary) { VALUE str; - size_t i; + size_t i, s; + long len = 1; + + if (UNLIKELY(!num)) return rb_str_new(0, 0); + if (UNLIKELY(num == 1)) return rb_str_resurrect(strary[0]); + + for (i = 0; i < num; ++i) { len += RSTRING_LEN(strary[i]); } + if (LIKELY(len < MIN_PRE_ALLOC_SIZE)) { + str = rb_str_resurrect(strary[0]); + s = 1; + } + else { + str = rb_str_buf_new(len); + rb_enc_copy(str, strary[0]); + s = 0; + } - if (!num) return rb_str_new(0, 0); - str = rb_str_resurrect(strary[0]); - for (i = 1; i < num; ++i) { + for (i = s; i < num; ++i) { const VALUE v = strary[i]; int encidx = ENCODING_GET(v); -- cgit v1.2.3