aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJean Boussier <jean.boussier@gmail.com>2022-07-21 13:16:16 +0200
committerJean Boussier <jean.boussier@gmail.com>2022-07-21 15:06:50 +0200
commitf954c5dae4c144207bd366cbc832d08930882b23 (patch)
tree76c47ab722e4a6be9bfc0097e42ff2f9cb10b82a
parent80672b2d0e0f8e3c43446f5833b1a3491460e215 (diff)
downloadruby-f954c5dae4c144207bd366cbc832d08930882b23.tar.gz
string.c: use str_enc_fastpath in TERM_LEN
Not having to fetch the rb_encoding save a significant amount of time. Additionally, even when we have to fetch it, we can do it faster using `ENCODING_GET` rather than `rb_enc_get`. ``` compare-ruby: ruby 3.2.0dev (2022-07-19T08:41:40Z master cb9fd920a3) [arm64-darwin21] built-ruby: ruby 3.2.0dev (2022-07-21T11:16:16Z faster-buffer-conc.. 4f001f0748) [arm64-darwin21] warming up... | |compare-ruby|built-ruby| |:---------------------|-----------:|---------:| |binary_concat_utf8 | 510.580k| 565.600k| | | -| 1.11x| |binary_concat_binary | 512.653k| 571.483k| | | -| 1.11x| |utf8_concat_utf8 | 511.396k| 566.879k| | | -| 1.11x| ```
-rw-r--r--benchmark/string_concat.yml7
-rw-r--r--string.c30
2 files changed, 19 insertions, 18 deletions
diff --git a/benchmark/string_concat.yml b/benchmark/string_concat.yml
index 656bcd1cd7..0ff1dc25b6 100644
--- a/benchmark/string_concat.yml
+++ b/benchmark/string_concat.yml
@@ -1,6 +1,7 @@
prelude: |
CHUNK = "a" * 64
BCHUNK = "a".b * 64
+ GC.disable # GC causes a lot of variance
benchmark:
binary_concat_utf8: |
buffer = String.new(capacity: 4096, encoding: Encoding::BINARY)
@@ -11,7 +12,7 @@ benchmark:
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
- buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
+ buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
binary_concat_binary: |
buffer = String.new(capacity: 4096, encoding: Encoding::BINARY)
buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
@@ -21,7 +22,7 @@ benchmark:
buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
- buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
+ buffer << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK << BCHUNK
utf8_concat_utf8: |
buffer = String.new(capacity: 4096, encoding: Encoding::UTF_8)
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
@@ -31,4 +32,4 @@ benchmark:
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
- buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
+ buffer << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK << CHUNK
diff --git a/string.c b/string.c
index 8f519da7d1..c726adb2c3 100644
--- a/string.c
+++ b/string.c
@@ -150,7 +150,21 @@ VALUE rb_cSymbol;
}\
} while (0)
-#define TERM_LEN(str) rb_enc_mbminlen(rb_enc_get(str))
+static inline bool
+str_enc_fastpath(VALUE str)
+{
+ // The overwhelming majority of strings are in one of these 3 encodings.
+ switch (ENCODING_GET_INLINED(str)) {
+ case ENCINDEX_ASCII_8BIT:
+ case ENCINDEX_UTF_8:
+ case ENCINDEX_US_ASCII:
+ return true;
+ default:
+ return false;
+ }
+}
+
+#define TERM_LEN(str) (str_enc_fastpath(str) ? 1 : rb_enc_mbminlen(rb_enc_from_index(ENCODING_GET(str))))
#define TERM_FILL(ptr, termlen) do {\
char *const term_fill_ptr = (ptr);\
const int term_fill_len = (termlen);\
@@ -3311,20 +3325,6 @@ rb_str_buf_cat_ascii(VALUE str, const char *ptr)
}
}
-static inline bool
-str_enc_fastpath(VALUE str)
-{
- // The overwhelming majority of strings are in one of these 3 encodings.
- switch (ENCODING_GET_INLINED(str)) {
- case ENCINDEX_ASCII_8BIT:
- case ENCINDEX_UTF_8:
- case ENCINDEX_US_ASCII:
- return true;
- default:
- return false;
- }
-}
-
VALUE
rb_str_buf_append(VALUE str, VALUE str2)
{