aboutsummaryrefslogtreecommitdiffstats
path: root/string.c
diff options
context:
space:
mode:
authorJean Boussier <byroot@ruby-lang.org>2023-11-02 11:02:43 +0100
committerJean Boussier <jean.boussier@gmail.com>2023-11-02 23:34:58 +0100
commitac8ec004e5272d589caca30616dbe12862150188 (patch)
treea3fb421e8ad6ddf400130b81fe33805c7c352fab /string.c
parentee7bf4643d2716d44e13a732caf588b1a6275f7a (diff)
downloadruby-ac8ec004e5272d589caca30616dbe12862150188.tar.gz
Make String.new size pools aware.
If the required capacity would fit in an embded string, returns one. This can reduce malloc churn for code that use string buffers.
Diffstat (limited to 'string.c')
-rw-r--r--string.c93
1 files changed, 93 insertions, 0 deletions
diff --git a/string.c b/string.c
index 9f4a345726..13b63f023a 100644
--- a/string.c
+++ b/string.c
@@ -1874,6 +1874,98 @@ rb_str_init(int argc, VALUE *argv, VALUE str)
return str;
}
+/* :nodoc: */
+static VALUE
+rb_str_s_new(int argc, VALUE *argv, VALUE klass)
+{
+ if (klass != rb_cString) {
+ return rb_class_new_instance_pass_kw(argc, argv, klass);
+ }
+
+ static ID keyword_ids[2];
+ VALUE orig, opt, encoding = Qnil, capacity = Qnil;
+ VALUE kwargs[2];
+ rb_encoding *enc = NULL;
+
+ int n = rb_scan_args(argc, argv, "01:", &orig, &opt);
+ if (NIL_P(opt)) {
+ return rb_class_new_instance_pass_kw(argc, argv, klass);
+ }
+
+ keyword_ids[0] = rb_id_encoding();
+ CONST_ID(keyword_ids[1], "capacity");
+ rb_get_kwargs(opt, keyword_ids, 0, 2, kwargs);
+ encoding = kwargs[0];
+ capacity = kwargs[1];
+
+ int termlen = 1;
+
+ if (n == 1) {
+ orig = StringValue(orig);
+ }
+ else {
+ orig = Qnil;
+ }
+
+ if (UNDEF_P(encoding)) {
+ if (!NIL_P(orig)) {
+ encoding = rb_obj_encoding(orig);
+ }
+ }
+
+ if (!UNDEF_P(encoding)) {
+ enc = rb_to_encoding(encoding);
+ termlen = rb_enc_mbminlen(enc);
+ }
+
+ // If capacity is nil, we're basically just duping `orig`.
+ if (UNDEF_P(capacity)) {
+ if (NIL_P(orig)) {
+ VALUE empty_str = str_new(klass, "", 0);
+ if (enc) {
+ rb_enc_associate(empty_str, enc);
+ }
+ return empty_str;
+ }
+ VALUE copy = str_duplicate(klass, orig);
+ rb_enc_associate(copy, enc);
+ ENC_CODERANGE_CLEAR(copy);
+ return copy;
+ }
+
+ long capa = 0;
+ capa = NUM2LONG(capacity);
+ if (capa < 0) {
+ capa = 0;
+ }
+
+ if (!NIL_P(orig)) {
+ long orig_capa = rb_str_capacity(orig);
+ if (orig_capa > capa) {
+ capa = orig_capa;
+ }
+ }
+
+ long fake_len = capa - termlen;
+ if (fake_len < 0) {
+ fake_len = 0;
+ }
+
+ VALUE str = str_new0(klass, NULL, fake_len, termlen);
+ STR_SET_LEN(str, 0);
+ TERM_FILL(RSTRING_PTR(str), termlen);
+
+ if (enc) {
+ rb_enc_associate(str, enc);
+ }
+
+ if (!NIL_P(orig)) {
+ rb_str_buf_append(str, orig);
+ }
+
+ return str;
+}
+
#ifdef NONASCII_MASK
#define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
@@ -11931,6 +12023,7 @@ Init_String(void)
st_foreach(rb_vm_fstring_table(), fstring_set_class_i, rb_cString);
rb_include_module(rb_cString, rb_mComparable);
rb_define_alloc_func(rb_cString, empty_str_alloc);
+ rb_define_singleton_method(rb_cString, "new", rb_str_s_new, -1);
rb_define_singleton_method(rb_cString, "try_convert", rb_str_s_try_convert, 1);
rb_define_method(rb_cString, "initialize", rb_str_init, -1);
rb_define_method(rb_cString, "initialize_copy", rb_str_replace, 1);