diff options
Diffstat (limited to 'string.c')
-rw-r--r-- | string.c | 33 |
1 files changed, 30 insertions, 3 deletions
@@ -997,10 +997,37 @@ rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to) VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *eenc) { + rb_encoding *ienc; VALUE str; - - str = rb_tainted_str_new_with_enc(ptr, len, eenc); - return rb_external_str_with_enc(str, eenc); + const int eidx = rb_enc_to_index(eenc); + + /* ASCII-8BIT case, no conversion */ + if ((eidx == rb_ascii8bit_encindex()) || + (eidx == rb_usascii_encindex() && search_nonascii(ptr, ptr + len))) { + return rb_tainted_str_new(ptr, len); + } + /* no default_internal or same encoding, no conversion */ + ienc = rb_default_internal_encoding(); + if (!ienc || eenc == ienc) { + return rb_tainted_str_new_with_enc(ptr, len, eenc); + } + /* ASCII compatible, and ASCII only string, no conversion in + * default_internal */ + if ((eidx == rb_ascii8bit_encindex()) || + (eidx == rb_usascii_encindex()) || + (rb_enc_asciicompat(eenc) && !search_nonascii(ptr, ptr + len))) { + return rb_tainted_str_new_with_enc(ptr, len, ienc); + } + /* convert from the given encoding to default_internal */ + str = rb_tainted_str_new_with_enc(NULL, 0, ienc); + /* when the conversion failed for some reason, just ignore the + * default_internal and result in the given encoding as-is. */ + if (NIL_P(rb_str_cat_conv_enc_opts(str, 0, ptr, len, eenc, 0, Qnil))) { + STR_SET_LEN(str, 0); + rb_enc_associate(str, eenc); + rb_str_cat(str, ptr, len); + } + return str; } VALUE |