diff options
author | matz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2008-05-20 07:59:33 +0000 |
---|---|---|
committer | matz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2008-05-20 07:59:33 +0000 |
commit | 8405033139bc3ee4cbdf1b1c415910f7df38ebfd (patch) | |
tree | 6cbb7e8e46c9acf8f7eeed8aaf5f6c342b969a91 /string.c | |
parent | 7992fabaf40f8635653d267592f591d9cc4c511e (diff) | |
download | ruby-8405033139bc3ee4cbdf1b1c415910f7df38ebfd.tar.gz |
* string.c (rb_enc_strlen_cr): need to set ENC_CODERANGE_7BIT if
search_nonascii() fails. [ruby-dev:34751]
* string.c (rb_str_reverse): preserve coderange info if the
receiver is 7bit string.
* string.c (rb_str_reverse_bang): ditto.
* string.c (rb_str_reverse_bang): should have called
single_byte_optimizable before rb_str_modify() that clears
coderange info.
* string.c (tr_trans): handle single bytes more eagerly.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@16492 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'string.c')
-rw-r--r-- | string.c | 50 |
1 files changed, 35 insertions, 15 deletions
@@ -726,6 +726,7 @@ rb_enc_strlen_cr(const char *p, const char *e, rb_encoding *enc, int *cr) if (ISASCII(*p)) { q = search_nonascii(p, e); if (!q) { + if (!*cr) *cr = ENC_CODERANGE_7BIT; return c + (e - p); } c += q - p; @@ -1237,7 +1238,7 @@ rb_str_substr(VALUE str, long beg, long len) rb_encoding *enc = STR_ENC_GET(str); VALUE str2; char *p, *s = RSTRING_PTR(str), *e = s + RSTRING_LEN(str); - int singlebyte = single_byte_optimizable(str); + int singlebyte; if (len < 0) return Qnil; if (!RSTRING_LEN(str)) { @@ -1263,6 +1264,7 @@ rb_str_substr(VALUE str, long beg, long len) else if (beg > 0 && beg > str_strlen(str, enc)) { return Qnil; } + singlebyte = single_byte_optimizable(str); if (len == 0) { p = 0; } @@ -3521,14 +3523,15 @@ static VALUE rb_str_reverse(VALUE str) { rb_encoding *enc; - VALUE obj; + VALUE rev; char *s, *e, *p; + int single = 1; if (RSTRING_LEN(str) <= 1) return rb_str_dup(str); enc = STR_ENC_GET(str); - obj = rb_str_new5(str, 0, RSTRING_LEN(str)); + rev = rb_str_new5(str, 0, RSTRING_LEN(str)); s = RSTRING_PTR(str); e = RSTRING_END(str); - p = RSTRING_END(obj); + p = RSTRING_END(rev); if (RSTRING_LEN(str) > 1) { if (single_byte_optimizable(str)) { @@ -3540,17 +3543,26 @@ rb_str_reverse(VALUE str) while (s < e) { int clen = rb_enc_mbclen(s, e, enc); + if (clen > 1 || (*s & 0x80)) single = 0; p -= clen; memcpy(p, s, clen); s += clen; } } } - STR_SET_LEN(obj, RSTRING_LEN(str)); - OBJ_INFECT(obj, str); - rb_enc_cr_str_copy_for_substr(obj, str); + STR_SET_LEN(rev, RSTRING_LEN(str)); + OBJ_INFECT(rev, str); + if (ENC_CODERANGE(str) == ENC_CODERANGE_UNKNOWN) { + if (single) { + ENC_CODERANGE_SET(str, ENC_CODERANGE_7BIT); + } + else { + ENC_CODERANGE_SET(str, ENC_CODERANGE_VALID); + } + } + rb_enc_cr_str_copy_for_substr(rev, str); - return obj; + return rev; } @@ -3564,19 +3576,25 @@ rb_str_reverse(VALUE str) static VALUE rb_str_reverse_bang(VALUE str) { - char *s, *e, c; - if (RSTRING_LEN(str) > 1) { - rb_str_modify(str); - s = RSTRING_PTR(str); - e = RSTRING_END(str) - 1; - if (single_byte_optimizable(str)) { + char *s, *e, c; + int cr = ENC_CODERANGE(str); + int single = 1; + + rb_str_modify(str); + s = RSTRING_PTR(str); + e = RSTRING_END(str) - 1; while (s < e) { c = *s; + if (*s & 0x80) single = 0; *s++ = *e; *e-- = c; } + if (cr == ENC_CODERANGE_UNKNOWN && single) { + cr = ENC_CODERANGE_7BIT; + } + ENC_CODERANGE_SET(str, cr); } else { rb_str_shared_replace(str, rb_str_reverse(str)); @@ -4226,6 +4244,7 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag) int c, c0, last = 0, modify = 0, i, l; char *s, *send; VALUE hash = 0; + int singlebyte = single_byte_optimizable(str); StringValue(src); StringValue(repl); @@ -4288,6 +4307,7 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag) if (r == -1) r = trrepl.now; if (c < 256) { trans[c] = r; + if (r > 255) singlebyte = 0; } else { if (!hash) hash = rb_hash_new(); @@ -4348,7 +4368,7 @@ tr_trans(VALUE str, VALUE src, VALUE repl, int sflag) STR_SET_NOEMBED(str); RSTRING(str)->as.heap.aux.capa = max; } - else if (rb_enc_mbmaxlen(enc) == 1) { + else if (rb_enc_mbmaxlen(enc) == 1 || (singlebyte && !hash)) { while (s < send) { c = (unsigned char)*s; if (trans[c] >= 0) { |