From 6016591e1cc54e48d58662180cc548c03334e1c5 Mon Sep 17 00:00:00 2001 From: naruse Date: Tue, 1 Mar 2011 07:02:08 +0000 Subject: * string.c (rb_str_byteslice): the resulted encoding should keep original encoding. this also fixes the encoding when the result shares internal string. [ruby-core:35376] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@30994 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 6 ++++++ string.c | 9 ++++----- test/ruby/test_string.rb | 36 +++++++++++++++++++----------------- 3 files changed, 29 insertions(+), 22 deletions(-) diff --git a/ChangeLog b/ChangeLog index f7ed85a445..f277500497 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +Tue Mar 1 15:59:53 2011 NARUSE, Yui + + * string.c (rb_str_byteslice): the resulted encoding should keep + original encoding. this also fixes the encoding when the result + shares internal string. [ruby-core:35376] + Tue Mar 1 13:25:00 2011 Kenta Murata * ext/bigdecimal/bigdecimal.c (VpMemAlloc): CVE-2011-0188. diff --git a/string.c b/string.c index 1a34bf148d..87273de5f1 100644 --- a/string.c +++ b/string.c @@ -4014,6 +4014,7 @@ str_byte_substr(VALUE str, long beg, long len) } else { str2 = rb_str_new5(str, p, len); + rb_enc_cr_str_copy_for_substr(str2, str); OBJ_INFECT(str2, str); } @@ -4030,14 +4031,13 @@ str_byte_aref(VALUE str, VALUE indx) num_index: str = str_byte_substr(str, idx, 1); - if (!NIL_P(str) && RSTRING_LEN(str) == 0) return Qnil; + if (NIL_P(str) || RSTRING_LEN(str) == 0) return Qnil; return str; default: /* check if indx is Range */ { long beg, len = RSTRING_LEN(str); - VALUE tmp; switch (rb_range_beg_len(indx, &beg, &len, len, 0)) { case Qfalse: @@ -4045,8 +4045,7 @@ str_byte_aref(VALUE str, VALUE indx) case Qnil: return Qnil; default: - tmp = str_byte_substr(str, beg, len); - return tmp; + return str_byte_substr(str, beg, len); } } idx = NUM2LONG(indx); @@ -4069,7 +4068,7 @@ str_byte_aref(VALUE str, VALUE indx) * an offset is negative, it is counted from the end of str. Returns * nil if the initial offset falls outside the string, the length * is negative, or the beginning of the range is greater than the end. - * The encoding of th3 resulted string is always ASCII-8BIT. + * The encoding of the resulted string keeps original encoding. * * "hello".byteslice(1) #=> "e" * "hello".byteslice(-1) #=> "o" diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb index f18c8148e4..0b632ab1a8 100644 --- a/test/ruby/test_string.rb +++ b/test/ruby/test_string.rb @@ -1945,32 +1945,34 @@ class TestString < Test::Unit::TestCase assert_equal(S("hello "), b) end - def b(str) - str.force_encoding(Encoding::ASCII_8BIT) + def u(str) + str.force_encoding(Encoding::UTF_8) end def test_byteslice - assert_equal(b("h"), "hello".byteslice(0)) + assert_equal("h", "hello".byteslice(0)) assert_equal(nil, "hello".byteslice(5)) - assert_equal(b("o"), "hello".byteslice(-1)) + assert_equal("o", "hello".byteslice(-1)) assert_equal(nil, "hello".byteslice(-6)) - assert_equal(b(""), "hello".byteslice(0, 0)) - assert_equal(b("hello"), "hello".byteslice(0, 6)) - assert_equal(b("hello"), "hello".byteslice(0, 6)) - assert_equal(b(""), "hello".byteslice(5, 1)) - assert_equal(b("o"), "hello".byteslice(-1, 6)) + assert_equal("", "hello".byteslice(0, 0)) + assert_equal("hello", "hello".byteslice(0, 6)) + assert_equal("hello", "hello".byteslice(0, 6)) + assert_equal("", "hello".byteslice(5, 1)) + assert_equal("o", "hello".byteslice(-1, 6)) assert_equal(nil, "hello".byteslice(-6, 1)) - assert_equal(b("h"), "hello".byteslice(0..0)) - assert_equal(b(""), "hello".byteslice(5..0)) - assert_equal(b("o"), "hello".byteslice(4..5)) + assert_equal("h", "hello".byteslice(0..0)) + assert_equal("", "hello".byteslice(5..0)) + assert_equal("o", "hello".byteslice(4..5)) assert_equal(nil, "hello".byteslice(6..0)) - assert_equal(b(""), "hello".byteslice(-1..0)) - assert_equal(b("llo"), "hello".byteslice(-3..5)) + assert_equal("", "hello".byteslice(-1..0)) + assert_equal("llo", "hello".byteslice(-3..5)) - assert_equal(b("\x81"), "\u3042".byteslice(1)) - assert_equal(b("\x81\x82"), "\u3042".byteslice(1, 2)) - assert_equal(b("\x81\x82"), "\u3042".byteslice(1..2)) + assert_equal(u("\x81"), "\u3042".byteslice(1)) + assert_equal(u("\x81\x82"), "\u3042".byteslice(1, 2)) + assert_equal(u("\x81\x82"), "\u3042".byteslice(1..2)) + + assert_equal(u("\x82")+("\u3042"*9), ("\u3042"*10).byteslice(2, 28)) end end -- cgit v1.2.3