diff options
-rw-r--r-- | ChangeLog | 5 | ||||
-rw-r--r-- | string.c | 30 | ||||
-rw-r--r-- | test/ruby/test_m17n.rb | 57 |
3 files changed, 59 insertions, 33 deletions
@@ -1,3 +1,8 @@ +Tue Sep 29 06:50:32 2009 NARUSE, Yui <naruse@ruby-lang.org> + + * string.c (rb_str_inspect): dump as \uXXXX when the + string is in Unicode. [ruby-dev:39388] + Tue Sep 29 06:49:16 2009 NARUSE, Yui <naruse@ruby-lang.org> * encoding.c (rb_enc_unicode_p): defined. @@ -4061,6 +4061,7 @@ rb_str_inspect(VALUE str) char *p, *pend; VALUE result = rb_str_buf_new(0); rb_encoding *resenc = rb_default_internal_encoding(); + int unicode_p = rb_enc_unicode_p(enc); if (resenc == NULL) resenc = rb_default_external_encoding(); if (!rb_enc_asciicompat(resenc)) resenc = rb_usascii_encoding(); @@ -4069,7 +4070,7 @@ rb_str_inspect(VALUE str) p = RSTRING_PTR(str); pend = RSTRING_END(str); while (p < pend) { - unsigned int c, cc; + unsigned int c = -1, cc; int n; n = rb_enc_precise_mbclen(p, pend, enc); @@ -4114,18 +4115,29 @@ rb_str_inspect(VALUE str) else if (c == 033) { str_buf_cat2(result, "\\e"); } - else if ((enc == resenc && rb_enc_isprint(c, enc)) || rb_enc_isascii(c, enc)) { + else if ((enc == resenc && rb_enc_isprint(c, enc)) || + (rb_enc_isascii(c, enc) && ISPRINT(c))) { str_buf_cat(result, p-n, n); } else { - char *q; + char buf[11]; escape_codepoint: - for (q = p-n; q < p; q++) { -#define BACKESC_BUFSIZE 5 - char buf[BACKESC_BUFSIZE]; - sprintf(buf, "\\x%02X", *q & 0377); - str_buf_cat(result, buf, BACKESC_BUFSIZE - 1); -#undef BACKESC_BUFSIZE + + if (unicode_p && c != -1) { + if (c > 0xFFFF) { + sprintf(buf, "\\u{%X}", c); + } + else { + sprintf(buf, "\\u%04X", c); + } + str_buf_cat(result, buf, strlen(buf)); + } + else { + char *q; + for (q = p-n; q < p; q++) { + sprintf(buf, "\\x%02X", *q & 0377); + str_buf_cat(result, buf, strlen(buf)); + } } } } diff --git a/test/ruby/test_m17n.rb b/test/ruby/test_m17n.rb index 78d98ac441..9c7cd926f4 100644 --- a/test/ruby/test_m17n.rb +++ b/test/ruby/test_m17n.rb @@ -2,6 +2,15 @@ require 'test/unit' require 'stringio' class TestM17N < Test::Unit::TestCase + def inspect_encoding + Encoding.default_internal || Encoding.default_external + end + + def setup + Encoding.default_internal = nil + Encoding.default_external = Encoding::UTF_8 + end + def assert_encoding(encname, actual, message=nil) assert_equal(Encoding.find(encname), actual, message) end @@ -201,10 +210,10 @@ class TestM17N < Test::Unit::TestCase assert_equal('"\xFC\x80\x80\x80\x80 "', u("\xfc\x80\x80\x80\x80 ").inspect) - assert_equal(e("\"\\xA1\x8f\xA1\xA1\""), e("\xa1\x8f\xa1\xa1").inspect) + assert_equal("\"\\xA1\\x8F\\xA1\\xA1\"", e("\xa1\x8f\xa1\xa1").inspect) assert_equal('"\x81."', s("\x81.").inspect) - assert_equal(s("\"\x81@\""), s("\x81@").inspect) + assert_equal(s('"\x81\x40"'), s("\x81@").inspect) assert_equal('"\xFC"', u("\xfc").inspect) end @@ -756,30 +765,30 @@ class TestM17N < Test::Unit::TestCase end def test_sprintf_p - assert_strenc('""', 'ASCII-8BIT', a("%p") % a("")) - assert_strenc('""', 'EUC-JP', e("%p") % e("")) - assert_strenc('""', 'Windows-31J', s("%p") % s("")) - assert_strenc('""', 'UTF-8', u("%p") % u("")) + assert_strenc('""', inspect_encoding, a("%p") % a("")) + assert_strenc('""', inspect_encoding, e("%p") % e("")) + assert_strenc('""', inspect_encoding, s("%p") % s("")) + assert_strenc('""', inspect_encoding, u("%p") % u("")) - assert_strenc('"a"', 'ASCII-8BIT', a("%p") % a("a")) - assert_strenc('"a"', 'EUC-JP', e("%p") % e("a")) - assert_strenc('"a"', 'Windows-31J', s("%p") % s("a")) - assert_strenc('"a"', 'UTF-8', u("%p") % u("a")) + assert_strenc('"a"', inspect_encoding, a("%p") % a("a")) + assert_strenc('"a"', inspect_encoding, e("%p") % e("a")) + assert_strenc('"a"', inspect_encoding, s("%p") % s("a")) + assert_strenc('"a"', inspect_encoding, u("%p") % u("a")) - assert_strenc('"\xC2\xA1"', 'ASCII-8BIT', a("%p") % a("\xc2\xa1")) - assert_strenc("\"\xC2\xA1\"", 'EUC-JP', e("%p") % e("\xc2\xa1")) - #assert_strenc("\"\xC2\xA1\"", 'Windows-31J', s("%p") % s("\xc2\xa1")) - assert_strenc("\"\xC2\xA1\"", 'UTF-8', u("%p") % u("\xc2\xa1")) + assert_strenc('"\xC2\xA1"', inspect_encoding, a("%p") % a("\xc2\xa1")) + assert_strenc('"\xC2\xA1"', inspect_encoding, e("%p") % e("\xc2\xa1")) + #assert_strenc("\"\xC2\xA1\"", inspect_encoding, s("%p") % s("\xc2\xa1")) + assert_strenc("\"\xC2\xA1\"", inspect_encoding, u("%p") % u("\xc2\xa1")) - assert_strenc('"\xC2\xA1"', 'ASCII-8BIT', "%10p" % a("\xc2\xa1")) - assert_strenc(" \"\xC2\xA1\"", 'EUC-JP', "%10p" % e("\xc2\xa1")) - #assert_strenc(" \"\xC2\xA1\"", 'Windows-31J', "%10p" % s("\xc2\xa1")) - assert_strenc(" \"\xC2\xA1\"", 'UTF-8', "%10p" % u("\xc2\xa1")) + assert_strenc('"\xC2\xA1"', inspect_encoding, "%10p" % a("\xc2\xa1")) + assert_strenc('"\xC2\xA1"', inspect_encoding, "%10p" % e("\xc2\xa1")) + #assert_strenc(" \"\xC2\xA1\"", inspect_encoding, "%10p" % s("\xc2\xa1")) + assert_strenc(" \"\xC2\xA1\"", inspect_encoding, "%10p" % u("\xc2\xa1")) - assert_strenc('"\x00"', 'ASCII-8BIT', a("%p") % a("\x00")) - assert_strenc('"\x00"', 'EUC-JP', e("%p") % e("\x00")) - assert_strenc('"\x00"', 'Windows-31J', s("%p") % s("\x00")) - assert_strenc('"\x00"', 'UTF-8', u("%p") % u("\x00")) + assert_strenc('"\x00"', inspect_encoding, a("%p") % a("\x00")) + assert_strenc('"\x00"', inspect_encoding, e("%p") % e("\x00")) + assert_strenc('"\x00"', inspect_encoding, s("%p") % s("\x00")) + assert_strenc('"\u0000"', inspect_encoding, u("%p") % u("\x00")) end def test_sprintf_s @@ -1176,8 +1185,8 @@ class TestM17N < Test::Unit::TestCase assert_equal(Encoding::US_ASCII, [].to_s.encoding) assert_equal(Encoding::US_ASCII, [nil].to_s.encoding) assert_equal(Encoding::US_ASCII, [1].to_s.encoding) - assert_equal(Encoding::US_ASCII, [""].to_s.encoding) - assert_equal(Encoding::US_ASCII, ["a"].to_s.encoding) + assert_equal(inspect_encoding, [""].to_s.encoding) + assert_equal(inspect_encoding, ["a"].to_s.encoding) assert_equal(Encoding::US_ASCII, [nil,1,"","a","\x20",[]].to_s.encoding) end |