From e3ab670a715769d0d56530675127438bd137089b Mon Sep 17 00:00:00 2001 From: naruse Date: Thu, 10 Dec 2015 18:57:08 +0000 Subject: * object.c (rb_inspect): dump inspected result with rb_str_escape() instead of raising Encoding::CompatibilityError. [Feature #11801] * string.c (rb_str_escape): added to dump given string like rb_str_inspect without quotes and always dump in US-ASCII like rb_str_dump. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@53027 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- string.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) (limited to 'string.c') diff --git a/string.c b/string.c index e6df91d512..319c51647e 100644 --- a/string.c +++ b/string.c @@ -5265,6 +5265,70 @@ rb_str_buf_cat_escaped_char(VALUE result, unsigned int c, int unicode_p) return l; } +VALUE +rb_str_escape(VALUE str) +{ + int encidx = ENCODING_GET(str); + rb_encoding *enc = rb_enc_from_index(encidx); + const char *p = RSTRING_PTR(str); + const char *pend = RSTRING_END(str); + const char *prev = p; + char buf[CHAR_ESC_LEN + 1]; + VALUE result = rb_str_buf_new(0); + int unicode_p = rb_enc_unicode_p(enc); + int asciicompat = rb_enc_asciicompat(enc); + + while (p < pend) { + unsigned int c, cc; + int n = rb_enc_precise_mbclen(p, pend, enc); + if (!MBCLEN_CHARFOUND_P(n)) { + if (p > prev) str_buf_cat(result, prev, p - prev); + n = rb_enc_mbminlen(enc); + if (pend < p + n) + n = (int)(pend - p); + while (n--) { + snprintf(buf, CHAR_ESC_LEN, "\\x%02X", *p & 0377); + str_buf_cat(result, buf, strlen(buf)); + prev = ++p; + } + continue; + } + n = MBCLEN_CHARFOUND_LEN(n); + c = rb_enc_mbc_to_codepoint(p, pend, enc); + p += n; + switch (c) { + case '\n': cc = 'n'; break; + case '\r': cc = 'r'; break; + case '\t': cc = 't'; break; + case '\f': cc = 'f'; break; + case '\013': cc = 'v'; break; + case '\010': cc = 'b'; break; + case '\007': cc = 'a'; break; + case 033: cc = 'e'; break; + default: cc = 0; break; + } + if (cc) { + if (p - n > prev) str_buf_cat(result, prev, p - n - prev); + buf[0] = '\\'; + buf[1] = (char)cc; + str_buf_cat(result, buf, 2); + prev = p; + } + else if (asciicompat && rb_enc_isascii(c, enc) && ISPRINT(c)) { + } + else { + if (p - n > prev) str_buf_cat(result, prev, p - n - prev); + rb_str_buf_cat_escaped_char(result, c, unicode_p); + prev = p; + } + } + if (p > prev) str_buf_cat(result, prev, p - prev); + ENCODING_CODERANGE_SET(result, rb_usascii_encindex(), ENC_CODERANGE_7BIT); + + OBJ_INFECT_RAW(result, str); + return result; +} + /* * call-seq: * str.inspect -> string -- cgit v1.2.3