From 69406aad505414de34dc8b560ac1eadf147b0dbc Mon Sep 17 00:00:00 2001 From: akr Date: Thu, 6 Dec 2007 09:28:26 +0000 Subject: * encoding.c (rb_enc_precise_mbclen): new function for mbclen with validation. * include/ruby/encoding.h (rb_enc_precise_mbclen): declared. (MBCLEN_CHARFOUND): new macro. (MBCLEN_INVALID): new macro. (MBCLEN_NEEDMORE): new macro. * include/ruby/oniguruma.h (OnigEncodingTypeST): replace mbc_enc_len by precise_mbc_enc_len. (ONIGENC_PRECISE_MBC_ENC_LEN): new macro. (ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND): new macro. (ONIGENC_CONSTRUCT_MBCLEN_INVALID): new macro. (ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE): new macro. (ONIGENC_MBCLEN_CHARFOUND): new macro. (ONIGENC_MBCLEN_INVALID): new macro. (ONIGENC_MBCLEN_NEEDMORE): new macro. (ONIGENC_MBC_ENC_LEN): use ONIGENC_PRECISE_MBC_ENC_LEN. * enc/euc_jp.c: validation implemented. * enc/sjis.c: ditto. * enc/utf8.c: ditto. * string.c (rb_str_inspect): use rb_enc_precise_mbclen for invalid encoding. (rb_str_valid_encoding_p): new method String#valid_encoding?. * io.c (rb_io_getc): use rb_enc_precise_mbclen. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@14119 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- string.c | 54 +++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 43 insertions(+), 11 deletions(-) (limited to 'string.c') diff --git a/string.c b/string.c index 5ca4dc425e..20f2e38239 100644 --- a/string.c +++ b/string.c @@ -2919,10 +2919,20 @@ rb_str_inspect(VALUE str) str_cat_char(result, '"', enc); p = RSTRING_PTR(str); pend = RSTRING_END(str); while (p < pend) { - int c = rb_enc_codepoint(p, pend, enc); - int n = rb_enc_codelen(c, enc); + int c; + int n; int cc; + n = rb_enc_precise_mbclen(p, pend, enc); + if (!MBCLEN_CHARFOUND(n)) { + p++; + n = 1; + goto escape_codepoint; + } + + c = rb_enc_codepoint(p, pend, enc); + n = rb_enc_codelen(c, enc); + p += n; if (c == '"'|| c == '\\' || (c == '#' && (cc = rb_enc_codepoint(p,pend,enc), @@ -2954,19 +2964,21 @@ rb_str_inspect(VALUE str) prefix_escape(result, 'e', enc); } else if (rb_enc_isprint(c, enc)) { - char buf[5]; - - rb_enc_mbcput(c, buf, enc); - rb_str_buf_cat(result, buf, n); + rb_str_buf_cat(result, p-n, n); } else { char buf[5]; - char *s = buf; + char *s; + char *q; - sprintf(buf, "\\%03o", c & 0377); - while (*s) { - str_cat_char(result, *s++, enc); - } +escape_codepoint: + for (q = p-n; q < p; q++) { + s = buf; + sprintf(buf, "\\%03o", *q & 0377); + while (*s) { + str_cat_char(result, *s++, enc); + } + } } } str_cat_char(result, '"', enc); @@ -5232,6 +5244,25 @@ rb_str_force_encoding(VALUE str, VALUE enc) return str; } +static VALUE +rb_str_valid_encoding_p(VALUE str) +{ + char *p = RSTRING_PTR(str); + char *pend = RSTRING_END(str); + rb_encoding *enc = rb_enc_get(str); + + while (p < pend) { + int n; + + n = rb_enc_precise_mbclen(p, pend, enc); + if (!MBCLEN_CHARFOUND(n)) { + return Qfalse; + } + p += n; + } + return Qtrue; +} + /********************************************************************** * Document-class: Symbol * @@ -5644,6 +5675,7 @@ Init_String(void) rb_define_method(rb_cString, "encoding", rb_obj_encoding, 0); /* in encoding.c */ rb_define_method(rb_cString, "force_encoding", rb_str_force_encoding, 1); + rb_define_method(rb_cString, "valid_encoding?", rb_str_valid_encoding_p, 0); id_to_s = rb_intern("to_s"); -- cgit v1.2.3