aboutsummaryrefslogtreecommitdiffstats
path: root/string.c
diff options
context:
space:
mode:
authorakr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2007-12-06 09:28:26 +0000
committerakr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2007-12-06 09:28:26 +0000
commit69406aad505414de34dc8b560ac1eadf147b0dbc (patch)
tree161d7248925c7bd2c99d3ed6a341e81ba76b40c4 /string.c
parentde4ec689910c07a48b81083adc3130b6b6023be3 (diff)
downloadruby-69406aad505414de34dc8b560ac1eadf147b0dbc.tar.gz
* encoding.c (rb_enc_precise_mbclen): new function for mbclen with
validation. * include/ruby/encoding.h (rb_enc_precise_mbclen): declared. (MBCLEN_CHARFOUND): new macro. (MBCLEN_INVALID): new macro. (MBCLEN_NEEDMORE): new macro. * include/ruby/oniguruma.h (OnigEncodingTypeST): replace mbc_enc_len by precise_mbc_enc_len. (ONIGENC_PRECISE_MBC_ENC_LEN): new macro. (ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND): new macro. (ONIGENC_CONSTRUCT_MBCLEN_INVALID): new macro. (ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE): new macro. (ONIGENC_MBCLEN_CHARFOUND): new macro. (ONIGENC_MBCLEN_INVALID): new macro. (ONIGENC_MBCLEN_NEEDMORE): new macro. (ONIGENC_MBC_ENC_LEN): use ONIGENC_PRECISE_MBC_ENC_LEN. * enc/euc_jp.c: validation implemented. * enc/sjis.c: ditto. * enc/utf8.c: ditto. * string.c (rb_str_inspect): use rb_enc_precise_mbclen for invalid encoding. (rb_str_valid_encoding_p): new method String#valid_encoding?. * io.c (rb_io_getc): use rb_enc_precise_mbclen. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@14119 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'string.c')
-rw-r--r--string.c54
1 files changed, 43 insertions, 11 deletions
diff --git a/string.c b/string.c
index 5ca4dc425e..20f2e38239 100644
--- a/string.c
+++ b/string.c
@@ -2919,10 +2919,20 @@ rb_str_inspect(VALUE str)
str_cat_char(result, '"', enc);
p = RSTRING_PTR(str); pend = RSTRING_END(str);
while (p < pend) {
- int c = rb_enc_codepoint(p, pend, enc);
- int n = rb_enc_codelen(c, enc);
+ int c;
+ int n;
int cc;
+ n = rb_enc_precise_mbclen(p, pend, enc);
+ if (!MBCLEN_CHARFOUND(n)) {
+ p++;
+ n = 1;
+ goto escape_codepoint;
+ }
+
+ c = rb_enc_codepoint(p, pend, enc);
+ n = rb_enc_codelen(c, enc);
+
p += n;
if (c == '"'|| c == '\\' ||
(c == '#' && (cc = rb_enc_codepoint(p,pend,enc),
@@ -2954,19 +2964,21 @@ rb_str_inspect(VALUE str)
prefix_escape(result, 'e', enc);
}
else if (rb_enc_isprint(c, enc)) {
- char buf[5];
-
- rb_enc_mbcput(c, buf, enc);
- rb_str_buf_cat(result, buf, n);
+ rb_str_buf_cat(result, p-n, n);
}
else {
char buf[5];
- char *s = buf;
+ char *s;
+ char *q;
- sprintf(buf, "\\%03o", c & 0377);
- while (*s) {
- str_cat_char(result, *s++, enc);
- }
+escape_codepoint:
+ for (q = p-n; q < p; q++) {
+ s = buf;
+ sprintf(buf, "\\%03o", *q & 0377);
+ while (*s) {
+ str_cat_char(result, *s++, enc);
+ }
+ }
}
}
str_cat_char(result, '"', enc);
@@ -5232,6 +5244,25 @@ rb_str_force_encoding(VALUE str, VALUE enc)
return str;
}
+static VALUE
+rb_str_valid_encoding_p(VALUE str)
+{
+ char *p = RSTRING_PTR(str);
+ char *pend = RSTRING_END(str);
+ rb_encoding *enc = rb_enc_get(str);
+
+ while (p < pend) {
+ int n;
+
+ n = rb_enc_precise_mbclen(p, pend, enc);
+ if (!MBCLEN_CHARFOUND(n)) {
+ return Qfalse;
+ }
+ p += n;
+ }
+ return Qtrue;
+}
+
/**********************************************************************
* Document-class: Symbol
*
@@ -5644,6 +5675,7 @@ Init_String(void)
rb_define_method(rb_cString, "encoding", rb_obj_encoding, 0); /* in encoding.c */
rb_define_method(rb_cString, "force_encoding", rb_str_force_encoding, 1);
+ rb_define_method(rb_cString, "valid_encoding?", rb_str_valid_encoding_p, 0);
id_to_s = rb_intern("to_s");