aboutsummaryrefslogtreecommitdiffstats
path: root/io.c
diff options
context:
space:
mode:
authorakr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2007-12-06 09:28:26 +0000
committerakr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2007-12-06 09:28:26 +0000
commit69406aad505414de34dc8b560ac1eadf147b0dbc (patch)
tree161d7248925c7bd2c99d3ed6a341e81ba76b40c4 /io.c
parentde4ec689910c07a48b81083adc3130b6b6023be3 (diff)
downloadruby-69406aad505414de34dc8b560ac1eadf147b0dbc.tar.gz
* encoding.c (rb_enc_precise_mbclen): new function for mbclen with
validation. * include/ruby/encoding.h (rb_enc_precise_mbclen): declared. (MBCLEN_CHARFOUND): new macro. (MBCLEN_INVALID): new macro. (MBCLEN_NEEDMORE): new macro. * include/ruby/oniguruma.h (OnigEncodingTypeST): replace mbc_enc_len by precise_mbc_enc_len. (ONIGENC_PRECISE_MBC_ENC_LEN): new macro. (ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND): new macro. (ONIGENC_CONSTRUCT_MBCLEN_INVALID): new macro. (ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE): new macro. (ONIGENC_MBCLEN_CHARFOUND): new macro. (ONIGENC_MBCLEN_INVALID): new macro. (ONIGENC_MBCLEN_NEEDMORE): new macro. (ONIGENC_MBC_ENC_LEN): use ONIGENC_PRECISE_MBC_ENC_LEN. * enc/euc_jp.c: validation implemented. * enc/sjis.c: ditto. * enc/utf8.c: ditto. * string.c (rb_str_inspect): use rb_enc_precise_mbclen for invalid encoding. (rb_str_valid_encoding_p): new method String#valid_encoding?. * io.c (rb_io_getc): use rb_enc_precise_mbclen. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@14119 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'io.c')
-rw-r--r--io.c32
1 files changed, 20 insertions, 12 deletions
diff --git a/io.c b/io.c
index 0dd586678e..12d4e01c59 100644
--- a/io.c
+++ b/io.c
@@ -2127,7 +2127,7 @@ rb_io_getc(VALUE io)
{
rb_encoding *enc;
rb_io_t *fptr;
- int n, left;
+ int r, n;
VALUE str;
GetOpenFile(io, fptr);
@@ -2138,22 +2138,30 @@ rb_io_getc(VALUE io)
if (io_fillbuf(fptr) < 0) {
return Qnil;
}
- n = rb_enc_mbclen(fptr->rbuf+fptr->rbuf_off, fptr->rbuf+fptr->rbuf_len, enc);
- if (n < fptr->rbuf_len) {
+ r = rb_enc_precise_mbclen(fptr->rbuf+fptr->rbuf_off, fptr->rbuf+fptr->rbuf_off+fptr->rbuf_len, enc);
+ if ((n = MBCLEN_CHARFOUND(r)) != 0 && n <= fptr->rbuf_len) {
str = rb_str_new(fptr->rbuf+fptr->rbuf_off, n);
fptr->rbuf_off += n;
fptr->rbuf_len -= n;
}
+ else if (MBCLEN_NEEDMORE(r)) {
+ str = rb_str_new(fptr->rbuf+fptr->rbuf_off, fptr->rbuf_len);
+ fptr->rbuf_len = 0;
+getc_needmore:
+ if (io_fillbuf(fptr) != -1) {
+ rb_str_cat(str, fptr->rbuf+fptr->rbuf_off, 1);
+ fptr->rbuf_off++;
+ fptr->rbuf_len--;
+ r = rb_enc_precise_mbclen(RSTRING_PTR(str), RSTRING_PTR(str)+RSTRING_LEN(str), enc);
+ if (MBCLEN_NEEDMORE(r)) {
+ goto getc_needmore;
+ }
+ }
+ }
else {
- str = rb_str_new(0, n);
- left = fptr->rbuf_len;
- MEMCPY(RSTRING_PTR(str), fptr->rbuf+fptr->rbuf_off, char, left);
- if (io_fillbuf(fptr) < 0) {
- return Qnil;
- }
- MEMCPY(RSTRING_PTR(str)+left, fptr->rbuf, char, n-left);
- fptr->rbuf_off += left;
- fptr->rbuf_len -= left;
+ str = rb_str_new(fptr->rbuf+fptr->rbuf_off, 1);
+ fptr->rbuf_off++;
+ fptr->rbuf_len--;
}
rb_enc_associate(str, enc);