diff options
author | akr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2007-12-06 09:28:26 +0000 |
---|---|---|
committer | akr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2007-12-06 09:28:26 +0000 |
commit | 69406aad505414de34dc8b560ac1eadf147b0dbc (patch) | |
tree | 161d7248925c7bd2c99d3ed6a341e81ba76b40c4 /io.c | |
parent | de4ec689910c07a48b81083adc3130b6b6023be3 (diff) | |
download | ruby-69406aad505414de34dc8b560ac1eadf147b0dbc.tar.gz |
* encoding.c (rb_enc_precise_mbclen): new function for mbclen with
validation.
* include/ruby/encoding.h (rb_enc_precise_mbclen): declared.
(MBCLEN_CHARFOUND): new macro.
(MBCLEN_INVALID): new macro.
(MBCLEN_NEEDMORE): new macro.
* include/ruby/oniguruma.h (OnigEncodingTypeST): replace mbc_enc_len
by precise_mbc_enc_len.
(ONIGENC_PRECISE_MBC_ENC_LEN): new macro.
(ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND): new macro.
(ONIGENC_CONSTRUCT_MBCLEN_INVALID): new macro.
(ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE): new macro.
(ONIGENC_MBCLEN_CHARFOUND): new macro.
(ONIGENC_MBCLEN_INVALID): new macro.
(ONIGENC_MBCLEN_NEEDMORE): new macro.
(ONIGENC_MBC_ENC_LEN): use ONIGENC_PRECISE_MBC_ENC_LEN.
* enc/euc_jp.c: validation implemented.
* enc/sjis.c: ditto.
* enc/utf8.c: ditto.
* string.c (rb_str_inspect): use rb_enc_precise_mbclen for invalid
encoding.
(rb_str_valid_encoding_p): new method String#valid_encoding?.
* io.c (rb_io_getc): use rb_enc_precise_mbclen.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@14119 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'io.c')
-rw-r--r-- | io.c | 32 |
1 files changed, 20 insertions, 12 deletions
@@ -2127,7 +2127,7 @@ rb_io_getc(VALUE io) { rb_encoding *enc; rb_io_t *fptr; - int n, left; + int r, n; VALUE str; GetOpenFile(io, fptr); @@ -2138,22 +2138,30 @@ rb_io_getc(VALUE io) if (io_fillbuf(fptr) < 0) { return Qnil; } - n = rb_enc_mbclen(fptr->rbuf+fptr->rbuf_off, fptr->rbuf+fptr->rbuf_len, enc); - if (n < fptr->rbuf_len) { + r = rb_enc_precise_mbclen(fptr->rbuf+fptr->rbuf_off, fptr->rbuf+fptr->rbuf_off+fptr->rbuf_len, enc); + if ((n = MBCLEN_CHARFOUND(r)) != 0 && n <= fptr->rbuf_len) { str = rb_str_new(fptr->rbuf+fptr->rbuf_off, n); fptr->rbuf_off += n; fptr->rbuf_len -= n; } + else if (MBCLEN_NEEDMORE(r)) { + str = rb_str_new(fptr->rbuf+fptr->rbuf_off, fptr->rbuf_len); + fptr->rbuf_len = 0; +getc_needmore: + if (io_fillbuf(fptr) != -1) { + rb_str_cat(str, fptr->rbuf+fptr->rbuf_off, 1); + fptr->rbuf_off++; + fptr->rbuf_len--; + r = rb_enc_precise_mbclen(RSTRING_PTR(str), RSTRING_PTR(str)+RSTRING_LEN(str), enc); + if (MBCLEN_NEEDMORE(r)) { + goto getc_needmore; + } + } + } else { - str = rb_str_new(0, n); - left = fptr->rbuf_len; - MEMCPY(RSTRING_PTR(str), fptr->rbuf+fptr->rbuf_off, char, left); - if (io_fillbuf(fptr) < 0) { - return Qnil; - } - MEMCPY(RSTRING_PTR(str)+left, fptr->rbuf, char, n-left); - fptr->rbuf_off += left; - fptr->rbuf_len -= left; + str = rb_str_new(fptr->rbuf+fptr->rbuf_off, 1); + fptr->rbuf_off++; + fptr->rbuf_len--; } rb_enc_associate(str, enc); |