From 63daa7c07d6c10c1a47a4578e5fa015760967e84 Mon Sep 17 00:00:00 2001 From: akr Date: Sat, 16 Aug 2008 17:06:35 +0000 Subject: * include/ruby/io.h (rb_io_t): new fields: readconv, crbuf, crbuf_off, crbuf_len, crbuf_capa. (MakeOpenFile): initialize them. * io.c (io_shift_crbuf): new function. (io_getc): use econv. (rb_io_fptr_finalize): finalize readconv and crbuf. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18666 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 10 ++++++ include/ruby/io.h | 18 +++++++++-- io.c | 77 +++++++++++++++++++++++++++++++++++++++++++++-- test/ruby/test_io_m17n.rb | 10 +++--- 4 files changed, 105 insertions(+), 10 deletions(-) diff --git a/ChangeLog b/ChangeLog index dada846feb..53b1c5154d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,13 @@ +Sun Aug 17 01:29:46 2008 Tanaka Akira + + * include/ruby/io.h (rb_io_t): new fields: readconv, crbuf, crbuf_off, + crbuf_len, crbuf_capa. + (MakeOpenFile): initialize them. + + * io.c (io_shift_crbuf): new function. + (io_getc): use econv. + (rb_io_fptr_finalize): finalize readconv and crbuf. + Sun Aug 17 00:02:07 2008 Tanaka Akira * include/ruby/encoding.h (rb_econv_check_error): declared. diff --git a/include/ruby/io.h b/include/ruby/io.h index 95be788c08..da7589f60c 100644 --- a/include/ruby/io.h +++ b/include/ruby/io.h @@ -36,17 +36,26 @@ typedef struct rb_io_t { char *path; /* pathname for file */ void (*finalize)(struct rb_io_t*,int); /* finalize proc */ long refcnt; + char *wbuf; /* wbuf_off + wbuf_len <= wbuf_capa */ int wbuf_off; int wbuf_len; int wbuf_capa; + char *rbuf; /* rbuf_off + rbuf_len <= rbuf_capa */ int rbuf_off; int rbuf_len; int rbuf_capa; + VALUE tied_io_for_writing; - rb_encoding *enc; - rb_encoding *enc2; + rb_encoding *enc; /* int_enc if enc2. ext_enc otherwise. */ + rb_encoding *enc2; /* ext_enc if not NULL. */ + + rb_econv_t *readconv; + char *crbuf; /* crbuf_off + crbuf_len <= crbuf_capa */ + int crbuf_off; + int crbuf_len; + int crbuf_capa; } rb_io_t; #define HAVE_RB_IO_T 1 @@ -89,6 +98,11 @@ typedef struct rb_io_t { fp->rbuf_off = 0;\ fp->rbuf_len = 0;\ fp->rbuf_capa = 0;\ + fp->readconv = NULL;\ + fp->crbuf = NULL;\ + fp->crbuf_off = 0;\ + fp->crbuf_len = 0;\ + fp->crbuf_capa = 0;\ fp->tied_io_for_writing = 0;\ fp->enc = 0;\ fp->enc2 = 0;\ diff --git a/io.c b/io.c index 1b3f8c7626..169beeaa3f 100644 --- a/io.c +++ b/io.c @@ -2268,14 +2268,77 @@ rb_io_each_byte(VALUE io) return io; } +static VALUE +io_shift_crbuf(rb_io_t *fptr, int len) +{ + VALUE str; + str = rb_str_new(fptr->crbuf+fptr->crbuf_off, len); + fptr->crbuf_off += len; + fptr->crbuf_len -= len; + OBJ_TAINT(str); + rb_enc_associate(str, fptr->enc); + /* xxx: set coderange */ + if (fptr->crbuf_len == 0) + fptr->crbuf_off = 0; + if (fptr->crbuf_off < fptr->crbuf_capa/2) { + memmove(fptr->crbuf, fptr->crbuf+fptr->crbuf_off, fptr->crbuf_len); + fptr->crbuf_off = 0; + } + return str; +} + static VALUE io_getc(rb_io_t *fptr, rb_encoding *enc) { int r, n, cr = 0; VALUE str; - if (rb_enc_dummy_p(enc)) { - rb_raise(rb_eNotImpError, "getc against dummy encoding is not currently supported"); + if (fptr->enc2) { + if (!fptr->readconv) { + fptr->readconv = rb_econv_open(fptr->enc2->name, fptr->enc->name, 0); + if (!fptr->readconv) + rb_raise(rb_eIOError, "code converter open failed (%s to %s)", fptr->enc->name, fptr->enc2->name); + fptr->crbuf_off = 0; + fptr->crbuf_len = 0; + fptr->crbuf_capa = 1024; + fptr->crbuf = ALLOC_N(char, fptr->crbuf_capa); + } + + while (1) { + const unsigned char *ss, *sp, *se; + unsigned char *ds, *dp, *de; + rb_econv_result_t res; + if (fptr->crbuf_len) { + r = rb_enc_precise_mbclen(fptr->crbuf+fptr->crbuf_off, fptr->crbuf+fptr->crbuf_off+fptr->crbuf_len, fptr->enc); + if (!MBCLEN_NEEDMORE_P(r)) + break; + if (fptr->crbuf_len == fptr->crbuf_capa) { + rb_raise(rb_eIOError, "too long character"); + } + } + if (fptr->rbuf_len == 0) { + if (io_fillbuf(fptr) == -1) { + if (fptr->crbuf_len == 0) + return Qnil; + /* return an incomplete character just before EOF */ + return io_shift_crbuf(fptr, fptr->crbuf_len); + } + } + ss = sp = (const unsigned char *)fptr->rbuf + fptr->rbuf_off; + se = sp + fptr->rbuf_len; + ds = dp = (unsigned char *)fptr->crbuf + fptr->crbuf_off + fptr->crbuf_len; + de = (unsigned char *)fptr->crbuf + fptr->crbuf_capa; + res = rb_econv_convert(fptr->readconv, &sp, se, &dp, de, ECONV_PARTIAL_INPUT|ECONV_OUTPUT_FOLLOWED_BY_INPUT); + fptr->rbuf_off += sp - ss; + fptr->rbuf_len -= sp - ss; + fptr->crbuf_len += dp - ds; + rb_econv_check_error(fptr->readconv); + } + if (MBCLEN_INVALID_P(r)) { + r = rb_enc_mbclen(fptr->crbuf+fptr->crbuf_off, fptr->crbuf+fptr->crbuf_off+fptr->crbuf_len, fptr->enc); + return io_shift_crbuf(fptr, r); + } + return io_shift_crbuf(fptr, MBCLEN_CHARFOUND_LEN(r)); } if (io_fillbuf(fptr) < 0) { @@ -2766,6 +2829,14 @@ rb_io_fptr_finalize(rb_io_t *fptr) free(fptr->wbuf); fptr->wbuf = 0; } + if (fptr->readconv) { + rb_econv_close(fptr->readconv); + fptr->readconv = NULL; + } + if (fptr->crbuf) { + free(fptr->crbuf); + fptr->crbuf = NULL; + } free(fptr); return 1; } @@ -3370,6 +3441,8 @@ mode_enc(rb_io_t *fptr, const char *estr) char *enc2name; int idx, idx2; + /* parse estr as "enc" or "enc2:enc" */ + p0 = strrchr(estr, ':'); if (!p0) p1 = estr; else p1 = p0 + 1; diff --git a/test/ruby/test_io_m17n.rb b/test/ruby/test_io_m17n.rb index bc9c6f34ce..6d7b4f7f9b 100644 --- a/test/ruby/test_io_m17n.rb +++ b/test/ruby/test_io_m17n.rb @@ -220,12 +220,10 @@ EOT with_tmpdir { src = "\e$B\x23\x30\x23\x31\e(B".force_encoding("iso-2022-jp") generate_file('tmp', src) - assert_raise(NotImplementedError) do - open("tmp", "r:iso-2022-jp:euc-jp") {|f| - assert_equal("\xa3\xb0".force_encoding("euc-jp"), f.getc) - assert_equal("\xa3\xb1".force_encoding("euc-jp"), f.getc) - } - end + open("tmp", "r:iso-2022-jp:euc-jp") {|f| + assert_equal("\xa3\xb0".force_encoding("euc-jp"), f.getc) + assert_equal("\xa3\xb1".force_encoding("euc-jp"), f.getc) + } } end -- cgit v1.2.3