diff options
author | matz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2008-10-22 16:53:50 +0000 |
---|---|---|
committer | matz <matz@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2008-10-22 16:53:50 +0000 |
commit | 38c7a687e174e5fc7761cf05f71003ad6b728320 (patch) | |
tree | 43f41183c4bbe3684284dce2d442833f1d5aa237 | |
parent | b6807f8356c670bb3668378a796f45e3e32ba007 (diff) | |
download | ruby-38c7a687e174e5fc7761cf05f71003ad6b728320.tar.gz |
* string.c (rb_str_conv_enc_opts): new function to convert with
specifying ecflags and ecopts.
* ext/zlib/zlib.c (gzfile_newstr): specify ecflags and ecopts for
conversion using above function.
* ext/zlib/zlib.c (gzfile_newstr): use own rb_econv_t for dummy
encoding to handling stateful encoding (e.g. iso-2022-jp).
[ruby-dev:36857]
* ext/zlib/zlib.c (gzfile_getc): ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19892 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r-- | ChangeLog | 14 | ||||
-rw-r--r-- | ext/zlib/zlib.c | 105 | ||||
-rw-r--r-- | string.c | 10 |
3 files changed, 103 insertions, 26 deletions
@@ -1,3 +1,17 @@ +Thu Oct 23 01:26:25 2008 Yukihiro Matsumoto <matz@ruby-lang.org> + + * string.c (rb_str_conv_enc_opts): new function to convert with + specifying ecflags and ecopts. + + * ext/zlib/zlib.c (gzfile_newstr): specify ecflags and ecopts for + conversion using above function. + + * ext/zlib/zlib.c (gzfile_newstr): use own rb_econv_t for dummy + encoding to handling stateful encoding (e.g. iso-2022-jp). + [ruby-dev:36857] + + * ext/zlib/zlib.c (gzfile_getc): ditto. + Thu Oct 23 01:24:49 2008 Nobuyoshi Nakada <nobu@ruby-lang.org> * lib/mkmf.rb (create_tmpsrc): get rid of side effects. diff --git a/ext/zlib/zlib.c b/ext/zlib/zlib.c index 2cce207180..f73d58a20d 100644 --- a/ext/zlib/zlib.c +++ b/ext/zlib/zlib.c @@ -194,6 +194,7 @@ static VALUE rb_gzreader_readlines(int, VALUE*, VALUE); void Init_zlib(void); int rb_io_extract_encoding_option(VALUE opt, rb_encoding **enc_p, rb_encoding **enc2_p); +VALUE rb_str_conv_enc_opts(VALUE, rb_encoding*, rb_encoding*, int, VALUE); /*--------- Exceptions --------*/ @@ -540,7 +541,7 @@ zstream_shift_buffer(struct zstream *z, int len) return zstream_detach_buffer(z); } - dst = rb_str_substr(z->buf, 0, len); + dst = rb_str_subseq(z->buf, 0, len); RBASIC(dst)->klass = rb_cString; z->buf_filled -= len; memmove(RSTRING_PTR(z->buf), RSTRING_PTR(z->buf) + len, @@ -1670,7 +1671,12 @@ struct gzfile { void (*end)(struct gzfile *); rb_encoding *enc; rb_encoding *enc2; + rb_econv_t *ec; + int ecflags; + VALUE ecopts; + char *cbuf; }; +#define GZFILE_CBUF_CAPA 10 #define GZFILE_FLAG_SYNC ZSTREAM_FLAG_UNUSED #define GZFILE_FLAG_HEADER_FINISHED (ZSTREAM_FLAG_UNUSED << 1) @@ -1689,6 +1695,7 @@ gzfile_mark(struct gzfile *gz) rb_gc_mark(gz->orig_name); rb_gc_mark(gz->comment); zstream_mark(&gz->z); + rb_gc_mark(gz->ecopts); } static void @@ -1702,6 +1709,9 @@ gzfile_free(struct gzfile *gz) } zstream_finalize(z); } + if (gz->cbuf) { + xfree(gz->cbuf); + } xfree(gz); } @@ -1728,6 +1738,10 @@ gzfile_new(klass, funcs, endfunc) gz->end = endfunc; gz->enc = rb_default_external_encoding(); gz->enc2 = 0; + gz->ec = NULL; + gz->ecflags = 0; + gz->ecopts = Qnil; + gz->cbuf = 0; return obj; } @@ -1742,6 +1756,11 @@ gzfile_reset(struct gzfile *gz) gz->crc = crc32(0, Z_NULL, 0); gz->lineno = 0; gz->ungetc = 0; + if (gz->ec) { + rb_econv_close(gz->ec); + gz->ec = rb_econv_open_opts(gz->enc2->name, gz->enc->name, + gz->ecflags, gz->ecopts); + } } static void @@ -2076,12 +2095,19 @@ gzfile_calc_crc(struct gzfile *gz, VALUE str) static VALUE gzfile_newstr(struct gzfile *gz, VALUE str) { - OBJ_TAINT(str); /* for safe */ - if (gz->enc && !gz->enc2) { + if (!gz->enc2) { rb_enc_associate(str, gz->enc); + OBJ_TAINT(str); /* for safe */ return str; } - return rb_str_conv_enc(str, gz->enc2, gz->enc); + if (gz->ec && rb_enc_dummy_p(gz->enc2)) { + str = rb_econv_str_convert(gz->ec, str, ECONV_PARTIAL_INPUT); + rb_enc_associate(str, gz->enc); + OBJ_TAINT(str); + return str; + } + return rb_str_conv_enc_opts(str, gz->enc2, gz->enc, + gz->ecflags, gz->ecopts); } static VALUE @@ -2105,7 +2131,7 @@ gzfile_read(struct gzfile *gz, int len) dst = zstream_shift_buffer(&gz->z, len); gzfile_calc_crc(gz, dst); - return gzfile_newstr(gz, dst); + return dst; } static VALUE @@ -2142,15 +2168,13 @@ gzfile_readpartial(struct gzfile *gz, int len, VALUE outbuf) dst = zstream_shift_buffer(&gz->z, len); gzfile_calc_crc(gz, dst); - if (NIL_P(outbuf)) { - OBJ_TAINT(dst); /* for safe */ - } - else { + if (!NIL_P(outbuf)) { rb_str_resize(outbuf, RSTRING_LEN(dst)); memcpy(RSTRING_PTR(outbuf), RSTRING_PTR(dst), RSTRING_LEN(dst)); dst = outbuf; } - return gzfile_newstr(gz, dst); + OBJ_TAINT(dst); /* for safe */ + return dst; } static VALUE @@ -2170,13 +2194,14 @@ gzfile_read_all(struct gzfile *gz) dst = zstream_detach_buffer(&gz->z); gzfile_calc_crc(gz, dst); - return gzfile_newstr(gz, dst); + OBJ_TAINT(dst); + return dst; } static VALUE gzfile_getc(struct gzfile *gz) { - VALUE buf, dst; + VALUE buf, dst = 0; int len; len = rb_enc_mbmaxlen(gz->enc); @@ -2190,11 +2215,33 @@ gzfile_getc(struct gzfile *gz) return Qnil; } - buf = gz->z.buf; - len = rb_enc_mbclen(RSTRING_PTR(buf), RSTRING_PTR(buf)+len, gz->enc); - dst = zstream_shift_buffer(&gz->z, len); - gzfile_calc_crc(gz, dst); - return gzfile_newstr(gz, dst); + if (gz->ec && rb_enc_dummy_p(gz->enc2)) { + const unsigned char *ss, *sp, *se; + unsigned char *ds, *dp, *de; + rb_econv_result_t res; + + if (!gz->cbuf) { + gz->cbuf = ALLOC_N(char, GZFILE_CBUF_CAPA); + } + ss = sp = (const unsigned char*)RSTRING_PTR(gz->z.buf); + se = sp + gz->z.buf_filled; + ds = dp = (unsigned char *)gz->cbuf; + de = (unsigned char *)ds + GZFILE_CBUF_CAPA; + res = rb_econv_convert(gz->ec, &sp, se, &dp, de, ECONV_PARTIAL_INPUT|ECONV_AFTER_OUTPUT); + rb_econv_check_error(gz->ec); + dst = zstream_shift_buffer(&gz->z, sp - ss); + gzfile_calc_crc(gz, dst); + dst = rb_str_new(gz->cbuf, dp - ds); + rb_enc_associate(dst, gz->enc); + OBJ_TAINT(dst); + return dst; + } + else { + buf = gz->z.buf; + len = rb_enc_mbclen(RSTRING_PTR(buf), RSTRING_END(buf), gz->enc); + dst = gzfile_read(gz, len); + return gzfile_newstr(gz, dst); + } } static void @@ -2624,6 +2671,20 @@ rb_gzfile_total_out(VALUE obj) } +static void +rb_gzfile_ecopts(struct gzfile *gz, VALUE opts) +{ + if (!NIL_P(opts)) { + rb_io_extract_encoding_option(opts, &gz->enc, &gz->enc2); + } + if (gz->enc2) { + gz->ecflags = rb_econv_prepare_opts(opts, &opts); + gz->ec = rb_econv_open_opts(gz->enc2->name, gz->enc->name, + gz->ecflags, opts); + gz->ecopts = opts; + } +} + /* ------------------------------------------------------------------------- */ /* @@ -2704,9 +2765,7 @@ rb_gzwriter_initialize(int argc, VALUE *argv, VALUE obj) } gz->io = io; ZSTREAM_READY(&gz->z); - if (!NIL_P(opt)) { - rb_io_extract_encoding_option(opt, &gz->enc, &gz->enc2); - } + rb_gzfile_ecopts(gz, opt); return obj; } @@ -2901,9 +2960,7 @@ rb_gzreader_initialize(int argc, VALUE *argv, VALUE obj) gz->io = io; ZSTREAM_READY(&gz->z); gzfile_read_header(gz); - if (!NIL_P(opt)) { - rb_io_extract_encoding_option(opt, &gz->enc, &gz->enc2); - } + rb_gzfile_ecopts(gz, opt); return obj; } @@ -3214,7 +3271,7 @@ gzreader_gets(int argc, VALUE *argv, VALUE obj) gzreader_skip_linebreaks(gz); } - return dst; + return gzfile_newstr(gz, dst); } /* @@ -473,7 +473,7 @@ RUBY_ALIAS_FUNCTION(rb_tainted_str_new2(const char *ptr), rb_tainted_str_new_cst #define rb_tainted_str_new2 rb_tainted_str_new_cstr VALUE -rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to) +rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts) { rb_econv_t *ec; rb_econv_result_t ret; @@ -497,7 +497,7 @@ rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to) newstr = rb_str_new(0, len); retry: - ec = rb_econv_open_opts(from->name, to->name, 0, Qnil); + ec = rb_econv_open_opts(from->name, to->name, ecflags, ecopts); if (!ec) return str; sp = (unsigned char*)RSTRING_PTR(str); @@ -525,6 +525,12 @@ rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to) } VALUE +rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to) +{ + return rb_str_conv_enc_opts(str, from, to, 0, Qnil); +} + +VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *eenc) { VALUE str; |