From d4fabf0e36946a2ac8423a0817e282efa1ee1459 Mon Sep 17 00:00:00 2001 From: akr Date: Sat, 23 Aug 2008 06:02:58 +0000 Subject: * include/ruby/encoding.h (ECONV_INVALID_MASK): defined. (ECONV_INVALID_IGNORE): defined. (ECONV_INVALID_REPLACE): defined. (ECONV_UNDEF_MASK): defined. (ECONV_UNDEF_IGNORE): defined. (ECONV_UNDEF_REPLACE): defined. * transcode.c (INVALID_IGNORE): removed. (INVALID_REPLACE): removed. (UNDEF_IGNORE): removed. (UNDEF_REPLACE): removed. (rb_econv_convert0): renamed from rb_econv_convert. (rb_econv_convert): defined to call rb_econv_convert0 with replace/ignore behavior moved from transcode_loop. (transcode_loop): replace/ignore behavior removed. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18787 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- transcode.c | 107 ++++++++++++++++++++++++++++++++---------------------------- 1 file changed, 57 insertions(+), 50 deletions(-) (limited to 'transcode.c') diff --git a/transcode.c b/transcode.c index 55f3281559..61843cfe9e 100644 --- a/transcode.c +++ b/transcode.c @@ -21,10 +21,6 @@ VALUE rb_eInvalidByteSequence; VALUE rb_cEncodingConverter; static VALUE sym_invalid, sym_undef, sym_ignore, sym_replace; -#define INVALID_IGNORE 0x1 -#define INVALID_REPLACE 0x2 -#define UNDEF_IGNORE 0x10 -#define UNDEF_REPLACE 0x20 /* * Dispatch data and logic @@ -972,8 +968,8 @@ found_needreport: return econv_source_buffer_empty; } -rb_econv_result_t -rb_econv_convert(rb_econv_t *ec, +static rb_econv_result_t +rb_econv_convert0(rb_econv_t *ec, const unsigned char **input_ptr, const unsigned char *input_stop, unsigned char **output_ptr, unsigned char *output_stop, int flags) @@ -1051,6 +1047,47 @@ gotresult: return res; } +static int output_replacement_character(rb_econv_t *ec); + +rb_econv_result_t +rb_econv_convert(rb_econv_t *ec, + const unsigned char **input_ptr, const unsigned char *input_stop, + unsigned char **output_ptr, unsigned char *output_stop, + int flags) +{ + rb_econv_result_t ret; + +resume: + ret = rb_econv_convert0(ec, input_ptr, input_stop, output_ptr, output_stop, flags); + + if (ret == econv_invalid_byte_sequence) { + /* deal with invalid byte sequence */ + /* todo: add more alternative behaviors */ + if (ec->flags&ECONV_INVALID_IGNORE) { + goto resume; + } + else if (ec->flags&ECONV_INVALID_REPLACE) { + if (output_replacement_character(ec) == 0) + goto resume; + } + } + + if (ret == econv_undefined_conversion) { + /* valid character in source encoding + * but no related character(s) in destination encoding */ + /* todo: add more alternative behaviors */ + if (ec->flags&ECONV_UNDEF_IGNORE) { + goto resume; + } + else if (ec->flags&ECONV_UNDEF_REPLACE) { + if (output_replacement_character(ec) == 0) + goto resume; + } + } + + return ret; +} + const char * rb_econv_encoding_to_insert_output(rb_econv_t *ec) { @@ -1455,7 +1492,7 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos, int max_output; VALUE exc; - ec = rb_econv_open(from_encoding, to_encoding, 0); + ec = rb_econv_open(from_encoding, to_encoding, opt & (ECONV_INVALID_MASK|ECONV_UNDEF_MASK)); if (!ec) rb_raise(rb_eArgError, "transcoding not supported (from %s to %s)", from_encoding, to_encoding); @@ -1464,35 +1501,18 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos, resume: ret = rb_econv_convert(ec, in_pos, in_stop, out_pos, out_stop, opt); + if (ret == econv_invalid_byte_sequence) { - /* deal with invalid byte sequence */ - /* todo: add more alternative behaviors */ - if (opt&INVALID_IGNORE) { - goto resume; - } - else if (opt&INVALID_REPLACE) { - if (output_replacement_character(ec) == 0) - goto resume; - } exc = make_econv_exception(ec); rb_econv_close(ec); rb_exc_raise(exc); } if (ret == econv_undefined_conversion) { - /* valid character in from encoding - * but no related character(s) in to encoding */ - /* todo: add more alternative behaviors */ - if (opt&UNDEF_IGNORE) { - goto resume; - } - else if (opt&UNDEF_REPLACE) { - if (output_replacement_character(ec) == 0) - goto resume; - } exc = make_econv_exception(ec); rb_econv_close(ec); rb_exc_raise(exc); } + if (ret == econv_destination_buffer_full) { more_output_buffer(destination, resize_destination, max_output, &out_start, out_pos, &out_stop); goto resume; @@ -1520,7 +1540,7 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos, int max_output; VALUE exc; - ec = rb_econv_open(from_encoding, to_encoding, 0); + ec = rb_econv_open(from_encoding, to_encoding, opt & (ECONV_INVALID_MASK|ECONV_UNDEF_MASK)); if (!ec) rb_raise(rb_eArgError, "transcoding not supported (from %s to %s)", from_encoding, to_encoding); @@ -1549,31 +1569,12 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos, ptr += p - &input_byte; switch (ret) { case econv_invalid_byte_sequence: - /* deal with invalid byte sequence */ - /* todo: add more alternative behaviors */ - if (opt&INVALID_IGNORE) { - break; - } - else if (opt&INVALID_REPLACE) { - if (output_replacement_character(ec) == 0) - break; - } exc = make_econv_exception(ec); rb_econv_close(ec); rb_exc_raise(exc); break; case econv_undefined_conversion: - /* valid character in from encoding - * but no related character(s) in to encoding */ - /* todo: add more alternative behaviors */ - if (opt&UNDEF_IGNORE) { - break; - } - else if (opt&UNDEF_REPLACE) { - if (output_replacement_character(ec) == 0) - break; - } exc = make_econv_exception(ec); rb_econv_close(ec); rb_exc_raise(exc); @@ -1632,10 +1633,10 @@ str_transcode(int argc, VALUE *argv, VALUE *self) if (NIL_P(v)) { } else if (v==sym_ignore) { - options |= INVALID_IGNORE; + options |= ECONV_INVALID_IGNORE; } else if (v==sym_replace) { - options |= INVALID_REPLACE; + options |= ECONV_INVALID_REPLACE; v = rb_hash_aref(opt, sym_replace); } else { @@ -1645,10 +1646,10 @@ str_transcode(int argc, VALUE *argv, VALUE *self) if (NIL_P(v)) { } else if (v==sym_ignore) { - options |= UNDEF_IGNORE; + options |= ECONV_UNDEF_IGNORE; } else if (v==sym_replace) { - options |= UNDEF_REPLACE; + options |= ECONV_UNDEF_REPLACE; } else { rb_raise(rb_eArgError, "unknown value for undefined character option"); @@ -2331,6 +2332,12 @@ Init_transcode(void) rb_define_method(rb_cEncodingConverter, "primitive_errinfo", econv_primitive_errinfo, 0); rb_define_method(rb_cEncodingConverter, "primitive_insert_output", econv_primitive_insert_output, 1); rb_define_method(rb_cEncodingConverter, "primitive_putback", econv_primitive_putback, 1); + rb_define_const(rb_cEncodingConverter, "INVALID_MASK", INT2FIX(ECONV_INVALID_MASK)); + rb_define_const(rb_cEncodingConverter, "INVALID_IGNORE", INT2FIX(ECONV_INVALID_IGNORE)); + rb_define_const(rb_cEncodingConverter, "INVALID_REPLACE", INT2FIX(ECONV_INVALID_REPLACE)); + rb_define_const(rb_cEncodingConverter, "UNDEF_MASK", INT2FIX(ECONV_UNDEF_MASK)); + rb_define_const(rb_cEncodingConverter, "UNDEF_IGNORE", INT2FIX(ECONV_UNDEF_IGNORE)); + rb_define_const(rb_cEncodingConverter, "UNDEF_REPLACE", INT2FIX(ECONV_UNDEF_REPLACE)); rb_define_const(rb_cEncodingConverter, "PARTIAL_INPUT", INT2FIX(ECONV_PARTIAL_INPUT)); rb_define_const(rb_cEncodingConverter, "OUTPUT_FOLLOWED_BY_INPUT", INT2FIX(ECONV_OUTPUT_FOLLOWED_BY_INPUT)); rb_define_const(rb_cEncodingConverter, "UNIVERSAL_NEWLINE_DECODER", INT2FIX(ECONV_UNIVERSAL_NEWLINE_DECODER)); -- cgit v1.2.3