diff options
author | akr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2008-08-11 22:44:23 +0000 |
---|---|---|
committer | akr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2008-08-11 22:44:23 +0000 |
commit | 94ca2d94deca939e6e50c09705873eb89c6012aa (patch) | |
tree | 269a9340a1ef490e80d3453781077943448d5c61 /transcode.c | |
parent | 5f9b877ebe2d3d53ec44df57aef982c5ae592a7f (diff) | |
download | ruby-94ca2d94deca939e6e50c09705873eb89c6012aa.tar.gz |
* transcode_data.h (rb_transcoder): add resetstate_func field for
resetting a state of stateful encoding.
* enc/trans/iso2022.trans (rb_EUC_JP_to_ISO_2022_JP): specify
finish_eucjp_to_iso2022jp for resetstate_func.
* tool/transcode-tblgen.rb: specify NULL for resetstate_func.
* transcode.c (output_replacement_character): call resetstate_func
before appending the replacement character.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18503 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'transcode.c')
-rw-r--r-- | transcode.c | 85 |
1 files changed, 60 insertions, 25 deletions
diff --git a/transcode.c b/transcode.c index 9219a98cd6..1802552a41 100644 --- a/transcode.c +++ b/transcode.c @@ -292,19 +292,6 @@ get_replacement_character(rb_encoding *enc, int *len_ret) } } -static void -output_replacement_character(unsigned char **out_pp, rb_encoding *enc) -{ - const char *replacement; - int len; - replacement = get_replacement_character(enc, &len); - - memcpy(*out_pp, replacement, len); - - *out_pp += len; - return; -} - /* * Transcoding engine logic */ @@ -818,6 +805,62 @@ more_output_buffer( *out_stop_ptr = *out_start_ptr + new_len; } +static void +output_replacement_character( + VALUE destination, + unsigned char *(*resize_destination)(VALUE, int, int), + rb_trans_t *ts, + unsigned char **out_start_ptr, + unsigned char **out_pos, + unsigned char **out_stop_ptr) + +{ + rb_transcoding *tc; + const rb_transcoder *tr; + int max_output; + rb_encoding *enc; + const char *replacement; + int len; + + tc = ts->elems[ts->num_trans-1].tc; + tr = tc->transcoder; + max_output = tr->max_output; + enc = rb_enc_find(tr->to_encoding); + + /* + * Assumption for stateful encoding: + * + * - The replacement character can be output on resetted state and doesn't + * change the state. + * - it is acceptable that extra state changing sequence if the replacement + * character contains a state changing sequence. + * + * Currently the replacement character for stateful encoding such as + * ISO-2022-JP is "?" and it has no state changing sequence. + * So the extra state changing sequence don't occur. + * + * Thease assumption may be removed in future. + * It needs to scan the replacement character to check + * state changing sequences in the replacement character. + */ + + if (tr->resetstate_func) { + if (*out_stop_ptr - *out_pos < max_output) + more_output_buffer(destination, resize_destination, ts, out_start_ptr, out_pos, out_stop_ptr); + *out_pos += tr->resetstate_func(tc, *out_pos); + } + + if (*out_stop_ptr - *out_pos < max_output) + more_output_buffer(destination, resize_destination, ts, out_start_ptr, out_pos, out_stop_ptr); + + replacement = get_replacement_character(enc, &len); + + memcpy(*out_pos, replacement, len); + + *out_pos += len; + return; +} + #if 1 static void transcode_loop(const unsigned char **in_pos, unsigned char **out_pos, @@ -848,9 +891,7 @@ resume: goto resume; } else if (opt&INVALID_REPLACE) { - if (out_stop - *out_pos < max_output) - more_output_buffer(destination, resize_destination, ts, &out_start, out_pos, &out_stop); - output_replacement_character(out_pos, rb_enc_find(to_encoding)); + output_replacement_character(destination, resize_destination, ts, &out_start, out_pos, &out_stop); goto resume; } rb_trans_close(ts); @@ -864,9 +905,7 @@ resume: goto resume; } else if (opt&UNDEF_REPLACE) { - if (out_stop - *out_pos < max_output) - more_output_buffer(destination, resize_destination, ts, &out_start, out_pos, &out_stop); - output_replacement_character(out_pos, rb_enc_find(to_encoding)); + output_replacement_character(destination, resize_destination, ts, &out_start, out_pos, &out_stop); goto resume; } rb_trans_close(ts); @@ -931,9 +970,7 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos, break; } else if (opt&INVALID_REPLACE) { - if (out_stop - *out_pos < max_output) - more_output_buffer(destination, resize_destination, ts, &out_start, out_pos, &out_stop); - output_replacement_character(out_pos, rb_enc_find(to_encoding)); + output_replacement_character(destination, resize_destination, ts, &out_start, out_pos, &out_stop); break; } rb_trans_close(ts); @@ -948,9 +985,7 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos, break; } else if (opt&UNDEF_REPLACE) { - if (out_stop - *out_pos < max_output) - more_output_buffer(destination, resize_destination, ts, &out_start, out_pos, &out_stop); - output_replacement_character(out_pos, rb_enc_find(to_encoding)); + output_replacement_character(destination, resize_destination, ts, &out_start, out_pos, &out_stop); break; } rb_trans_close(ts); |