aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorakr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-08-10 02:17:56 +0000
committerakr <akr@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2008-08-10 02:17:56 +0000
commite748ed513d1fc079513a3ae9905705f579d351a4 (patch)
tree2171a5a98d88da50480ebeaab8de82c39af17ef1
parent3ba7984ef4dfc88c7a764a24dd50aad026a7b56d (diff)
downloadruby-e748ed513d1fc079513a3ae9905705f579d351a4.tar.gz
* transcode_data.h (rb_transcoding): add feedlen field.
* transcode.c (transcode_restartable0): renamed from transcode_restartable. save input buffer into feed buffer if next character is started the point before input buffer. for example, "\x00\xd8\x01" then "\x02" in UTF-16LE. \x02 causes invalid and next character is started from \x01. (transcode_restartable): new function to call transcode_restartable0. if feed buffer is not empty, convert it at first. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18467 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog14
-rw-r--r--transcode.c60
-rw-r--r--transcode_data.h5
3 files changed, 69 insertions, 10 deletions
diff --git a/ChangeLog b/ChangeLog
index 439280d822..7146e9dadf 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,17 @@
+Sun Aug 10 11:15:55 2008 Tanaka Akira <akr@fsij.org>
+
+ * transcode_data.h (rb_transcoding): add feedlen field.
+
+ * transcode.c (transcode_restartable0): renamed from
+ transcode_restartable.
+ save input buffer into feed buffer if next character is started the
+ point before input buffer. for example, "\x00\xd8\x01" then "\x02"
+ in UTF-16LE. \x02 causes invalid and next character is started from
+ \x01.
+ (transcode_restartable): new function to call
+ transcode_restartable0. if feed buffer is not empty, convert it at
+ first.
+
Sun Aug 10 11:02:58 2008 Nobuyoshi Nakada <nobu@ruby-lang.org>
* common.mk (extconf): use MAKEDIRS.
diff --git a/transcode.c b/transcode.c
index f4c14398ec..63de574b79 100644
--- a/transcode.c
+++ b/transcode.c
@@ -355,7 +355,7 @@ typedef enum {
} transcode_result_t;
static transcode_result_t
-transcode_restartable(const unsigned char **in_pos, unsigned char **out_pos,
+transcode_restartable0(const unsigned char **in_pos, unsigned char **out_pos,
const unsigned char *in_stop, unsigned char *out_stop,
rb_transcoding *my_transcoding,
const int opt)
@@ -363,6 +363,7 @@ transcode_restartable(const unsigned char **in_pos, unsigned char **out_pos,
{
const rb_transcoder *my_transcoder = my_transcoding->transcoder;
int unitlen = my_transcoder->input_unit_length;
+ int feedlen = 0;
const unsigned char *inchar_start;
const unsigned char *in_p;
@@ -396,11 +397,15 @@ transcode_restartable(const unsigned char **in_pos, unsigned char **out_pos,
do { \
my_transcoding->resume_position = (num); \
if (0 < in_p - inchar_start) \
- MEMCPY(TRANSCODING_READBUF(my_transcoding)+my_transcoding->readlen, \
+ MEMMOVE(TRANSCODING_READBUF(my_transcoding)+my_transcoding->readlen, \
inchar_start, unsigned char, in_p - inchar_start); \
*in_pos = in_p; \
*out_pos = out_p; \
my_transcoding->readlen += in_p - inchar_start; \
+ if (feedlen) { \
+ my_transcoding->readlen -= feedlen; \
+ my_transcoding->feedlen = feedlen; \
+ } \
my_transcoding->next_table = next_table; \
my_transcoding->next_info = next_info; \
my_transcoding->next_byte = next_byte; \
@@ -524,12 +529,23 @@ transcode_restartable(const unsigned char **in_pos, unsigned char **out_pos,
}
}
else {
+ int found_len; /* including the last byte which cuases invalid */
+ int invalid_len;
int step;
- /* xxx: step may be negative.
- * possibly in_p is lesser than *in_pos.
- * caller may want to access readbuf. */
- step = (((my_transcoding->readlen + (in_p - inchar_start)) - 1) / unitlen) * unitlen - (my_transcoding->readlen + (in_p - inchar_start));
- in_p += step;
+ found_len = my_transcoding->readlen + (in_p - inchar_start);
+ invalid_len = ((found_len - 1) / unitlen) * unitlen;
+ step = invalid_len - found_len;
+ if (step < -1) {
+ if (-step <= in_p - *in_pos) {
+ in_p += step;
+ }
+ else {
+ feedlen = -step;
+ }
+ }
+ else {
+ in_p += step;
+ }
}
goto invalid;
}
@@ -559,6 +575,32 @@ transcode_restartable(const unsigned char **in_pos, unsigned char **out_pos,
#undef SUSPEND
}
+static transcode_result_t
+transcode_restartable(const unsigned char **in_pos, unsigned char **out_pos,
+ const unsigned char *in_stop, unsigned char *out_stop,
+ rb_transcoding *my_transcoding,
+ const int opt)
+{
+ if (my_transcoding->feedlen) {
+ unsigned char *feed_buf = ALLOCA_N(unsigned char, my_transcoding->feedlen);
+ const unsigned char *feed_pos = feed_buf;
+ const unsigned char *feed_stop = feed_buf + my_transcoding->feedlen;
+ transcode_result_t res;
+
+ MEMCPY(feed_buf, TRANSCODING_READBUF(my_transcoding) + my_transcoding->readlen,
+ unsigned char, my_transcoding->feedlen);
+ my_transcoding->feedlen = 0;
+ res = transcode_restartable0(&feed_pos, out_pos, feed_stop, out_stop, my_transcoding, opt);
+ if (res != transcode_ibuf_empty) {
+ MEMCPY(TRANSCODING_READBUF(my_transcoding) + my_transcoding->readlen + my_transcoding->feedlen,
+ feed_pos, unsigned char, feed_stop - feed_pos);
+ my_transcoding->feedlen += feed_stop - feed_pos;
+ return res;
+ }
+ }
+ return transcode_restartable0(in_pos, out_pos, in_stop, out_stop, my_transcoding, opt);
+}
+
static void
more_output_buffer(
VALUE destination,
@@ -590,6 +632,7 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
my_transcoding->resume_position = 0;
my_transcoding->readlen = 0;
+ my_transcoding->feedlen = 0;
if (sizeof(my_transcoding->readbuf.ary) < my_transcoder->max_input) {
my_transcoding->readbuf.ptr = xmalloc(my_transcoder->max_input);
@@ -648,7 +691,7 @@ static void
transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
const unsigned char *in_stop, unsigned char *out_stop,
VALUE destination,
- unsigned char *(*resize_destination)(VALUE, struct rb_transcoding*, int, int),
+ unsigned char *(*resize_destination)(VALUE, int, int),
rb_transcoding *my_transcoding,
const int opt)
{
@@ -659,6 +702,7 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
my_transcoding->resume_position = 0;
my_transcoding->readlen = 0;
+ my_transcoding->feedlen = 0;
if (sizeof(my_transcoding->readbuf.ary) < my_transcoder->max_input) {
my_transcoding->readbuf.ptr = xmalloc(my_transcoder->max_input);
diff --git a/transcode_data.h b/transcode_data.h
index ad20a0b9df..42c3b2dc89 100644
--- a/transcode_data.h
+++ b/transcode_data.h
@@ -65,11 +65,12 @@ typedef struct rb_transcoding {
const BYTE_LOOKUP *next_table;
VALUE next_info;
unsigned char next_byte;
- int readlen;
+ int readlen; /* already interpreted */
+ int feedlen; /* not yet interpreted */
union {
unsigned char ary[8]; /* max_input <= sizeof(ary) */
unsigned char *ptr; /* length is max_input */
- } readbuf;
+ } readbuf; /* readlen + feedlen used */
unsigned char stateful[256]; /* opaque data for stateful encoding */
} rb_transcoding;