aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog17
-rw-r--r--test/ruby/test_econv.rb84
-rw-r--r--transcode.c251
-rw-r--r--transcode_data.h16
4 files changed, 193 insertions, 175 deletions
diff --git a/ChangeLog b/ChangeLog
index d27394f54d..1f8cf50ecc 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,20 @@
+Thu Aug 14 20:25:52 2008 Tanaka Akira <akr@fsij.org>
+
+ * transcode_data.h (transcode_invalid_byte_sequence): renamed from
+ transcode_invalid_input.
+ (transcode_destination_buffer_full): renamed from transcode_obuf_full.
+ (transcode_source_buffer_empty): renamed from transcode_ibuf_empty.
+ (rb_econv_result_t): renamed from rb_trans_result_t.
+ (rb_econv_elem_t): renamed from rb_trans_elem_t.
+ (rb_econv_t): renamed from rb_trans_t.
+
+ * transcode.c (UNIVERSAL_NEWLINE_DECODER): renamed from
+ UNIVERSAL_NEWLINE.
+ (CRLF_NEWLINE_ENCODER): renamed from CRLF_NEWLINE.
+ (CR_NEWLINE_ENCODER): renamed from CR_NEWLINE.
+ (rb_econv_open): renamed from rb_trans_open.
+ (rb_econv_close): renamed from rb_trans_close.
+
Thu Aug 14 19:41:42 2008 Tanaka Akira <akr@fsij.org>
* transcode.c (load_transcoder): unused function removed.
diff --git a/test/ruby/test_econv.rb b/test/ruby/test_econv.rb
index ba8d6d9b94..1da9a34b4f 100644
--- a/test/ruby/test_econv.rb
+++ b/test/ruby/test_econv.rb
@@ -42,7 +42,7 @@ class TestEncodingConverter < Test::Unit::TestCase
def test_partial_input
ec = Encoding::Converter.new("UTF-8", "EUC-JP")
ret = ec.primitive_convert(src="", dst="", nil, 10, Encoding::Converter::PARTIAL_INPUT)
- assert_equal(:ibuf_empty, ret)
+ assert_equal(:source_buffer_empty, ret)
ret = ec.primitive_convert(src="", dst="", nil, 10)
assert_equal(:finished, ret)
end
@@ -50,22 +50,22 @@ class TestEncodingConverter < Test::Unit::TestCase
def test_accumulate_dst1
ec = Encoding::Converter.new("UTF-8", "EUC-JP")
a = ["", "abc\u{3042}def", ec, nil, 1]
- check_ec("a", "c\u{3042}def", :obuf_full, *a)
- check_ec("ab", "\u{3042}def", :obuf_full, *a)
- check_ec("abc", "def", :obuf_full, *a)
- check_ec("abc\xA4", "def", :obuf_full, *a)
- check_ec("abc\xA4\xA2", "ef", :obuf_full, *a)
- check_ec("abc\xA4\xA2d", "f", :obuf_full, *a)
- check_ec("abc\xA4\xA2de", "", :obuf_full, *a)
+ check_ec("a", "c\u{3042}def", :destination_buffer_full, *a)
+ check_ec("ab", "\u{3042}def", :destination_buffer_full, *a)
+ check_ec("abc", "def", :destination_buffer_full, *a)
+ check_ec("abc\xA4", "def", :destination_buffer_full, *a)
+ check_ec("abc\xA4\xA2", "ef", :destination_buffer_full, *a)
+ check_ec("abc\xA4\xA2d", "f", :destination_buffer_full, *a)
+ check_ec("abc\xA4\xA2de", "", :destination_buffer_full, *a)
check_ec("abc\xA4\xA2def", "", :finished, *a)
end
def test_accumulate_dst2
ec = Encoding::Converter.new("UTF-8", "EUC-JP")
a = ["", "abc\u{3042}def", ec, nil, 2]
- check_ec("ab", "\u{3042}def", :obuf_full, *a)
- check_ec("abc\xA4", "def", :obuf_full, *a)
- check_ec("abc\xA4\xA2d", "f", :obuf_full, *a)
+ check_ec("ab", "\u{3042}def", :destination_buffer_full, *a)
+ check_ec("abc\xA4", "def", :destination_buffer_full, *a)
+ check_ec("abc\xA4\xA2d", "f", :destination_buffer_full, *a)
check_ec("abc\xA4\xA2def", "", :finished, *a)
end
@@ -81,40 +81,40 @@ class TestEncodingConverter < Test::Unit::TestCase
def test_iso2022jp_outstream
ec = Encoding::Converter.new("EUC-JP", "ISO-2022-JP")
a = ["", src="", ec, nil, 50, Encoding::Converter::PARTIAL_INPUT]
- src << "a"; check_ec("a", "", :ibuf_empty, *a)
- src << "\xA2"; check_ec("a", "", :ibuf_empty, *a)
- src << "\xA4"; check_ec("a\e$B\"$", "", :ibuf_empty, *a)
- src << "\xA1"; check_ec("a\e$B\"$", "", :ibuf_empty, *a)
- src << "\xA2"; check_ec("a\e$B\"$!\"", "", :ibuf_empty, *a)
- src << "b"; check_ec("a\e$B\"$!\"\e(Bb", "", :ibuf_empty, *a)
- src << "\xA2\xA6"; check_ec("a\e$B\"$!\"\e(Bb\e$B\"&", "", :ibuf_empty, *a)
+ src << "a"; check_ec("a", "", :source_buffer_empty, *a)
+ src << "\xA2"; check_ec("a", "", :source_buffer_empty, *a)
+ src << "\xA4"; check_ec("a\e$B\"$", "", :source_buffer_empty, *a)
+ src << "\xA1"; check_ec("a\e$B\"$", "", :source_buffer_empty, *a)
+ src << "\xA2"; check_ec("a\e$B\"$!\"", "", :source_buffer_empty, *a)
+ src << "b"; check_ec("a\e$B\"$!\"\e(Bb", "", :source_buffer_empty, *a)
+ src << "\xA2\xA6"; check_ec("a\e$B\"$!\"\e(Bb\e$B\"&", "", :source_buffer_empty, *a)
a[-1] = 0; check_ec("a\e$B\"$!\"\e(Bb\e$B\"&\e(B", "", :finished, *a)
end
def test_invalid
- assert_econv("", :invalid_input, 100, ["UTF-8", "EUC-JP"], "\x80", "")
- assert_econv("a", :invalid_input, 100, ["UTF-8", "EUC-JP"], "a\x80", "")
- assert_econv("a", :invalid_input, 100, ["UTF-8", "EUC-JP"], "a\x80", "\x80")
- assert_econv("abc", :invalid_input, 100, ["UTF-8", "EUC-JP"], "abc\xFF", "def")
- assert_econv("abc", :invalid_input, 100, ["Shift_JIS", "EUC-JP"], "abc\xFF", "def")
- assert_econv("abc", :invalid_input, 100, ["ISO-2022-JP", "EUC-JP"], "abc\xFF", "def")
+ assert_econv("", :invalid_byte_sequence, 100, ["UTF-8", "EUC-JP"], "\x80", "")
+ assert_econv("a", :invalid_byte_sequence, 100, ["UTF-8", "EUC-JP"], "a\x80", "")
+ assert_econv("a", :invalid_byte_sequence, 100, ["UTF-8", "EUC-JP"], "a\x80", "\x80")
+ assert_econv("abc", :invalid_byte_sequence, 100, ["UTF-8", "EUC-JP"], "abc\xFF", "def")
+ assert_econv("abc", :invalid_byte_sequence, 100, ["Shift_JIS", "EUC-JP"], "abc\xFF", "def")
+ assert_econv("abc", :invalid_byte_sequence, 100, ["ISO-2022-JP", "EUC-JP"], "abc\xFF", "def")
end
def test_invalid2
ec = Encoding::Converter.new("Shift_JIS", "EUC-JP")
a = ["", "abc\xFFdef", ec, nil, 1]
- check_ec("a", "c\xFFdef", :obuf_full, *a)
- check_ec("ab", "\xFFdef", :obuf_full, *a)
- check_ec("abc", "def", :invalid_input, *a)
- check_ec("abcd", "f", :obuf_full, *a)
- check_ec("abcde", "", :obuf_full, *a)
+ check_ec("a", "c\xFFdef", :destination_buffer_full, *a)
+ check_ec("ab", "\xFFdef", :destination_buffer_full, *a)
+ check_ec("abc", "def", :invalid_byte_sequence, *a)
+ check_ec("abcd", "f", :destination_buffer_full, *a)
+ check_ec("abcde", "", :destination_buffer_full, *a)
check_ec("abcdef", "", :finished, *a)
end
def test_invalid3
ec = Encoding::Converter.new("Shift_JIS", "EUC-JP")
a = ["", "abc\xFFdef", ec, nil, 10]
- check_ec("abc", "def", :invalid_input, *a)
+ check_ec("abc", "def", :invalid_byte_sequence, *a)
check_ec("abcdef", "", :finished, *a)
end
@@ -124,7 +124,7 @@ class TestEncodingConverter < Test::Unit::TestCase
check_ec("a", "bc\xFFdef", :output_followed_by_input, *a)
check_ec("ab", "c\xFFdef", :output_followed_by_input, *a)
check_ec("abc", "\xFFdef", :output_followed_by_input, *a)
- check_ec("abc", "def", :invalid_input, *a)
+ check_ec("abc", "def", :invalid_byte_sequence, *a)
check_ec("abcd", "ef", :output_followed_by_input, *a)
check_ec("abcde", "f", :output_followed_by_input, *a)
check_ec("abcdef", "", :output_followed_by_input, *a)
@@ -135,7 +135,7 @@ class TestEncodingConverter < Test::Unit::TestCase
ec = Encoding::Converter.new("UTF-16BE", "EUC-JP")
a = ["", "\xFF\xFE\x00A\xDC\x00\x00B", ec, nil, 10]
check_ec("", "\x00A\xDC\x00\x00B", :undefined_conversion, *a)
- check_ec("A", "\x00B", :invalid_input, *a) # \xDC\x00 is invalid as UTF-16BE
+ check_ec("A", "\x00B", :invalid_byte_sequence, *a) # \xDC\x00 is invalid as UTF-16BE
check_ec("AB", "", :finished, *a)
end
@@ -144,29 +144,29 @@ class TestEncodingConverter < Test::Unit::TestCase
a = ["", "\xFF\xFE\x00A\xDC\x00\x00B", ec, nil, 10, Encoding::Converter::OUTPUT_FOLLOWED_BY_INPUT]
check_ec("", "\x00A\xDC\x00\x00B", :undefined_conversion, *a)
check_ec("A", "\xDC\x00\x00B", :output_followed_by_input, *a)
- check_ec("A", "\x00B", :invalid_input, *a)
+ check_ec("A", "\x00B", :invalid_byte_sequence, *a)
check_ec("AB", "", :output_followed_by_input, *a)
check_ec("AB", "", :finished, *a)
end
def test_universal_newline
- ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::UNIVERSAL_NEWLINE)
+ ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::UNIVERSAL_NEWLINE_DECODER)
a = ["", src="", ec, nil, 50, Encoding::Converter::PARTIAL_INPUT]
- src << "abc\r\ndef"; check_ec("abc\ndef", "", :ibuf_empty, *a)
- src << "ghi\njkl"; check_ec("abc\ndefghi\njkl", "", :ibuf_empty, *a)
- src << "mno\rpqr"; check_ec("abc\ndefghi\njklmno\npqr", "", :ibuf_empty, *a)
- src << "stu\r"; check_ec("abc\ndefghi\njklmno\npqrstu\n", "", :ibuf_empty, *a)
- src << "\nvwx"; check_ec("abc\ndefghi\njklmno\npqrstu\nvwx", "", :ibuf_empty, *a)
- src << "\nyz"; check_ec("abc\ndefghi\njklmno\npqrstu\nvwx\nyz", "", :ibuf_empty, *a)
+ src << "abc\r\ndef"; check_ec("abc\ndef", "", :source_buffer_empty, *a)
+ src << "ghi\njkl"; check_ec("abc\ndefghi\njkl", "", :source_buffer_empty, *a)
+ src << "mno\rpqr"; check_ec("abc\ndefghi\njklmno\npqr", "", :source_buffer_empty, *a)
+ src << "stu\r"; check_ec("abc\ndefghi\njklmno\npqrstu\n", "", :source_buffer_empty, *a)
+ src << "\nvwx"; check_ec("abc\ndefghi\njklmno\npqrstu\nvwx", "", :source_buffer_empty, *a)
+ src << "\nyz"; check_ec("abc\ndefghi\njklmno\npqrstu\nvwx\nyz", "", :source_buffer_empty, *a)
end
def test_crlf_newline
- ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::CRLF_NEWLINE)
+ ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::CRLF_NEWLINE_ENCODER)
assert_econv("abc\r\ndef", :finished, 50, ec, "abc\ndef", "")
end
def test_cr_newline
- ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::CR_NEWLINE)
+ ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::CR_NEWLINE_ENCODER)
assert_econv("abc\rdef", :finished, 50, ec, "abc\ndef", "")
end
diff --git a/transcode.c b/transcode.c
index 80eea1b573..8e2d6d453c 100644
--- a/transcode.c
+++ b/transcode.c
@@ -21,15 +21,15 @@ VALUE rb_eInvalidByteSequence;
VALUE rb_cEncodingConverter;
static VALUE sym_invalid, sym_undef, sym_ignore, sym_replace;
-#define INVALID_IGNORE 0x1
-#define INVALID_REPLACE 0x2
-#define UNDEF_IGNORE 0x10
-#define UNDEF_REPLACE 0x20
-#define PARTIAL_INPUT 0x100
-#define UNIVERSAL_NEWLINE 0x200
-#define CRLF_NEWLINE 0x400
-#define CR_NEWLINE 0x800
-#define OUTPUT_FOLLOWED_BY_INPUT 0x1000
+#define INVALID_IGNORE 0x1
+#define INVALID_REPLACE 0x2
+#define UNDEF_IGNORE 0x10
+#define UNDEF_REPLACE 0x20
+#define PARTIAL_INPUT 0x100
+#define UNIVERSAL_NEWLINE_DECODER 0x200
+#define CRLF_NEWLINE_ENCODER 0x400
+#define CR_NEWLINE_ENCODER 0x800
+#define OUTPUT_FOLLOWED_BY_INPUT 0x1000
/*
* Dispatch data and logic
@@ -331,7 +331,7 @@ transcode_char_start(rb_transcoding *tc,
return ptr;
}
-static rb_trans_result_t
+static rb_econv_result_t
transcode_restartable0(const unsigned char **in_pos, unsigned char **out_pos,
const unsigned char *in_stop, unsigned char *out_stop,
rb_transcoding *tc,
@@ -382,7 +382,7 @@ transcode_restartable0(const unsigned char **in_pos, unsigned char **out_pos,
} while (0)
#define SUSPEND_OBUF(num) \
do { \
- while (out_stop - out_p < 1) { SUSPEND(transcode_obuf_full, num); } \
+ while (out_stop - out_p < 1) { SUSPEND(transcode_destination_buffer_full, num); } \
} while (0)
#define SUSPEND_OUTPUT_FOLLOWED_BY_INPUT(num) \
@@ -431,7 +431,7 @@ transcode_restartable0(const unsigned char **in_pos, unsigned char **out_pos,
if (in_stop <= in_p) {
if (!(opt & PARTIAL_INPUT))
break;
- SUSPEND(transcode_ibuf_empty, 7);
+ SUSPEND(transcode_source_buffer_empty, 7);
continue;
}
@@ -457,7 +457,7 @@ transcode_restartable0(const unsigned char **in_pos, unsigned char **out_pos,
while (in_p >= in_stop) {
if (!(opt & PARTIAL_INPUT))
goto invalid;
- SUSPEND(transcode_ibuf_empty, 5);
+ SUSPEND(transcode_source_buffer_empty, 5);
}
next_byte = (unsigned char)*in_p++;
next_table = (const BYTE_LOOKUP *)next_info;
@@ -532,7 +532,7 @@ transcode_restartable0(const unsigned char **in_pos, unsigned char **out_pos,
SUSPEND_OUTPUT_FOLLOWED_BY_INPUT(26);
while ((opt & PARTIAL_INPUT) && tc->recognized_len + (in_stop - inchar_start) < unitlen) {
in_p = in_stop;
- SUSPEND(transcode_ibuf_empty, 8);
+ SUSPEND(transcode_source_buffer_empty, 8);
}
if (tc->recognized_len + (in_stop - inchar_start) <= unitlen) {
in_p = in_stop;
@@ -555,7 +555,7 @@ transcode_restartable0(const unsigned char **in_pos, unsigned char **out_pos,
continue;
invalid:
- SUSPEND(transcode_invalid_input, 1);
+ SUSPEND(transcode_invalid_byte_sequence, 1);
continue;
undef:
@@ -588,7 +588,7 @@ transcode_restartable0(const unsigned char **in_pos, unsigned char **out_pos,
#undef writebuf_off
}
-static rb_trans_result_t
+static rb_econv_result_t
transcode_restartable(const unsigned char **in_pos, unsigned char **out_pos,
const unsigned char *in_stop, unsigned char *out_stop,
rb_transcoding *tc,
@@ -598,13 +598,13 @@ transcode_restartable(const unsigned char **in_pos, unsigned char **out_pos,
unsigned char *readagain_buf = ALLOCA_N(unsigned char, tc->readagain_len);
const unsigned char *readagain_pos = readagain_buf;
const unsigned char *readagain_stop = readagain_buf + tc->readagain_len;
- rb_trans_result_t res;
+ rb_econv_result_t res;
MEMCPY(readagain_buf, TRANSCODING_READBUF(tc) + tc->recognized_len,
unsigned char, tc->readagain_len);
tc->readagain_len = 0;
res = transcode_restartable0(&readagain_pos, out_pos, readagain_stop, out_stop, tc, opt|PARTIAL_INPUT);
- if (res != transcode_ibuf_empty) {
+ if (res != transcode_source_buffer_empty) {
MEMCPY(TRANSCODING_READBUF(tc) + tc->recognized_len + tc->readagain_len,
readagain_pos, unsigned char, readagain_stop - readagain_pos);
tc->readagain_len += readagain_stop - readagain_pos;
@@ -637,7 +637,7 @@ rb_transcoding_open_by_transcoder(const rb_transcoder *tr, int flags)
return tc;
}
-static rb_trans_result_t
+static rb_econv_result_t
rb_transcoding_convert(rb_transcoding *tc,
const unsigned char **input_ptr, const unsigned char *input_stop,
unsigned char **output_ptr, unsigned char *output_stop,
@@ -660,10 +660,10 @@ rb_transcoding_close(rb_transcoding *tc)
xfree(tc);
}
-static rb_trans_t *
+static rb_econv_t *
rb_trans_open_by_transcoder_entries(int n, transcoder_entry_t **entries)
{
- rb_trans_t *ts;
+ rb_econv_t *ts;
int i;
for (i = 0; i < n; i++) {
@@ -673,9 +673,9 @@ rb_trans_open_by_transcoder_entries(int n, transcoder_entry_t **entries)
return NULL;
}
- ts = ALLOC(rb_trans_t);
+ ts = ALLOC(rb_econv_t);
ts->num_trans = n;
- ts->elems = ALLOC_N(rb_trans_elem_t, ts->num_trans);
+ ts->elems = ALLOC_N(rb_econv_elem_t, ts->num_trans);
ts->num_finished = 0;
ts->last_tc = NULL;
for (i = 0; i < ts->num_trans; i++) {
@@ -687,7 +687,7 @@ rb_trans_open_by_transcoder_entries(int n, transcoder_entry_t **entries)
ts->elems[i].out_data_start = NULL;
ts->elems[i].out_data_end = NULL;
ts->elems[i].out_buf_end = NULL;
- ts->elems[i].last_result = transcode_ibuf_empty;
+ ts->elems[i].last_result = transcode_source_buffer_empty;
}
ts->last_tc = ts->elems[ts->num_trans-1].tc;
@@ -720,20 +720,20 @@ trans_open_i(const char *from, const char *to, int depth, void *arg)
entries[depth] = get_transcoder_entry(from, to);
}
-static rb_trans_t *
-rb_trans_open(const char *from, const char *to, int flags)
+static rb_econv_t *
+rb_econv_open(const char *from, const char *to, int flags)
{
transcoder_entry_t **entries = NULL;
int num_trans;
- static rb_trans_t *ts;
+ static rb_econv_t *ts;
num_trans = transcode_search_path(from, to, trans_open_i, (void *)&entries);
if (num_trans < 0 || !entries)
return NULL;
- if (flags & (CRLF_NEWLINE|CR_NEWLINE)) {
- const char *name = (flags & CRLF_NEWLINE) ? "crlf_newline" : "cr_newline";
+ if (flags & (CRLF_NEWLINE_ENCODER|CR_NEWLINE_ENCODER)) {
+ const char *name = (flags & CRLF_NEWLINE_ENCODER) ? "crlf_newline" : "cr_newline";
transcoder_entry_t *e = get_transcoder_entry("", name);
if (!e)
return NULL;
@@ -742,7 +742,7 @@ rb_trans_open(const char *from, const char *to, int flags)
num_trans++;
}
- if (flags & UNIVERSAL_NEWLINE) {
+ if (flags & UNIVERSAL_NEWLINE_DECODER) {
transcoder_entry_t *e = get_transcoder_entry("universal_newline", "");
if (!e)
return NULL;
@@ -751,7 +751,7 @@ rb_trans_open(const char *from, const char *to, int flags)
ts = rb_trans_open_by_transcoder_entries(num_trans, entries);
- if (flags & UNIVERSAL_NEWLINE) {
+ if (flags & UNIVERSAL_NEWLINE_DECODER) {
ts->last_tc = ts->elems[ts->num_trans-2].tc;
}
@@ -759,7 +759,7 @@ rb_trans_open(const char *from, const char *to, int flags)
}
static int
-trans_sweep(rb_trans_t *ts,
+trans_sweep(rb_econv_t *ts,
const unsigned char **input_ptr, const unsigned char *input_stop,
unsigned char **output_ptr, unsigned char *output_stop,
int flags,
@@ -770,20 +770,20 @@ trans_sweep(rb_trans_t *ts,
const unsigned char **ipp, *is, *iold;
unsigned char **opp, *os, *oold;
- rb_trans_result_t res;
+ rb_econv_result_t res;
try = 1;
while (try) {
try = 0;
for (i = start; i < ts->num_trans; i++) {
- rb_trans_elem_t *te = &ts->elems[i];
+ rb_econv_elem_t *te = &ts->elems[i];
if (i == 0) {
ipp = input_ptr;
is = input_stop;
}
else {
- rb_trans_elem_t *prev_te = &ts->elems[i-1];
+ rb_econv_elem_t *prev_te = &ts->elems[i-1];
ipp = (const unsigned char **)&prev_te->out_data_start;
is = prev_te->out_data_end;
}
@@ -820,13 +820,13 @@ trans_sweep(rb_trans_t *ts,
try = 1;
switch (res) {
- case transcode_invalid_input:
+ case transcode_invalid_byte_sequence:
case transcode_undefined_conversion:
case transcode_output_followed_by_input:
return i;
- case transcode_obuf_full:
- case transcode_ibuf_empty:
+ case transcode_destination_buffer_full:
+ case transcode_source_buffer_empty:
break;
case transcode_finished:
@@ -838,8 +838,8 @@ trans_sweep(rb_trans_t *ts,
return -1;
}
-static rb_trans_result_t
-rb_trans_conv(rb_trans_t *ts,
+static rb_econv_result_t
+rb_trans_conv(rb_econv_t *ts,
const unsigned char **input_ptr, const unsigned char *input_stop,
unsigned char **output_ptr, unsigned char *output_stop,
int flags)
@@ -862,12 +862,12 @@ rb_trans_conv(rb_trans_t *ts,
}
if (ts->elems[0].last_result == transcode_output_followed_by_input)
- ts->elems[0].last_result = transcode_ibuf_empty;
+ ts->elems[0].last_result = transcode_source_buffer_empty;
needreport_index = -1;
for (i = ts->num_trans-1; 0 <= i; i--) {
switch (ts->elems[i].last_result) {
- case transcode_invalid_input:
+ case transcode_invalid_byte_sequence:
case transcode_undefined_conversion:
case transcode_output_followed_by_input:
case transcode_finished:
@@ -875,8 +875,8 @@ rb_trans_conv(rb_trans_t *ts,
needreport_index = i;
goto found_needreport;
- case transcode_obuf_full:
- case transcode_ibuf_empty:
+ case transcode_destination_buffer_full:
+ case transcode_source_buffer_empty:
break;
default:
@@ -886,14 +886,14 @@ rb_trans_conv(rb_trans_t *ts,
/* /^[io]+$/ is confirmed. but actually /^i*o*$/. */
- if (ts->elems[ts->num_trans-1].last_result == transcode_obuf_full &&
+ if (ts->elems[ts->num_trans-1].last_result == transcode_destination_buffer_full &&
(flags & OUTPUT_FOLLOWED_BY_INPUT)) {
- rb_trans_result_t res;
+ rb_econv_result_t res;
res = rb_trans_conv(ts, NULL, NULL, output_ptr, output_stop,
(flags & ~OUTPUT_FOLLOWED_BY_INPUT)|PARTIAL_INPUT);
- if (res == transcode_ibuf_empty)
+ if (res == transcode_source_buffer_empty)
return transcode_output_followed_by_input;
return res;
}
@@ -908,26 +908,26 @@ found_needreport:
} while (needreport_index != -1 && needreport_index != ts->num_trans-1);
for (i = ts->num_trans-1; 0 <= i; i--) {
- if (ts->elems[i].last_result != transcode_ibuf_empty) {
- rb_trans_result_t res = ts->elems[i].last_result;
- if (res == transcode_invalid_input ||
+ if (ts->elems[i].last_result != transcode_source_buffer_empty) {
+ rb_econv_result_t res = ts->elems[i].last_result;
+ if (res == transcode_invalid_byte_sequence ||
res == transcode_undefined_conversion ||
res == transcode_output_followed_by_input) {
- ts->elems[i].last_result = transcode_ibuf_empty;
+ ts->elems[i].last_result = transcode_source_buffer_empty;
}
return res;
}
}
- return transcode_ibuf_empty;
+ return transcode_source_buffer_empty;
}
-static rb_trans_result_t
-rb_econv_conv(rb_trans_t *ts,
+static rb_econv_result_t
+rb_econv_conv(rb_econv_t *ts,
const unsigned char **input_ptr, const unsigned char *input_stop,
unsigned char **output_ptr, unsigned char *output_stop,
int flags)
{
- rb_trans_result_t res;
+ rb_econv_result_t res;
if ((flags & OUTPUT_FOLLOWED_BY_INPUT) ||
ts->num_trans == 1)
@@ -941,7 +941,7 @@ rb_econv_conv(rb_trans_t *ts,
}
static void
-rb_trans_close(rb_trans_t *ts)
+rb_econv_close(rb_econv_t *ts)
{
int i;
@@ -1035,13 +1035,13 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
const char *to_encoding,
const int opt)
{
- rb_trans_t *ts;
+ rb_econv_t *ts;
rb_transcoding *last_tc;
- rb_trans_result_t ret;
+ rb_econv_result_t ret;
unsigned char *out_start = *out_pos;
int max_output;
- ts = rb_trans_open(from_encoding, to_encoding, 0);
+ ts = rb_econv_open(from_encoding, to_encoding, 0);
if (!ts)
rb_raise(rb_eArgError, "transcoding not supported (from %s to %s)", from_encoding, to_encoding);
@@ -1050,7 +1050,7 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
resume:
ret = rb_econv_conv(ts, in_pos, in_stop, out_pos, out_stop, opt);
- if (ret == transcode_invalid_input) {
+ if (ret == transcode_invalid_byte_sequence) {
/* deal with invalid byte sequence */
/* todo: add more alternative behaviors */
if (opt&INVALID_IGNORE) {
@@ -1060,7 +1060,7 @@ resume:
output_replacement_character(destination, resize_destination, last_tc, &out_start, out_pos, &out_stop);
goto resume;
}
- rb_trans_close(ts);
+ rb_econv_close(ts);
rb_raise(rb_eInvalidByteSequence, "invalid byte sequence");
}
if (ret == transcode_undefined_conversion) {
@@ -1074,15 +1074,15 @@ resume:
output_replacement_character(destination, resize_destination, last_tc, &out_start, out_pos, &out_stop);
goto resume;
}
- rb_trans_close(ts);
+ rb_econv_close(ts);
rb_raise(rb_eConversionUndefined, "conversion undefined for byte sequence (maybe invalid byte sequence)");
}
- if (ret == transcode_obuf_full) {
+ if (ret == transcode_destination_buffer_full) {
more_output_buffer(destination, resize_destination, max_output, &out_start, out_pos, &out_stop);
goto resume;
}
- rb_trans_close(ts);
+ rb_econv_close(ts);
return;
}
#else
@@ -1096,27 +1096,27 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
const char *to_encoding,
const int opt)
{
- rb_trans_t *ts;
+ rb_econv_t *ts;
rb_transcoding *last_tc;
- rb_trans_result_t ret;
+ rb_econv_result_t ret;
unsigned char *out_start = *out_pos;
const unsigned char *ptr;
int max_output;
- ts = rb_trans_open(from_encoding, to_encoding, 0);
+ ts = rb_econv_open(from_encoding, to_encoding, 0);
if (!ts)
rb_raise(rb_eArgError, "transcoding not supported (from %s to %s)", from_encoding, to_encoding);
last_tc = ts->last_tc;
max_output = ts->elems[ts->num_trans-1].tc->transcoder->max_output;
- ret = transcode_ibuf_empty;
+ ret = transcode_source_buffer_empty;
ptr = *in_pos;
while (ret != transcode_finished) {
unsigned char input_byte;
const unsigned char *p = &input_byte;
- if (ret == transcode_ibuf_empty) {
+ if (ret == transcode_source_buffer_empty) {
if (ptr < in_stop) {
input_byte = *ptr;
ret = rb_econv_conv(ts, &p, p+1, out_pos, out_stop, PARTIAL_INPUT);
@@ -1131,7 +1131,7 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
if (&input_byte != p)
ptr += p - &input_byte;
switch (ret) {
- case transcode_invalid_input:
+ case transcode_invalid_byte_sequence:
/* deal with invalid byte sequence */
/* todo: add more alternative behaviors */
if (opt&INVALID_IGNORE) {
@@ -1141,7 +1141,7 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
output_replacement_character(destination, resize_destination, last_tc, &out_start, out_pos, &out_stop);
break;
}
- rb_trans_close(ts);
+ rb_econv_close(ts);
rb_raise(rb_eInvalidByteSequence, "invalid byte sequence");
break;
@@ -1156,22 +1156,22 @@ transcode_loop(const unsigned char **in_pos, unsigned char **out_pos,
output_replacement_character(destination, resize_destination, last_tc, &out_start, out_pos, &out_stop);
break;
}
- rb_trans_close(ts);
+ rb_econv_close(ts);
rb_raise(rb_eConversionUndefined, "conversion undefined for byte sequence (maybe invalid byte sequence)");
break;
- case transcode_obuf_full:
+ case transcode_destination_buffer_full:
more_output_buffer(destination, resize_destination, max_output, &out_start, out_pos, &out_stop);
break;
- case transcode_ibuf_empty:
+ case transcode_source_buffer_empty:
break;
case transcode_finished:
break;
}
}
- rb_trans_close(ts);
+ rb_econv_close(ts);
*in_pos = in_stop;
return;
}
@@ -1370,9 +1370,9 @@ rb_str_transcode(VALUE str, VALUE to)
}
static void
-econv_free(rb_trans_t *ts)
+econv_free(rb_econv_t *ts)
{
- rb_trans_close(ts);
+ rb_econv_close(ts);
}
static VALUE
@@ -1383,17 +1383,17 @@ econv_s_allocate(VALUE klass)
/*
* call-seq:
- * Encoding::Converter.new(input_encoding, output_encoding)
- * Encoding::Converter.new(input_encoding, output_encoding, flags)
+ * Encoding::Converter.new(source_encoding, destination_encoding)
+ * Encoding::Converter.new(source_encoding, destination_encoding, flags)
*
* possible flags:
- * Encoding::Converter::UNIVERSAL_NEWLINE # convert CRLF and CR to LF at last
- * Encoding::Converter::CRLF_NEWLINE # convert LF to CRLF at first
- * Encoding::Converter::CR_NEWLINE # convert LF to CR at first
+ * Encoding::Converter::UNIVERSAL_NEWLINE_DECODER # convert CRLF and CR to LF at last
+ * Encoding::Converter::CRLF_NEWLINE_ENCODER # convert LF to CRLF at first
+ * Encoding::Converter::CR_NEWLINE_ENCODER # convert LF to CR at first
*
* Encoding::Converter.new creates an instance of Encoding::Converter.
*
- * input_encoding and output_encoding should be a string.
+ * source_encoding and destination_encoding should be a string.
* flags should be an integer.
*
* example:
@@ -1402,11 +1402,11 @@ econv_s_allocate(VALUE klass)
*
* # (1) convert UTF-16BE to UTF-8
* # (2) convert CRLF and CR to LF
- * ec = Encoding::Converter.new("UTF-16BE", "UTF-8", Encoding::Converter::UNIVERSAL_NEWLINE)
+ * ec = Encoding::Converter.new("UTF-16BE", "UTF-8", Encoding::Converter::UNIVERSAL_NEWLINE_DECODER)
*
* # (1) convert LF to CRLF
* # (2) convert UTF-8 to UTF-16BE
- * ec = Encoding::Converter.new("UTF-8", "UTF-16BE", Encoding::Converter::CRLF_NEWLINE)
+ * ec = Encoding::Converter.new("UTF-8", "UTF-16BE", Encoding::Converter::CRLF_NEWLINE_ENCODER)
*
*/
static VALUE
@@ -1414,7 +1414,7 @@ econv_init(int argc, VALUE *argv, VALUE self)
{
VALUE from_encoding, to_encoding, flags_v;
const char *from_e, *to_e;
- rb_trans_t *ts;
+ rb_econv_t *ts;
int flags;
rb_scan_args(argc, argv, "21", &from_encoding, &to_encoding, &flags_v);
@@ -1433,7 +1433,7 @@ econv_init(int argc, VALUE *argv, VALUE self)
rb_raise(rb_eTypeError, "already initialized");
}
- ts = rb_trans_open(from_e, to_e, flags);
+ ts = rb_econv_open(from_e, to_e, flags);
if (!ts) {
rb_raise(rb_eArgError, "encoding convewrter not supported (from %s to %s)", from_e, to_e);
}
@@ -1447,7 +1447,7 @@ static VALUE
econv_inspect(VALUE self)
{
const char *cname = rb_obj_classname(self);
- rb_trans_t *ts = DATA_PTR(self);
+ rb_econv_t *ts = DATA_PTR(self);
if (!ts)
return rb_sprintf("#<%s: uninitialized>", cname);
@@ -1459,7 +1459,7 @@ econv_inspect(VALUE self)
#define IS_ECONV(obj) (RDATA(obj)->dfree == (RUBY_DATA_FUNC)econv_free)
-static rb_trans_t *
+static rb_econv_t *
check_econv(VALUE self)
{
Check_Type(self, T_DATA);
@@ -1475,49 +1475,50 @@ check_econv(VALUE self)
/*
* call-seq:
- * primitive_convert(input_buffer, output_buffer, output_byteoffset, output_bytesize) -> symbol
- * primitive_convert(input_buffer, output_buffer, output_byteoffset, output_bytesize, flags) -> symbol
+ * primitive_convert(source_buffer, destination_buffer, destination_byteoffset, destination_bytesize) -> symbol
+ * primitive_convert(source_buffer, destination_buffer, destination_byteoffset, destination_bytesize, flags) -> symbol
*
* possible flags:
- * Encoding::Converter::PARTIAL_INPUT # input buffer may be part of larger input
+ * Encoding::Converter::PARTIAL_INPUT # source buffer may be part of larger source
* Encoding::Converter::OUTPUT_FOLLOWED_BY_INPUT # stop conversion after output before input
*
* possible results:
- * :invalid_input
+ * :invalid_byte_sequence
* :undefined_conversion
* :output_followed_by_input
- * :obuf_full
- * :ibuf_empty
+ * :destination_buffer_full
+ * :source_buffer_empty
* :finished
*
- * primitive_convert converts input_buffer into output_buffer.
+ * primitive_convert converts source_buffer into destination_buffer.
*
- * input_buffer and output_buffer should be a string.
- * output_byteoffset should be an integer or nil.
- * output_bytesize and flags should be an integer.
+ * source_buffer and destination_buffer should be a string.
+ * destination_byteoffset should be an integer or nil.
+ * destination_bytesize and flags should be an integer.
*
- * primitive_convert convert the content of input_buffer from beginning
- * and store the result into output_buffer.
+ * primitive_convert convert the content of source_buffer from beginning
+ * and store the result into destination_buffer.
*
- * output_byteoffset and output_bytesize specify the region which
+ * destination_byteoffset and destination_bytesize specify the region which
* the converted result is stored.
- * output_byteoffset specifies the start position in output_buffer in bytes.
- * If output_byteoffset is nil, output_buffer.bytesize is assumed.
- * output_bytesize specifies maximum number of bytes.
- * After conversion, output_buffer is resized to
- * output_byteoffset + actually converted number of bytes.
+ * destination_byteoffset specifies the start position in destination_buffer in bytes.
+ * If destination_byteoffset is nil,
+ * destination_buffer.bytesize is used for appending the result.
+ * destination_bytesize specifies maximum number of bytes.
+ * After conversion, destination_buffer is resized to
+ * destination_byteoffset + actually converted number of bytes.
*
- * primitive_convert drops the first part of input_buffer.
- * the dropped part is converted in output_buffer or
+ * primitive_convert drops the first part of source_buffer.
+ * the dropped part is converted in destination_buffer or
* buffered in Encoding::Converter object.
*
* primitive_convert stops conversion when one of following condition met.
- * - invalid byte sequence found in input buffer (:invalid_input)
+ * - invalid byte sequence found in source buffer (:invalid_byte_sequence)
* - character not representable in output encoding (:undefined_conversion)
- * - after some output is generated, before any input is consumed (:output_followed_by_input)
+ * - after some output is generated, before input is done (:output_followed_by_input)
* this occur only when OUTPUT_FOLLOWED_BY_INPUT is specified.
- * - output buffer is full (:obuf_full)
- * - input buffer is empty (:ibuf_empty)
+ * - destination buffer is full (:destination_buffer_full)
+ * - source buffer is empty (:source_buffer_empty)
* this occur only when PARTIAL_INPUT is specified.
* - conversion is finished (:finished)
*
@@ -1528,11 +1529,11 @@ check_econv(VALUE self)
*
* ec = Encoding::Converter.new("UTF-8", "UTF-16BE")
* ret = ec.primitive_convert(src="pi", dst="", 1)
- * p [ret, src, dst] #=> [:obuf_full, "i", "\x00"]
+ * p [ret, src, dst] #=> [:destination_buffer_full, "i", "\x00"]
* ret = ec.primitive_convert(src, dst="", 1)
- * p [ret, src, dst] #=> [:obuf_full, "", "p"]
+ * p [ret, src, dst] #=> [:destination_buffer_full, "", "p"]
* ret = ec.primitive_convert(src, dst="", 1)
- * p [ret, src, dst] #=> [:obuf_full, "", "\x00"]
+ * p [ret, src, dst] #=> [:destination_buffer_full, "", "\x00"]
* ret = ec.primitive_convert(src, dst="", 1)
* p [ret, src, dst] #=> [:finished, "", "i"]
*
@@ -1541,8 +1542,8 @@ static VALUE
econv_primitive_convert(int argc, VALUE *argv, VALUE self)
{
VALUE input, output, output_byteoffset_v, output_bytesize_v, flags_v;
- rb_trans_t *ts = check_econv(self);
- rb_trans_result_t res;
+ rb_econv_t *ts = check_econv(self);
+ rb_econv_result_t res;
const unsigned char *ip, *is;
unsigned char *op, *os;
long output_byteoffset, output_bytesize;
@@ -1600,10 +1601,10 @@ econv_primitive_convert(int argc, VALUE *argv, VALUE self)
rb_str_drop_bytes(input, ip - (unsigned char *)RSTRING_PTR(input));
switch (res) {
- case transcode_invalid_input: return ID2SYM(rb_intern("invalid_input"));
+ case transcode_invalid_byte_sequence: return ID2SYM(rb_intern("invalid_byte_sequence"));
case transcode_undefined_conversion: return ID2SYM(rb_intern("undefined_conversion"));
- case transcode_obuf_full: return ID2SYM(rb_intern("obuf_full"));
- case transcode_ibuf_empty: return ID2SYM(rb_intern("ibuf_empty"));
+ case transcode_destination_buffer_full: return ID2SYM(rb_intern("destination_buffer_full"));
+ case transcode_source_buffer_empty: return ID2SYM(rb_intern("source_buffer_empty"));
case transcode_finished: return ID2SYM(rb_intern("finished"));
case transcode_output_followed_by_input: return ID2SYM(rb_intern("output_followed_by_input"));
default: return INT2NUM(res); /* should not be reached */
@@ -1619,7 +1620,7 @@ econv_primitive_convert(int argc, VALUE *argv, VALUE self)
static VALUE
econv_max_output(VALUE self)
{
- rb_trans_t *ts = check_econv(self);
+ rb_econv_t *ts = check_econv(self);
int n;
n = ts->elems[ts->num_trans-1].tc->transcoder->max_output;
@@ -1650,7 +1651,7 @@ Init_transcode(void)
rb_define_method(rb_cEncodingConverter, "max_output", econv_max_output, 0);
rb_define_const(rb_cEncodingConverter, "PARTIAL_INPUT", INT2FIX(PARTIAL_INPUT));
rb_define_const(rb_cEncodingConverter, "OUTPUT_FOLLOWED_BY_INPUT", INT2FIX(OUTPUT_FOLLOWED_BY_INPUT));
- rb_define_const(rb_cEncodingConverter, "UNIVERSAL_NEWLINE", INT2FIX(UNIVERSAL_NEWLINE));
- rb_define_const(rb_cEncodingConverter, "CRLF_NEWLINE", INT2FIX(CRLF_NEWLINE));
- rb_define_const(rb_cEncodingConverter, "CR_NEWLINE", INT2FIX(CR_NEWLINE));
+ rb_define_const(rb_cEncodingConverter, "UNIVERSAL_NEWLINE_DECODER", INT2FIX(UNIVERSAL_NEWLINE_DECODER));
+ rb_define_const(rb_cEncodingConverter, "CRLF_NEWLINE_ENCODER", INT2FIX(CRLF_NEWLINE_ENCODER));
+ rb_define_const(rb_cEncodingConverter, "CR_NEWLINE_ENCODER", INT2FIX(CR_NEWLINE_ENCODER));
}
diff --git a/transcode_data.h b/transcode_data.h
index f843e6b205..e949471928 100644
--- a/transcode_data.h
+++ b/transcode_data.h
@@ -112,13 +112,13 @@ struct rb_transcoder {
};
typedef enum {
- transcode_invalid_input,
+ transcode_invalid_byte_sequence,
transcode_undefined_conversion,
- transcode_obuf_full,
- transcode_ibuf_empty,
+ transcode_destination_buffer_full,
+ transcode_source_buffer_empty,
transcode_finished,
transcode_output_followed_by_input,
-} rb_trans_result_t;
+} rb_econv_result_t;
typedef struct {
const char *from;
@@ -128,15 +128,15 @@ typedef struct {
unsigned char *out_data_start;
unsigned char *out_data_end;
unsigned char *out_buf_end;
- rb_trans_result_t last_result;
-} rb_trans_elem_t;
+ rb_econv_result_t last_result;
+} rb_econv_elem_t;
typedef struct {
- rb_trans_elem_t *elems;
+ rb_econv_elem_t *elems;
int num_trans;
int num_finished;
rb_transcoding *last_tc;
-} rb_trans_t;
+} rb_econv_t;
void rb_declare_transcoder(const char *enc1, const char *enc2, const char *lib);
void rb_register_transcoder(const rb_transcoder *);