aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog18
-rw-r--r--include/ruby/io.h36
-rw-r--r--io.c35
-rw-r--r--test/ruby/test_io_m17n.rb39
4 files changed, 113 insertions, 15 deletions
diff --git a/ChangeLog b/ChangeLog
index 1f4117e00a..0c4fefc42d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,21 @@
+Sat Aug 23 16:59:42 2008 Tanaka Akira <akr@fsij.org>
+
+ * include/ruby/io.h (FMODE_INVALID_MASK): defined.
+ (FMODE_INVALID_IGNORE): defined.
+ (FMODE_INVALID_REPLACE): defined.
+ (FMODE_UNDEF_MASK): defined.
+ (FMODE_UNDEF_IGNORE): defined.
+ (FMODE_UNDEF_REPLACE): defined.
+
+ * io.c (sym_invalid): defined.
+ (sym_undef): defined.
+ (sym_ignore): defined.
+ (sym_replace): defined.
+ (make_readconv): specify ECONV_INVALID_* and ECONV_UNDEF_* if
+ FMODE_INVALID_* and FMODE_UNDEF_* is set.
+ (rb_io_extract_modeenc): check {:invalid, :undef} => {:replace,
+ :ignore} for FMODE_INVALID_* and FMODE_UNDEF_*.
+
Sat Aug 23 17:06:57 2008 Nobuyoshi Nakada <nobu@ruby-lang.org>
* strftime.c (rb_strftime): support more flags.
diff --git a/include/ruby/io.h b/include/ruby/io.h
index be34231bd0..dbbba48ce5 100644
--- a/include/ruby/io.h
+++ b/include/ruby/io.h
@@ -72,21 +72,27 @@ typedef struct rb_io_t {
#define HAVE_RB_IO_T 1
-#define FMODE_READABLE 0x0001
-#define FMODE_WRITABLE 0x0002
-#define FMODE_READWRITE (FMODE_READABLE|FMODE_WRITABLE)
-#define FMODE_BINMODE 0x0004
-#define FMODE_SYNC 0x0008
-#define FMODE_TTY 0x0010
-#define FMODE_DUPLEX 0x0020
-#define FMODE_APPEND 0x0040
-#define FMODE_CREATE 0x0080
-/* #define FMODE_NOREVLOOKUP 0x0100 */
-#define FMODE_WSPLIT 0x0200
-#define FMODE_WSPLIT_INITIALIZED 0x0400
-#define FMODE_TRUNC 0x0800
-#define FMODE_TEXTMODE 0x1000
-/* #define FMODE_PREP 0x10000 */
+#define FMODE_READABLE 0x00000001
+#define FMODE_WRITABLE 0x00000002
+#define FMODE_READWRITE (FMODE_READABLE|FMODE_WRITABLE)
+#define FMODE_BINMODE 0x00000004
+#define FMODE_SYNC 0x00000008
+#define FMODE_TTY 0x00000010
+#define FMODE_DUPLEX 0x00000020
+#define FMODE_APPEND 0x00000040
+#define FMODE_CREATE 0x00000080
+/* #define FMODE_NOREVLOOKUP 0x00000100 */
+#define FMODE_WSPLIT 0x00000200
+#define FMODE_WSPLIT_INITIALIZED 0x00000400
+#define FMODE_TRUNC 0x00000800
+#define FMODE_TEXTMODE 0x00001000
+/* #define FMODE_PREP 0x00010000 */
+#define FMODE_INVALID_MASK 0x00f00000
+#define FMODE_INVALID_IGNORE 0x00100000
+#define FMODE_INVALID_REPLACE 0x00200000
+#define FMODE_UNDEF_MASK 0x0f000000
+#define FMODE_UNDEF_IGNORE 0x01000000
+#define FMODE_UNDEF_REPLACE 0x02000000
#define GetOpenFile(obj,fp) rb_io_check_closed((fp) = RFILE(rb_io_taint_check(obj))->fptr)
diff --git a/io.c b/io.c
index a87b8f3bef..c05a0b8ac2 100644
--- a/io.c
+++ b/io.c
@@ -126,6 +126,7 @@ static VALUE argf;
static ID id_write, id_read, id_getc, id_flush, id_readpartial;
static VALUE sym_mode, sym_perm, sym_extenc, sym_intenc, sym_encoding, sym_open_args;
static VALUE sym_textmode, sym_binmode;
+static VALUE sym_invalid, sym_undef, sym_ignore, sym_replace;
struct timeval rb_time_interval(VALUE);
@@ -1433,6 +1434,10 @@ make_readconv(rb_io_t *fptr)
const char *sname, *dname;
if (NEED_NEWLINE_DECODER(fptr))
ecflags |= ECONV_UNIVERSAL_NEWLINE_DECODER;
+ if (fptr->mode & FMODE_INVALID_MASK)
+ ecflags |= (fptr->mode / (FMODE_INVALID_MASK/ECONV_INVALID_MASK)) & ECONV_INVALID_MASK;
+ if (fptr->mode & FMODE_UNDEF_MASK)
+ ecflags |= (fptr->mode / (FMODE_UNDEF_MASK/ECONV_UNDEF_MASK)) & ECONV_UNDEF_MASK;
if (fptr->enc2) {
sname = fptr->enc2->name;
dname = fptr->enc->name;
@@ -3876,6 +3881,32 @@ rb_io_extract_modeenc(VALUE *mode_p, VALUE opthash,
modenum |= O_BINARY;
#endif
}
+ v = rb_hash_aref(opthash, sym_invalid);
+ if (!NIL_P(v)) {
+ if (v == sym_replace) {
+ flags |= FMODE_INVALID_REPLACE;
+ }
+ else if (v == sym_ignore) {
+ flags |= FMODE_INVALID_IGNORE;
+ }
+ else {
+ v = rb_inspect(v);
+ rb_raise(rb_eArgError, "unexpected action for invalid byte sequence: %s", StringValueCStr(v));
+ }
+ }
+ v = rb_hash_aref(opthash, sym_undef);
+ if (!NIL_P(v)) {
+ if (v == sym_replace) {
+ flags |= FMODE_UNDEF_REPLACE;
+ }
+ else if (v == sym_ignore) {
+ flags |= FMODE_UNDEF_IGNORE;
+ }
+ else {
+ v = rb_inspect(v);
+ rb_raise(rb_eArgError, "unexpected action for undefined conversion: %s", StringValueCStr(v));
+ }
+ }
if (io_extract_encoding_option(opthash, &enc, &enc2)) {
if (has_enc) {
@@ -8353,4 +8384,8 @@ Init_IO(void)
sym_open_args = ID2SYM(rb_intern("open_args"));
sym_textmode = ID2SYM(rb_intern("textmode"));
sym_binmode = ID2SYM(rb_intern("binmode"));
+ sym_invalid = ID2SYM(rb_intern("invalid"));
+ sym_undef = ID2SYM(rb_intern("undef"));
+ sym_ignore = ID2SYM(rb_intern("ignore"));
+ sym_replace = ID2SYM(rb_intern("replace"));
}
diff --git a/test/ruby/test_io_m17n.rb b/test/ruby/test_io_m17n.rb
index 59a691cbd6..94be8b311f 100644
--- a/test/ruby/test_io_m17n.rb
+++ b/test/ruby/test_io_m17n.rb
@@ -1163,5 +1163,44 @@ EOT
}
end
+ def test_invalid_r
+ with_tmpdir {
+ generate_file("t.txt", "a\x80b")
+ open("t.txt", "r:utf-8:euc-jp", :invalid => :replace) {|f|
+ assert_equal("a?b", f.read)
+ }
+ open("t.txt", "r:utf-8:euc-jp", :invalid => :ignore) {|f|
+ assert_equal("ab", f.read)
+ }
+ open("t.txt", "r:utf-8:euc-jp", :undef => :replace) {|f|
+ assert_raise(Encoding::InvalidByteSequence) { f.read }
+ assert_equal("b", f.read)
+ }
+ open("t.txt", "r:utf-8:euc-jp", :undef => :ignore) {|f|
+ assert_raise(Encoding::InvalidByteSequence) { f.read }
+ assert_equal("b", f.read)
+ }
+ }
+ end
+
+ def test_undef_r
+ with_tmpdir {
+ generate_file("t.txt", "a\uFFFDb")
+ open("t.txt", "r:utf-8:euc-jp", :undef => :replace) {|f|
+ assert_equal("a?b", f.read)
+ }
+ open("t.txt", "r:utf-8:euc-jp", :undef => :ignore) {|f|
+ assert_equal("ab", f.read)
+ }
+ open("t.txt", "r:utf-8:euc-jp", :invalid => :replace) {|f|
+ assert_raise(Encoding::ConversionUndefined) { f.read }
+ assert_equal("b", f.read)
+ }
+ open("t.txt", "r:utf-8:euc-jp", :invalid => :ignore) {|f|
+ assert_raise(Encoding::ConversionUndefined) { f.read }
+ assert_equal("b", f.read)
+ }
+ }
+ end
end