From 876146772787599c894369801034f1fed1d16b54 Mon Sep 17 00:00:00 2001 From: nobu Date: Tue, 26 Apr 2011 15:55:21 +0000 Subject: * io.c (validate_enc_binmode, rb_io_extract_modeenc): set newline decorator according to open mode. * transcode.c (rb_econv_prepare_options): new function, to prepare econv options with newline flags. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@31355 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 8 +++++++- include/ruby/encoding.h | 7 +++++++ io.c | 42 +++++++++++++++++++++++++++++------------- test/ruby/test_econv.rb | 10 ++++++++++ test/ruby/test_io_m17n.rb | 3 +++ transcode.c | 44 ++++++++++++++++++++++++++++++-------------- 6 files changed, 86 insertions(+), 28 deletions(-) diff --git a/ChangeLog b/ChangeLog index 00707f673d..66c577c850 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,10 @@ -Wed Apr 27 00:54:12 2011 Nobuyoshi Nakada +Wed Apr 27 00:55:18 2011 Nobuyoshi Nakada + + * io.c (validate_enc_binmode, rb_io_extract_modeenc): set newline + decorator according to open mode. + + * transcode.c (rb_econv_prepare_options): new function, to prepare + econv options with newline flags. * include/ruby/encoding.h (ECONV_NEWLINE_DECORATOR_MASK): add. diff --git a/include/ruby/encoding.h b/include/ruby/encoding.h index 5ccbbbdfd8..123f76aff5 100644 --- a/include/ruby/encoding.h +++ b/include/ruby/encoding.h @@ -244,6 +244,7 @@ typedef struct rb_econv_t rb_econv_t; VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts); int rb_econv_has_convpath_p(const char* from_encoding, const char* to_encoding); +int rb_econv_prepare_options(VALUE opthash, VALUE *ecopts, int ecflags); int rb_econv_prepare_opts(VALUE opthash, VALUE *ecopts); rb_econv_t *rb_econv_open(const char *source_encoding, const char *destination_encoding, int ecflags); @@ -314,6 +315,12 @@ void rb_econv_binmode(rb_econv_t *ec); #define ECONV_STATEFUL_DECORATOR_MASK 0x00f00000 #define ECONV_XML_ATTR_QUOTE_DECORATOR 0x00100000 +#if defined(RUBY_TEST_CRLF_ENVIRONMENT) || defined(_WIN32) +#define ECONV_DEFAULT_NEWLINE_DECORATOR ECONV_UNIVERSAL_NEWLINE_DECORATOR +#else +#define ECONV_DEFAULT_NEWLINE_DECORATOR 0 +#endif + /* end of flags for rb_econv_open */ /* flags for rb_econv_convert */ diff --git a/io.c b/io.c index 6a4ca44bf4..73a30cd305 100644 --- a/io.c +++ b/io.c @@ -217,15 +217,15 @@ static int max_file_descriptor = NOFILE; # endif #endif +#define NEED_NEWLINE_DECORATOR_ON_READ(fptr) ((fptr)->mode & FMODE_TEXTMODE) +#define NEED_NEWLINE_DECORATOR_ON_WRITE(fptr) ((fptr)->mode & FMODE_TEXTMODE) #if defined(RUBY_TEST_CRLF_ENVIRONMENT) || defined(_WIN32) /* Windows */ -# define NEED_NEWLINE_DECORATOR_ON_READ(fptr) (!((fptr)->mode & FMODE_BINMODE)) -# define NEED_NEWLINE_DECORATOR_ON_WRITE(fptr) (!((fptr)->mode & FMODE_BINMODE)) +# define DEFAULT_TEXTMODE FMODE_TEXTMODE # define TEXTMODE_NEWLINE_DECORATOR_ON_WRITE ECONV_CRLF_NEWLINE_DECORATOR #else /* Unix */ -# define NEED_NEWLINE_DECORATOR_ON_READ(fptr) ((fptr)->mode & FMODE_TEXTMODE) -# define NEED_NEWLINE_DECORATOR_ON_WRITE(fptr) 0 +# define DEFAULT_TEXTMODE 0 #endif #define NEED_READCONV(fptr) ((fptr)->encs.enc2 != NULL || NEED_NEWLINE_DECORATOR_ON_READ(fptr)) #define NEED_WRITECONV(fptr) (((fptr)->encs.enc != NULL && (fptr)->encs.enc != rb_ascii8bit_encoding()) || NEED_NEWLINE_DECORATOR_ON_WRITE(fptr) || ((fptr)->encs.ecflags & (ECONV_DECORATOR_MASK|ECONV_STATEFUL_DECORATOR_MASK))) @@ -1672,8 +1672,6 @@ make_readconv(rb_io_t *fptr, int size) const char *sname, *dname; ecflags = fptr->encs.ecflags; ecopts = fptr->encs.ecopts; - if (NEED_NEWLINE_DECORATOR_ON_READ(fptr)) - ecflags |= ECONV_UNIVERSAL_NEWLINE_DECORATOR; if (fptr->encs.enc2) { sname = rb_enc_name(fptr->encs.enc2); dname = rb_enc_name(fptr->encs.enc); @@ -3994,7 +3992,7 @@ rb_io_binmode(VALUE io) rb_econv_binmode(fptr->writeconv); fptr->mode |= FMODE_BINMODE; fptr->mode &= ~FMODE_TEXTMODE; - fptr->writeconv_pre_ecflags &= ~(ECONV_UNIVERSAL_NEWLINE_DECORATOR|ECONV_CRLF_NEWLINE_DECORATOR|ECONV_CR_NEWLINE_DECORATOR); + fptr->writeconv_pre_ecflags &= ~ECONV_NEWLINE_DECORATOR_MASK; return io; } @@ -4442,13 +4440,25 @@ rb_io_extract_encoding_option(VALUE opt, rb_encoding **enc_p, rb_encoding **enc2 typedef struct rb_io_enc_t convconfig_t; static void -validate_enc_binmode(int fmode, rb_encoding *enc, rb_encoding *enc2) +validate_enc_binmode(int *fmode_p, int ecflags, rb_encoding *enc, rb_encoding *enc2) { + int fmode = *fmode_p; + if ((fmode & FMODE_READABLE) && !enc2 && !(fmode & FMODE_BINMODE) && !rb_enc_asciicompat(enc ? enc : rb_default_external_encoding())) rb_raise(rb_eArgError, "ASCII incompatible encoding needs binmode"); + + if (!(fmode & FMODE_BINMODE) && + (ecflags & ECONV_NEWLINE_DECORATOR_MASK)) { + fmode |= DEFAULT_TEXTMODE; + *fmode_p = fmode; + } + else if (!(ecflags & ECONV_NEWLINE_DECORATOR_MASK)) { + fmode &= ~FMODE_TEXTMODE; + *fmode_p = fmode; + } } static void @@ -4516,7 +4526,9 @@ rb_io_extract_modeenc(VALUE *vmode_p, VALUE *vperm_p, VALUE opthash, } if (NIL_P(opthash)) { - ecflags = 0; + ecflags = (fmode & FMODE_READABLE) ? + MODE_BTMODE(ECONV_DEFAULT_NEWLINE_DECORATOR, + 0, ECONV_UNIVERSAL_NEWLINE_DECORATOR) : 0; ecopts = Qnil; } else { @@ -4549,7 +4561,10 @@ rb_io_extract_modeenc(VALUE *vmode_p, VALUE *vperm_p, VALUE opthash, /* perm no use, just ignore */ } } - ecflags = rb_econv_prepare_opts(opthash, &ecopts); + ecflags = (fmode & FMODE_READABLE) ? + MODE_BTMODE(ECONV_DEFAULT_NEWLINE_DECORATOR, + 0, ECONV_UNIVERSAL_NEWLINE_DECORATOR) : 0; + ecflags = rb_econv_prepare_options(opthash, &ecopts, ecflags); if (rb_io_extract_encoding_option(opthash, &enc, &enc2, &fmode)) { if (has_enc) { @@ -4558,7 +4573,7 @@ rb_io_extract_modeenc(VALUE *vmode_p, VALUE *vperm_p, VALUE opthash, } } - validate_enc_binmode(fmode, enc, enc2); + validate_enc_binmode(&fmode, ecflags, enc, enc2); *vmode_p = vmode; @@ -4756,7 +4771,8 @@ rb_file_open_generic(VALUE io, VALUE filename, int oflags, int fmode, convconfig cc.ecopts = Qnil; convconfig = &cc; } - validate_enc_binmode(fmode, convconfig->enc, convconfig->enc2); + validate_enc_binmode(&fmode, convconfig->ecflags, + convconfig->enc, convconfig->enc2); MakeOpenFile(io, fptr); fptr->mode = fmode; @@ -8024,7 +8040,7 @@ io_encoding_set(rb_io_t *fptr, VALUE v1, VALUE v2, VALUE opt) } } } - validate_enc_binmode(fptr->mode, enc, enc2); + validate_enc_binmode(&fptr->mode, ecflags, enc, enc2); fptr->encs.enc = enc; fptr->encs.enc2 = enc2; fptr->encs.ecflags = ecflags; diff --git a/test/ruby/test_econv.rb b/test/ruby/test_econv.rb index 00682f69cd..765616da3d 100644 --- a/test/ruby/test_econv.rb +++ b/test/ruby/test_econv.rb @@ -449,6 +449,16 @@ class TestEncodingConverter < Test::Unit::TestCase assert_econv("abc\rdef", :finished, 50, ec, "abc\ndef", "") end + def test_no_universal_newline1 + ec = Encoding::Converter.new("UTF-8", "EUC-JP", universal_newline: false) + assert_econv("abc\r\ndef", :finished, 50, ec, "abc\r\ndef", "") + end + + def test_no_universal_newline2 + ec = Encoding::Converter.new("", "", universal_newline: false) + assert_econv("abc\r\ndef", :finished, 50, ec, "abc\r\ndef", "") + end + def test_after_output ec = Encoding::Converter.new("UTF-8", "EUC-JP") a = ["", "abc\u{3042}def", ec, nil, 100, :after_output=>true] diff --git a/test/ruby/test_io_m17n.rb b/test/ruby/test_io_m17n.rb index db1fe0b539..86d45a50ae 100644 --- a/test/ruby/test_io_m17n.rb +++ b/test/ruby/test_io_m17n.rb @@ -1349,6 +1349,9 @@ EOT open("t.crlf", "rt:euc-jp:utf-8") {|f| assert_equal("a\nb\nc\n", f.read) } open("t.crlf", "rt") {|f| assert_equal("a\nb\nc\n", f.read) } open("t.crlf", "r", :textmode=>true) {|f| assert_equal("a\nb\nc\n", f.read) } + open("t.crlf", "r", textmode: true, universal_newline: false) {|f| + assert_equal("a\r\nb\r\nc\r\n", f.read) + } generate_file("t.cr", "a\rb\rc\r") assert_equal("a\nb\nc\n", File.read("t.cr", mode:"rt:euc-jp:utf-8")) diff --git a/transcode.c b/transcode.c index 9433e6a3df..92ebf99b4a 100644 --- a/transcode.c +++ b/transcode.c @@ -2423,10 +2423,9 @@ str_transcoding_resize(VALUE destination, size_t len, size_t new_len) } static int -econv_opts(VALUE opt) +econv_opts(VALUE opt, int ecflags) { VALUE v; - int ecflags = 0; v = rb_hash_aref(opt, sym_invalid); if (NIL_P(v)) { @@ -2469,25 +2468,36 @@ econv_opts(VALUE opt) } } - v = rb_hash_aref(opt, sym_universal_newline); - if (RTEST(v)) - ecflags |= ECONV_UNIVERSAL_NEWLINE_DECORATOR; + { + int setflags = 0, newlineflag = 0; + + v = rb_hash_aref(opt, sym_universal_newline); + if (RTEST(v)) + setflags |= ECONV_UNIVERSAL_NEWLINE_DECORATOR; + newlineflag |= !NIL_P(v); + + v = rb_hash_aref(opt, sym_crlf_newline); + if (RTEST(v)) + setflags |= ECONV_CRLF_NEWLINE_DECORATOR; + newlineflag |= !NIL_P(v); - v = rb_hash_aref(opt, sym_crlf_newline); - if (RTEST(v)) - ecflags |= ECONV_CRLF_NEWLINE_DECORATOR; + v = rb_hash_aref(opt, sym_cr_newline); + if (RTEST(v)) + setflags |= ECONV_CR_NEWLINE_DECORATOR; + newlineflag |= !NIL_P(v); - v = rb_hash_aref(opt, sym_cr_newline); - if (RTEST(v)) - ecflags |= ECONV_CR_NEWLINE_DECORATOR; + if (newlineflag) { + ecflags &= ~ECONV_NEWLINE_DECORATOR_MASK; + ecflags |= setflags; + } + } return ecflags; } int -rb_econv_prepare_opts(VALUE opthash, VALUE *opts) +rb_econv_prepare_options(VALUE opthash, VALUE *opts, int ecflags) { - int ecflags; VALUE newhash = Qnil; VALUE v; @@ -2495,7 +2505,7 @@ rb_econv_prepare_opts(VALUE opthash, VALUE *opts) *opts = Qnil; return 0; } - ecflags = econv_opts(opthash); + ecflags = econv_opts(opthash, ecflags); v = rb_hash_aref(opthash, sym_replace); if (!NIL_P(v)) { @@ -2530,6 +2540,12 @@ rb_econv_prepare_opts(VALUE opthash, VALUE *opts) return ecflags; } +int +rb_econv_prepare_opts(VALUE opthash, VALUE *opts) +{ + return rb_econv_prepare_options(opthash, opts, 0); +} + rb_econv_t * rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE opthash) { -- cgit v1.2.3