aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--enc/trans/newline.trans20
-rw-r--r--include/ruby/internal/encoding/transcode.h18
-rw-r--r--test/ruby/test_file.rb42
-rw-r--r--test/ruby/test_transcode.rb2
-rw-r--r--transcode.c30
5 files changed, 104 insertions, 8 deletions
diff --git a/enc/trans/newline.trans b/enc/trans/newline.trans
index 9e763407f9..95e082f5bd 100644
--- a/enc/trans/newline.trans
+++ b/enc/trans/newline.trans
@@ -17,10 +17,16 @@
map_cr["0a"] = "0d"
transcode_generate_node(ActionMap.parse(map_cr), "cr_newline")
+
+ map_normalize = {}
+ map_normalize["{00-ff}"] = :func_so
+
+ transcode_generate_node(ActionMap.parse(map_normalize), "lf_newline")
%>
<%= transcode_generated_code %>
+#define lf_newline universal_newline
#define STATE (sp[0])
#define NORMAL 0
#define JUST_AFTER_CR 1
@@ -126,10 +132,24 @@ rb_cr_newline = {
0, 0, 0, 0
};
+static const rb_transcoder
+rb_lf_newline = {
+ "", "lf_newline", lf_newline,
+ TRANSCODE_TABLE_INFO,
+ 1, /* input_unit_length */
+ 1, /* max_input */
+ 2, /* max_output */
+ asciicompat_converter, /* asciicompat_type */
+ 2, universal_newline_init, universal_newline_init, /* state_size, state_init, state_fini */
+ 0, 0, 0, fun_so_universal_newline,
+ universal_newline_finish
+};
+
void
Init_newline(void)
{
rb_register_transcoder(&rb_universal_newline);
rb_register_transcoder(&rb_crlf_newline);
rb_register_transcoder(&rb_cr_newline);
+ rb_register_transcoder(&rb_lf_newline);
}
diff --git a/include/ruby/internal/encoding/transcode.h b/include/ruby/internal/encoding/transcode.h
index 60c96a41c9..7f26d2eae9 100644
--- a/include/ruby/internal/encoding/transcode.h
+++ b/include/ruby/internal/encoding/transcode.h
@@ -476,16 +476,16 @@ enum ruby_econv_flag_type {
RUBY_ECONV_UNDEF_HEX_CHARREF = 0x00000030,
/** Decorators are there. */
- RUBY_ECONV_DECORATOR_MASK = 0x0000ff00,
+ RUBY_ECONV_DECORATOR_MASK = 0x0001ff00,
/** Newline converters are there. */
- RUBY_ECONV_NEWLINE_DECORATOR_MASK = 0x00003f00,
+ RUBY_ECONV_NEWLINE_DECORATOR_MASK = 0x00007f00,
/** (Unclear; seems unused). */
RUBY_ECONV_NEWLINE_DECORATOR_READ_MASK = 0x00000f00,
/** (Unclear; seems unused). */
- RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK = 0x00003000,
+ RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK = 0x00007000,
/** Universal newline mode. */
RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR = 0x00000100,
@@ -496,11 +496,14 @@ enum ruby_econv_flag_type {
/** CRLF to CR conversion shall happen. */
RUBY_ECONV_CR_NEWLINE_DECORATOR = 0x00002000,
+ /** CRLF to LF conversion shall happen. */
+ RUBY_ECONV_LF_NEWLINE_DECORATOR = 0x00004000,
+
/** Texts shall be XML-escaped. */
- RUBY_ECONV_XML_TEXT_DECORATOR = 0x00004000,
+ RUBY_ECONV_XML_TEXT_DECORATOR = 0x00008000,
/** Texts shall be AttrValue escaped */
- RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR = 0x00008000,
+ RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR = 0x00010000,
/** (Unclear; seems unused). */
RUBY_ECONV_STATEFUL_DECORATOR_MASK = 0x00f00000,
@@ -529,6 +532,7 @@ enum ruby_econv_flag_type {
#define ECONV_UNIVERSAL_NEWLINE_DECORATOR RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR} */
#define ECONV_CRLF_NEWLINE_DECORATOR RUBY_ECONV_CRLF_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_CRLF_NEWLINE_DECORATOR} */
#define ECONV_CR_NEWLINE_DECORATOR RUBY_ECONV_CR_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_CR_NEWLINE_DECORATOR} */
+#define ECONV_LF_NEWLINE_DECORATOR RUBY_ECONV_LF_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_LF_NEWLINE_DECORATOR} */
#define ECONV_XML_TEXT_DECORATOR RUBY_ECONV_XML_TEXT_DECORATOR /**< @old{RUBY_ECONV_XML_TEXT_DECORATOR} */
#define ECONV_XML_ATTR_CONTENT_DECORATOR RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR /**< @old{RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR} */
#define ECONV_STATEFUL_DECORATOR_MASK RUBY_ECONV_STATEFUL_DECORATOR_MASK /**< @old{RUBY_ECONV_STATEFUL_DECORATOR_MASK} */
@@ -543,10 +547,10 @@ enum ruby_econv_flag_type {
*/
/** Indicates the input is a part of much larger one. */
- RUBY_ECONV_PARTIAL_INPUT = 0x00010000,
+ RUBY_ECONV_PARTIAL_INPUT = 0x00020000,
/** Instructs the converter to stop after output. */
- RUBY_ECONV_AFTER_OUTPUT = 0x00020000,
+ RUBY_ECONV_AFTER_OUTPUT = 0x00040000,
#define ECONV_PARTIAL_INPUT RUBY_ECONV_PARTIAL_INPUT /**< @old{RUBY_ECONV_PARTIAL_INPUT} */
#define ECONV_AFTER_OUTPUT RUBY_ECONV_AFTER_OUTPUT /**< @old{RUBY_ECONV_AFTER_OUTPUT} */
diff --git a/test/ruby/test_file.rb b/test/ruby/test_file.rb
index 905416911a..669b004b83 100644
--- a/test/ruby/test_file.rb
+++ b/test/ruby/test_file.rb
@@ -460,6 +460,48 @@ class TestFile < Test::Unit::TestCase
end
end
+ def test_file_open_newline_option
+ Dir.mktmpdir(__method__.to_s) do |tmpdir|
+ path = File.join(tmpdir, "foo")
+ test = lambda do |newline|
+ File.open(path, "wt", newline: newline) do |f|
+ f.write "a\n"
+ f.puts "b"
+ end
+ File.binread(path)
+ end
+ assert_equal("a\nb\n", test.(:lf))
+ assert_equal("a\nb\n", test.(:universal))
+ assert_equal("a\r\nb\r\n", test.(:crlf))
+ assert_equal("a\rb\r", test.(:cr))
+
+ test = lambda do |newline|
+ File.open(path, "rt", newline: newline) do |f|
+ f.read
+ end
+ end
+
+ File.binwrite(path, "a\nb\n")
+ assert_equal("a\nb\n", test.(:lf))
+ assert_equal("a\nb\n", test.(:universal))
+ assert_equal("a\nb\n", test.(:crlf))
+ assert_equal("a\nb\n", test.(:cr))
+
+ File.binwrite(path, "a\r\nb\r\n")
+ assert_equal("a\r\nb\r\n", test.(:lf))
+ assert_equal("a\nb\n", test.(:universal))
+ # Work on both Windows and non-Windows
+ assert_include(["a\r\nb\r\n", "a\nb\n"], test.(:crlf))
+ assert_equal("a\r\nb\r\n", test.(:cr))
+
+ File.binwrite(path, "a\rb\r")
+ assert_equal("a\rb\r", test.(:lf))
+ assert_equal("a\nb\n", test.(:universal))
+ assert_equal("a\rb\r", test.(:crlf))
+ assert_equal("a\rb\r", test.(:cr))
+ end
+ end
+
def test_open_nul
Dir.mktmpdir(__method__.to_s) do |tmpdir|
path = File.join(tmpdir, "foo")
diff --git a/test/ruby/test_transcode.rb b/test/ruby/test_transcode.rb
index c8b0034e06..73737be0ad 100644
--- a/test/ruby/test_transcode.rb
+++ b/test/ruby/test_transcode.rb
@@ -2305,5 +2305,7 @@ class TestTranscode < Test::Unit::TestCase
assert_equal("A\rB\r\rC", s.encode(usascii, newline: :cr))
assert_equal("A\r\nB\r\r\nC", s.encode(usascii, crlf_newline: true))
assert_equal("A\r\nB\r\r\nC", s.encode(usascii, newline: :crlf))
+ assert_equal("A\nB\nC", s.encode(usascii, lf_newline: true))
+ assert_equal("A\nB\nC", s.encode(usascii, newline: :lf))
end
end
diff --git a/transcode.c b/transcode.c
index 5fafad398f..535e436b03 100644
--- a/transcode.c
+++ b/transcode.c
@@ -47,6 +47,7 @@ static VALUE sym_xml, sym_text, sym_attr;
static VALUE sym_universal_newline;
static VALUE sym_crlf_newline;
static VALUE sym_cr_newline;
+static VALUE sym_lf_newline;
#ifdef ENABLE_ECONV_NEWLINE_OPTION
static VALUE sym_newline, sym_universal, sym_crlf, sym_cr, sym_lf;
#endif
@@ -1039,6 +1040,7 @@ decorator_names(int ecflags, const char **decorators_ret)
case ECONV_UNIVERSAL_NEWLINE_DECORATOR:
case ECONV_CRLF_NEWLINE_DECORATOR:
case ECONV_CR_NEWLINE_DECORATOR:
+ case ECONV_LF_NEWLINE_DECORATOR:
case 0:
break;
default:
@@ -1062,6 +1064,8 @@ decorator_names(int ecflags, const char **decorators_ret)
decorators_ret[num_decorators++] = "crlf_newline";
if (ecflags & ECONV_CR_NEWLINE_DECORATOR)
decorators_ret[num_decorators++] = "cr_newline";
+ if (ecflags & ECONV_LF_NEWLINE_DECORATOR)
+ decorators_ret[num_decorators++] = "lf_newline";
if (ecflags & ECONV_UNIVERSAL_NEWLINE_DECORATOR)
decorators_ret[num_decorators++] = "universal_newline";
@@ -1982,6 +1986,9 @@ rb_econv_binmode(rb_econv_t *ec)
case ECONV_CR_NEWLINE_DECORATOR:
dname = "cr_newline";
break;
+ case ECONV_LF_NEWLINE_DECORATOR:
+ dname = "lf_newline";
+ break;
}
if (dname) {
@@ -2040,6 +2047,10 @@ econv_description(const char *sname, const char *dname, int ecflags, VALUE mesg)
rb_str_cat2(mesg, pre); pre = ",";
rb_str_cat2(mesg, "cr_newline");
}
+ if (ecflags & ECONV_LF_NEWLINE_DECORATOR) {
+ rb_str_cat2(mesg, pre); pre = ",";
+ rb_str_cat2(mesg, "lf_newline");
+ }
if (ecflags & ECONV_XML_TEXT_DECORATOR) {
rb_str_cat2(mesg, pre); pre = ",";
rb_str_cat2(mesg, "xml_text");
@@ -2515,7 +2526,7 @@ econv_opts(VALUE opt, int ecflags)
ecflags |= ECONV_CR_NEWLINE_DECORATOR;
}
else if (v == sym_lf) {
- /* ecflags |= ECONV_LF_NEWLINE_DECORATOR; */
+ ecflags |= ECONV_LF_NEWLINE_DECORATOR;
}
else if (SYMBOL_P(v)) {
rb_raise(rb_eArgError, "unexpected value for newline option: %"PRIsVALUE,
@@ -2544,6 +2555,11 @@ econv_opts(VALUE opt, int ecflags)
setflags |= ECONV_CR_NEWLINE_DECORATOR;
newlineflag |= !NIL_P(v);
+ v = rb_hash_aref(opt, sym_lf_newline);
+ if (RTEST(v))
+ setflags |= ECONV_LF_NEWLINE_DECORATOR;
+ newlineflag |= !NIL_P(v);
+
switch (newlineflag) {
case 1:
ecflags &= ~ECONV_NEWLINE_DECORATOR_MASK;
@@ -3281,11 +3297,13 @@ rb_econv_init_by_convpath(VALUE self, VALUE convpath,
* :undef => :replace # replace undefined conversion
* :replace => string # replacement string ("?" or "\uFFFD" if not specified)
* :newline => :universal # decorator for converting CRLF and CR to LF
+ * :newline => :lf # decorator for converting CRLF and CR to LF when writing
* :newline => :crlf # decorator for converting LF to CRLF
* :newline => :cr # decorator for converting LF to CR
* :universal_newline => true # decorator for converting CRLF and CR to LF
* :crlf_newline => true # decorator for converting LF to CRLF
* :cr_newline => true # decorator for converting LF to CR
+ * :lf_newline => true # decorator for converting CRLF and CR to LF when writing
* :xml => :text # escape as XML CharData.
* :xml => :attr # escape as XML AttValue
* integer form:
@@ -3293,6 +3311,7 @@ rb_econv_init_by_convpath(VALUE self, VALUE convpath,
* Encoding::Converter::UNDEF_REPLACE
* Encoding::Converter::UNDEF_HEX_CHARREF
* Encoding::Converter::UNIVERSAL_NEWLINE_DECORATOR
+ * Encoding::Converter::LF_NEWLINE_DECORATOR
* Encoding::Converter::CRLF_NEWLINE_DECORATOR
* Encoding::Converter::CR_NEWLINE_DECORATOR
* Encoding::Converter::XML_TEXT_DECORATOR
@@ -3335,6 +3354,8 @@ rb_econv_init_by_convpath(VALUE self, VALUE convpath,
* Convert LF to CRLF.
* [:cr_newline => true]
* Convert LF to CR.
+ * [:lf_newline => true]
+ * Convert CRLF and CR to LF (when writing).
* [:xml => :text]
* Escape as XML CharData.
* This form can be used as an HTML 4.0 #PCDATA.
@@ -4437,6 +4458,7 @@ Init_transcode(void)
sym_universal_newline = ID2SYM(rb_intern_const("universal_newline"));
sym_crlf_newline = ID2SYM(rb_intern_const("crlf_newline"));
sym_cr_newline = ID2SYM(rb_intern_const("cr_newline"));
+ sym_lf_newline = ID2SYM(rb_intern("lf_newline"));
sym_partial_input = ID2SYM(rb_intern_const("partial_input"));
#ifdef ENABLE_ECONV_NEWLINE_OPTION
@@ -4533,6 +4555,12 @@ InitVM_transcode(void)
*/
rb_define_const(rb_cEncodingConverter, "UNIVERSAL_NEWLINE_DECORATOR", INT2FIX(ECONV_UNIVERSAL_NEWLINE_DECORATOR));
+ /* Document-const: LF_NEWLINE_DECORATOR
+ *
+ * Decorator for converting CRLF and CR to LF when writing
+ */
+ rb_define_const(rb_cEncodingConverter, "LF_NEWLINE_DECORATOR", INT2FIX(ECONV_LF_NEWLINE_DECORATOR));
+
/* Document-const: CRLF_NEWLINE_DECORATOR
*
* Decorator for converting LF to CRLF