From ffc136a024c26d1047421f900002bce32b8c9a2d Mon Sep 17 00:00:00 2001 From: nahi Date: Mon, 15 Sep 2003 10:07:42 +0000 Subject: * lib/csv.rb: add extra pamameter to specify row(record) separater character. To parse Mac's CR separated CSV, do like this. CSV.open("mac.csv", "r", ?,,?\r) { |row| p row.to_a } The 3rd parameter in this example ?, is for column separater and the 4th ?\r is for row separater. Row separater is nil by default. Nil separater means "\r\n" or "\n". * test/csv/test_csv.rb: add tests for above feature. * test/csv/mac.csv: added. Sample CR separated CSV file. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@4553 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 13 ++ lib/csv.rb | 124 +++++++++-------- test/csv/mac.csv | 2 + test/csv/test_csv.rb | 365 ++++++++++++++++++++++++++++++++++----------------- 4 files changed, 327 insertions(+), 177 deletions(-) create mode 100644 test/csv/mac.csv diff --git a/ChangeLog b/ChangeLog index 9b915bc728..e49bb3ce85 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,16 @@ +Mon Sep 15 19:02:52 2003 NAKAMURA, Hiroshi + + * lib/csv.rb: add extra pamameter to specify row(record) separater + character. To parse Mac's CR separated CSV, do like this. + CSV.open("mac.csv", "r", ?,, ?\r) { |row| p row.to_a } + The 3rd parameter in this example ?, is for column separater and the + 4th ?\r is for row separater. Row separater is nil by default. Nil + separater means "\r\n" or "\n". + + * test/csv/test_csv.rb: add tests for above feature. + + * test/csv/mac.csv: added. Sample CR separated CSV file. + Fri Sep 12 22:41:48 2003 Michal Rokos * ext/openssl/ossl.c: move ASN.1 stuff to ossl_asn1.[ch] diff --git a/lib/csv.rb b/lib/csv.rb index 947eacbcfa..ee686db0cf 100644 --- a/lib/csv.rb +++ b/lib/csv.rb @@ -189,32 +189,32 @@ public # writer << [nil, nil] # end # - def CSV.open(filename, mode, col_sep = ?,, &block) + def CSV.open(filename, mode, col_sep = ?,, row_sep = nil, &block) if mode == 'r' or mode == 'rb' - open_reader(filename, col_sep, &block) + open_reader(filename, col_sep, row_sep, &block) elsif mode == 'w' or mode == 'wb' - open_writer(filename, col_sep, &block) + open_writer(filename, col_sep, row_sep, &block) else raise ArgumentError.new("'mode' must be 'r', 'rb', 'w', or 'wb'") end end - def CSV.parse(filename, col_sep = ?,, &block) - open_reader(filename, col_sep, &block) + def CSV.parse(filename, col_sep = ?,, row_sep = nil, &block) + open_reader(filename, col_sep, row_sep, &block) end - def CSV.generate(filename, col_sep = ?,, &block) - open_writer(filename, col_sep, &block) + def CSV.generate(filename, col_sep = ?,, row_sep = nil, &block) + open_writer(filename, col_sep, row_sep, &block) end # Private class methods. class << self private - def open_reader(filename, col_sep, &block) + def open_reader(filename, col_sep, row_sep, &block) file = File.open(filename, 'rb') if block begin - CSV::Reader.parse(file, col_sep) do |row| + CSV::Reader.parse(file, col_sep, row_sep) do |row| yield(row) end ensure @@ -222,17 +222,17 @@ public end nil else - reader = CSV::Reader.create(file, col_sep) + reader = CSV::Reader.create(file, col_sep, row_sep) reader.close_on_terminate reader end end - def open_writer(filename, col_sep, &block) + def open_writer(filename, col_sep, row_sep, &block) file = File.open(filename, 'wb') if block begin - CSV::Writer.generate(file, col_sep) do |writer| + CSV::Writer.generate(file, col_sep, row_sep) do |writer| yield(writer) end ensure @@ -240,7 +240,7 @@ public end nil else - writer = CSV::Writer.create(file, col_sep) + writer = CSV::Writer.create(file, col_sep, row_sep) writer.close_on_terminate writer end @@ -275,14 +275,14 @@ public # DESCRIPTION # Create instance. To get parse result, see CSV::Reader#each. # - def Reader.create(str_or_readable, col_sep = ?,) + def Reader.create(str_or_readable, col_sep = ?,, row_sep = nil) case str_or_readable when IO - IOReader.new(str_or_readable, col_sep) + IOReader.new(str_or_readable, col_sep, row_sep) when String - StringReader.new(str_or_readable, col_sep) + StringReader.new(str_or_readable, col_sep, row_sep) else - IOReader.new(str_or_readable, col_sep) + IOReader.new(str_or_readable, col_sep, row_sep) end end @@ -305,8 +305,8 @@ public # Block value is always nil. Rows are not cached for performance # reason. # - def Reader.parse(str_or_readable, col_sep = ?,) - reader = create(str_or_readable, col_sep) + def Reader.parse(str_or_readable, col_sep = ?,, row_sep = nil) + reader = create(str_or_readable, col_sep, row_sep) reader.each do |row| yield(row) end @@ -413,8 +413,9 @@ public # DESCRIPTION # Create instance. To get parse result, see CSV::Reader#each. # - def initialize(string, col_sep = ?,) + def initialize(string, col_sep = ?,, row_sep = nil) @col_sep = col_sep + @row_sep = row_sep @dev = string @idx = 0 if @dev[0, 3] == "\xef\xbb\xbf" @@ -424,7 +425,7 @@ public private def get_row(row) - parsed_cells, next_idx = CSV.parse_row(@dev, @idx, row, @col_sep) + parsed_cells, next_idx = CSV.parse_row(@dev, @idx, row, @col_sep, @row_sep) if parsed_cells == 0 && next_idx == 0 && @idx != @dev.size raise IllegalFormatError.new end @@ -460,9 +461,10 @@ public # DESCRIPTION # Create instance. To get parse result, see CSV::Reader#each. # - def initialize(io, col_sep = ?,) + def initialize(io, col_sep = ?,, row_sep = nil) @io = io @col_sep = col_sep + @row_sep = row_sep @dev = CSV::IOBuf.new(@io) @idx = 0 if @dev[0] == 0xef and @dev[1] == 0xbb and @dev[2] == 0xbf @@ -487,7 +489,7 @@ public private def get_row(row) - parsed_cells, next_idx = CSV.parse_row(@dev, @idx, row, @col_sep) + parsed_cells, next_idx = CSV.parse_row(@dev, @idx, row, @col_sep, @row_sep) if parsed_cells == 0 && next_idx == 0 && !@dev.is_eos? raise IllegalFormatError.new end @@ -549,8 +551,8 @@ public # Create instance. To add CSV data to generate CSV string, see # CSV::Writer#<< or CSV::Writer#add_row. # - def Writer.create(str_or_readable, col_sep = ?,) - BasicWriter.new(str_or_readable, col_sep) + def Writer.create(str_or_readable, col_sep = ?,, row_sep = nil) + BasicWriter.new(str_or_readable, col_sep, row_sep) end # SYNOPSIS @@ -572,8 +574,8 @@ public # To add CSV data to generate CSV string, see CSV::Writer#<< or # CSV::Writer#add_row. # - def Writer.generate(str_or_writable, col_sep = ?,) - writer = Writer.create(str_or_writable, col_sep) + def Writer.generate(str_or_writable, col_sep = ?,, row_sep = nil) + writer = Writer.create(str_or_writable, col_sep, row_sep) yield(writer) writer.close nil @@ -602,7 +604,7 @@ public Cell.new(item.to_s, false) end } - CSV.generate_row(row, row.size, @dev, @col_sep) + CSV.generate_row(row, row.size, @dev, @col_sep, @row_sep) self end @@ -621,7 +623,7 @@ public # (Formar is 'c1' and latter is Null.) # def add_row(row) - CSV.generate_row(row, row.size, @dev, @col_sep) + CSV.generate_row(row, row.size, @dev, @col_sep, @row_sep) self end @@ -669,8 +671,9 @@ public # Create instance. To add CSV data to generate CSV string, see # CSV::Writer#<< or CSV::Writer#add_row. # - def initialize(str_or_writable, col_sep = ?,) + def initialize(str_or_writable, col_sep = ?,, row_sep = nil) @col_sep = col_sep + @row_sep = row_sep @dev = str_or_writable @close_on_terminate = false end @@ -698,12 +701,14 @@ public end # SYNOPSIS - # cells = CSV.parse_line(src, col_sep = ?,) + # cells = CSV.parse_line(src, col_sep = ?,, row_sep = nil) # # ARGS # src: a CSV String. # col_sep: Column separator. ?, by default. If you want to separate # fields with semicolon, give ?; here. + # row_sep: Row separator. nil by default. nil means "\r\n or \n". If you + # want to separate records with \r, give ?\r here. # # RETURNS # cells: an Array of parsed cells in first line. Each cell is a String. @@ -716,14 +721,14 @@ public # If you don't know whether a target string to parse is exactly 1 line or # not, use CSV.parse_row instead of this method. # - def CSV.parse_line(src, col_sep = ?,) + def CSV.parse_line(src, col_sep = ?,, row_sep = nil) idx = 0 res_type = :DT_COLSEP cells = Row.new begin while (res_type.equal?(:DT_COLSEP)) cell = Cell.new - res_type, idx = parse_body(src, idx, cell, col_sep) + res_type, idx = parse_body(src, idx, cell, col_sep, row_sep) cells.push(cell.is_null ? nil : cell.data) end rescue IllegalFormatError @@ -734,13 +739,15 @@ public # SYNOPSIS - # str = CSV.generate_line(cells, col_sep = ?,) + # str = CSV.generate_line(cells, col_sep = ?,, row_sep = nil) # # ARGS # cells: an Array of cell to be converted to CSV string. Each cell must # respond to 'to_s'. # col_sep: Column separator. ?, by default. If you want to separate # fields with semicolon, give ?; here. + # row_sep: Row separator. nil by default. nil means "\r\n or \n". If you + # want to separate records with \r, give ?\r here. # # RETURNS # str: a String of generated CSV string. @@ -748,7 +755,7 @@ public # DESCRIPTION # Create a line from cells. Each cell is stringified by to_s. # - def CSV.generate_line(cells, col_sep = ?,) + def CSV.generate_line(cells, col_sep = ?,, row_sep = nil) if (cells.size == 0) return '' end @@ -761,18 +768,18 @@ public else Cell.new(cells[idx].to_s, false) end - generate_body(cell, result_str, col_sep) + generate_body(cell, result_str, col_sep, row_sep) idx += 1 if (idx == cells.size) break end - generate_separator(:DT_COLSEP, result_str, col_sep) + generate_separator(:DT_COLSEP, result_str, col_sep, row_sep) end result_str end # SYNOPSIS - # parsed_cells, idx = CSV.parse_row(src, idx, out_dev, col_sep = ?,) + # parsed_cells, idx = CSV.parse_row(src, idx, out_dev, col_sep = ?,, row_sep = nil) # # ARGS # src: a CSV data to be parsed. Must respond '[](idx)'. @@ -783,6 +790,8 @@ public # out_dev: buffer for parsed cells. Must respond '<<(CSV::Cell)'. # col_sep: Column separator. ?, by default. If you want to separate # fields with semicolon, give ?; here. + # row_sep: Row separator. nil by default. nil means "\r\n or \n". If you + # want to separate records with \r, give ?\r here. # # RETURNS # parsed_cells: num of parsed cells. @@ -802,14 +811,14 @@ public # p parsed # end while parsed_cells > 0 # - def CSV.parse_row(src, idx, out_dev, col_sep = ?,) + def CSV.parse_row(src, idx, out_dev, col_sep = ?,, row_sep = nil) idx_backup = idx parsed_cells = 0 res_type = :DT_COLSEP begin while (!res_type.equal?(:DT_ROWSEP)) cell = Cell.new - res_type, idx = parse_body(src, idx, cell, col_sep) + res_type, idx = parse_body(src, idx, cell, col_sep, row_sep) if res_type.equal?(:DT_EOS) if idx == idx_backup #((parsed_cells == 0) && (cell.is_null)) return 0, 0 @@ -826,7 +835,7 @@ public end # SYNOPSIS - # parsed_cells = CSV.generate_row(src, cells, out_dev, col_sep = ?,) + # parsed_cells = CSV.generate_row(src, cells, out_dev, col_sep = ?,, row_sep = nil) # # ARGS # src: an Array of CSV::Cell to be converted to CSV string. Must respond to @@ -835,6 +844,8 @@ public # out_dev: buffer for generated CSV string. Must respond to '<<(string)'. # col_sep: Column separator. ?, by default. If you want to separate # fields with semicolon, give ?; here. + # row_sep: Row separator. nil by default. nil means "\r\n or \n". If you + # want to separate records with \r, give ?\r here. # # RETURNS # parsed_cells: num of converted cells. @@ -859,27 +870,27 @@ public # end # p buf # - def CSV.generate_row(src, cells, out_dev, col_sep = ?,) + def CSV.generate_row(src, cells, out_dev, col_sep = ?,, row_sep = nil) src_size = src.size if (src_size == 0) if cells == 0 - generate_separator(:DT_ROWSEP, out_dev, col_sep) + generate_separator(:DT_ROWSEP, out_dev, col_sep, row_sep) end return 0 end res_type = :DT_COLSEP parsed_cells = 0 - generate_body(src[parsed_cells], out_dev, col_sep) + generate_body(src[parsed_cells], out_dev, col_sep, row_sep) parsed_cells += 1 while ((parsed_cells < cells) && (parsed_cells != src_size)) - generate_separator(:DT_COLSEP, out_dev, col_sep) - generate_body(src[parsed_cells], out_dev, col_sep) + generate_separator(:DT_COLSEP, out_dev, col_sep, row_sep) + generate_body(src[parsed_cells], out_dev, col_sep, row_sep) parsed_cells += 1 end if (parsed_cells == cells) - generate_separator(:DT_ROWSEP, out_dev, col_sep) + generate_separator(:DT_ROWSEP, out_dev, col_sep, row_sep) else - generate_separator(:DT_COLSEP, out_dev, col_sep) + generate_separator(:DT_COLSEP, out_dev, col_sep, row_sep) end parsed_cells end @@ -891,7 +902,8 @@ private class << self private - def parse_body(src, idx, cell, col_sep) + def parse_body(src, idx, cell, col_sep, row_sep) + row_sep_end = row_sep || ?\n cell.is_null = false state = :ST_START quoted = false @@ -941,7 +953,7 @@ private quoted = true state = :ST_DATA end - elsif (c == ?\r) + elsif row_sep.nil? and c == ?\r if cr raise IllegalFormatError.new end @@ -951,7 +963,7 @@ private else cr = true end - elsif (c == ?\n) + elsif c == row_sep_end if state.equal?(:ST_DATA) if cr state = :ST_END @@ -1004,12 +1016,14 @@ private return :DT_EOS, idx end - def generate_body(cells, out_dev, col_sep) + def generate_body(cells, out_dev, col_sep, row_sep) row_data = cells.data.dup if (!cells.is_null) if (row_data.gsub!('"', '""') || row_data.include?(col_sep) || - (/[\r\n]/ =~ row_data) || (cells.data.empty?)) + (row_sep && row_data.index(row_sep)) || + (/[\r\n]/ =~ row_data) || + (cells.data.empty?)) out_dev << '"' << row_data << '"' else out_dev << row_data @@ -1017,12 +1031,12 @@ private end end - def generate_separator(type, out_dev, col_sep) + def generate_separator(type, out_dev, col_sep, row_sep) case type when :DT_COLSEP out_dev << col_sep.chr when :DT_ROWSEP - out_dev << "\r\n" + out_dev << (row_sep || "\r\n") end end end diff --git a/test/csv/mac.csv b/test/csv/mac.csv new file mode 100644 index 0000000000..95b1a1725b --- /dev/null +++ b/test/csv/mac.csv @@ -0,0 +1,2 @@ +"Avenches","aus Umgebung" +"Bad Hersfeld","Ausgrabung" \ No newline at end of file diff --git a/test/csv/test_csv.rb b/test/csv/test_csv.rb index f1ba35a66f..41e25fe5ec 100644 --- a/test/csv/test_csv.rb +++ b/test/csv/test_csv.rb @@ -1,4 +1,5 @@ -require 'test/unit' +require 'test/unit/testsuite' +require 'test/unit/testcase' require 'tempfile' require 'fileutils' @@ -12,130 +13,16 @@ class CSV end end -class TestCSV < Test::Unit::TestCase - - class << self - def d(data, is_null = false) - CSV::Cell.new(data.to_s, is_null) - end - end - - @@colData = ['', nil, true, false, 'foo', '!' * 1000] - @@simpleCSVData = { - [nil] => '', - [''] => '""', - [nil, nil] => ',', - [nil, nil, nil] => ',,', - ['foo'] => 'foo', - [','] => '","', - [',', ','] => '",",","', - [';'] => ';', - [';', ';'] => ';,;', - ["\"\r", "\"\r"] => "\"\"\"\r\",\"\"\"\r\"", - ["\"\n", "\"\n"] => "\"\"\"\n\",\"\"\"\n\"", - ["\t"] => "\t", - ["\t", "\t"] => "\t,\t", - ['foo', 'bar'] => 'foo,bar', - ['foo', '"bar"', 'baz'] => 'foo,"""bar""",baz', - ['foo', 'foo,bar', 'baz'] => 'foo,"foo,bar",baz', - ['foo', '""', 'baz'] => 'foo,"""""",baz', - ['foo', '', 'baz'] => 'foo,"",baz', - ['foo', nil, 'baz'] => 'foo,,baz', - [nil, 'foo', 'bar'] => ',foo,bar', - ['foo', 'bar', nil] => 'foo,bar,', - ['foo', "\r", 'baz'] => "foo,\"\r\",baz", - ['foo', "\n", 'baz'] => "foo,\"\n\",baz", - ['foo', "\r\n\r", 'baz'] => "foo,\"\r\n\r\",baz", - ['foo', "\r\n", 'baz'] => "foo,\"\r\n\",baz", - ['foo', "\r.\n", 'baz'] => "foo,\"\r.\n\",baz", - ['foo', "\r\n\n", 'baz'] => "foo,\"\r\n\n\",baz", - ['foo', '"', 'baz'] => 'foo,"""",baz', - } - - @@fullCSVData = { - [d('', true)] => '', - [d('')] => '""', - [d('', true), d('', true)] => ',', - [d('', true), d('', true), d('', true)] => ',,', - [d('foo')] => 'foo', - [d('foo'), d('bar')] => 'foo,bar', - [d('foo'), d('"bar"'), d('baz')] => 'foo,"""bar""",baz', - [d('foo'), d('foo,bar'), d('baz')] => 'foo,"foo,bar",baz', - [d('foo'), d('""'), d('baz')] => 'foo,"""""",baz', - [d('foo'), d(''), d('baz')] => 'foo,"",baz', - [d('foo'), d('', true), d('baz')] => 'foo,,baz', - [d('foo'), d("\r"), d('baz')] => "foo,\"\r\",baz", - [d('foo'), d("\n"), d('baz')] => "foo,\"\n\",baz", - [d('foo'), d("\r\n"), d('baz')] => "foo,\"\r\n\",baz", - [d('foo'), d("\r.\n"), d('baz')] => "foo,\"\r.\n\",baz", - [d('foo'), d("\r\n\n"), d('baz')] => "foo,\"\r\n\n\",baz", - [d('foo'), d('"'), d('baz')] => 'foo,"""",baz', - } - - @@fullCSVDataArray = @@fullCSVData.collect { |key, value| key } - - def ssv2csv(ssvStr) - sepConv(ssvStr, ?;, ?,) - end - - def csv2ssv(csvStr) - sepConv(csvStr, ?,, ?;) - end - def tsv2csv(tsvStr) - sepConv(tsvStr, ?\t, ?,) - end - - def csv2tsv(csvStr) - sepConv(csvStr, ?,, ?\t) - end - - def sepConv(srcStr, srcSep, destSep) - rows = CSV::Row.new - cols, idx = CSV.parse_row(srcStr, 0, rows, srcSep) - destStr = '' - cols = CSV.generate_row(rows, rows.size, destStr, destSep) - destStr - end - -public - - def setup - @tmpdir = File.join(Dir.tmpdir, "ruby_test_csv_tmp_#{$$}") - Dir.mkdir(@tmpdir) - @infile = File.join(@tmpdir, 'in.csv') - @infiletsv = File.join(@tmpdir, 'in.tsv') - @emptyfile = File.join(@tmpdir, 'empty.csv') - @outfile = File.join(@tmpdir, 'out.csv') - @bomfile = File.join(File.dirname(__FILE__), "bom.csv") - - CSV.open(@infile, "w") do |writer| - @@fullCSVDataArray.each do |row| - writer.add_row(row) - end - end - - CSV.open(@infiletsv, "w", ?\t) do |writer| - @@fullCSVDataArray.each do |row| - writer.add_row(row) - end - end - - CSV.generate(@emptyfile) do |writer| - # Create empty file. - end - end - - def teardown - FileUtils.rm_rf(@tmpdir) - end - - def d(*arg) - TestCSV.d(*arg) +module CSVTestSupport + def d(data, is_null = false) + CSV::Cell.new(data.to_s, is_null) end +end - #### CSV::Cell unit test +class TestCSVCell < Test::Unit::TestCase + @@colData = ['', nil, true, false, 'foo', '!' * 1000] def test_Cell_EQUAL # '==' d1 = CSV::Cell.new('d', false) @@ -206,9 +93,11 @@ public d3 = CSV::Cell.new(nil, false) assert_equal(d3.is_null, false, "Data: false.") end +end - #### CSV::Row unit test +class TestCSVRow < Test::Unit::TestCase + include CSVTestSupport def test_Row_s_match c1 = CSV::Row[d(1), d(2), d(3)] @@ -267,7 +156,125 @@ public r = CSV::Row[] assert_equal([], r.to_a, 'Empty') end +end + + +class TestCSV < Test::Unit::TestCase + include CSVTestSupport + + class << self + include CSVTestSupport + end + + @@simpleCSVData = { + [nil] => '', + [''] => '""', + [nil, nil] => ',', + [nil, nil, nil] => ',,', + ['foo'] => 'foo', + [','] => '","', + [',', ','] => '",",","', + [';'] => ';', + [';', ';'] => ';,;', + ["\"\r", "\"\r"] => "\"\"\"\r\",\"\"\"\r\"", + ["\"\n", "\"\n"] => "\"\"\"\n\",\"\"\"\n\"", + ["\t"] => "\t", + ["\t", "\t"] => "\t,\t", + ['foo', 'bar'] => 'foo,bar', + ['foo', '"bar"', 'baz'] => 'foo,"""bar""",baz', + ['foo', 'foo,bar', 'baz'] => 'foo,"foo,bar",baz', + ['foo', '""', 'baz'] => 'foo,"""""",baz', + ['foo', '', 'baz'] => 'foo,"",baz', + ['foo', nil, 'baz'] => 'foo,,baz', + [nil, 'foo', 'bar'] => ',foo,bar', + ['foo', 'bar', nil] => 'foo,bar,', + ['foo', "\r", 'baz'] => "foo,\"\r\",baz", + ['foo', "\n", 'baz'] => "foo,\"\n\",baz", + ['foo', "\r\n\r", 'baz'] => "foo,\"\r\n\r\",baz", + ['foo', "\r\n", 'baz'] => "foo,\"\r\n\",baz", + ['foo', "\r.\n", 'baz'] => "foo,\"\r.\n\",baz", + ['foo', "\r\n\n", 'baz'] => "foo,\"\r\n\n\",baz", + ['foo', '"', 'baz'] => 'foo,"""",baz', + } + + @@fullCSVData = { + [d('', true)] => '', + [d('')] => '""', + [d('', true), d('', true)] => ',', + [d('', true), d('', true), d('', true)] => ',,', + [d('foo')] => 'foo', + [d('foo'), d('bar')] => 'foo,bar', + [d('foo'), d('"bar"'), d('baz')] => 'foo,"""bar""",baz', + [d('foo'), d('foo,bar'), d('baz')] => 'foo,"foo,bar",baz', + [d('foo'), d('""'), d('baz')] => 'foo,"""""",baz', + [d('foo'), d(''), d('baz')] => 'foo,"",baz', + [d('foo'), d('', true), d('baz')] => 'foo,,baz', + [d('foo'), d("\r"), d('baz')] => "foo,\"\r\",baz", + [d('foo'), d("\n"), d('baz')] => "foo,\"\n\",baz", + [d('foo'), d("\r\n"), d('baz')] => "foo,\"\r\n\",baz", + [d('foo'), d("\r.\n"), d('baz')] => "foo,\"\r.\n\",baz", + [d('foo'), d("\r\n\n"), d('baz')] => "foo,\"\r\n\n\",baz", + [d('foo'), d('"'), d('baz')] => 'foo,"""",baz', + } + + @@fullCSVDataArray = @@fullCSVData.collect { |key, value| key } + + def ssv2csv(ssvStr, row_sep = nil) + sepConv(ssvStr, ?;, ?,, row_sep) + end + + def csv2ssv(csvStr, row_sep = nil) + sepConv(csvStr, ?,, ?;, row_sep) + end + + def tsv2csv(tsvStr, row_sep = nil) + sepConv(tsvStr, ?\t, ?,, row_sep) + end + + def csv2tsv(csvStr, row_sep = nil) + sepConv(csvStr, ?,, ?\t, row_sep) + end + + def sepConv(srcStr, srcSep, destSep, row_sep = nil) + rows = CSV::Row.new + cols, idx = CSV.parse_row(srcStr, 0, rows, srcSep, row_sep) + destStr = '' + cols = CSV.generate_row(rows, rows.size, destStr, destSep, row_sep) + destStr + end + +public + + def setup + @tmpdir = File.join(Dir.tmpdir, "ruby_test_csv_tmp_#{$$}") + Dir.mkdir(@tmpdir) + @infile = File.join(@tmpdir, 'in.csv') + @infiletsv = File.join(@tmpdir, 'in.tsv') + @emptyfile = File.join(@tmpdir, 'empty.csv') + @outfile = File.join(@tmpdir, 'out.csv') + @bomfile = File.join(File.dirname(__FILE__), "bom.csv") + @macfile = File.join(File.dirname(__FILE__), "mac.csv") + + CSV.open(@infile, "w") do |writer| + @@fullCSVDataArray.each do |row| + writer.add_row(row) + end + end + + CSV.open(@infiletsv, "w", ?\t) do |writer| + @@fullCSVDataArray.each do |row| + writer.add_row(row) + end + end + + CSV.generate(@emptyfile) do |writer| + # Create empty file. + end + end + def teardown + FileUtils.rm_rf(@tmpdir) + end #### CSV::Reader unit test @@ -725,6 +732,11 @@ public assert_equal(0, cols) assert_equal("\r\n", buf, "Extra boundary check.") + buf = '' + cols = CSV.generate_row([], 0, buf, ?\t, ?|) + assert_equal(0, cols) + assert_equal("|", buf, "Extra boundary check.") + buf = '' cols = CSV.generate_row([d(1)], 2, buf) assert_equal('1,', buf) @@ -737,6 +749,10 @@ public cols = CSV.generate_row([d(1)], 2, buf, ?\t) assert_equal("1\t", buf) + buf = '' + cols = CSV.generate_row([d(1)], 2, buf, ?\t, ?|) + assert_equal("1\t", buf) + buf = '' cols = CSV.generate_row([d(1), d(2)], 1, buf) assert_equal("1\r\n", buf) @@ -749,6 +765,18 @@ public cols = CSV.generate_row([d(1), d(2)], 1, buf, ?\t) assert_equal("1\r\n", buf) + buf = '' + cols = CSV.generate_row([d(1), d(2)], 1, buf, ?\t, ?\n) + assert_equal("1\n", buf) + + buf = '' + cols = CSV.generate_row([d(1), d(2)], 1, buf, ?\t, ?\r) + assert_equal("1\r", buf) + + buf = '' + cols = CSV.generate_row([d(1), d(2)], 1, buf, ?\t, ?|) + assert_equal("1|", buf) + @@fullCSVData.each do |col, str| buf = '' cols = CSV.generate_row(col, col.size, buf) @@ -770,6 +798,22 @@ public assert_equal(str + "\r\n", tsv2csv(buf)) end + # row separator + @@fullCSVData.each do |col, str| + buf = '' + cols = CSV.generate_row(col, col.size, buf, ?,, ?|) + assert_equal(col.size, cols) + assert_equal(str + "|", buf) + end + + # col and row separator + @@fullCSVData.each do |col, str| + buf = '' + cols = CSV.generate_row(col, col.size, buf, ?\t, ?|) + assert_equal(col.size, cols) + assert_equal(str + "|", tsv2csv(buf, ?|)) + end + buf = '' toBe = '' cols = 0 @@ -809,6 +853,20 @@ public end assert_equal(colsToBe, cols) assert_equal(toBe, buf) + + buf = '' + toBe = '' + cols = 0 + colsToBe = 0 + @@fullCSVData.each do |col, str| + lineBuf = '' + cols += CSV.generate_row(col, col.size, lineBuf, ?|) + buf << tsv2csv(lineBuf, ?|) + toBe << tsv2csv(lineBuf, ?|) + colsToBe += col.size + end + assert_equal(colsToBe, cols) + assert_equal(toBe, buf) end def test_s_parse_line @@ -901,6 +959,16 @@ public assert_equal(cols, buf.size, "Reported size.") assert_equal(col.size, buf.size, "Size.") assert(buf.match(col)) + + # separator: | + buf = CSV::Row.new + cols, idx = CSV.parse_row(str + "|", 0, buf, ?,) + assert(!buf.match(col)) + buf = CSV::Row.new + cols, idx = CSV.parse_row(str + "|", 0, buf, ?,, ?|) + assert_equal(cols, buf.size, "Reported size.") + assert_equal(col.size, buf.size, "Size.") + assert(buf.match(col)) end @@fullCSVData.each do |col, str| @@ -921,6 +989,15 @@ public assert(buf.match(col)) end + @@fullCSVData.each do |col, str| + str = csv2tsv(str, ?|) + buf = CSV::Row.new + cols, idx = CSV.parse_row(str + "|", 0, buf, ?\t, ?|) + assert_equal(cols, buf.size, "Reported size.") + assert_equal(col.size, buf.size, "Size.") + assert(buf.match(col), str) + end + buf = CSV::Row.new cols, idx = CSV.parse_row("a,b,\"c\r\"", 0, buf) assert_equal(["a", "b", "c\r"], buf.to_a) @@ -1086,6 +1163,24 @@ public assert_equal(toBe.size, parsedCols) assert_equal(toBe.size, parsed.size) assert(parsed.match(toBe)) + + buf = '' + toBe = [] + @@fullCSVData.each do |col, str| + buf << str << "|" + toBe.concat(col) + end + idx = 0 + cols = 0 + parsed = CSV::Row.new + parsedCols = 0 + begin + cols, idx = CSV.parse_row(buf, idx, parsed, ?,, ?|) + parsedCols += cols + end while cols > 0 + assert_equal(toBe.size, parsedCols) + assert_equal(toBe.size, parsed.size) + assert(parsed.match(toBe)) end def test_utf8 @@ -1104,6 +1199,22 @@ public file.close end + def test_macCR + rows = [] + CSV.open(@macfile, "r", ?,, ?\r) do |row| + rows << row.to_a + end + assert_equal([["Avenches", "aus Umgebung"], ["Bad Hersfeld", "Ausgrabung"]], rows) + + rows = [] + file = File.open(@macfile) + CSV::Reader.parse(file.read, ?,, ?\r) do |row| + rows << row.to_a + end + assert_equal([["Avenches", "aus Umgebung"], ["Bad Hersfeld", "Ausgrabung"]], rows) + file.close + end + #### CSV unit test @@ -1518,3 +1629,13 @@ public assert_equal(csvStrTerminated, buf) end end + + +if $0 == __FILE__ + suite = Test::Unit::TestSuite.new('CSV') + ObjectSpace.each_object(Class) do |klass| + suite << klass.suite if (Test::Unit::TestCase > klass) + end + require 'test/unit/ui/console/testrunner' + Test::Unit::UI::Console::TestRunner.run(suite).passed? +end -- cgit v1.2.3