From 13e15cecf3f3bec0514ce9b2b4576135fe3f944c Mon Sep 17 00:00:00 2001 From: nahi Date: Thu, 19 Jun 2003 16:08:52 +0000 Subject: * lib/csv.rb: Import csv module. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@3964 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- ChangeLog | 30 +- MANIFEST | 1 + lib/csv.rb | 1322 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 1340 insertions(+), 13 deletions(-) create mode 100644 lib/csv.rb diff --git a/ChangeLog b/ChangeLog index 7788806180..b18b579918 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,7 @@ +Fri Jun 20 00:45:19 2003 NAKAMURA, Hiroshi + + * lib/csv.rb: Import csv module. + Thu Jun 19 22:51:41 2003 Masatoshi SEKI * lib/drb.rb, lib/drb/drb.rb, lib/drb/eq.rb, @@ -22,28 +26,28 @@ Wed Jun 18 23:41:27 2003 Marc Cartright Wed Jun 18 01:13:36 2003 why the lucky stiff - * ext/syck/rubyext.c (rb_syck_load_handler): merge key implemented. + * ext/syck/rubyext.c (rb_syck_load_handler): merge key implemented. - * ext/syck/rubyext.c (transfer_find_i): removed use of String#=~ in favor - of Regexp#match. + * ext/syck/rubyext.c (transfer_find_i): removed use of String#=~ in favor + of Regexp#match. - * lib/yaml.rb: YAML::try_implicit returns. + * lib/yaml.rb: YAML::try_implicit returns. - * lib/yaml/rubytypes.rb: Regexps added for type matching. + * lib/yaml/rubytypes.rb: Regexps added for type matching. - * lib/yaml/emitter.rb: fix String + nil error. + * lib/yaml/emitter.rb: fix String + nil error. Tue Jun 17 17:01:08 2003 why the lucky stiff - * ext/syck/gram.c: added grammar for certain empty sequence entries. + * ext/syck/gram.c: added grammar for certain empty sequence entries. - * ext/syck/handler.c, ext/syck/syck.c, ext/syck/syck.h: track bad anchors. + * ext/syck/handler.c, ext/syck/syck.c, ext/syck/syck.h: track bad anchors. - * ext/syck/token.c: added pause token, tag possible circular references. + * ext/syck/token.c: added pause token, tag possible circular references. - * lib/yaml/rubytypes.rb: parsing YMD time as Date instance. + * lib/yaml/rubytypes.rb: parsing YMD time as Date instance. - * ext/syck/rubyext.c: ditto. DomainType, PrivateType, BadAlias classes. + * ext/syck/rubyext.c: ditto. DomainType, PrivateType, BadAlias classes. Tue Jun 17 21:28:27 2003 Ariff Abdullah @@ -220,9 +224,9 @@ Thu Jun 5 18:33:46 2003 WATANABE Hirofumi Thu Jun 5 17:44:11 2003 why the lucky stiff - * ext/syck/rubyext.c (syck_parser_mark): was a bit heavy on the GC. + * ext/syck/rubyext.c (syck_parser_mark): was a bit heavy on the GC. - * lib/yaml.rb (YAML::transfer): added. + * lib/yaml.rb (YAML::transfer): added. Thu Jun 5 16:11:50 2003 NAKAMURA Usaku diff --git a/MANIFEST b/MANIFEST index 36b99b2175..a865bfe18d 100644 --- a/MANIFEST +++ b/MANIFEST @@ -115,6 +115,7 @@ lib/cgi-lib.rb lib/cgi.rb lib/cgi/session.rb lib/complex.rb +lib/csv.rb lib/date.rb lib/date/format.rb lib/date2.rb diff --git a/lib/csv.rb b/lib/csv.rb new file mode 100644 index 0000000000..947eacbcfa --- /dev/null +++ b/lib/csv.rb @@ -0,0 +1,1322 @@ +# CSV -- module for generating/parsing CSV data. + +# $Id$ + +# This module is copyrighted free software by NAKAMURA, Hiroshi. +# You can redistribute it and/or modify it under the same term as Ruby. + + +class CSV +public + + # DESCRIPTION + # CSV::Cell -- Describes 1 cell of CSV. + # + class Cell + public + + # Datum as string. + attr_accessor :data + + # Is this datum null? + attr_accessor :is_null + + # SYNOPSIS + # cell = CSV::Cell.new(data = '', is_null = true) + # + # ARGS + # data: datum as String + # is_null: is this datum null? + # + # RETURNS + # cell: Created instance. + # + # DESCRIPTION + # Create instance. If is_null is true, datum is stored in the instance + # created but it should be treated as 'NULL'. + # + def initialize(data = '', is_null = true) + @data = data + @is_null = is_null + end + + # SYNOPSIS + # CSV::Cell#match(rhs) + # + # ARGS + # rhs: an instance of CSV::Cell to be compared. + # + # RETURNS + # true/false. See the souce if you want to know matching algorithm. + # + # DESCRIPTION + # Compare another cell with me. Bare in mind Null matches with Null + # using this method. Use CSV::Cell#== if you want Null never matches + # with other data including Null. + # + def match(rhs) + if @is_null and rhs.is_null + true + elsif @is_null or rhs.is_null + false + else + @data == rhs.data + end + end + + # SYNOPSIS + # CSV::Cell#==(rhs) + # + # ARGS + # rhs: an instance of CSV::Cell to be compared. + # + # RETURNS + # true/false. See the souce if you want to know matching algorithm. + # + # DESCRIPTION + # Compare another cell with me. Bare in mind Null is not match with + # Null using this method. Null never matches with other data including + # Null. Use CSV::Cell#match if you want Null matches with Null. + # + def ==(rhs) + if @is_null or rhs.is_null + false + else + @data == rhs.data + end + end + end + + + # DESCRIPTION + # CSV::Row -- Describes a row of CSV. Each element must be a CSV::Cell. + # + class Row < Array + public + + # SYNOPSIS + # CSV::Row#to_a + # + # RETURNS + # An Array of String. + # + # DESCRIPTION + # Convert CSV::Cell to String. Null is converted to nil. + # + def to_a + self.collect { |cell| cell.is_null ? nil : cell.data } + end + + # SYNOPSIS + # CSV::Row#match(rhs) + # + # ARGS + # rhs: an Array of cells. Each cell is a instance of CSV::Cell. + # + # RETURNS + # true/false. See the souce if you want to know matching algorithm. + # + # DESCRIPTION + # Compare another row with me. + # + def match(rhs) + if self.size != rhs.size + return false + end + for idx in 0...(self.size) + unless self[idx].match(rhs[idx]) + return false + end + end + true + end + end + + + # SYNOPSIS + # 1. reader = CSV.open(filename, 'r') + # + # 2. CSV.open(filename, 'r') do |row| + # ... + # end + # + # 3. writer = CSV.open(filename, 'w') + # + # 4. CSV.open(filename, 'w') do |writer| + # ... + # end + # + # ARGS + # filename: filename to open. + # mode: 'r' for read (parse) + # 'w' for write (generate) + # row: an Array of cells which is a parsed line. + # writer: Created writer instance. See CSV::Writer#<< and + # CSV::Writer#add_row to know how to generate CSV string. + # + # RETURNS + # reader: Create reader instance. To get parse result, see + # CSV::Reader#each. + # writer: Created writer instance. See CSV::Writer#<< and + # CSV::Writer#add_row to know how to generate CSV string. + # + # DESCRIPTION + # Open a CSV formatted file to read or write. + # + # EXAMPLE 1 + # reader = CSV.open('csvfile.csv', 'r') + # row1 = reader.shift + # row2 = reader.shift + # if row2.empty? + # p 'row2 not find.' + # end + # reader.close + # + # EXAMPLE 2 + # CSV.open('csvfile.csv', 'r') do |row| + # p row + # end + # + # EXAMPLE 3 + # writer = CSV.open('csvfile.csv', 'w') + # writer << ['r1c1', 'r1c2'] << ['r2c1', 'r2c2'] << [nil, nil] + # writer.close + # + # EXAMPLE 4 + # CSV.open('csvfile.csv', 'w') do |writer| + # writer << ['r1c1', 'r1c2'] + # writer << ['r2c1', 'r2c2'] + # writer << [nil, nil] + # end + # + def CSV.open(filename, mode, col_sep = ?,, &block) + if mode == 'r' or mode == 'rb' + open_reader(filename, col_sep, &block) + elsif mode == 'w' or mode == 'wb' + open_writer(filename, col_sep, &block) + else + raise ArgumentError.new("'mode' must be 'r', 'rb', 'w', or 'wb'") + end + end + + def CSV.parse(filename, col_sep = ?,, &block) + open_reader(filename, col_sep, &block) + end + + def CSV.generate(filename, col_sep = ?,, &block) + open_writer(filename, col_sep, &block) + end + + # Private class methods. + class << self + private + def open_reader(filename, col_sep, &block) + file = File.open(filename, 'rb') + if block + begin + CSV::Reader.parse(file, col_sep) do |row| + yield(row) + end + ensure + file.close + end + nil + else + reader = CSV::Reader.create(file, col_sep) + reader.close_on_terminate + reader + end + end + + def open_writer(filename, col_sep, &block) + file = File.open(filename, 'wb') + if block + begin + CSV::Writer.generate(file, col_sep) do |writer| + yield(writer) + end + ensure + file.close + end + nil + else + writer = CSV::Writer.create(file, col_sep) + writer.close_on_terminate + writer + end + end + end + + + # DESCRIPTION + # CSV::Reader -- CSV formatted string/stream reader. + # + # EXAMPLE + # Read CSV lines untill the first column is 'stop'. + # + # CSV::Reader.parse(File.open('bigdata', 'rb')) do |row| + # p row + # break if !row[0].is_null && row[0].data == 'stop' + # end + # + class Reader + include Enumerable + public + + # SYNOPSIS + # reader = CSV::Reader.create(str_or_readable) + # + # ARGS + # str_or_readable: a CSV data to be parsed. A String or an IO. + # + # RETURNS + # reader: Created instance. + # + # DESCRIPTION + # Create instance. To get parse result, see CSV::Reader#each. + # + def Reader.create(str_or_readable, col_sep = ?,) + case str_or_readable + when IO + IOReader.new(str_or_readable, col_sep) + when String + StringReader.new(str_or_readable, col_sep) + else + IOReader.new(str_or_readable, col_sep) + end + end + + # SYNOPSIS + # CSV::Reader.parse(str_or_readable) do |row| + # ... + # end + # + # ARGS + # str_or_readable: a CSV data to be parsed. A String or an IO. + # row: a CSV::Row; an Array of a CSV::Cell in a line. + # + # RETURNS + # nil + # + # DESCRIPTION + # Parse CSV data and get lines. Caller block is called for each line + # with an argument which is a chunk of cells in a row. + # + # Block value is always nil. Rows are not cached for performance + # reason. + # + def Reader.parse(str_or_readable, col_sep = ?,) + reader = create(str_or_readable, col_sep) + reader.each do |row| + yield(row) + end + reader.close + nil + end + + # SYNOPSIS + # CSV::Reader#each do |row| + # ... + # end + # + # ARGS + # row: a CSV::Row; an Array of a CSV::Cell in a line. + # + # RETURNS + # nil + # + # DESCRIPTION + # Caller block is called for each line with an argument which is a chunk + # of cells in a row. + # + # Block value is always nil. Rows are not cached for performance + # reason. + # + def each + while true + row = Row.new + parsed_cells = get_row(row) + if parsed_cells == 0 + break + end + yield(row) + end + nil + end + + # SYNOPSIS + # cell = CSV::Reader#shift + # + # RETURNS + # cell: a CSV::Row; an Array of a CSV::Cell. + # + # DESCRIPTION + # Extract cells of next line. + # + def shift + row = Row.new + parsed_cells = get_row(row) + row + end + + # SYNOPSIS + # CSV::Reader#close + # + # RETURNS + # nil + # + # DESCRIPTION + # Close this reader. + # + def close + terminate + end + + private + def initialize(dev) + raise RuntimeError.new('Do not instanciate this class directly.') + end + + def get_row(row) + raise NotImplementedError.new('Method get_row must be defined in a derived class.') + end + + def terminate + # Define if needed. + end + end + + + # DESCRIPTION + # CSV::StringReader -- CSV formatted stream reader. + # + # EXAMPLE + # Read CSV lines untill the first column is 'stop'. + # + # CSV::Reader.parse(File.open('bigdata', 'rb')) do |row| + # p row + # break if !row[0].is_null && row[0].data == 'stop' + # end + # + class StringReader < Reader + public + + # SYNOPSIS + # reader = CSV::StringReader.new(string) + # + # ARGS + # string: a CSV String to be parsed. + # + # RETURNS + # reader: Created instance. + # + # DESCRIPTION + # Create instance. To get parse result, see CSV::Reader#each. + # + def initialize(string, col_sep = ?,) + @col_sep = col_sep + @dev = string + @idx = 0 + if @dev[0, 3] == "\xef\xbb\xbf" + @idx += 3 + end + end + + private + def get_row(row) + parsed_cells, next_idx = CSV.parse_row(@dev, @idx, row, @col_sep) + if parsed_cells == 0 && next_idx == 0 && @idx != @dev.size + raise IllegalFormatError.new + end + @idx = next_idx + parsed_cells + end + end + + + # DESCRIPTION + # CSV::IOReader -- CSV formatted stream reader. + # + # EXAMPLE + # Read CSV lines untill the first column is 'stop'. + # + # CSV::Reader.parse(File.open('bigdata', 'rb')) do |row| + # p row + # break if !row[0].is_null && row[0].data == 'stop' + # end + # + class IOReader < Reader + public + + # SYNOPSIS + # reader = CSV::IOReader.new(io) + # + # ARGS + # io: a CSV data to be parsed. Must be an IO. (io#read is called.) + # + # RETURNS + # reader: Created instance. + # + # DESCRIPTION + # Create instance. To get parse result, see CSV::Reader#each. + # + def initialize(io, col_sep = ?,) + @io = io + @col_sep = col_sep + @dev = CSV::IOBuf.new(@io) + @idx = 0 + if @dev[0] == 0xef and @dev[1] == 0xbb and @dev[2] == 0xbf + @idx += 3 + end + @close_on_terminate = false + end + + # SYNOPSIS + # CSV::IOReader#close_on_terminate + # + # RETURNS + # true + # + # DESCRIPTION + # Tell this reader to close the IO when terminated (Triggered by invoking + # CSV::IOReader#close). + # + def close_on_terminate + @close_on_terminate = true + end + + private + def get_row(row) + parsed_cells, next_idx = CSV.parse_row(@dev, @idx, row, @col_sep) + if parsed_cells == 0 && next_idx == 0 && !@dev.is_eos? + raise IllegalFormatError.new + end + dropped = @dev.drop(next_idx) + @idx = next_idx - dropped + parsed_cells + end + + def terminate + if @close_on_terminate + @io.close + end + + if @dev + @dev.close + end + end + end + + + # DESCRIPTION + # CSV::Writer -- CSV formatted string/stream writer. + # + # EXAMPLE + # Write rows to 'csvout' file. + # + # outfile = File.open('csvout', 'wb') + # CSV::Writer.generate(outfile) do |csv| + # csv << ['c1', nil, '', '"', "\r\n", 'c2'] + # # or + # csv.add_row [ + # CSV::Cell.new('c1', false), + # CSV::Cell.new('dummy', true), + # CSV::Cell.new('', false), + # CSV::Cell.new('"', false), + # CSV::Cell.new("\r\n", false) + # CSV::Cell.new('c2', false) + # ] + # ... + # ... + # end + # + # outfile.close + # + class Writer + public + + # SYNOPSIS + # writer = CSV::Writer.create(str_or_readable) + # + # ARGS + # str_or_writable: device for generated CSV string. Must respond to + # '<<(string)'. + # + # RETURNS + # writer: Created instance. + # + # DESCRIPTION + # Create instance. To add CSV data to generate CSV string, see + # CSV::Writer#<< or CSV::Writer#add_row. + # + def Writer.create(str_or_readable, col_sep = ?,) + BasicWriter.new(str_or_readable, col_sep) + end + + # SYNOPSIS + # CSV::Writer.generate(str_or_writable) do |writer| + # ... + # end + # + # ARGS + # str_or_writable: device for generated CSV string. Must respond to + # '<<(string)'. + # writer: Created writer instance. See CSV::Writer#<< and + # CSV::Writer#add_row to know how to generate CSV string. + # + # RETURNS + # nil + # + # DESCRIPTION + # Create writer instance. Caller block is called with the new instance. + # To add CSV data to generate CSV string, see CSV::Writer#<< or + # CSV::Writer#add_row. + # + def Writer.generate(str_or_writable, col_sep = ?,) + writer = Writer.create(str_or_writable, col_sep) + yield(writer) + writer.close + nil + end + + # SYNOPSIS + # CSV::Writer#<<(row) + # + # ARGS + # row: an Array of a String. + # + # RETURNS + # self + # + # DESCRIPTION + # Dump CSV stream to the device. Argument is an array of a String like + # ['c1', 'c2', 'c3']. + # + def <<(ary) + row = ary.collect { |item| + if item.is_a?(Cell) + item + elsif (item.nil?) + Cell.new('', true) + else + Cell.new(item.to_s, false) + end + } + CSV.generate_row(row, row.size, @dev, @col_sep) + self + end + + # SYNOPSIS + # CSV::Writer#<<(row) + # + # ARGS + # row: an Array of a CSV::Cell. + # + # RETURNS + # self + # + # DESCRIPTION + # Dump CSV stream to the device. Argument is an array of a CSV::Cell + # like [CSV::Cell.new('c1', false), CSV::Cell.new('dummy', true)]. + # (Formar is 'c1' and latter is Null.) + # + def add_row(row) + CSV.generate_row(row, row.size, @dev, @col_sep) + self + end + + # SYNOPSIS + # CSV::Writer#close + # + # RETURNS + # nil + # + # DESCRIPTION + # Close this writer. + # + def close + terminate + end + + private + def initialize(dev) + raise RuntimeError.new('Do not instanciate this class directly.') + end + + def terminate + # Define if needed. + end + end + + + # DESCRIPTION + # CSV::BasicWriter -- CSV formatted string/stream writer using <<. + # + class BasicWriter < Writer + public + + # SYNOPSIS + # writer = CSV::BasicWriter.new(str_or_writable) + # + # ARGS + # str_or_writable: device for generated CSV string. Must respond to + # '<<(string)'. + # + # RETURNS + # writer: Created instance. + # + # DESCRIPTION + # Create instance. To add CSV data to generate CSV string, see + # CSV::Writer#<< or CSV::Writer#add_row. + # + def initialize(str_or_writable, col_sep = ?,) + @col_sep = col_sep + @dev = str_or_writable + @close_on_terminate = false + end + + # SYNOPSIS + # CSV::BasicWriter#close_on_terminate + # + # RETURNS + # true + # + # DESCRIPTION + # Tell this writer to close the IO when terminated (Triggered by invoking + # CSV::BasicWriter#close). + # + def close_on_terminate + @close_on_terminate = true + end + + private + def terminate + if @close_on_terminate + @dev.close + end + end + end + + # SYNOPSIS + # cells = CSV.parse_line(src, col_sep = ?,) + # + # ARGS + # src: a CSV String. + # col_sep: Column separator. ?, by default. If you want to separate + # fields with semicolon, give ?; here. + # + # RETURNS + # cells: an Array of parsed cells in first line. Each cell is a String. + # + # DESCRIPTION + # Parse one line from given string. Bare in mind it parses ONE LINE. Rest + # of the string is ignored for example "a,b\r\nc,d" => ['a', 'b'] and the + # second line 'c,d' is ignored. + # + # If you don't know whether a target string to parse is exactly 1 line or + # not, use CSV.parse_row instead of this method. + # + def CSV.parse_line(src, col_sep = ?,) + idx = 0 + res_type = :DT_COLSEP + cells = Row.new + begin + while (res_type.equal?(:DT_COLSEP)) + cell = Cell.new + res_type, idx = parse_body(src, idx, cell, col_sep) + cells.push(cell.is_null ? nil : cell.data) + end + rescue IllegalFormatError + return Row.new + end + cells + end + + + # SYNOPSIS + # str = CSV.generate_line(cells, col_sep = ?,) + # + # ARGS + # cells: an Array of cell to be converted to CSV string. Each cell must + # respond to 'to_s'. + # col_sep: Column separator. ?, by default. If you want to separate + # fields with semicolon, give ?; here. + # + # RETURNS + # str: a String of generated CSV string. + # + # DESCRIPTION + # Create a line from cells. Each cell is stringified by to_s. + # + def CSV.generate_line(cells, col_sep = ?,) + if (cells.size == 0) + return '' + end + res_type = :DT_COLSEP + result_str = '' + idx = 0 + while true + cell = if (cells[idx].nil?) + Cell.new('', true) + else + Cell.new(cells[idx].to_s, false) + end + generate_body(cell, result_str, col_sep) + idx += 1 + if (idx == cells.size) + break + end + generate_separator(:DT_COLSEP, result_str, col_sep) + end + result_str + end + + # SYNOPSIS + # parsed_cells, idx = CSV.parse_row(src, idx, out_dev, col_sep = ?,) + # + # ARGS + # src: a CSV data to be parsed. Must respond '[](idx)'. + # src[](idx) must return a char. (Not a string such as 'a', but 97). + # src[](idx_out_of_bounds) must return nil. A String satisfies this + # requirement. + # idx: index of parsing location of 'src'. 0 origin. + # out_dev: buffer for parsed cells. Must respond '<<(CSV::Cell)'. + # col_sep: Column separator. ?, by default. If you want to separate + # fields with semicolon, give ?; here. + # + # RETURNS + # parsed_cells: num of parsed cells. + # idx: index of next parsing location of 'src'. + # + # DESCRIPTION + # Parse a line from string. To parse lines in CSV string, see EXAMPLE + # below. + # + # EXAMPLE + # src = "a,b\r\nc,d\r\ne,f" + # idx = 0 + # begin + # parsed = [] + # parsed_cells, idx = CSV.parse_row(src, idx, parsed) + # puts "Parsed #{ parsed_cells } cells." + # p parsed + # end while parsed_cells > 0 + # + def CSV.parse_row(src, idx, out_dev, col_sep = ?,) + idx_backup = idx + parsed_cells = 0 + res_type = :DT_COLSEP + begin + while (!res_type.equal?(:DT_ROWSEP)) + cell = Cell.new + res_type, idx = parse_body(src, idx, cell, col_sep) + if res_type.equal?(:DT_EOS) + if idx == idx_backup #((parsed_cells == 0) && (cell.is_null)) + return 0, 0 + end + res_type = :DT_ROWSEP + end + parsed_cells += 1 + out_dev << cell + end + rescue IllegalFormatError + return 0, 0 + end + return parsed_cells, idx + end + + # SYNOPSIS + # parsed_cells = CSV.generate_row(src, cells, out_dev, col_sep = ?,) + # + # ARGS + # src: an Array of CSV::Cell to be converted to CSV string. Must respond to + # 'size' and '[](idx)'. src[idx] must return CSV::Cell. + # cells: num of cells in a line. + # out_dev: buffer for generated CSV string. Must respond to '<<(string)'. + # col_sep: Column separator. ?, by default. If you want to separate + # fields with semicolon, give ?; here. + # + # RETURNS + # parsed_cells: num of converted cells. + # + # DESCRIPTION + # Convert a line from cells data to string. To generate multi-row CSV + # string, See EXAMPLE below. + # + # EXAMPLE + # def d(str) + # CSV::Cell.new(str, false) + # end + # + # row1 = [d('a'), d('b')] + # row2 = [d('c'), d('d')] + # row3 = [d('e'), d('f')] + # src = [row1, row2, row3] + # buf = '' + # src.each do |row| + # parsed_cells = CSV.generate_row(row, 2, buf) + # puts "Created #{ parsed_cells } cells." + # end + # p buf + # + def CSV.generate_row(src, cells, out_dev, col_sep = ?,) + src_size = src.size + if (src_size == 0) + if cells == 0 + generate_separator(:DT_ROWSEP, out_dev, col_sep) + end + return 0 + end + res_type = :DT_COLSEP + parsed_cells = 0 + generate_body(src[parsed_cells], out_dev, col_sep) + parsed_cells += 1 + while ((parsed_cells < cells) && (parsed_cells != src_size)) + generate_separator(:DT_COLSEP, out_dev, col_sep) + generate_body(src[parsed_cells], out_dev, col_sep) + parsed_cells += 1 + end + if (parsed_cells == cells) + generate_separator(:DT_ROWSEP, out_dev, col_sep) + else + generate_separator(:DT_COLSEP, out_dev, col_sep) + end + parsed_cells + end + +private + class IllegalFormatError < RuntimeError; end + + # Private class methods. + class << self + private + + def parse_body(src, idx, cell, col_sep) + cell.is_null = false + state = :ST_START + quoted = false + cr = false + c = nil + while (c = src[idx]) + idx += 1 + result_state = :DT_UNKNOWN + if (c == col_sep) + if state.equal?(:ST_DATA) + if cr + raise IllegalFormatError.new + end + if (!quoted) + state = :ST_END + result_state = :DT_COLSEP + else + cell.data << c.chr + end + elsif state.equal?(:ST_QUOTE) + if cr + raise IllegalFormatError.new + end + state = :ST_END + result_state = :DT_COLSEP + else # :ST_START + cell.is_null = true + state = :ST_END + result_state = :DT_COLSEP + end + elsif (c == ?") # " for vim syntax hilighting. + if state.equal?(:ST_DATA) + if cr + raise IllegalFormatError.new + end + if quoted + quoted = false + state = :ST_QUOTE + else + raise IllegalFormatError.new + end + elsif state.equal?(:ST_QUOTE) + cell.data << c.chr + quoted = true + state = :ST_DATA + else # :ST_START + quoted = true + state = :ST_DATA + end + elsif (c == ?\r) + if cr + raise IllegalFormatError.new + end + if quoted + cell.data << c.chr + state = :ST_DATA + else + cr = true + end + elsif (c == ?\n) + if state.equal?(:ST_DATA) + if cr + state = :ST_END + result_state = :DT_ROWSEP + cr = false + else + if quoted + cell.data << c.chr + state = :ST_DATA + else + state = :ST_END + result_state = :DT_ROWSEP + end + end + elsif state.equal?(:ST_QUOTE) + state = :ST_END + result_state = :DT_ROWSEP + if cr + cr = false + end + else # :ST_START + cell.is_null = true + state = :ST_END + result_state = :DT_ROWSEP + end + else + if state.equal?(:ST_DATA) || state.equal?(:ST_START) + if cr + raise IllegalFormatError.new + end + cell.data << c.chr + state = :ST_DATA + else # :ST_QUOTE + raise IllegalFormatError.new + end + end + if state.equal?(:ST_END) + return result_state, idx; + end + end + if state.equal?(:ST_START) + cell.is_null = true + elsif state.equal?(:ST_QUOTE) + true # dummy for coverate; only a data + elsif quoted + raise IllegalFormatError.new + elsif cr + raise IllegalFormatError.new + end + return :DT_EOS, idx + end + + def generate_body(cells, out_dev, col_sep) + row_data = cells.data.dup + if (!cells.is_null) + if (row_data.gsub!('"', '""') || + row_data.include?(col_sep) || + (/[\r\n]/ =~ row_data) || (cells.data.empty?)) + out_dev << '"' << row_data << '"' + else + out_dev << row_data + end + end + end + + def generate_separator(type, out_dev, col_sep) + case type + when :DT_COLSEP + out_dev << col_sep.chr + when :DT_ROWSEP + out_dev << "\r\n" + end + end + end + + + # DESCRIPTION + # CSV::StreamBuf -- a class for a bufferd stream. + # + # EXAMPLE 1 -- an IO. + # class MyBuf < StreamBuf + # # Do initialize myself before a super class. Super class might call my + # # method 'read'. (Could be awful for C++ user. :-) + # def initialize(s) + # @s = s + # super() + # end + # + # # define my own 'read' method. + # # CAUTION: Returning nil means EnfOfStream. + # def read(size) + # @s.read(size) + # end + # + # # release buffers. in Ruby which has GC, you do not have to call this... + # def terminate + # @s = nil + # super() + # end + # end + # + # buf = MyBuf.new(STDIN) + # my_str = '' + # p buf[0, 0] # => '' (null string) + # p buf[0] # => 97 (char code of 'a') + # p buf[0, 1] # => 'a' + # my_str = buf[0, 5] + # p my_str # => 'abcde' (5 chars) + # p buf[0, 6] # => "abcde\n" (6 chars) + # p buf[0, 7] # => "abcde\n" (6 chars) + # p buf.drop(3) # => 3 (dropped chars) + # p buf.get(0, 2) # => 'de' (2 chars) + # p buf.is_eos? # => false (is not EOS here) + # p buf.drop(5) # => 3 (dropped chars) + # p buf.is_eos? # => true (is EOS here) + # p buf[0] # => nil (is EOS here) + # + # EXAMPLE 2 -- String. + # This is a conceptual example. No pros with this. + # + # class StrBuf < StreamBuf + # def initialize(s) + # @str = s + # @idx = 0 + # super() + # end + # + # def read(size) + # str = @str[@idx, size] + # @idx += str.size + # str + # end + # end + # + class StreamBuf # pure virtual. (do not instanciate it directly) + public + + # SYNOPSIS + # char/str = CSV::StreamBuf#get(idx, n = nil) + # char/str = CSV::StreamBuf#[idx, n = nil] + # + # ARGS + # idx: index of a string to specify a start point of a string to get. + # Unlike String instance, idx < 0 returns nil. + # n: size of a string to get. + # + # RETURNS + # char: if n == nil. A char at idx. + # str: if n != nil. A partial string, from idx to (idx + size). At + # EOF, the string size could not equal to arg n. + # + # DESCRIPTION + # Get a char or a partial string from the stream. + # + def [](idx, n = nil) + if idx < 0 + return nil + end + if (idx_is_eos?(idx)) + if n and (@offset + idx == buf_size(@cur_buf)) + # Like a String, 'abc'[4, 1] returns nil and + # 'abc'[3, 1] returns '' not nil. + return '' + else + return nil + end + end + my_buf = @cur_buf + my_offset = @offset + next_idx = idx + while (my_offset + next_idx >= buf_size(my_buf)) + if (my_buf == @buf_tail_idx) + unless add_buf + break + end + end + next_idx = my_offset + next_idx - buf_size(my_buf) + my_buf += 1 + my_offset = 0 + end + loc = my_offset + next_idx + if !n + return @buf_list[my_buf][loc] # Fixnum of char code. + elsif (loc + n - 1 < buf_size(my_buf)) + return @buf_list[my_buf][loc, n] # String. + else # should do loop insted of (tail) recursive call... + res = @buf_list[my_buf][loc, BufSize] + size_added = buf_size(my_buf) - loc + if size_added > 0 + idx += size_added + n -= size_added + ret = self[idx, n] + if ret + res << ret + end + end + return res + end + end + alias get [] + + # SYNOPSIS + # size_dropped = CSV::StreamBuf#drop(n) + # + # ARGS + # n: drop size + # + # RETURNS + # size_dropped: droped size. At EOF, dropped size might not equals to arg n. + # 0 if n <= 0. + # + # DESCRIPTION + # Drop a string from the stream. Once you drop the head of the stream, + # access to the dropped part via [] or get returns nil. + # + def drop(n) + if is_eos? + return 0 + end + size_dropped = 0 + while (n > 0) + if (!@is_eos || (@cur_buf != @buf_tail_idx)) + if (@offset + n < buf_size(@cur_buf)) + size_dropped += n + @offset += n + n = 0 + else + size = buf_size(@cur_buf) - @offset + size_dropped += size + n -= size + @offset = 0 + unless rel_buf + unless add_buf + break + end + @cur_buf = @buf_tail_idx + end + end + end + end + size_dropped + end + + # SYNOPSIS + # is_eos = CSV::StreamBuf#is_eos? + # + # RETURNS + # is_eos: true if end of the stream or false. + # + # DESCRIPTION + # Check EOF or not. + # + def is_eos? + return idx_is_eos?(0) + end + + # SYNOPSIS + # N/A + # + # DESCRIPTION + # Do not instanciate this class directly. Define your own class which + # derives this class and define 'read' instance method. + # + def initialize + @buf_list = [] + @cur_buf = @buf_tail_idx = -1 + @offset = 0 + @is_eos = false + add_buf + @cur_buf = @buf_tail_idx + end + + protected + def terminate + while (rel_buf); end + end + + # protected method 'read' must be defined in derived classes. + # CAUTION: Returning a string which size is not equal to 'size' means + # EnfOfStream. When it is not at EOS, you must block the callee, try to + # read and return the sized string. + def read(size) # raise EOFError + raise NotImplementedError.new('Method read must be defined in a derived class.') + end + + private + + def buf_size(idx) + @buf_list[idx].size + end + + def add_buf + if @is_eos + return false + end + begin + str_read = read(BufSize) + rescue EOFError + str_read = nil + rescue + terminate + raise + end + if str_read.nil? + @is_eos = true + @buf_list.push('') + @buf_tail_idx += 1 + false + else + @buf_list.push(str_read) + @buf_tail_idx += 1 + true + end + end + + def rel_buf + if (@cur_buf < 0) + return false + end + @buf_list[@cur_buf] = nil + if (@cur_buf == @buf_tail_idx) + @cur_buf = -1 + return false + else + @cur_buf += 1 + return true + end + end + + def idx_is_eos?(idx) + (@is_eos && ((@cur_buf < 0) || (@cur_buf == @buf_tail_idx))) + end + + BufSize = 1024 * 8 + end + + # DESCRIPTION + # CSV::IOBuf -- a class for a bufferd IO. + # + # EXAMPLE + # # File 'bigdata' could be a giga-byte size one! + # buf = CSV::IOBuf.new(File.open('bigdata', 'rb')) + # CSV::Reader.new(buf).each do |row| + # p row + # break if row[0].data == 'admin' + # end + # + class IOBuf < StreamBuf + public + def initialize(s) + @s = s + super() + end + + def close + terminate + end + + private + def read(size) + @s.read(size) + end + + def terminate + super() + end + end +end -- cgit v1.2.3