aboutsummaryrefslogtreecommitdiffstats
path: root/lib/csv
diff options
context:
space:
mode:
authorHiroshi SHIBATA <hsbt@ruby-lang.org>2022-12-09 08:46:14 +0900
committerHiroshi SHIBATA <hsbt@ruby-lang.org>2022-12-09 16:36:22 +0900
commit643918ecfe9c980f251247de6acd3be6280da24c (patch)
treea5b4011c13ee3af5b110e377a839e79045266dcd /lib/csv
parent260a00d80e4dcc930b040313a99da29e4b1e6678 (diff)
downloadruby-643918ecfe9c980f251247de6acd3be6280da24c.tar.gz
Merge csv-3.2.6
Diffstat (limited to 'lib/csv')
-rw-r--r--lib/csv/fields_converter.rb5
-rw-r--r--lib/csv/input_record_separator.rb15
-rw-r--r--lib/csv/parser.rb293
-rw-r--r--lib/csv/row.rb229
-rw-r--r--lib/csv/table.rb626
-rw-r--r--lib/csv/version.rb2
-rw-r--r--lib/csv/writer.rb10
7 files changed, 926 insertions, 254 deletions
diff --git a/lib/csv/fields_converter.rb b/lib/csv/fields_converter.rb
index b206118d99..d15977d379 100644
--- a/lib/csv/fields_converter.rb
+++ b/lib/csv/fields_converter.rb
@@ -44,7 +44,7 @@ class CSV
@converters.empty?
end
- def convert(fields, headers, lineno)
+ def convert(fields, headers, lineno, quoted_fields)
return fields unless need_convert?
fields.collect.with_index do |field, index|
@@ -63,7 +63,8 @@ class CSV
else
header = nil
end
- field = converter[field, FieldInfo.new(index, lineno, header)]
+ quoted = quoted_fields[index]
+ field = converter[field, FieldInfo.new(index, lineno, header, quoted)]
end
break unless field.is_a?(String) # short-circuit pipeline for speed
end
diff --git a/lib/csv/input_record_separator.rb b/lib/csv/input_record_separator.rb
index bbf13479f7..7a99343c0c 100644
--- a/lib/csv/input_record_separator.rb
+++ b/lib/csv/input_record_separator.rb
@@ -4,20 +4,7 @@ require "stringio"
class CSV
module InputRecordSeparator
class << self
- is_input_record_separator_deprecated = false
- verbose, $VERBOSE = $VERBOSE, true
- stderr, $stderr = $stderr, StringIO.new
- input_record_separator = $INPUT_RECORD_SEPARATOR
- begin
- $INPUT_RECORD_SEPARATOR = "\r\n"
- is_input_record_separator_deprecated = (not $stderr.string.empty?)
- ensure
- $INPUT_RECORD_SEPARATOR = input_record_separator
- $stderr = stderr
- $VERBOSE = verbose
- end
-
- if is_input_record_separator_deprecated
+ if RUBY_VERSION >= "3.0.0"
def value
"\n"
end
diff --git a/lib/csv/parser.rb b/lib/csv/parser.rb
index 7e943acf21..afb3131cd5 100644
--- a/lib/csv/parser.rb
+++ b/lib/csv/parser.rb
@@ -2,15 +2,10 @@
require "strscan"
-require_relative "delete_suffix"
require_relative "input_record_separator"
-require_relative "match_p"
require_relative "row"
require_relative "table"
-using CSV::DeleteSuffix if CSV.const_defined?(:DeleteSuffix)
-using CSV::MatchP if CSV.const_defined?(:MatchP)
-
class CSV
# Note: Don't use this class directly. This is an internal class.
class Parser
@@ -27,6 +22,10 @@ class CSV
class InvalidEncoding < StandardError
end
+ # Raised when unexpected case is happen.
+ class UnexpectedError < StandardError
+ end
+
#
# CSV::Scanner receives a CSV output, scans it and return the content.
# It also controls the life cycle of the object with its methods +keep_start+,
@@ -78,10 +77,10 @@ class CSV
# +keep_end+, +keep_back+, +keep_drop+.
#
# CSV::InputsScanner.scan() tries to match with pattern at the current position.
- # If there's a match, the scanner advances the “scan pointer” and returns the matched string.
+ # If there's a match, the scanner advances the "scan pointer" and returns the matched string.
# Otherwise, the scanner returns nil.
#
- # CSV::InputsScanner.rest() returns the “rest” of the string (i.e. everything after the scan pointer).
+ # CSV::InputsScanner.rest() returns the "rest" of the string (i.e. everything after the scan pointer).
# If there is no more data (eos? = true), it returns "".
#
class InputsScanner
@@ -96,11 +95,13 @@ class CSV
end
def each_line(row_separator)
+ return enum_for(__method__, row_separator) unless block_given?
buffer = nil
input = @scanner.rest
position = @scanner.pos
offset = 0
n_row_separator_chars = row_separator.size
+ # trace(__method__, :start, line, input)
while true
input.each_line(row_separator) do |line|
@scanner.pos += line.bytesize
@@ -140,25 +141,28 @@ class CSV
end
def scan(pattern)
+ # trace(__method__, pattern, :start)
value = @scanner.scan(pattern)
+ # trace(__method__, pattern, :done, :last, value) if @last_scanner
return value if @last_scanner
- if value
- read_chunk if @scanner.eos?
- return value
- else
- nil
- end
+ read_chunk if value and @scanner.eos?
+ # trace(__method__, pattern, :done, value)
+ value
end
def scan_all(pattern)
+ # trace(__method__, pattern, :start)
value = @scanner.scan(pattern)
+ # trace(__method__, pattern, :done, :last, value) if @last_scanner
return value if @last_scanner
return nil if value.nil?
while @scanner.eos? and read_chunk and (sub_value = @scanner.scan(pattern))
+ # trace(__method__, pattern, :sub, sub_value)
value << sub_value
end
+ # trace(__method__, pattern, :done, value)
value
end
@@ -167,68 +171,126 @@ class CSV
end
def keep_start
- @keeps.push([@scanner.pos, nil])
+ # trace(__method__, :start)
+ adjust_last_keep
+ @keeps.push([@scanner, @scanner.pos, nil])
+ # trace(__method__, :done)
end
def keep_end
- start, buffer = @keeps.pop
- keep = @scanner.string.byteslice(start, @scanner.pos - start)
+ # trace(__method__, :start)
+ scanner, start, buffer = @keeps.pop
+ if scanner == @scanner
+ keep = @scanner.string.byteslice(start, @scanner.pos - start)
+ else
+ keep = @scanner.string.byteslice(0, @scanner.pos)
+ end
if buffer
buffer << keep
keep = buffer
end
+ # trace(__method__, :done, keep)
keep
end
def keep_back
- start, buffer = @keeps.pop
+ # trace(__method__, :start)
+ scanner, start, buffer = @keeps.pop
if buffer
+ # trace(__method__, :rescan, start, buffer)
string = @scanner.string
- keep = string.byteslice(start, string.bytesize - start)
+ if scanner == @scanner
+ keep = string.byteslice(start, string.bytesize - start)
+ else
+ keep = string
+ end
if keep and not keep.empty?
@inputs.unshift(StringIO.new(keep))
@last_scanner = false
end
@scanner = StringScanner.new(buffer)
else
+ if @scanner != scanner
+ message = "scanners are different but no buffer: "
+ message += "#{@scanner.inspect}(#{@scanner.object_id}): "
+ message += "#{scanner.inspect}(#{scanner.object_id})"
+ raise UnexpectedError, message
+ end
+ # trace(__method__, :repos, start, buffer)
@scanner.pos = start
end
read_chunk if @scanner.eos?
end
def keep_drop
- @keeps.pop
+ _, _, buffer = @keeps.pop
+ # trace(__method__, :done, :empty) unless buffer
+ return unless buffer
+
+ last_keep = @keeps.last
+ # trace(__method__, :done, :no_last_keep) unless last_keep
+ return unless last_keep
+
+ if last_keep[2]
+ last_keep[2] << buffer
+ else
+ last_keep[2] = buffer
+ end
+ # trace(__method__, :done)
end
def rest
@scanner.rest
end
+ def check(pattern)
+ @scanner.check(pattern)
+ end
+
private
- def read_chunk
- return false if @last_scanner
+ def trace(*args)
+ pp([*args, @scanner, @scanner&.string, @scanner&.pos, @keeps])
+ end
- unless @keeps.empty?
- keep = @keeps.last
- keep_start = keep[0]
- string = @scanner.string
- keep_data = string.byteslice(keep_start, @scanner.pos - keep_start)
- if keep_data
- keep_buffer = keep[1]
- if keep_buffer
- keep_buffer << keep_data
- else
- keep[1] = keep_data.dup
- end
+ def adjust_last_keep
+ # trace(__method__, :start)
+
+ keep = @keeps.last
+ # trace(__method__, :done, :empty) if keep.nil?
+ return if keep.nil?
+
+ scanner, start, buffer = keep
+ string = @scanner.string
+ if @scanner != scanner
+ start = 0
+ end
+ if start == 0 and @scanner.eos?
+ keep_data = string
+ else
+ keep_data = string.byteslice(start, @scanner.pos - start)
+ end
+ if keep_data
+ if buffer
+ buffer << keep_data
+ else
+ keep[2] = keep_data.dup
end
- keep[0] = 0
end
+ # trace(__method__, :done)
+ end
+
+ def read_chunk
+ return false if @last_scanner
+
+ adjust_last_keep
+
input = @inputs.first
case input
when StringIO
string = input.read
raise InvalidEncoding unless string.valid_encoding?
+ # trace(__method__, :stringio, string)
@scanner = StringScanner.new(string)
@inputs.shift
@last_scanner = @inputs.empty?
@@ -237,6 +299,7 @@ class CSV
chunk = input.gets(@row_separator, @chunk_size)
if chunk
raise InvalidEncoding unless chunk.valid_encoding?
+ # trace(__method__, :chunk, chunk)
@scanner = StringScanner.new(chunk)
if input.respond_to?(:eof?) and input.eof?
@inputs.shift
@@ -244,6 +307,7 @@ class CSV
end
true
else
+ # trace(__method__, :no_chunk)
@scanner = StringScanner.new("".encode(@encoding))
@inputs.shift
@last_scanner = @inputs.empty?
@@ -278,7 +342,11 @@ class CSV
end
def field_size_limit
- @field_size_limit
+ @max_field_size&.succ
+ end
+
+ def max_field_size
+ @max_field_size
end
def skip_lines
@@ -346,6 +414,16 @@ class CSV
end
message = "Invalid byte sequence in #{@encoding}"
raise MalformedCSVError.new(message, lineno)
+ rescue UnexpectedError => error
+ if @scanner
+ ignore_broken_line
+ lineno = @lineno
+ else
+ lineno = @lineno + 1
+ end
+ message = "This should not be happen: #{error.message}: "
+ message += "Please report this to https://github.com/ruby/csv/issues"
+ raise MalformedCSVError.new(message, lineno)
end
end
@@ -390,7 +468,7 @@ class CSV
@backslash_quote = false
end
@unconverted_fields = @options[:unconverted_fields]
- @field_size_limit = @options[:field_size_limit]
+ @max_field_size = @options[:max_field_size]
@skip_blanks = @options[:skip_blanks]
@fields_converter = @options[:fields_converter]
@header_fields_converter = @options[:header_fields_converter]
@@ -680,9 +758,10 @@ class CSV
case headers
when Array
@raw_headers = headers
+ quoted_fields = [false] * @raw_headers.size
@use_headers = true
when String
- @raw_headers = parse_headers(headers)
+ @raw_headers, quoted_fields = parse_headers(headers)
@use_headers = true
when nil, false
@raw_headers = nil
@@ -692,21 +771,28 @@ class CSV
@use_headers = true
end
if @raw_headers
- @headers = adjust_headers(@raw_headers)
+ @headers = adjust_headers(@raw_headers, quoted_fields)
else
@headers = nil
end
end
def parse_headers(row)
- CSV.parse_line(row,
- col_sep: @column_separator,
- row_sep: @row_separator,
- quote_char: @quote_character)
+ quoted_fields = []
+ converter = lambda do |field, info|
+ quoted_fields << info.quoted?
+ field
+ end
+ headers = CSV.parse_line(row,
+ col_sep: @column_separator,
+ row_sep: @row_separator,
+ quote_char: @quote_character,
+ converters: [converter])
+ [headers, quoted_fields]
end
- def adjust_headers(headers)
- adjusted_headers = @header_fields_converter.convert(headers, nil, @lineno)
+ def adjust_headers(headers, quoted_fields)
+ adjusted_headers = @header_fields_converter.convert(headers, nil, @lineno, quoted_fields)
adjusted_headers.each {|h| h.freeze if h.is_a? String}
adjusted_headers
end
@@ -729,28 +815,28 @@ class CSV
sample[0, 128].index(@quote_character)
end
- SCANNER_TEST = (ENV["CSV_PARSER_SCANNER_TEST"] == "yes")
- if SCANNER_TEST
- class UnoptimizedStringIO
- def initialize(string)
- @io = StringIO.new(string, "rb:#{string.encoding}")
- end
+ class UnoptimizedStringIO # :nodoc:
+ def initialize(string)
+ @io = StringIO.new(string, "rb:#{string.encoding}")
+ end
- def gets(*args)
- @io.gets(*args)
- end
+ def gets(*args)
+ @io.gets(*args)
+ end
- def each_line(*args, &block)
- @io.each_line(*args, &block)
- end
+ def each_line(*args, &block)
+ @io.each_line(*args, &block)
+ end
- def eof?
- @io.eof?
- end
+ def eof?
+ @io.eof?
end
+ end
- SCANNER_TEST_CHUNK_SIZE =
- Integer((ENV["CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"] || "1"), 10)
+ SCANNER_TEST = (ENV["CSV_PARSER_SCANNER_TEST"] == "yes")
+ if SCANNER_TEST
+ SCANNER_TEST_CHUNK_SIZE_NAME = "CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"
+ SCANNER_TEST_CHUNK_SIZE_VALUE = ENV[SCANNER_TEST_CHUNK_SIZE_NAME]
def build_scanner
inputs = @samples.collect do |sample|
UnoptimizedStringIO.new(sample)
@@ -760,10 +846,17 @@ class CSV
else
inputs << @input
end
+ begin
+ chunk_size_value = ENV[SCANNER_TEST_CHUNK_SIZE_NAME]
+ rescue # Ractor::IsolationError
+ # Ractor on Ruby 3.0 can't read ENV value.
+ chunk_size_value = SCANNER_TEST_CHUNK_SIZE_VALUE
+ end
+ chunk_size = Integer((chunk_size_value || "1"), 10)
InputsScanner.new(inputs,
@encoding,
@row_separator,
- chunk_size: SCANNER_TEST_CHUNK_SIZE)
+ chunk_size: chunk_size)
end
else
def build_scanner
@@ -826,6 +919,14 @@ class CSV
end
end
+ def validate_field_size(field)
+ return unless @max_field_size
+ return if field.size <= @max_field_size
+ ignore_broken_line
+ message = "Field size exceeded: #{field.size} > #{@max_field_size}"
+ raise MalformedCSVError.new(message, @lineno)
+ end
+
def parse_no_quote(&block)
@scanner.each_line(@row_separator) do |line|
next if @skip_lines and skip_line?(line)
@@ -835,9 +936,16 @@ class CSV
if line.empty?
next if @skip_blanks
row = []
+ quoted_fields = []
else
line = strip_value(line)
row = line.split(@split_column_separator, -1)
+ quoted_fields = [false] * row.size
+ if @max_field_size
+ row.each do |column|
+ validate_field_size(column)
+ end
+ end
n_columns = row.size
i = 0
while i < n_columns
@@ -846,7 +954,7 @@ class CSV
end
end
@last_line = original_line
- emit_row(row, &block)
+ emit_row(row, quoted_fields, &block)
end
end
@@ -868,31 +976,37 @@ class CSV
next
end
row = []
+ quoted_fields = []
elsif line.include?(@cr) or line.include?(@lf)
@scanner.keep_back
@need_robust_parsing = true
return parse_quotable_robust(&block)
else
row = line.split(@split_column_separator, -1)
+ quoted_fields = []
n_columns = row.size
i = 0
while i < n_columns
column = row[i]
if column.empty?
+ quoted_fields << false
row[i] = nil
else
n_quotes = column.count(@quote_character)
if n_quotes.zero?
+ quoted_fields << false
# no quote
elsif n_quotes == 2 and
column.start_with?(@quote_character) and
column.end_with?(@quote_character)
+ quoted_fields << true
row[i] = column[1..-2]
else
@scanner.keep_back
@need_robust_parsing = true
return parse_quotable_robust(&block)
end
+ validate_field_size(row[i])
end
i += 1
end
@@ -900,13 +1014,14 @@ class CSV
@scanner.keep_drop
@scanner.keep_start
@last_line = original_line
- emit_row(row, &block)
+ emit_row(row, quoted_fields, &block)
end
@scanner.keep_drop
end
def parse_quotable_robust(&block)
row = []
+ quoted_fields = []
skip_needless_lines
start_row
while true
@@ -916,32 +1031,39 @@ class CSV
value = parse_column_value
if value
@scanner.scan_all(@strip_value) if @strip_value
- if @field_size_limit and value.size >= @field_size_limit
- ignore_broken_line
- raise MalformedCSVError.new("Field size exceeded", @lineno)
- end
+ validate_field_size(value)
end
if parse_column_end
row << value
+ quoted_fields << @quoted_column_value
elsif parse_row_end
if row.empty? and value.nil?
- emit_row([], &block) unless @skip_blanks
+ emit_row([], [], &block) unless @skip_blanks
else
row << value
- emit_row(row, &block)
+ quoted_fields << @quoted_column_value
+ emit_row(row, quoted_fields, &block)
row = []
+ quoted_fields = []
end
skip_needless_lines
start_row
elsif @scanner.eos?
break if row.empty? and value.nil?
row << value
- emit_row(row, &block)
+ quoted_fields << @quoted_column_value
+ emit_row(row, quoted_fields, &block)
break
else
if @quoted_column_value
+ if liberal_parsing? and (new_line = @scanner.check(@line_end))
+ message =
+ "Illegal end-of-line sequence outside of a quoted field " +
+ "<#{new_line.inspect}>"
+ else
+ message = "Any value after quoted field isn't allowed"
+ end
ignore_broken_line
- message = "Any value after quoted field isn't allowed"
raise MalformedCSVError.new(message, @lineno)
elsif @unquoted_column_value and
(new_line = @scanner.scan(@line_end))
@@ -1034,7 +1156,7 @@ class CSV
if (n_quotes % 2).zero?
quotes[0, (n_quotes - 2) / 2]
else
- value = quotes[0, (n_quotes - 1) / 2]
+ value = quotes[0, n_quotes / 2]
while true
quoted_value = @scanner.scan_all(@quoted_value)
value << quoted_value if quoted_value
@@ -1058,11 +1180,9 @@ class CSV
n_quotes = quotes.size
if n_quotes == 1
break
- elsif (n_quotes % 2) == 1
- value << quotes[0, (n_quotes - 1) / 2]
- break
else
value << quotes[0, n_quotes / 2]
+ break if (n_quotes % 2) == 1
end
end
value
@@ -1098,18 +1218,15 @@ class CSV
def strip_value(value)
return value unless @strip
- return nil if value.nil?
+ return value if value.nil?
case @strip
when String
- size = value.size
- while value.start_with?(@strip)
- size -= 1
- value = value[1, size]
+ while value.delete_prefix!(@strip)
+ # do nothing
end
- while value.end_with?(@strip)
- size -= 1
- value = value[0, size]
+ while value.delete_suffix!(@strip)
+ # do nothing
end
else
value.strip!
@@ -1132,22 +1249,22 @@ class CSV
@scanner.keep_start
end
- def emit_row(row, &block)
+ def emit_row(row, quoted_fields, &block)
@lineno += 1
raw_row = row
if @use_headers
if @headers.nil?
- @headers = adjust_headers(row)
+ @headers = adjust_headers(row, quoted_fields)
return unless @return_headers
row = Row.new(@headers, row, true)
else
row = Row.new(@headers,
- @fields_converter.convert(raw_row, @headers, @lineno))
+ @fields_converter.convert(raw_row, @headers, @lineno, quoted_fields))
end
else
# convert fields, if needed...
- row = @fields_converter.convert(raw_row, nil, @lineno)
+ row = @fields_converter.convert(raw_row, nil, @lineno, quoted_fields)
end
# inject unconverted fields and accessor, if requested...
diff --git a/lib/csv/row.rb b/lib/csv/row.rb
index 7f2e7e7807..86323f7d0a 100644
--- a/lib/csv/row.rb
+++ b/lib/csv/row.rb
@@ -3,30 +3,105 @@
require "forwardable"
class CSV
+ # = \CSV::Row
+ # A \CSV::Row instance represents a \CSV table row.
+ # (see {class CSV}[../CSV.html]).
#
- # A CSV::Row is part Array and part Hash. It retains an order for the fields
- # and allows duplicates just as an Array would, but also allows you to access
- # fields by name just as you could if they were in a Hash.
+ # The instance may have:
+ # - Fields: each is an object, not necessarily a \String.
+ # - Headers: each serves a key, and also need not be a \String.
#
- # All rows returned by CSV will be constructed from this class, if header row
- # processing is activated.
+ # === Instance Methods
+ #
+ # \CSV::Row has three groups of instance methods:
+ # - Its own internally defined instance methods.
+ # - Methods included by module Enumerable.
+ # - Methods delegated to class Array.:
+ # * Array#empty?
+ # * Array#length
+ # * Array#size
+ #
+ # == Creating a \CSV::Row Instance
+ #
+ # Commonly, a new \CSV::Row instance is created by parsing \CSV source
+ # that has headers:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # table.each {|row| p row }
+ # Output:
+ # #<CSV::Row "Name":"foo" "Value":"0">
+ # #<CSV::Row "Name":"bar" "Value":"1">
+ # #<CSV::Row "Name":"baz" "Value":"2">
+ #
+ # You can also create a row directly. See ::new.
+ #
+ # == Headers
+ #
+ # Like a \CSV::Table, a \CSV::Row has headers.
+ #
+ # A \CSV::Row that was created by parsing \CSV source
+ # inherits its headers from the table:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # row = table.first
+ # row.headers # => ["Name", "Value"]
+ #
+ # You can also create a new row with headers;
+ # like the keys in a \Hash, the headers need not be Strings:
+ # row = CSV::Row.new([:name, :value], ['foo', 0])
+ # row.headers # => [:name, :value]
+ #
+ # The new row retains its headers even if added to a table
+ # that has headers:
+ # table << row # => #<CSV::Table mode:col_or_row row_count:5>
+ # row.headers # => [:name, :value]
+ # row[:name] # => "foo"
+ # row['Name'] # => nil
+ #
+ #
+ #
+ # == Accessing Fields
+ #
+ # You may access a field in a \CSV::Row with either its \Integer index
+ # (\Array-style) or its header (\Hash-style).
+ #
+ # Fetch a field using method #[]:
+ # row = CSV::Row.new(['Name', 'Value'], ['foo', 0])
+ # row[1] # => 0
+ # row['Value'] # => 0
+ #
+ # Set a field using method #[]=:
+ # row = CSV::Row.new(['Name', 'Value'], ['foo', 0])
+ # row # => #<CSV::Row "Name":"foo" "Value":0>
+ # row[0] = 'bar'
+ # row['Value'] = 1
+ # row # => #<CSV::Row "Name":"bar" "Value":1>
#
class Row
- #
- # Constructs a new CSV::Row from +headers+ and +fields+, which are expected
- # to be Arrays. If one Array is shorter than the other, it will be padded
- # with +nil+ objects.
- #
- # The optional +header_row+ parameter can be set to +true+ to indicate, via
- # CSV::Row.header_row?() and CSV::Row.field_row?(), that this is a header
- # row. Otherwise, the row assumes to be a field row.
- #
- # A CSV::Row object supports the following Array methods through delegation:
- #
- # * empty?()
- # * length()
- # * size()
- #
+ # :call-seq:
+ # CSV::Row.new(headers, fields, header_row = false) -> csv_row
+ #
+ # Returns the new \CSV::Row instance constructed from
+ # arguments +headers+ and +fields+; both should be Arrays;
+ # note that the fields need not be Strings:
+ # row = CSV::Row.new(['Name', 'Value'], ['foo', 0])
+ # row # => #<CSV::Row "Name":"foo" "Value":0>
+ #
+ # If the \Array lengths are different, the shorter is +nil+-filled:
+ # row = CSV::Row.new(['Name', 'Value', 'Date', 'Size'], ['foo', 0])
+ # row # => #<CSV::Row "Name":"foo" "Value":0 "Date":nil "Size":nil>
+ #
+ # Each \CSV::Row object is either a <i>field row</i> or a <i>header row</i>;
+ # by default, a new row is a field row; for the row created above:
+ # row.field_row? # => true
+ # row.header_row? # => false
+ #
+ # If the optional argument +header_row+ is given as +true+,
+ # the created row is a header row:
+ # row = CSV::Row.new(['Name', 'Value'], ['foo', 0], header_row = true)
+ # row # => #<CSV::Row "Name":"foo" "Value":0>
+ # row.field_row? # => false
+ # row.header_row? # => true
def initialize(headers, fields, header_row = false)
@header_row = header_row
headers.each { |h| h.freeze if h.is_a? String }
@@ -48,6 +123,10 @@ class CSV
extend Forwardable
def_delegators :@row, :empty?, :length, :size
+ # :call-seq:
+ # row.initialize_copy(other_row) -> self
+ #
+ # Calls superclass method.
def initialize_copy(other)
super_return_value = super
@row = @row.collect(&:dup)
@@ -71,7 +150,7 @@ class CSV
end
# :call-seq:
- # row.headers
+ # row.headers -> array_of_headers
#
# Returns the headers for this row:
# source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
@@ -83,9 +162,9 @@ class CSV
end
# :call-seq:
- # field(index)
- # field(header)
- # field(header, offset)
+ # field(index) -> value
+ # field(header) -> value
+ # field(header, offset) -> value
#
# Returns the field value for the given +index+ or +header+.
#
@@ -137,9 +216,9 @@ class CSV
#
# :call-seq:
- # fetch(header)
- # fetch(header, default)
- # fetch(header) {|row| ... }
+ # fetch(header) -> value
+ # fetch(header, default) -> value
+ # fetch(header) {|row| ... } -> value
#
# Returns the field value as specified by +header+.
#
@@ -193,7 +272,7 @@ class CSV
end
# :call-seq:
- # row.has_key?(header)
+ # row.has_key?(header) -> true or false
#
# Returns +true+ if there is a field with the given +header+,
# +false+ otherwise.
@@ -320,7 +399,7 @@ class CSV
end
# :call-seq:
- # row.push(*values) ->self
+ # row.push(*values) -> self
#
# Appends each of the given +values+ to +self+ as a field; returns +self+:
# source = "Name,Name,Name\nFoo,Bar,Baz\n"
@@ -403,7 +482,7 @@ class CSV
end
# :call-seq:
- # self.fields(*specifiers)
+ # self.fields(*specifiers) -> array_of_fields
#
# Returns field values per the given +specifiers+, which may be any mixture of:
# - \Integer index.
@@ -471,15 +550,26 @@ class CSV
end
alias_method :values_at, :fields
- #
# :call-seq:
- # index( header )
- # index( header, offset )
+ # index(header) -> index
+ # index(header, offset) -> index
#
- # This method will return the index of a field with the provided +header+.
- # The +offset+ can be used to locate duplicate header names, as described in
- # CSV::Row.field().
+ # Returns the index for the given header, if it exists;
+ # otherwise returns +nil+.
#
+ # With the single argument +header+, returns the index
+ # of the first-found field with the given +header+:
+ # source = "Name,Name,Name\nFoo,Bar,Baz\n"
+ # table = CSV.parse(source, headers: true)
+ # row = table[0]
+ # row.index('Name') # => 0
+ # row.index('NAME') # => nil
+ #
+ # With arguments +header+ and +offset+,
+ # returns the index of the first-found field with given +header+,
+ # but ignoring the first +offset+ fields:
+ # row.index('Name', 1) # => 1
+ # row.index('Name', 3) # => nil
def index(header, minimum_index = 0)
# find the pair
index = headers[minimum_index..-1].index(header)
@@ -487,24 +577,36 @@ class CSV
index.nil? ? nil : index + minimum_index
end
+ # :call-seq:
+ # row.field?(value) -> true or false
#
- # Returns +true+ if +data+ matches a field in this row, and +false+
- # otherwise.
- #
+ # Returns +true+ if +value+ is a field in this row, +false+ otherwise:
+ # source = "Name,Name,Name\nFoo,Bar,Baz\n"
+ # table = CSV.parse(source, headers: true)
+ # row = table[0]
+ # row.field?('Bar') # => true
+ # row.field?('BAR') # => false
def field?(data)
fields.include? data
end
include Enumerable
+ # :call-seq:
+ # row.each {|header, value| ... } -> self
#
- # Yields each pair of the row as header and field tuples (much like
- # iterating over a Hash). This method returns the row for chaining.
- #
- # If no block is given, an Enumerator is returned.
- #
- # Support for Enumerable.
+ # Calls the block with each header-value pair; returns +self+:
+ # source = "Name,Name,Name\nFoo,Bar,Baz\n"
+ # table = CSV.parse(source, headers: true)
+ # row = table[0]
+ # row.each {|header, value| p [header, value] }
+ # Output:
+ # ["Name", "Foo"]
+ # ["Name", "Bar"]
+ # ["Name", "Baz"]
#
+ # If no block is given, returns a new Enumerator:
+ # row.each # => #<Enumerator: #<CSV::Row "Name":"Foo" "Name":"Bar" "Name":"Baz">:each>
def each(&block)
return enum_for(__method__) { size } unless block_given?
@@ -515,10 +617,19 @@ class CSV
alias_method :each_pair, :each
+ # :call-seq:
+ # row == other -> true or false
#
- # Returns +true+ if this row contains the same headers and fields in the
- # same order as +other+.
- #
+ # Returns +true+ if +other+ is a /CSV::Row that has the same
+ # fields (headers and values) in the same order as +self+;
+ # otherwise returns +false+:
+ # source = "Name,Name,Name\nFoo,Bar,Baz\n"
+ # table = CSV.parse(source, headers: true)
+ # row = table[0]
+ # other_row = table[0]
+ # row == other_row # => true
+ # other_row = table[1]
+ # row == other_row # => false
def ==(other)
return @row == other.row if other.is_a? CSV::Row
@row == other
@@ -548,9 +659,31 @@ class CSV
end
alias_method :to_hash, :to_h
+ # :call-seq:
+ # row.deconstruct_keys(keys) -> hash
+ #
+ # Returns the new \Hash suitable for pattern matching containing only the
+ # keys specified as an argument.
+ def deconstruct_keys(keys)
+ if keys.nil?
+ to_h
+ else
+ keys.to_h { |key| [key, self[key]] }
+ end
+ end
+
alias_method :to_ary, :to_a
# :call-seq:
+ # row.deconstruct -> array
+ #
+ # Returns the new \Array suitable for pattern matching containing the values
+ # of the row.
+ def deconstruct
+ fields
+ end
+
+ # :call-seq:
# row.to_csv -> csv_string
#
# Returns the row as a \CSV String. Headers are not included:
diff --git a/lib/csv/table.rb b/lib/csv/table.rb
index 0b62ae89ae..fb19f5453f 100644
--- a/lib/csv/table.rb
+++ b/lib/csv/table.rb
@@ -3,31 +3,199 @@
require "forwardable"
class CSV
+ # = \CSV::Table
+ # A \CSV::Table instance represents \CSV data.
+ # (see {class CSV}[../CSV.html]).
#
- # A CSV::Table is a two-dimensional data structure for representing CSV
- # documents. Tables allow you to work with the data by row or column,
- # manipulate the data, and even convert the results back to CSV, if needed.
+ # The instance may have:
+ # - Rows: each is a Table::Row object.
+ # - Headers: names for the columns.
#
- # All tables returned by CSV will be constructed from this class, if header
- # row processing is activated.
+ # === Instance Methods
#
+ # \CSV::Table has three groups of instance methods:
+ # - Its own internally defined instance methods.
+ # - Methods included by module Enumerable.
+ # - Methods delegated to class Array.:
+ # * Array#empty?
+ # * Array#length
+ # * Array#size
+ #
+ # == Creating a \CSV::Table Instance
+ #
+ # Commonly, a new \CSV::Table instance is created by parsing \CSV source
+ # using headers:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # table.class # => CSV::Table
+ #
+ # You can also create an instance directly. See ::new.
+ #
+ # == Headers
+ #
+ # If a table has headers, the headers serve as labels for the columns of data.
+ # Each header serves as the label for its column.
+ #
+ # The headers for a \CSV::Table object are stored as an \Array of Strings.
+ #
+ # Commonly, headers are defined in the first row of \CSV source:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # table.headers # => ["Name", "Value"]
+ #
+ # If no headers are defined, the \Array is empty:
+ # table = CSV::Table.new([])
+ # table.headers # => []
+ #
+ # == Access Modes
+ #
+ # \CSV::Table provides three modes for accessing table data:
+ # - \Row mode.
+ # - Column mode.
+ # - Mixed mode (the default for a new table).
+ #
+ # The access mode for a\CSV::Table instance affects the behavior
+ # of some of its instance methods:
+ # - #[]
+ # - #[]=
+ # - #delete
+ # - #delete_if
+ # - #each
+ # - #values_at
+ #
+ # === \Row Mode
+ #
+ # Set a table to row mode with method #by_row!:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # table.by_row! # => #<CSV::Table mode:row row_count:4>
+ #
+ # Specify a single row by an \Integer index:
+ # # Get a row.
+ # table[1] # => #<CSV::Row "Name":"bar" "Value":"1">
+ # # Set a row, then get it.
+ # table[1] = CSV::Row.new(['Name', 'Value'], ['bam', 3])
+ # table[1] # => #<CSV::Row "Name":"bam" "Value":3>
+ #
+ # Specify a sequence of rows by a \Range:
+ # # Get rows.
+ # table[1..2] # => [#<CSV::Row "Name":"bam" "Value":3>, #<CSV::Row "Name":"baz" "Value":"2">]
+ # # Set rows, then get them.
+ # table[1..2] = [
+ # CSV::Row.new(['Name', 'Value'], ['bat', 4]),
+ # CSV::Row.new(['Name', 'Value'], ['bad', 5]),
+ # ]
+ # table[1..2] # => [["Name", #<CSV::Row "Name":"bat" "Value":4>], ["Value", #<CSV::Row "Name":"bad" "Value":5>]]
+ #
+ # === Column Mode
+ #
+ # Set a table to column mode with method #by_col!:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # table.by_col! # => #<CSV::Table mode:col row_count:4>
+ #
+ # Specify a column by an \Integer index:
+ # # Get a column.
+ # table[0]
+ # # Set a column, then get it.
+ # table[0] = ['FOO', 'BAR', 'BAZ']
+ # table[0] # => ["FOO", "BAR", "BAZ"]
+ #
+ # Specify a column by its \String header:
+ # # Get a column.
+ # table['Name'] # => ["FOO", "BAR", "BAZ"]
+ # # Set a column, then get it.
+ # table['Name'] = ['Foo', 'Bar', 'Baz']
+ # table['Name'] # => ["Foo", "Bar", "Baz"]
+ #
+ # === Mixed Mode
+ #
+ # In mixed mode, you can refer to either rows or columns:
+ # - An \Integer index refers to a row.
+ # - A \Range index refers to multiple rows.
+ # - A \String index refers to a column.
+ #
+ # Set a table to mixed mode with method #by_col_or_row!:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # table.by_col_or_row! # => #<CSV::Table mode:col_or_row row_count:4>
+ #
+ # Specify a single row by an \Integer index:
+ # # Get a row.
+ # table[1] # => #<CSV::Row "Name":"bar" "Value":"1">
+ # # Set a row, then get it.
+ # table[1] = CSV::Row.new(['Name', 'Value'], ['bam', 3])
+ # table[1] # => #<CSV::Row "Name":"bam" "Value":3>
+ #
+ # Specify a sequence of rows by a \Range:
+ # # Get rows.
+ # table[1..2] # => [#<CSV::Row "Name":"bam" "Value":3>, #<CSV::Row "Name":"baz" "Value":"2">]
+ # # Set rows, then get them.
+ # table[1] = CSV::Row.new(['Name', 'Value'], ['bat', 4])
+ # table[2] = CSV::Row.new(['Name', 'Value'], ['bad', 5])
+ # table[1..2] # => [["Name", #<CSV::Row "Name":"bat" "Value":4>], ["Value", #<CSV::Row "Name":"bad" "Value":5>]]
+ #
+ # Specify a column by its \String header:
+ # # Get a column.
+ # table['Name'] # => ["foo", "bat", "bad"]
+ # # Set a column, then get it.
+ # table['Name'] = ['Foo', 'Bar', 'Baz']
+ # table['Name'] # => ["Foo", "Bar", "Baz"]
class Table
+ # :call-seq:
+ # CSV::Table.new(array_of_rows, headers = nil) -> csv_table
+ #
+ # Returns a new \CSV::Table object.
+ #
+ # - Argument +array_of_rows+ must be an \Array of CSV::Row objects.
+ # - Argument +headers+, if given, may be an \Array of Strings.
+ #
+ # ---
+ #
+ # Create an empty \CSV::Table object:
+ # table = CSV::Table.new([])
+ # table # => #<CSV::Table mode:col_or_row row_count:1>
+ #
+ # Create a non-empty \CSV::Table object:
+ # rows = [
+ # CSV::Row.new([], []),
+ # CSV::Row.new([], []),
+ # CSV::Row.new([], []),
+ # ]
+ # table = CSV::Table.new(rows)
+ # table # => #<CSV::Table mode:col_or_row row_count:4>
+ #
+ # ---
#
- # Constructs a new CSV::Table from +array_of_rows+, which are expected
- # to be CSV::Row objects. All rows are assumed to have the same headers.
+ # If argument +headers+ is an \Array of Strings,
+ # those Strings become the table's headers:
+ # table = CSV::Table.new([], headers: ['Name', 'Age'])
+ # table.headers # => ["Name", "Age"]
#
- # The optional +headers+ parameter can be set to Array of headers.
- # If headers aren't set, headers are fetched from CSV::Row objects.
- # Otherwise, headers() method will return headers being set in
- # headers argument.
+ # If argument +headers+ is not given and the table has rows,
+ # the headers are taken from the first row:
+ # rows = [
+ # CSV::Row.new(['Foo', 'Bar'], []),
+ # CSV::Row.new(['foo', 'bar'], []),
+ # CSV::Row.new(['FOO', 'BAR'], []),
+ # ]
+ # table = CSV::Table.new(rows)
+ # table.headers # => ["Foo", "Bar"]
#
- # A CSV::Table object supports the following Array methods through
- # delegation:
+ # If argument +headers+ is not given and the table is empty (has no rows),
+ # the headers are also empty:
+ # table = CSV::Table.new([])
+ # table.headers # => []
#
- # * empty?()
- # * length()
- # * size()
+ # ---
#
+ # Raises an exception if argument +array_of_rows+ is not an \Array object:
+ # # Raises NoMethodError (undefined method `first' for :foo:Symbol):
+ # CSV::Table.new(:foo)
+ #
+ # Raises an exception if an element of +array_of_rows+ is not a \CSV::Table object:
+ # # Raises NoMethodError (undefined method `headers' for :foo:Symbol):
+ # CSV::Table.new([:foo])
def initialize(array_of_rows, headers: nil)
@table = array_of_rows
@headers = headers
@@ -54,88 +222,141 @@ class CSV
extend Forwardable
def_delegators :@table, :empty?, :length, :size
+ # :call-seq:
+ # table.by_col -> table_dup
#
- # Returns a duplicate table object, in column mode. This is handy for
- # chaining in a single call without changing the table mode, but be aware
- # that this method can consume a fair amount of memory for bigger data sets.
+ # Returns a duplicate of +self+, in column mode
+ # (see {Column Mode}[#class-CSV::Table-label-Column+Mode]):
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # table.mode # => :col_or_row
+ # dup_table = table.by_col
+ # dup_table.mode # => :col
+ # dup_table.equal?(table) # => false # It's a dup
#
- # This method returns the duplicate table for chaining. Don't chain
- # destructive methods (like []=()) this way though, since you are working
- # with a duplicate.
+ # This may be used to chain method calls without changing the mode
+ # (but also will affect performance and memory usage):
+ # dup_table.by_col['Name']
#
+ # Also note that changes to the duplicate table will not affect the original.
def by_col
self.class.new(@table.dup).by_col!
end
+ # :call-seq:
+ # table.by_col! -> self
#
- # Switches the mode of this table to column mode. All calls to indexing and
- # iteration methods will work with columns until the mode is changed again.
- #
- # This method returns the table and is safe to chain.
- #
+ # Sets the mode for +self+ to column mode
+ # (see {Column Mode}[#class-CSV::Table-label-Column+Mode]); returns +self+:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # table.mode # => :col_or_row
+ # table1 = table.by_col!
+ # table.mode # => :col
+ # table1.equal?(table) # => true # Returned self
def by_col!
@mode = :col
self
end
+ # :call-seq:
+ # table.by_col_or_row -> table_dup
#
- # Returns a duplicate table object, in mixed mode. This is handy for
- # chaining in a single call without changing the table mode, but be aware
- # that this method can consume a fair amount of memory for bigger data sets.
+ # Returns a duplicate of +self+, in mixed mode
+ # (see {Mixed Mode}[#class-CSV::Table-label-Mixed+Mode]):
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true).by_col!
+ # table.mode # => :col
+ # dup_table = table.by_col_or_row
+ # dup_table.mode # => :col_or_row
+ # dup_table.equal?(table) # => false # It's a dup
#
- # This method returns the duplicate table for chaining. Don't chain
- # destructive methods (like []=()) this way though, since you are working
- # with a duplicate.
+ # This may be used to chain method calls without changing the mode
+ # (but also will affect performance and memory usage):
+ # dup_table.by_col_or_row['Name']
#
+ # Also note that changes to the duplicate table will not affect the original.
def by_col_or_row
self.class.new(@table.dup).by_col_or_row!
end
+ # :call-seq:
+ # table.by_col_or_row! -> self
#
- # Switches the mode of this table to mixed mode. All calls to indexing and
- # iteration methods will use the default intelligent indexing system until
- # the mode is changed again. In mixed mode an index is assumed to be a row
- # reference while anything else is assumed to be column access by headers.
- #
- # This method returns the table and is safe to chain.
- #
+ # Sets the mode for +self+ to mixed mode
+ # (see {Mixed Mode}[#class-CSV::Table-label-Mixed+Mode]); returns +self+:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true).by_col!
+ # table.mode # => :col
+ # table1 = table.by_col_or_row!
+ # table.mode # => :col_or_row
+ # table1.equal?(table) # => true # Returned self
def by_col_or_row!
@mode = :col_or_row
self
end
+ # :call-seq:
+ # table.by_row -> table_dup
#
- # Returns a duplicate table object, in row mode. This is handy for chaining
- # in a single call without changing the table mode, but be aware that this
- # method can consume a fair amount of memory for bigger data sets.
+ # Returns a duplicate of +self+, in row mode
+ # (see {Row Mode}[#class-CSV::Table-label-Row+Mode]):
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # table.mode # => :col_or_row
+ # dup_table = table.by_row
+ # dup_table.mode # => :row
+ # dup_table.equal?(table) # => false # It's a dup
#
- # This method returns the duplicate table for chaining. Don't chain
- # destructive methods (like []=()) this way though, since you are working
- # with a duplicate.
+ # This may be used to chain method calls without changing the mode
+ # (but also will affect performance and memory usage):
+ # dup_table.by_row[1]
#
+ # Also note that changes to the duplicate table will not affect the original.
def by_row
self.class.new(@table.dup).by_row!
end
+ # :call-seq:
+ # table.by_row! -> self
#
- # Switches the mode of this table to row mode. All calls to indexing and
- # iteration methods will work with rows until the mode is changed again.
- #
- # This method returns the table and is safe to chain.
- #
+ # Sets the mode for +self+ to row mode
+ # (see {Row Mode}[#class-CSV::Table-label-Row+Mode]); returns +self+:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # table.mode # => :col_or_row
+ # table1 = table.by_row!
+ # table.mode # => :row
+ # table1.equal?(table) # => true # Returned self
def by_row!
@mode = :row
self
end
+ # :call-seq:
+ # table.headers -> array_of_headers
#
- # Returns the headers for the first row of this table (assumed to match all
- # other rows). The headers Array passed to CSV::Table.new is returned for
- # empty tables.
+ # Returns a new \Array containing the \String headers for the table.
#
+ # If the table is not empty, returns the headers from the first row:
+ # rows = [
+ # CSV::Row.new(['Foo', 'Bar'], []),
+ # CSV::Row.new(['FOO', 'BAR'], []),
+ # CSV::Row.new(['foo', 'bar'], []),
+ # ]
+ # table = CSV::Table.new(rows)
+ # table.headers # => ["Foo", "Bar"]
+ # table.delete(0)
+ # table.headers # => ["FOO", "BAR"]
+ # table.delete(0)
+ # table.headers # => ["foo", "bar"]
+ #
+ # If the table is empty, returns a copy of the headers in the table itself:
+ # table.delete(0)
+ # table.headers # => ["Foo", "Bar"]
def headers
if @table.empty?
@headers.dup
@@ -145,17 +366,21 @@ class CSV
end
# :call-seq:
- # table[n] -> row
- # table[range] -> array_of_rows
- # table[header] -> array_of_fields
+ # table[n] -> row or column_data
+ # table[range] -> array_of_rows or array_of_column_data
+ # table[header] -> array_of_column_data
#
# Returns data from the table; does not modify the table.
#
# ---
#
- # The expression <tt>table[n]</tt>, where +n+ is a non-negative \Integer,
- # returns the +n+th row of the table, if that row exists,
- # and if the access mode is <tt>:row</tt> or <tt>:col_or_row</tt>:
+ # Fetch a \Row by Its \Integer Index::
+ # - Form: <tt>table[n]</tt>, +n+ an integer.
+ # - Access mode: <tt>:row</tt> or <tt>:col_or_row</tt>.
+ # - Return value: _nth_ row of the table, if that row exists;
+ # otherwise +nil+.
+ #
+ # Returns the _nth_ row of the table if that row exists:
# source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
# table = CSV.parse(source, headers: true)
# table.by_row! # => #<CSV::Table mode:row row_count:4>
@@ -168,20 +393,45 @@ class CSV
#
# Returns +nil+ if +n+ is too large or too small:
# table[4] # => nil
- # table[-4] => nil
+ # table[-4] # => nil
#
# Raises an exception if the access mode is <tt>:row</tt>
- # and +n+ is not an
- # {Integer-convertible object}[rdoc-ref:implicit_conversion.rdoc@Integer-Convertible+Objects].
+ # and +n+ is not an \Integer:
# table.by_row! # => #<CSV::Table mode:row row_count:4>
# # Raises TypeError (no implicit conversion of String into Integer):
# table['Name']
#
# ---
#
- # The expression <tt>table[range]</tt>, where +range+ is a Range object,
- # returns rows from the table, beginning at row <tt>range.first</tt>,
- # if those rows exist, and if the access mode is <tt>:row</tt> or <tt>:col_or_row</tt>:
+ # Fetch a Column by Its \Integer Index::
+ # - Form: <tt>table[n]</tt>, +n+ an \Integer.
+ # - Access mode: <tt>:col</tt>.
+ # - Return value: _nth_ column of the table, if that column exists;
+ # otherwise an \Array of +nil+ fields of length <tt>self.size</tt>.
+ #
+ # Returns the _nth_ column of the table if that column exists:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # table.by_col! # => #<CSV::Table mode:col row_count:4>
+ # table[1] # => ["0", "1", "2"]
+ #
+ # Counts backward from the last column if +n+ is negative:
+ # table[-2] # => ["foo", "bar", "baz"]
+ #
+ # Returns an \Array of +nil+ fields if +n+ is too large or too small:
+ # table[4] # => [nil, nil, nil]
+ # table[-4] # => [nil, nil, nil]
+ #
+ # ---
+ #
+ # Fetch Rows by \Range::
+ # - Form: <tt>table[range]</tt>, +range+ a \Range object.
+ # - Access mode: <tt>:row</tt> or <tt>:col_or_row</tt>.
+ # - Return value: rows from the table, beginning at row <tt>range.start</tt>,
+ # if those rows exists.
+ #
+ # Returns rows from the table, beginning at row <tt>range.first</tt>,
+ # if those rows exist:
# source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
# table = CSV.parse(source, headers: true)
# table.by_row! # => #<CSV::Table mode:row row_count:4>
@@ -191,11 +441,11 @@ class CSV
# rows = table[1..2] # => #<CSV::Row "Name":"bar" "Value":"1">
# rows # => [#<CSV::Row "Name":"bar" "Value":"1">, #<CSV::Row "Name":"baz" "Value":"2">]
#
- # If there are too few rows, returns all from <tt>range.first</tt> to the end:
+ # If there are too few rows, returns all from <tt>range.start</tt> to the end:
# rows = table[1..50] # => #<CSV::Row "Name":"bar" "Value":"1">
# rows # => [#<CSV::Row "Name":"bar" "Value":"1">, #<CSV::Row "Name":"baz" "Value":"2">]
#
- # Special case: if <tt>range.start == table.size</tt>, returns an empty \Array:
+ # Special case: if <tt>range.start == table.size</tt>, returns an empty \Array:
# table[table.size..50] # => []
#
# If <tt>range.end</tt> is negative, calculates the ending index from the end:
@@ -211,9 +461,41 @@ class CSV
#
# ---
#
- # The expression <tt>table[header]</tt>, where +header+ is a \String,
- # returns column values (\Array of \Strings) if the column exists
- # and if the access mode is <tt>:col</tt> or <tt>:col_or_row</tt>:
+ # Fetch Columns by \Range::
+ # - Form: <tt>table[range]</tt>, +range+ a \Range object.
+ # - Access mode: <tt>:col</tt>.
+ # - Return value: column data from the table, beginning at column <tt>range.start</tt>,
+ # if those columns exist.
+ #
+ # Returns column values from the table, if the column exists;
+ # the values are arranged by row:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # table.by_col!
+ # table[0..1] # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
+ #
+ # Special case: if <tt>range.start == headers.size</tt>,
+ # returns an \Array (size: <tt>table.size</tt>) of empty \Arrays:
+ # table[table.headers.size..50] # => [[], [], []]
+ #
+ # If <tt>range.end</tt> is negative, calculates the ending index from the end:
+ # table[0..-1] # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
+ #
+ # If <tt>range.start</tt> is negative, calculates the starting index from the end:
+ # table[-2..2] # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
+ #
+ # If <tt>range.start</tt> is larger than <tt>table.size</tt>,
+ # returns an \Array of +nil+ values:
+ # table[4..4] # => [nil, nil, nil]
+ #
+ # ---
+ #
+ # Fetch a Column by Its \String Header::
+ # - Form: <tt>table[header]</tt>, +header+ a \String header.
+ # - Access mode: <tt>:col</tt> or <tt>:col_or_row</tt>
+ # - Return value: column data from the table, if that +header+ exists.
+ #
+ # Returns column values from the table, if the column exists:
# source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
# table = CSV.parse(source, headers: true)
# table.by_col! # => #<CSV::Table mode:col row_count:4>
@@ -238,22 +520,132 @@ class CSV
end
end
+ # :call-seq:
+ # table[n] = row -> row
+ # table[n] = field_or_array_of_fields -> field_or_array_of_fields
+ # table[header] = field_or_array_of_fields -> field_or_array_of_fields
#
- # In the default mixed mode, this method assigns rows for index access and
- # columns for header access. You can force the index association by first
- # calling by_col!() or by_row!().
+ # Puts data onto the table.
#
- # Rows may be set to an Array of values (which will inherit the table's
- # headers()) or a CSV::Row.
+ # ---
+ #
+ # Set a \Row by Its \Integer Index::
+ # - Form: <tt>table[n] = row</tt>, +n+ an \Integer,
+ # +row+ a \CSV::Row instance or an \Array of fields.
+ # - Access mode: <tt>:row</tt> or <tt>:col_or_row</tt>.
+ # - Return value: +row+.
+ #
+ # If the row exists, it is replaced:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # new_row = CSV::Row.new(['Name', 'Value'], ['bat', 3])
+ # table.by_row! # => #<CSV::Table mode:row row_count:4>
+ # return_value = table[0] = new_row
+ # return_value.equal?(new_row) # => true # Returned the row
+ # table[0].to_h # => {"Name"=>"bat", "Value"=>3}
+ #
+ # With access mode <tt>:col_or_row</tt>:
+ # table.by_col_or_row! # => #<CSV::Table mode:col_or_row row_count:4>
+ # table[0] = CSV::Row.new(['Name', 'Value'], ['bam', 4])
+ # table[0].to_h # => {"Name"=>"bam", "Value"=>4}
+ #
+ # With an \Array instead of a \CSV::Row, inherits headers from the table:
+ # array = ['bad', 5]
+ # return_value = table[0] = array
+ # return_value.equal?(array) # => true # Returned the array
+ # table[0].to_h # => {"Name"=>"bad", "Value"=>5}
#
- # Columns may be set to a single value, which is copied to each row of the
- # column, or an Array of values. Arrays of values are assigned to rows top
- # to bottom in row major order. Excess values are ignored and if the Array
- # does not have a value for each row the extra rows will receive a +nil+.
+ # If the row does not exist, extends the table by adding rows:
+ # assigns rows with +nil+ as needed:
+ # table.size # => 3
+ # table[5] = ['bag', 6]
+ # table.size # => 6
+ # table[3] # => nil
+ # table[4]# => nil
+ # table[5].to_h # => {"Name"=>"bag", "Value"=>6}
+ #
+ # Note that the +nil+ rows are actually +nil+, not a row of +nil+ fields.
#
- # Assigning to an existing column or row clobbers the data. Assigning to
- # new columns creates them at the right end of the table.
+ # ---
#
+ # Set a Column by Its \Integer Index::
+ # - Form: <tt>table[n] = array_of_fields</tt>, +n+ an \Integer,
+ # +array_of_fields+ an \Array of \String fields.
+ # - Access mode: <tt>:col</tt>.
+ # - Return value: +array_of_fields+.
+ #
+ # If the column exists, it is replaced:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # new_col = [3, 4, 5]
+ # table.by_col! # => #<CSV::Table mode:col row_count:4>
+ # return_value = table[1] = new_col
+ # return_value.equal?(new_col) # => true # Returned the column
+ # table[1] # => [3, 4, 5]
+ # # The rows, as revised:
+ # table.by_row! # => #<CSV::Table mode:row row_count:4>
+ # table[0].to_h # => {"Name"=>"foo", "Value"=>3}
+ # table[1].to_h # => {"Name"=>"bar", "Value"=>4}
+ # table[2].to_h # => {"Name"=>"baz", "Value"=>5}
+ # table.by_col! # => #<CSV::Table mode:col row_count:4>
+ #
+ # If there are too few values, fills with +nil+ values:
+ # table[1] = [0]
+ # table[1] # => [0, nil, nil]
+ #
+ # If there are too many values, ignores the extra values:
+ # table[1] = [0, 1, 2, 3, 4]
+ # table[1] # => [0, 1, 2]
+ #
+ # If a single value is given, replaces all fields in the column with that value:
+ # table[1] = 'bat'
+ # table[1] # => ["bat", "bat", "bat"]
+ #
+ # ---
+ #
+ # Set a Column by Its \String Header::
+ # - Form: <tt>table[header] = field_or_array_of_fields</tt>,
+ # +header+ a \String header, +field_or_array_of_fields+ a field value
+ # or an \Array of \String fields.
+ # - Access mode: <tt>:col</tt> or <tt>:col_or_row</tt>.
+ # - Return value: +field_or_array_of_fields+.
+ #
+ # If the column exists, it is replaced:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # new_col = [3, 4, 5]
+ # table.by_col! # => #<CSV::Table mode:col row_count:4>
+ # return_value = table['Value'] = new_col
+ # return_value.equal?(new_col) # => true # Returned the column
+ # table['Value'] # => [3, 4, 5]
+ # # The rows, as revised:
+ # table.by_row! # => #<CSV::Table mode:row row_count:4>
+ # table[0].to_h # => {"Name"=>"foo", "Value"=>3}
+ # table[1].to_h # => {"Name"=>"bar", "Value"=>4}
+ # table[2].to_h # => {"Name"=>"baz", "Value"=>5}
+ # table.by_col! # => #<CSV::Table mode:col row_count:4>
+ #
+ # If there are too few values, fills with +nil+ values:
+ # table['Value'] = [0]
+ # table['Value'] # => [0, nil, nil]
+ #
+ # If there are too many values, ignores the extra values:
+ # table['Value'] = [0, 1, 2, 3, 4]
+ # table['Value'] # => [0, 1, 2]
+ #
+ # If the column does not exist, extends the table by adding columns:
+ # table['Note'] = ['x', 'y', 'z']
+ # table['Note'] # => ["x", "y", "z"]
+ # # The rows, as revised:
+ # table.by_row!
+ # table[0].to_h # => {"Name"=>"foo", "Value"=>0, "Note"=>"x"}
+ # table[1].to_h # => {"Name"=>"bar", "Value"=>1, "Note"=>"y"}
+ # table[2].to_h # => {"Name"=>"baz", "Value"=>2, "Note"=>"z"}
+ # table.by_col!
+ #
+ # If a single value is given, replaces all fields in the column with that value:
+ # table['Value'] = 'bat'
+ # table['Value'] # => ["bat", "bat", "bat"]
def []=(index_or_header, value)
if @mode == :row or # by index
(@mode == :col_or_row and index_or_header.is_a? Integer)
@@ -463,6 +855,9 @@ class CSV
end
end
+ # :call-seq:
+ # table.delete_if {|row_or_column| ... } -> self
+ #
# Removes rows or columns for which the block returns a truthy value;
# returns +self+.
#
@@ -495,9 +890,8 @@ class CSV
if @mode == :row or @mode == :col_or_row # by index
@table.delete_if(&block)
else # by header
- deleted = []
headers.each do |header|
- deleted << delete(header) if yield([header, self[header]])
+ delete(header) if yield([header, self[header]])
end
end
@@ -506,6 +900,9 @@ class CSV
include Enumerable
+ # :call-seq:
+ # table.each {|row_or_column| ... ) -> self
+ #
# Calls the block with each row or column; returns +self+.
#
# When the access mode is <tt>:row</tt> or <tt>:col_or_row</tt>,
@@ -534,7 +931,9 @@ class CSV
return enum_for(__method__) { @mode == :col ? headers.size : size } unless block_given?
if @mode == :col
- headers.each { |header| yield([header, self[header]]) }
+ headers.each.with_index do |header, i|
+ yield([header, @table.map {|row| row[header, i]}])
+ end
else
@table.each(&block)
end
@@ -542,6 +941,9 @@ class CSV
self # for chaining
end
+ # :call-seq:
+ # table == other_table -> true or false
+ #
# Returns +true+ if all each row of +self+ <tt>==</tt>
# the corresponding row of +other_table+, otherwise, +false+.
#
@@ -565,10 +967,14 @@ class CSV
@table == other
end
+ # :call-seq:
+ # table.to_a -> array_of_arrays
#
- # Returns the table as an Array of Arrays. Headers will be the first row,
- # then all of the field rows will follow.
- #
+ # Returns the table as an \Array of \Arrays;
+ # the headers are in the first row:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # table.to_a # => [["Name", "Value"], ["foo", "0"], ["bar", "1"], ["baz", "2"]]
def to_a
array = [headers]
@table.each do |row|
@@ -578,16 +984,29 @@ class CSV
array
end
+ # :call-seq:
+ # table.to_csv(**options) -> csv_string
#
- # Returns the table as a complete CSV String. Headers will be listed first,
- # then all of the field rows.
+ # Returns the table as \CSV string.
+ # See {Options for Generating}[../CSV.html#class-CSV-label-Options+for+Generating].
#
- # This method assumes you want the Table.headers(), unless you explicitly
- # pass <tt>:write_headers => false</tt>.
+ # Defaults option +write_headers+ to +true+:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # table.to_csv # => "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
#
- def to_csv(write_headers: true, **options)
+ # Omits the headers if option +write_headers+ is given as +false+
+ # (see {Option +write_headers+}[../CSV.html#class-CSV-label-Option+write_headers]):
+ # table.to_csv(write_headers: false) # => "foo,0\nbar,1\nbaz,2\n"
+ #
+ # Limit rows if option +limit+ is given like +2+:
+ # table.to_csv(limit: 2) # => "Name,Value\nfoo,0\nbar,1\n"
+ def to_csv(write_headers: true, limit: nil, **options)
array = write_headers ? [headers.to_csv(**options)] : []
- @table.each do |row|
+ limit ||= @table.size
+ limit = @table.size + 1 + limit if limit < 0
+ limit = 0 if limit < 0
+ @table.first(limit).each do |row|
array.push(row.fields.to_csv(**options)) unless row.header_row?
end
@@ -613,9 +1032,24 @@ class CSV
end
end
- # Shows the mode and size of this table in a US-ASCII String.
+ # :call-seq:
+ # table.inspect => string
+ #
+ # Returns a <tt>US-ASCII</tt>-encoded \String showing table:
+ # - Class: <tt>CSV::Table</tt>.
+ # - Access mode: <tt>:row</tt>, <tt>:col</tt>, or <tt>:col_or_row</tt>.
+ # - Size: Row count, including the header row.
+ #
+ # Example:
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
+ # table = CSV.parse(source, headers: true)
+ # table.inspect # => "#<CSV::Table mode:col_or_row row_count:4>\nName,Value\nfoo,0\nbar,1\nbaz,2\n"
+ #
def inspect
- "#<#{self.class} mode:#{@mode} row_count:#{to_a.size}>".encode("US-ASCII")
+ inspected = +"#<#{self.class} mode:#{@mode} row_count:#{to_a.size}>"
+ summary = to_csv(limit: 5)
+ inspected << "\n" << summary if summary.encoding.ascii_compatible?
+ inspected
end
end
end
diff --git a/lib/csv/version.rb b/lib/csv/version.rb
index d1d0dc0e02..e05d63d801 100644
--- a/lib/csv/version.rb
+++ b/lib/csv/version.rb
@@ -2,5 +2,5 @@
class CSV
# The version of the installed library.
- VERSION = "3.2.2"
+ VERSION = "3.2.6"
end
diff --git a/lib/csv/writer.rb b/lib/csv/writer.rb
index 4a9a35c5af..030a295bc9 100644
--- a/lib/csv/writer.rb
+++ b/lib/csv/writer.rb
@@ -1,11 +1,8 @@
# frozen_string_literal: true
require_relative "input_record_separator"
-require_relative "match_p"
require_relative "row"
-using CSV::MatchP if CSV.const_defined?(:MatchP)
-
class CSV
# Note: Don't use this class directly. This is an internal class.
class Writer
@@ -42,7 +39,10 @@ class CSV
@headers ||= row if @use_headers
@lineno += 1
- row = @fields_converter.convert(row, nil, lineno) if @fields_converter
+ if @fields_converter
+ quoted_fields = [false] * row.size
+ row = @fields_converter.convert(row, nil, lineno, quoted_fields)
+ end
i = -1
converted_row = row.collect do |field|
@@ -97,7 +97,7 @@ class CSV
return unless @headers
converter = @options[:header_fields_converter]
- @headers = converter.convert(@headers, nil, 0)
+ @headers = converter.convert(@headers, nil, 0, [])
@headers.each do |header|
header.freeze if header.is_a?(String)
end