From 643918ecfe9c980f251247de6acd3be6280da24c Mon Sep 17 00:00:00 2001 From: Hiroshi SHIBATA Date: Fri, 9 Dec 2022 08:46:14 +0900 Subject: Merge csv-3.2.6 --- lib/csv/fields_converter.rb | 5 +- lib/csv/input_record_separator.rb | 15 +- lib/csv/parser.rb | 293 ++++++++++++------ lib/csv/row.rb | 229 +++++++++++--- lib/csv/table.rb | 626 ++++++++++++++++++++++++++++++++------ lib/csv/version.rb | 2 +- lib/csv/writer.rb | 10 +- 7 files changed, 926 insertions(+), 254 deletions(-) (limited to 'lib/csv') diff --git a/lib/csv/fields_converter.rb b/lib/csv/fields_converter.rb index b206118d99..d15977d379 100644 --- a/lib/csv/fields_converter.rb +++ b/lib/csv/fields_converter.rb @@ -44,7 +44,7 @@ class CSV @converters.empty? end - def convert(fields, headers, lineno) + def convert(fields, headers, lineno, quoted_fields) return fields unless need_convert? fields.collect.with_index do |field, index| @@ -63,7 +63,8 @@ class CSV else header = nil end - field = converter[field, FieldInfo.new(index, lineno, header)] + quoted = quoted_fields[index] + field = converter[field, FieldInfo.new(index, lineno, header, quoted)] end break unless field.is_a?(String) # short-circuit pipeline for speed end diff --git a/lib/csv/input_record_separator.rb b/lib/csv/input_record_separator.rb index bbf13479f7..7a99343c0c 100644 --- a/lib/csv/input_record_separator.rb +++ b/lib/csv/input_record_separator.rb @@ -4,20 +4,7 @@ require "stringio" class CSV module InputRecordSeparator class << self - is_input_record_separator_deprecated = false - verbose, $VERBOSE = $VERBOSE, true - stderr, $stderr = $stderr, StringIO.new - input_record_separator = $INPUT_RECORD_SEPARATOR - begin - $INPUT_RECORD_SEPARATOR = "\r\n" - is_input_record_separator_deprecated = (not $stderr.string.empty?) - ensure - $INPUT_RECORD_SEPARATOR = input_record_separator - $stderr = stderr - $VERBOSE = verbose - end - - if is_input_record_separator_deprecated + if RUBY_VERSION >= "3.0.0" def value "\n" end diff --git a/lib/csv/parser.rb b/lib/csv/parser.rb index 7e943acf21..afb3131cd5 100644 --- a/lib/csv/parser.rb +++ b/lib/csv/parser.rb @@ -2,15 +2,10 @@ require "strscan" -require_relative "delete_suffix" require_relative "input_record_separator" -require_relative "match_p" require_relative "row" require_relative "table" -using CSV::DeleteSuffix if CSV.const_defined?(:DeleteSuffix) -using CSV::MatchP if CSV.const_defined?(:MatchP) - class CSV # Note: Don't use this class directly. This is an internal class. class Parser @@ -27,6 +22,10 @@ class CSV class InvalidEncoding < StandardError end + # Raised when unexpected case is happen. + class UnexpectedError < StandardError + end + # # CSV::Scanner receives a CSV output, scans it and return the content. # It also controls the life cycle of the object with its methods +keep_start+, @@ -78,10 +77,10 @@ class CSV # +keep_end+, +keep_back+, +keep_drop+. # # CSV::InputsScanner.scan() tries to match with pattern at the current position. - # If there's a match, the scanner advances the “scan pointer” and returns the matched string. + # If there's a match, the scanner advances the "scan pointer" and returns the matched string. # Otherwise, the scanner returns nil. # - # CSV::InputsScanner.rest() returns the “rest” of the string (i.e. everything after the scan pointer). + # CSV::InputsScanner.rest() returns the "rest" of the string (i.e. everything after the scan pointer). # If there is no more data (eos? = true), it returns "". # class InputsScanner @@ -96,11 +95,13 @@ class CSV end def each_line(row_separator) + return enum_for(__method__, row_separator) unless block_given? buffer = nil input = @scanner.rest position = @scanner.pos offset = 0 n_row_separator_chars = row_separator.size + # trace(__method__, :start, line, input) while true input.each_line(row_separator) do |line| @scanner.pos += line.bytesize @@ -140,25 +141,28 @@ class CSV end def scan(pattern) + # trace(__method__, pattern, :start) value = @scanner.scan(pattern) + # trace(__method__, pattern, :done, :last, value) if @last_scanner return value if @last_scanner - if value - read_chunk if @scanner.eos? - return value - else - nil - end + read_chunk if value and @scanner.eos? + # trace(__method__, pattern, :done, value) + value end def scan_all(pattern) + # trace(__method__, pattern, :start) value = @scanner.scan(pattern) + # trace(__method__, pattern, :done, :last, value) if @last_scanner return value if @last_scanner return nil if value.nil? while @scanner.eos? and read_chunk and (sub_value = @scanner.scan(pattern)) + # trace(__method__, pattern, :sub, sub_value) value << sub_value end + # trace(__method__, pattern, :done, value) value end @@ -167,68 +171,126 @@ class CSV end def keep_start - @keeps.push([@scanner.pos, nil]) + # trace(__method__, :start) + adjust_last_keep + @keeps.push([@scanner, @scanner.pos, nil]) + # trace(__method__, :done) end def keep_end - start, buffer = @keeps.pop - keep = @scanner.string.byteslice(start, @scanner.pos - start) + # trace(__method__, :start) + scanner, start, buffer = @keeps.pop + if scanner == @scanner + keep = @scanner.string.byteslice(start, @scanner.pos - start) + else + keep = @scanner.string.byteslice(0, @scanner.pos) + end if buffer buffer << keep keep = buffer end + # trace(__method__, :done, keep) keep end def keep_back - start, buffer = @keeps.pop + # trace(__method__, :start) + scanner, start, buffer = @keeps.pop if buffer + # trace(__method__, :rescan, start, buffer) string = @scanner.string - keep = string.byteslice(start, string.bytesize - start) + if scanner == @scanner + keep = string.byteslice(start, string.bytesize - start) + else + keep = string + end if keep and not keep.empty? @inputs.unshift(StringIO.new(keep)) @last_scanner = false end @scanner = StringScanner.new(buffer) else + if @scanner != scanner + message = "scanners are different but no buffer: " + message += "#{@scanner.inspect}(#{@scanner.object_id}): " + message += "#{scanner.inspect}(#{scanner.object_id})" + raise UnexpectedError, message + end + # trace(__method__, :repos, start, buffer) @scanner.pos = start end read_chunk if @scanner.eos? end def keep_drop - @keeps.pop + _, _, buffer = @keeps.pop + # trace(__method__, :done, :empty) unless buffer + return unless buffer + + last_keep = @keeps.last + # trace(__method__, :done, :no_last_keep) unless last_keep + return unless last_keep + + if last_keep[2] + last_keep[2] << buffer + else + last_keep[2] = buffer + end + # trace(__method__, :done) end def rest @scanner.rest end + def check(pattern) + @scanner.check(pattern) + end + private - def read_chunk - return false if @last_scanner + def trace(*args) + pp([*args, @scanner, @scanner&.string, @scanner&.pos, @keeps]) + end - unless @keeps.empty? - keep = @keeps.last - keep_start = keep[0] - string = @scanner.string - keep_data = string.byteslice(keep_start, @scanner.pos - keep_start) - if keep_data - keep_buffer = keep[1] - if keep_buffer - keep_buffer << keep_data - else - keep[1] = keep_data.dup - end + def adjust_last_keep + # trace(__method__, :start) + + keep = @keeps.last + # trace(__method__, :done, :empty) if keep.nil? + return if keep.nil? + + scanner, start, buffer = keep + string = @scanner.string + if @scanner != scanner + start = 0 + end + if start == 0 and @scanner.eos? + keep_data = string + else + keep_data = string.byteslice(start, @scanner.pos - start) + end + if keep_data + if buffer + buffer << keep_data + else + keep[2] = keep_data.dup end - keep[0] = 0 end + # trace(__method__, :done) + end + + def read_chunk + return false if @last_scanner + + adjust_last_keep + input = @inputs.first case input when StringIO string = input.read raise InvalidEncoding unless string.valid_encoding? + # trace(__method__, :stringio, string) @scanner = StringScanner.new(string) @inputs.shift @last_scanner = @inputs.empty? @@ -237,6 +299,7 @@ class CSV chunk = input.gets(@row_separator, @chunk_size) if chunk raise InvalidEncoding unless chunk.valid_encoding? + # trace(__method__, :chunk, chunk) @scanner = StringScanner.new(chunk) if input.respond_to?(:eof?) and input.eof? @inputs.shift @@ -244,6 +307,7 @@ class CSV end true else + # trace(__method__, :no_chunk) @scanner = StringScanner.new("".encode(@encoding)) @inputs.shift @last_scanner = @inputs.empty? @@ -278,7 +342,11 @@ class CSV end def field_size_limit - @field_size_limit + @max_field_size&.succ + end + + def max_field_size + @max_field_size end def skip_lines @@ -346,6 +414,16 @@ class CSV end message = "Invalid byte sequence in #{@encoding}" raise MalformedCSVError.new(message, lineno) + rescue UnexpectedError => error + if @scanner + ignore_broken_line + lineno = @lineno + else + lineno = @lineno + 1 + end + message = "This should not be happen: #{error.message}: " + message += "Please report this to https://github.com/ruby/csv/issues" + raise MalformedCSVError.new(message, lineno) end end @@ -390,7 +468,7 @@ class CSV @backslash_quote = false end @unconverted_fields = @options[:unconverted_fields] - @field_size_limit = @options[:field_size_limit] + @max_field_size = @options[:max_field_size] @skip_blanks = @options[:skip_blanks] @fields_converter = @options[:fields_converter] @header_fields_converter = @options[:header_fields_converter] @@ -680,9 +758,10 @@ class CSV case headers when Array @raw_headers = headers + quoted_fields = [false] * @raw_headers.size @use_headers = true when String - @raw_headers = parse_headers(headers) + @raw_headers, quoted_fields = parse_headers(headers) @use_headers = true when nil, false @raw_headers = nil @@ -692,21 +771,28 @@ class CSV @use_headers = true end if @raw_headers - @headers = adjust_headers(@raw_headers) + @headers = adjust_headers(@raw_headers, quoted_fields) else @headers = nil end end def parse_headers(row) - CSV.parse_line(row, - col_sep: @column_separator, - row_sep: @row_separator, - quote_char: @quote_character) + quoted_fields = [] + converter = lambda do |field, info| + quoted_fields << info.quoted? + field + end + headers = CSV.parse_line(row, + col_sep: @column_separator, + row_sep: @row_separator, + quote_char: @quote_character, + converters: [converter]) + [headers, quoted_fields] end - def adjust_headers(headers) - adjusted_headers = @header_fields_converter.convert(headers, nil, @lineno) + def adjust_headers(headers, quoted_fields) + adjusted_headers = @header_fields_converter.convert(headers, nil, @lineno, quoted_fields) adjusted_headers.each {|h| h.freeze if h.is_a? String} adjusted_headers end @@ -729,28 +815,28 @@ class CSV sample[0, 128].index(@quote_character) end - SCANNER_TEST = (ENV["CSV_PARSER_SCANNER_TEST"] == "yes") - if SCANNER_TEST - class UnoptimizedStringIO - def initialize(string) - @io = StringIO.new(string, "rb:#{string.encoding}") - end + class UnoptimizedStringIO # :nodoc: + def initialize(string) + @io = StringIO.new(string, "rb:#{string.encoding}") + end - def gets(*args) - @io.gets(*args) - end + def gets(*args) + @io.gets(*args) + end - def each_line(*args, &block) - @io.each_line(*args, &block) - end + def each_line(*args, &block) + @io.each_line(*args, &block) + end - def eof? - @io.eof? - end + def eof? + @io.eof? end + end - SCANNER_TEST_CHUNK_SIZE = - Integer((ENV["CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"] || "1"), 10) + SCANNER_TEST = (ENV["CSV_PARSER_SCANNER_TEST"] == "yes") + if SCANNER_TEST + SCANNER_TEST_CHUNK_SIZE_NAME = "CSV_PARSER_SCANNER_TEST_CHUNK_SIZE" + SCANNER_TEST_CHUNK_SIZE_VALUE = ENV[SCANNER_TEST_CHUNK_SIZE_NAME] def build_scanner inputs = @samples.collect do |sample| UnoptimizedStringIO.new(sample) @@ -760,10 +846,17 @@ class CSV else inputs << @input end + begin + chunk_size_value = ENV[SCANNER_TEST_CHUNK_SIZE_NAME] + rescue # Ractor::IsolationError + # Ractor on Ruby 3.0 can't read ENV value. + chunk_size_value = SCANNER_TEST_CHUNK_SIZE_VALUE + end + chunk_size = Integer((chunk_size_value || "1"), 10) InputsScanner.new(inputs, @encoding, @row_separator, - chunk_size: SCANNER_TEST_CHUNK_SIZE) + chunk_size: chunk_size) end else def build_scanner @@ -826,6 +919,14 @@ class CSV end end + def validate_field_size(field) + return unless @max_field_size + return if field.size <= @max_field_size + ignore_broken_line + message = "Field size exceeded: #{field.size} > #{@max_field_size}" + raise MalformedCSVError.new(message, @lineno) + end + def parse_no_quote(&block) @scanner.each_line(@row_separator) do |line| next if @skip_lines and skip_line?(line) @@ -835,9 +936,16 @@ class CSV if line.empty? next if @skip_blanks row = [] + quoted_fields = [] else line = strip_value(line) row = line.split(@split_column_separator, -1) + quoted_fields = [false] * row.size + if @max_field_size + row.each do |column| + validate_field_size(column) + end + end n_columns = row.size i = 0 while i < n_columns @@ -846,7 +954,7 @@ class CSV end end @last_line = original_line - emit_row(row, &block) + emit_row(row, quoted_fields, &block) end end @@ -868,31 +976,37 @@ class CSV next end row = [] + quoted_fields = [] elsif line.include?(@cr) or line.include?(@lf) @scanner.keep_back @need_robust_parsing = true return parse_quotable_robust(&block) else row = line.split(@split_column_separator, -1) + quoted_fields = [] n_columns = row.size i = 0 while i < n_columns column = row[i] if column.empty? + quoted_fields << false row[i] = nil else n_quotes = column.count(@quote_character) if n_quotes.zero? + quoted_fields << false # no quote elsif n_quotes == 2 and column.start_with?(@quote_character) and column.end_with?(@quote_character) + quoted_fields << true row[i] = column[1..-2] else @scanner.keep_back @need_robust_parsing = true return parse_quotable_robust(&block) end + validate_field_size(row[i]) end i += 1 end @@ -900,13 +1014,14 @@ class CSV @scanner.keep_drop @scanner.keep_start @last_line = original_line - emit_row(row, &block) + emit_row(row, quoted_fields, &block) end @scanner.keep_drop end def parse_quotable_robust(&block) row = [] + quoted_fields = [] skip_needless_lines start_row while true @@ -916,32 +1031,39 @@ class CSV value = parse_column_value if value @scanner.scan_all(@strip_value) if @strip_value - if @field_size_limit and value.size >= @field_size_limit - ignore_broken_line - raise MalformedCSVError.new("Field size exceeded", @lineno) - end + validate_field_size(value) end if parse_column_end row << value + quoted_fields << @quoted_column_value elsif parse_row_end if row.empty? and value.nil? - emit_row([], &block) unless @skip_blanks + emit_row([], [], &block) unless @skip_blanks else row << value - emit_row(row, &block) + quoted_fields << @quoted_column_value + emit_row(row, quoted_fields, &block) row = [] + quoted_fields = [] end skip_needless_lines start_row elsif @scanner.eos? break if row.empty? and value.nil? row << value - emit_row(row, &block) + quoted_fields << @quoted_column_value + emit_row(row, quoted_fields, &block) break else if @quoted_column_value + if liberal_parsing? and (new_line = @scanner.check(@line_end)) + message = + "Illegal end-of-line sequence outside of a quoted field " + + "<#{new_line.inspect}>" + else + message = "Any value after quoted field isn't allowed" + end ignore_broken_line - message = "Any value after quoted field isn't allowed" raise MalformedCSVError.new(message, @lineno) elsif @unquoted_column_value and (new_line = @scanner.scan(@line_end)) @@ -1034,7 +1156,7 @@ class CSV if (n_quotes % 2).zero? quotes[0, (n_quotes - 2) / 2] else - value = quotes[0, (n_quotes - 1) / 2] + value = quotes[0, n_quotes / 2] while true quoted_value = @scanner.scan_all(@quoted_value) value << quoted_value if quoted_value @@ -1058,11 +1180,9 @@ class CSV n_quotes = quotes.size if n_quotes == 1 break - elsif (n_quotes % 2) == 1 - value << quotes[0, (n_quotes - 1) / 2] - break else value << quotes[0, n_quotes / 2] + break if (n_quotes % 2) == 1 end end value @@ -1098,18 +1218,15 @@ class CSV def strip_value(value) return value unless @strip - return nil if value.nil? + return value if value.nil? case @strip when String - size = value.size - while value.start_with?(@strip) - size -= 1 - value = value[1, size] + while value.delete_prefix!(@strip) + # do nothing end - while value.end_with?(@strip) - size -= 1 - value = value[0, size] + while value.delete_suffix!(@strip) + # do nothing end else value.strip! @@ -1132,22 +1249,22 @@ class CSV @scanner.keep_start end - def emit_row(row, &block) + def emit_row(row, quoted_fields, &block) @lineno += 1 raw_row = row if @use_headers if @headers.nil? - @headers = adjust_headers(row) + @headers = adjust_headers(row, quoted_fields) return unless @return_headers row = Row.new(@headers, row, true) else row = Row.new(@headers, - @fields_converter.convert(raw_row, @headers, @lineno)) + @fields_converter.convert(raw_row, @headers, @lineno, quoted_fields)) end else # convert fields, if needed... - row = @fields_converter.convert(raw_row, nil, @lineno) + row = @fields_converter.convert(raw_row, nil, @lineno, quoted_fields) end # inject unconverted fields and accessor, if requested... diff --git a/lib/csv/row.rb b/lib/csv/row.rb index 7f2e7e7807..86323f7d0a 100644 --- a/lib/csv/row.rb +++ b/lib/csv/row.rb @@ -3,30 +3,105 @@ require "forwardable" class CSV + # = \CSV::Row + # A \CSV::Row instance represents a \CSV table row. + # (see {class CSV}[../CSV.html]). # - # A CSV::Row is part Array and part Hash. It retains an order for the fields - # and allows duplicates just as an Array would, but also allows you to access - # fields by name just as you could if they were in a Hash. + # The instance may have: + # - Fields: each is an object, not necessarily a \String. + # - Headers: each serves a key, and also need not be a \String. # - # All rows returned by CSV will be constructed from this class, if header row - # processing is activated. + # === Instance Methods + # + # \CSV::Row has three groups of instance methods: + # - Its own internally defined instance methods. + # - Methods included by module Enumerable. + # - Methods delegated to class Array.: + # * Array#empty? + # * Array#length + # * Array#size + # + # == Creating a \CSV::Row Instance + # + # Commonly, a new \CSV::Row instance is created by parsing \CSV source + # that has headers: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.each {|row| p row } + # Output: + # # + # # + # # + # + # You can also create a row directly. See ::new. + # + # == Headers + # + # Like a \CSV::Table, a \CSV::Row has headers. + # + # A \CSV::Row that was created by parsing \CSV source + # inherits its headers from the table: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # row = table.first + # row.headers # => ["Name", "Value"] + # + # You can also create a new row with headers; + # like the keys in a \Hash, the headers need not be Strings: + # row = CSV::Row.new([:name, :value], ['foo', 0]) + # row.headers # => [:name, :value] + # + # The new row retains its headers even if added to a table + # that has headers: + # table << row # => # + # row.headers # => [:name, :value] + # row[:name] # => "foo" + # row['Name'] # => nil + # + # + # + # == Accessing Fields + # + # You may access a field in a \CSV::Row with either its \Integer index + # (\Array-style) or its header (\Hash-style). + # + # Fetch a field using method #[]: + # row = CSV::Row.new(['Name', 'Value'], ['foo', 0]) + # row[1] # => 0 + # row['Value'] # => 0 + # + # Set a field using method #[]=: + # row = CSV::Row.new(['Name', 'Value'], ['foo', 0]) + # row # => # + # row[0] = 'bar' + # row['Value'] = 1 + # row # => # # class Row - # - # Constructs a new CSV::Row from +headers+ and +fields+, which are expected - # to be Arrays. If one Array is shorter than the other, it will be padded - # with +nil+ objects. - # - # The optional +header_row+ parameter can be set to +true+ to indicate, via - # CSV::Row.header_row?() and CSV::Row.field_row?(), that this is a header - # row. Otherwise, the row assumes to be a field row. - # - # A CSV::Row object supports the following Array methods through delegation: - # - # * empty?() - # * length() - # * size() - # + # :call-seq: + # CSV::Row.new(headers, fields, header_row = false) -> csv_row + # + # Returns the new \CSV::Row instance constructed from + # arguments +headers+ and +fields+; both should be Arrays; + # note that the fields need not be Strings: + # row = CSV::Row.new(['Name', 'Value'], ['foo', 0]) + # row # => # + # + # If the \Array lengths are different, the shorter is +nil+-filled: + # row = CSV::Row.new(['Name', 'Value', 'Date', 'Size'], ['foo', 0]) + # row # => # + # + # Each \CSV::Row object is either a field row or a header row; + # by default, a new row is a field row; for the row created above: + # row.field_row? # => true + # row.header_row? # => false + # + # If the optional argument +header_row+ is given as +true+, + # the created row is a header row: + # row = CSV::Row.new(['Name', 'Value'], ['foo', 0], header_row = true) + # row # => # + # row.field_row? # => false + # row.header_row? # => true def initialize(headers, fields, header_row = false) @header_row = header_row headers.each { |h| h.freeze if h.is_a? String } @@ -48,6 +123,10 @@ class CSV extend Forwardable def_delegators :@row, :empty?, :length, :size + # :call-seq: + # row.initialize_copy(other_row) -> self + # + # Calls superclass method. def initialize_copy(other) super_return_value = super @row = @row.collect(&:dup) @@ -71,7 +150,7 @@ class CSV end # :call-seq: - # row.headers + # row.headers -> array_of_headers # # Returns the headers for this row: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" @@ -83,9 +162,9 @@ class CSV end # :call-seq: - # field(index) - # field(header) - # field(header, offset) + # field(index) -> value + # field(header) -> value + # field(header, offset) -> value # # Returns the field value for the given +index+ or +header+. # @@ -137,9 +216,9 @@ class CSV # # :call-seq: - # fetch(header) - # fetch(header, default) - # fetch(header) {|row| ... } + # fetch(header) -> value + # fetch(header, default) -> value + # fetch(header) {|row| ... } -> value # # Returns the field value as specified by +header+. # @@ -193,7 +272,7 @@ class CSV end # :call-seq: - # row.has_key?(header) + # row.has_key?(header) -> true or false # # Returns +true+ if there is a field with the given +header+, # +false+ otherwise. @@ -320,7 +399,7 @@ class CSV end # :call-seq: - # row.push(*values) ->self + # row.push(*values) -> self # # Appends each of the given +values+ to +self+ as a field; returns +self+: # source = "Name,Name,Name\nFoo,Bar,Baz\n" @@ -403,7 +482,7 @@ class CSV end # :call-seq: - # self.fields(*specifiers) + # self.fields(*specifiers) -> array_of_fields # # Returns field values per the given +specifiers+, which may be any mixture of: # - \Integer index. @@ -471,15 +550,26 @@ class CSV end alias_method :values_at, :fields - # # :call-seq: - # index( header ) - # index( header, offset ) + # index(header) -> index + # index(header, offset) -> index # - # This method will return the index of a field with the provided +header+. - # The +offset+ can be used to locate duplicate header names, as described in - # CSV::Row.field(). + # Returns the index for the given header, if it exists; + # otherwise returns +nil+. # + # With the single argument +header+, returns the index + # of the first-found field with the given +header+: + # source = "Name,Name,Name\nFoo,Bar,Baz\n" + # table = CSV.parse(source, headers: true) + # row = table[0] + # row.index('Name') # => 0 + # row.index('NAME') # => nil + # + # With arguments +header+ and +offset+, + # returns the index of the first-found field with given +header+, + # but ignoring the first +offset+ fields: + # row.index('Name', 1) # => 1 + # row.index('Name', 3) # => nil def index(header, minimum_index = 0) # find the pair index = headers[minimum_index..-1].index(header) @@ -487,24 +577,36 @@ class CSV index.nil? ? nil : index + minimum_index end + # :call-seq: + # row.field?(value) -> true or false # - # Returns +true+ if +data+ matches a field in this row, and +false+ - # otherwise. - # + # Returns +true+ if +value+ is a field in this row, +false+ otherwise: + # source = "Name,Name,Name\nFoo,Bar,Baz\n" + # table = CSV.parse(source, headers: true) + # row = table[0] + # row.field?('Bar') # => true + # row.field?('BAR') # => false def field?(data) fields.include? data end include Enumerable + # :call-seq: + # row.each {|header, value| ... } -> self # - # Yields each pair of the row as header and field tuples (much like - # iterating over a Hash). This method returns the row for chaining. - # - # If no block is given, an Enumerator is returned. - # - # Support for Enumerable. + # Calls the block with each header-value pair; returns +self+: + # source = "Name,Name,Name\nFoo,Bar,Baz\n" + # table = CSV.parse(source, headers: true) + # row = table[0] + # row.each {|header, value| p [header, value] } + # Output: + # ["Name", "Foo"] + # ["Name", "Bar"] + # ["Name", "Baz"] # + # If no block is given, returns a new Enumerator: + # row.each # => #:each> def each(&block) return enum_for(__method__) { size } unless block_given? @@ -515,10 +617,19 @@ class CSV alias_method :each_pair, :each + # :call-seq: + # row == other -> true or false # - # Returns +true+ if this row contains the same headers and fields in the - # same order as +other+. - # + # Returns +true+ if +other+ is a /CSV::Row that has the same + # fields (headers and values) in the same order as +self+; + # otherwise returns +false+: + # source = "Name,Name,Name\nFoo,Bar,Baz\n" + # table = CSV.parse(source, headers: true) + # row = table[0] + # other_row = table[0] + # row == other_row # => true + # other_row = table[1] + # row == other_row # => false def ==(other) return @row == other.row if other.is_a? CSV::Row @row == other @@ -548,8 +659,30 @@ class CSV end alias_method :to_hash, :to_h + # :call-seq: + # row.deconstruct_keys(keys) -> hash + # + # Returns the new \Hash suitable for pattern matching containing only the + # keys specified as an argument. + def deconstruct_keys(keys) + if keys.nil? + to_h + else + keys.to_h { |key| [key, self[key]] } + end + end + alias_method :to_ary, :to_a + # :call-seq: + # row.deconstruct -> array + # + # Returns the new \Array suitable for pattern matching containing the values + # of the row. + def deconstruct + fields + end + # :call-seq: # row.to_csv -> csv_string # diff --git a/lib/csv/table.rb b/lib/csv/table.rb index 0b62ae89ae..fb19f5453f 100644 --- a/lib/csv/table.rb +++ b/lib/csv/table.rb @@ -3,31 +3,199 @@ require "forwardable" class CSV + # = \CSV::Table + # A \CSV::Table instance represents \CSV data. + # (see {class CSV}[../CSV.html]). # - # A CSV::Table is a two-dimensional data structure for representing CSV - # documents. Tables allow you to work with the data by row or column, - # manipulate the data, and even convert the results back to CSV, if needed. + # The instance may have: + # - Rows: each is a Table::Row object. + # - Headers: names for the columns. # - # All tables returned by CSV will be constructed from this class, if header - # row processing is activated. + # === Instance Methods # + # \CSV::Table has three groups of instance methods: + # - Its own internally defined instance methods. + # - Methods included by module Enumerable. + # - Methods delegated to class Array.: + # * Array#empty? + # * Array#length + # * Array#size + # + # == Creating a \CSV::Table Instance + # + # Commonly, a new \CSV::Table instance is created by parsing \CSV source + # using headers: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.class # => CSV::Table + # + # You can also create an instance directly. See ::new. + # + # == Headers + # + # If a table has headers, the headers serve as labels for the columns of data. + # Each header serves as the label for its column. + # + # The headers for a \CSV::Table object are stored as an \Array of Strings. + # + # Commonly, headers are defined in the first row of \CSV source: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.headers # => ["Name", "Value"] + # + # If no headers are defined, the \Array is empty: + # table = CSV::Table.new([]) + # table.headers # => [] + # + # == Access Modes + # + # \CSV::Table provides three modes for accessing table data: + # - \Row mode. + # - Column mode. + # - Mixed mode (the default for a new table). + # + # The access mode for a\CSV::Table instance affects the behavior + # of some of its instance methods: + # - #[] + # - #[]= + # - #delete + # - #delete_if + # - #each + # - #values_at + # + # === \Row Mode + # + # Set a table to row mode with method #by_row!: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.by_row! # => # + # + # Specify a single row by an \Integer index: + # # Get a row. + # table[1] # => # + # # Set a row, then get it. + # table[1] = CSV::Row.new(['Name', 'Value'], ['bam', 3]) + # table[1] # => # + # + # Specify a sequence of rows by a \Range: + # # Get rows. + # table[1..2] # => [#, #] + # # Set rows, then get them. + # table[1..2] = [ + # CSV::Row.new(['Name', 'Value'], ['bat', 4]), + # CSV::Row.new(['Name', 'Value'], ['bad', 5]), + # ] + # table[1..2] # => [["Name", #], ["Value", #]] + # + # === Column Mode + # + # Set a table to column mode with method #by_col!: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.by_col! # => # + # + # Specify a column by an \Integer index: + # # Get a column. + # table[0] + # # Set a column, then get it. + # table[0] = ['FOO', 'BAR', 'BAZ'] + # table[0] # => ["FOO", "BAR", "BAZ"] + # + # Specify a column by its \String header: + # # Get a column. + # table['Name'] # => ["FOO", "BAR", "BAZ"] + # # Set a column, then get it. + # table['Name'] = ['Foo', 'Bar', 'Baz'] + # table['Name'] # => ["Foo", "Bar", "Baz"] + # + # === Mixed Mode + # + # In mixed mode, you can refer to either rows or columns: + # - An \Integer index refers to a row. + # - A \Range index refers to multiple rows. + # - A \String index refers to a column. + # + # Set a table to mixed mode with method #by_col_or_row!: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.by_col_or_row! # => # + # + # Specify a single row by an \Integer index: + # # Get a row. + # table[1] # => # + # # Set a row, then get it. + # table[1] = CSV::Row.new(['Name', 'Value'], ['bam', 3]) + # table[1] # => # + # + # Specify a sequence of rows by a \Range: + # # Get rows. + # table[1..2] # => [#, #] + # # Set rows, then get them. + # table[1] = CSV::Row.new(['Name', 'Value'], ['bat', 4]) + # table[2] = CSV::Row.new(['Name', 'Value'], ['bad', 5]) + # table[1..2] # => [["Name", #], ["Value", #]] + # + # Specify a column by its \String header: + # # Get a column. + # table['Name'] # => ["foo", "bat", "bad"] + # # Set a column, then get it. + # table['Name'] = ['Foo', 'Bar', 'Baz'] + # table['Name'] # => ["Foo", "Bar", "Baz"] class Table + # :call-seq: + # CSV::Table.new(array_of_rows, headers = nil) -> csv_table + # + # Returns a new \CSV::Table object. + # + # - Argument +array_of_rows+ must be an \Array of CSV::Row objects. + # - Argument +headers+, if given, may be an \Array of Strings. + # + # --- + # + # Create an empty \CSV::Table object: + # table = CSV::Table.new([]) + # table # => # + # + # Create a non-empty \CSV::Table object: + # rows = [ + # CSV::Row.new([], []), + # CSV::Row.new([], []), + # CSV::Row.new([], []), + # ] + # table = CSV::Table.new(rows) + # table # => # + # + # --- # - # Constructs a new CSV::Table from +array_of_rows+, which are expected - # to be CSV::Row objects. All rows are assumed to have the same headers. + # If argument +headers+ is an \Array of Strings, + # those Strings become the table's headers: + # table = CSV::Table.new([], headers: ['Name', 'Age']) + # table.headers # => ["Name", "Age"] # - # The optional +headers+ parameter can be set to Array of headers. - # If headers aren't set, headers are fetched from CSV::Row objects. - # Otherwise, headers() method will return headers being set in - # headers argument. + # If argument +headers+ is not given and the table has rows, + # the headers are taken from the first row: + # rows = [ + # CSV::Row.new(['Foo', 'Bar'], []), + # CSV::Row.new(['foo', 'bar'], []), + # CSV::Row.new(['FOO', 'BAR'], []), + # ] + # table = CSV::Table.new(rows) + # table.headers # => ["Foo", "Bar"] # - # A CSV::Table object supports the following Array methods through - # delegation: + # If argument +headers+ is not given and the table is empty (has no rows), + # the headers are also empty: + # table = CSV::Table.new([]) + # table.headers # => [] # - # * empty?() - # * length() - # * size() + # --- # + # Raises an exception if argument +array_of_rows+ is not an \Array object: + # # Raises NoMethodError (undefined method `first' for :foo:Symbol): + # CSV::Table.new(:foo) + # + # Raises an exception if an element of +array_of_rows+ is not a \CSV::Table object: + # # Raises NoMethodError (undefined method `headers' for :foo:Symbol): + # CSV::Table.new([:foo]) def initialize(array_of_rows, headers: nil) @table = array_of_rows @headers = headers @@ -54,88 +222,141 @@ class CSV extend Forwardable def_delegators :@table, :empty?, :length, :size + # :call-seq: + # table.by_col -> table_dup # - # Returns a duplicate table object, in column mode. This is handy for - # chaining in a single call without changing the table mode, but be aware - # that this method can consume a fair amount of memory for bigger data sets. + # Returns a duplicate of +self+, in column mode + # (see {Column Mode}[#class-CSV::Table-label-Column+Mode]): + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.mode # => :col_or_row + # dup_table = table.by_col + # dup_table.mode # => :col + # dup_table.equal?(table) # => false # It's a dup # - # This method returns the duplicate table for chaining. Don't chain - # destructive methods (like []=()) this way though, since you are working - # with a duplicate. + # This may be used to chain method calls without changing the mode + # (but also will affect performance and memory usage): + # dup_table.by_col['Name'] # + # Also note that changes to the duplicate table will not affect the original. def by_col self.class.new(@table.dup).by_col! end + # :call-seq: + # table.by_col! -> self # - # Switches the mode of this table to column mode. All calls to indexing and - # iteration methods will work with columns until the mode is changed again. - # - # This method returns the table and is safe to chain. - # + # Sets the mode for +self+ to column mode + # (see {Column Mode}[#class-CSV::Table-label-Column+Mode]); returns +self+: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.mode # => :col_or_row + # table1 = table.by_col! + # table.mode # => :col + # table1.equal?(table) # => true # Returned self def by_col! @mode = :col self end + # :call-seq: + # table.by_col_or_row -> table_dup # - # Returns a duplicate table object, in mixed mode. This is handy for - # chaining in a single call without changing the table mode, but be aware - # that this method can consume a fair amount of memory for bigger data sets. + # Returns a duplicate of +self+, in mixed mode + # (see {Mixed Mode}[#class-CSV::Table-label-Mixed+Mode]): + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true).by_col! + # table.mode # => :col + # dup_table = table.by_col_or_row + # dup_table.mode # => :col_or_row + # dup_table.equal?(table) # => false # It's a dup # - # This method returns the duplicate table for chaining. Don't chain - # destructive methods (like []=()) this way though, since you are working - # with a duplicate. + # This may be used to chain method calls without changing the mode + # (but also will affect performance and memory usage): + # dup_table.by_col_or_row['Name'] # + # Also note that changes to the duplicate table will not affect the original. def by_col_or_row self.class.new(@table.dup).by_col_or_row! end + # :call-seq: + # table.by_col_or_row! -> self # - # Switches the mode of this table to mixed mode. All calls to indexing and - # iteration methods will use the default intelligent indexing system until - # the mode is changed again. In mixed mode an index is assumed to be a row - # reference while anything else is assumed to be column access by headers. - # - # This method returns the table and is safe to chain. - # + # Sets the mode for +self+ to mixed mode + # (see {Mixed Mode}[#class-CSV::Table-label-Mixed+Mode]); returns +self+: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true).by_col! + # table.mode # => :col + # table1 = table.by_col_or_row! + # table.mode # => :col_or_row + # table1.equal?(table) # => true # Returned self def by_col_or_row! @mode = :col_or_row self end + # :call-seq: + # table.by_row -> table_dup # - # Returns a duplicate table object, in row mode. This is handy for chaining - # in a single call without changing the table mode, but be aware that this - # method can consume a fair amount of memory for bigger data sets. + # Returns a duplicate of +self+, in row mode + # (see {Row Mode}[#class-CSV::Table-label-Row+Mode]): + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.mode # => :col_or_row + # dup_table = table.by_row + # dup_table.mode # => :row + # dup_table.equal?(table) # => false # It's a dup # - # This method returns the duplicate table for chaining. Don't chain - # destructive methods (like []=()) this way though, since you are working - # with a duplicate. + # This may be used to chain method calls without changing the mode + # (but also will affect performance and memory usage): + # dup_table.by_row[1] # + # Also note that changes to the duplicate table will not affect the original. def by_row self.class.new(@table.dup).by_row! end + # :call-seq: + # table.by_row! -> self # - # Switches the mode of this table to row mode. All calls to indexing and - # iteration methods will work with rows until the mode is changed again. - # - # This method returns the table and is safe to chain. - # + # Sets the mode for +self+ to row mode + # (see {Row Mode}[#class-CSV::Table-label-Row+Mode]); returns +self+: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.mode # => :col_or_row + # table1 = table.by_row! + # table.mode # => :row + # table1.equal?(table) # => true # Returned self def by_row! @mode = :row self end + # :call-seq: + # table.headers -> array_of_headers # - # Returns the headers for the first row of this table (assumed to match all - # other rows). The headers Array passed to CSV::Table.new is returned for - # empty tables. + # Returns a new \Array containing the \String headers for the table. # + # If the table is not empty, returns the headers from the first row: + # rows = [ + # CSV::Row.new(['Foo', 'Bar'], []), + # CSV::Row.new(['FOO', 'BAR'], []), + # CSV::Row.new(['foo', 'bar'], []), + # ] + # table = CSV::Table.new(rows) + # table.headers # => ["Foo", "Bar"] + # table.delete(0) + # table.headers # => ["FOO", "BAR"] + # table.delete(0) + # table.headers # => ["foo", "bar"] + # + # If the table is empty, returns a copy of the headers in the table itself: + # table.delete(0) + # table.headers # => ["Foo", "Bar"] def headers if @table.empty? @headers.dup @@ -145,17 +366,21 @@ class CSV end # :call-seq: - # table[n] -> row - # table[range] -> array_of_rows - # table[header] -> array_of_fields + # table[n] -> row or column_data + # table[range] -> array_of_rows or array_of_column_data + # table[header] -> array_of_column_data # # Returns data from the table; does not modify the table. # # --- # - # The expression table[n], where +n+ is a non-negative \Integer, - # returns the +n+th row of the table, if that row exists, - # and if the access mode is :row or :col_or_row: + # Fetch a \Row by Its \Integer Index:: + # - Form: table[n], +n+ an integer. + # - Access mode: :row or :col_or_row. + # - Return value: _nth_ row of the table, if that row exists; + # otherwise +nil+. + # + # Returns the _nth_ row of the table if that row exists: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # table.by_row! # => # @@ -168,20 +393,45 @@ class CSV # # Returns +nil+ if +n+ is too large or too small: # table[4] # => nil - # table[-4] => nil + # table[-4] # => nil # # Raises an exception if the access mode is :row - # and +n+ is not an - # {Integer-convertible object}[rdoc-ref:implicit_conversion.rdoc@Integer-Convertible+Objects]. + # and +n+ is not an \Integer: # table.by_row! # => # # # Raises TypeError (no implicit conversion of String into Integer): # table['Name'] # # --- # - # The expression table[range], where +range+ is a Range object, - # returns rows from the table, beginning at row range.first, - # if those rows exist, and if the access mode is :row or :col_or_row: + # Fetch a Column by Its \Integer Index:: + # - Form: table[n], +n+ an \Integer. + # - Access mode: :col. + # - Return value: _nth_ column of the table, if that column exists; + # otherwise an \Array of +nil+ fields of length self.size. + # + # Returns the _nth_ column of the table if that column exists: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.by_col! # => # + # table[1] # => ["0", "1", "2"] + # + # Counts backward from the last column if +n+ is negative: + # table[-2] # => ["foo", "bar", "baz"] + # + # Returns an \Array of +nil+ fields if +n+ is too large or too small: + # table[4] # => [nil, nil, nil] + # table[-4] # => [nil, nil, nil] + # + # --- + # + # Fetch Rows by \Range:: + # - Form: table[range], +range+ a \Range object. + # - Access mode: :row or :col_or_row. + # - Return value: rows from the table, beginning at row range.start, + # if those rows exists. + # + # Returns rows from the table, beginning at row range.first, + # if those rows exist: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # table.by_row! # => # @@ -191,11 +441,11 @@ class CSV # rows = table[1..2] # => # # rows # => [#, #] # - # If there are too few rows, returns all from range.first to the end: + # If there are too few rows, returns all from range.start to the end: # rows = table[1..50] # => # # rows # => [#, #] # - # Special case: if range.start == table.size, returns an empty \Array: + # Special case: if range.start == table.size, returns an empty \Array: # table[table.size..50] # => [] # # If range.end is negative, calculates the ending index from the end: @@ -211,9 +461,41 @@ class CSV # # --- # - # The expression table[header], where +header+ is a \String, - # returns column values (\Array of \Strings) if the column exists - # and if the access mode is :col or :col_or_row: + # Fetch Columns by \Range:: + # - Form: table[range], +range+ a \Range object. + # - Access mode: :col. + # - Return value: column data from the table, beginning at column range.start, + # if those columns exist. + # + # Returns column values from the table, if the column exists; + # the values are arranged by row: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.by_col! + # table[0..1] # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] + # + # Special case: if range.start == headers.size, + # returns an \Array (size: table.size) of empty \Arrays: + # table[table.headers.size..50] # => [[], [], []] + # + # If range.end is negative, calculates the ending index from the end: + # table[0..-1] # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] + # + # If range.start is negative, calculates the starting index from the end: + # table[-2..2] # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] + # + # If range.start is larger than table.size, + # returns an \Array of +nil+ values: + # table[4..4] # => [nil, nil, nil] + # + # --- + # + # Fetch a Column by Its \String Header:: + # - Form: table[header], +header+ a \String header. + # - Access mode: :col or :col_or_row + # - Return value: column data from the table, if that +header+ exists. + # + # Returns column values from the table, if the column exists: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # table.by_col! # => # @@ -238,22 +520,132 @@ class CSV end end + # :call-seq: + # table[n] = row -> row + # table[n] = field_or_array_of_fields -> field_or_array_of_fields + # table[header] = field_or_array_of_fields -> field_or_array_of_fields # - # In the default mixed mode, this method assigns rows for index access and - # columns for header access. You can force the index association by first - # calling by_col!() or by_row!(). + # Puts data onto the table. # - # Rows may be set to an Array of values (which will inherit the table's - # headers()) or a CSV::Row. + # --- + # + # Set a \Row by Its \Integer Index:: + # - Form: table[n] = row, +n+ an \Integer, + # +row+ a \CSV::Row instance or an \Array of fields. + # - Access mode: :row or :col_or_row. + # - Return value: +row+. + # + # If the row exists, it is replaced: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # new_row = CSV::Row.new(['Name', 'Value'], ['bat', 3]) + # table.by_row! # => # + # return_value = table[0] = new_row + # return_value.equal?(new_row) # => true # Returned the row + # table[0].to_h # => {"Name"=>"bat", "Value"=>3} + # + # With access mode :col_or_row: + # table.by_col_or_row! # => # + # table[0] = CSV::Row.new(['Name', 'Value'], ['bam', 4]) + # table[0].to_h # => {"Name"=>"bam", "Value"=>4} + # + # With an \Array instead of a \CSV::Row, inherits headers from the table: + # array = ['bad', 5] + # return_value = table[0] = array + # return_value.equal?(array) # => true # Returned the array + # table[0].to_h # => {"Name"=>"bad", "Value"=>5} # - # Columns may be set to a single value, which is copied to each row of the - # column, or an Array of values. Arrays of values are assigned to rows top - # to bottom in row major order. Excess values are ignored and if the Array - # does not have a value for each row the extra rows will receive a +nil+. + # If the row does not exist, extends the table by adding rows: + # assigns rows with +nil+ as needed: + # table.size # => 3 + # table[5] = ['bag', 6] + # table.size # => 6 + # table[3] # => nil + # table[4]# => nil + # table[5].to_h # => {"Name"=>"bag", "Value"=>6} + # + # Note that the +nil+ rows are actually +nil+, not a row of +nil+ fields. # - # Assigning to an existing column or row clobbers the data. Assigning to - # new columns creates them at the right end of the table. + # --- # + # Set a Column by Its \Integer Index:: + # - Form: table[n] = array_of_fields, +n+ an \Integer, + # +array_of_fields+ an \Array of \String fields. + # - Access mode: :col. + # - Return value: +array_of_fields+. + # + # If the column exists, it is replaced: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # new_col = [3, 4, 5] + # table.by_col! # => # + # return_value = table[1] = new_col + # return_value.equal?(new_col) # => true # Returned the column + # table[1] # => [3, 4, 5] + # # The rows, as revised: + # table.by_row! # => # + # table[0].to_h # => {"Name"=>"foo", "Value"=>3} + # table[1].to_h # => {"Name"=>"bar", "Value"=>4} + # table[2].to_h # => {"Name"=>"baz", "Value"=>5} + # table.by_col! # => # + # + # If there are too few values, fills with +nil+ values: + # table[1] = [0] + # table[1] # => [0, nil, nil] + # + # If there are too many values, ignores the extra values: + # table[1] = [0, 1, 2, 3, 4] + # table[1] # => [0, 1, 2] + # + # If a single value is given, replaces all fields in the column with that value: + # table[1] = 'bat' + # table[1] # => ["bat", "bat", "bat"] + # + # --- + # + # Set a Column by Its \String Header:: + # - Form: table[header] = field_or_array_of_fields, + # +header+ a \String header, +field_or_array_of_fields+ a field value + # or an \Array of \String fields. + # - Access mode: :col or :col_or_row. + # - Return value: +field_or_array_of_fields+. + # + # If the column exists, it is replaced: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # new_col = [3, 4, 5] + # table.by_col! # => # + # return_value = table['Value'] = new_col + # return_value.equal?(new_col) # => true # Returned the column + # table['Value'] # => [3, 4, 5] + # # The rows, as revised: + # table.by_row! # => # + # table[0].to_h # => {"Name"=>"foo", "Value"=>3} + # table[1].to_h # => {"Name"=>"bar", "Value"=>4} + # table[2].to_h # => {"Name"=>"baz", "Value"=>5} + # table.by_col! # => # + # + # If there are too few values, fills with +nil+ values: + # table['Value'] = [0] + # table['Value'] # => [0, nil, nil] + # + # If there are too many values, ignores the extra values: + # table['Value'] = [0, 1, 2, 3, 4] + # table['Value'] # => [0, 1, 2] + # + # If the column does not exist, extends the table by adding columns: + # table['Note'] = ['x', 'y', 'z'] + # table['Note'] # => ["x", "y", "z"] + # # The rows, as revised: + # table.by_row! + # table[0].to_h # => {"Name"=>"foo", "Value"=>0, "Note"=>"x"} + # table[1].to_h # => {"Name"=>"bar", "Value"=>1, "Note"=>"y"} + # table[2].to_h # => {"Name"=>"baz", "Value"=>2, "Note"=>"z"} + # table.by_col! + # + # If a single value is given, replaces all fields in the column with that value: + # table['Value'] = 'bat' + # table['Value'] # => ["bat", "bat", "bat"] def []=(index_or_header, value) if @mode == :row or # by index (@mode == :col_or_row and index_or_header.is_a? Integer) @@ -463,6 +855,9 @@ class CSV end end + # :call-seq: + # table.delete_if {|row_or_column| ... } -> self + # # Removes rows or columns for which the block returns a truthy value; # returns +self+. # @@ -495,9 +890,8 @@ class CSV if @mode == :row or @mode == :col_or_row # by index @table.delete_if(&block) else # by header - deleted = [] headers.each do |header| - deleted << delete(header) if yield([header, self[header]]) + delete(header) if yield([header, self[header]]) end end @@ -506,6 +900,9 @@ class CSV include Enumerable + # :call-seq: + # table.each {|row_or_column| ... ) -> self + # # Calls the block with each row or column; returns +self+. # # When the access mode is :row or :col_or_row, @@ -534,7 +931,9 @@ class CSV return enum_for(__method__) { @mode == :col ? headers.size : size } unless block_given? if @mode == :col - headers.each { |header| yield([header, self[header]]) } + headers.each.with_index do |header, i| + yield([header, @table.map {|row| row[header, i]}]) + end else @table.each(&block) end @@ -542,6 +941,9 @@ class CSV self # for chaining end + # :call-seq: + # table == other_table -> true or false + # # Returns +true+ if all each row of +self+ == # the corresponding row of +other_table+, otherwise, +false+. # @@ -565,10 +967,14 @@ class CSV @table == other end + # :call-seq: + # table.to_a -> array_of_arrays # - # Returns the table as an Array of Arrays. Headers will be the first row, - # then all of the field rows will follow. - # + # Returns the table as an \Array of \Arrays; + # the headers are in the first row: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.to_a # => [["Name", "Value"], ["foo", "0"], ["bar", "1"], ["baz", "2"]] def to_a array = [headers] @table.each do |row| @@ -578,16 +984,29 @@ class CSV array end + # :call-seq: + # table.to_csv(**options) -> csv_string # - # Returns the table as a complete CSV String. Headers will be listed first, - # then all of the field rows. + # Returns the table as \CSV string. + # See {Options for Generating}[../CSV.html#class-CSV-label-Options+for+Generating]. # - # This method assumes you want the Table.headers(), unless you explicitly - # pass :write_headers => false. + # Defaults option +write_headers+ to +true+: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.to_csv # => "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # - def to_csv(write_headers: true, **options) + # Omits the headers if option +write_headers+ is given as +false+ + # (see {Option +write_headers+}[../CSV.html#class-CSV-label-Option+write_headers]): + # table.to_csv(write_headers: false) # => "foo,0\nbar,1\nbaz,2\n" + # + # Limit rows if option +limit+ is given like +2+: + # table.to_csv(limit: 2) # => "Name,Value\nfoo,0\nbar,1\n" + def to_csv(write_headers: true, limit: nil, **options) array = write_headers ? [headers.to_csv(**options)] : [] - @table.each do |row| + limit ||= @table.size + limit = @table.size + 1 + limit if limit < 0 + limit = 0 if limit < 0 + @table.first(limit).each do |row| array.push(row.fields.to_csv(**options)) unless row.header_row? end @@ -613,9 +1032,24 @@ class CSV end end - # Shows the mode and size of this table in a US-ASCII String. + # :call-seq: + # table.inspect => string + # + # Returns a US-ASCII-encoded \String showing table: + # - Class: CSV::Table. + # - Access mode: :row, :col, or :col_or_row. + # - Size: Row count, including the header row. + # + # Example: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.inspect # => "#\nName,Value\nfoo,0\nbar,1\nbaz,2\n" + # def inspect - "#<#{self.class} mode:#{@mode} row_count:#{to_a.size}>".encode("US-ASCII") + inspected = +"#<#{self.class} mode:#{@mode} row_count:#{to_a.size}>" + summary = to_csv(limit: 5) + inspected << "\n" << summary if summary.encoding.ascii_compatible? + inspected end end end diff --git a/lib/csv/version.rb b/lib/csv/version.rb index d1d0dc0e02..e05d63d801 100644 --- a/lib/csv/version.rb +++ b/lib/csv/version.rb @@ -2,5 +2,5 @@ class CSV # The version of the installed library. - VERSION = "3.2.2" + VERSION = "3.2.6" end diff --git a/lib/csv/writer.rb b/lib/csv/writer.rb index 4a9a35c5af..030a295bc9 100644 --- a/lib/csv/writer.rb +++ b/lib/csv/writer.rb @@ -1,11 +1,8 @@ # frozen_string_literal: true require_relative "input_record_separator" -require_relative "match_p" require_relative "row" -using CSV::MatchP if CSV.const_defined?(:MatchP) - class CSV # Note: Don't use this class directly. This is an internal class. class Writer @@ -42,7 +39,10 @@ class CSV @headers ||= row if @use_headers @lineno += 1 - row = @fields_converter.convert(row, nil, lineno) if @fields_converter + if @fields_converter + quoted_fields = [false] * row.size + row = @fields_converter.convert(row, nil, lineno, quoted_fields) + end i = -1 converted_row = row.collect do |field| @@ -97,7 +97,7 @@ class CSV return unless @headers converter = @options[:header_fields_converter] - @headers = converter.convert(@headers, nil, 0) + @headers = converter.convert(@headers, nil, 0, []) @headers.each do |header| header.freeze if header.is_a?(String) end -- cgit v1.2.3