diff options
author | hsbt <hsbt@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2018-05-09 04:39:16 +0000 |
---|---|---|
committer | hsbt <hsbt@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2018-05-09 04:39:16 +0000 |
commit | 5c1941a9be56a979c27d740370b781882d344f79 (patch) | |
tree | 7478e42cde5b470b4df2eb40f89ee25f2621f5aa /lib/csv | |
parent | dfc56b8c432d6a374c18cba7048d05175bcfba05 (diff) | |
download | ruby-5c1941a9be56a979c27d740370b781882d344f79.tar.gz |
Merge csv-1.0.2 from upstream.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@63364 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'lib/csv')
-rw-r--r-- | lib/csv/core_ext/array.rb | 9 | ||||
-rw-r--r-- | lib/csv/core_ext/string.rb | 9 | ||||
-rw-r--r-- | lib/csv/csv.gemspec | 24 | ||||
-rw-r--r-- | lib/csv/row.rb | 388 | ||||
-rw-r--r-- | lib/csv/table.rb | 378 | ||||
-rw-r--r-- | lib/csv/version.rb | 6 |
6 files changed, 814 insertions, 0 deletions
diff --git a/lib/csv/core_ext/array.rb b/lib/csv/core_ext/array.rb new file mode 100644 index 0000000000..94df7d5c35 --- /dev/null +++ b/lib/csv/core_ext/array.rb @@ -0,0 +1,9 @@ +class Array # :nodoc: + # Equivalent to CSV::generate_line(self, options) + # + # ["CSV", "data"].to_csv + # #=> "CSV,data\n" + def to_csv(**options) + CSV.generate_line(self, options) + end +end diff --git a/lib/csv/core_ext/string.rb b/lib/csv/core_ext/string.rb new file mode 100644 index 0000000000..8f2070f3bd --- /dev/null +++ b/lib/csv/core_ext/string.rb @@ -0,0 +1,9 @@ +class String # :nodoc: + # Equivalent to CSV::parse_line(self, options) + # + # "CSV,data".parse_csv + # #=> ["CSV", "data"] + def parse_csv(**options) + CSV.parse_line(self, options) + end +end diff --git a/lib/csv/csv.gemspec b/lib/csv/csv.gemspec new file mode 100644 index 0000000000..38ead7c81d --- /dev/null +++ b/lib/csv/csv.gemspec @@ -0,0 +1,24 @@ +# frozen_string_literal: true + +require_relative "lib/csv/version" + +Gem::Specification.new do |spec| + spec.name = "csv" + spec.version = CSV::VERSION + spec.authors = ["James Edward Gray II", "Kouhei Sutou"] + spec.email = [nil, "kou@cozmixng.org"] + + spec.summary = "CSV Reading and Writing" + spec.description = "The CSV library provides a complete interface to CSV files and data. It offers tools to enable you to read and write to and from Strings or IO objects, as needed." + spec.homepage = "https://github.com/ruby/csv" + spec.license = "BSD-2-Clause" + + spec.files = Dir.glob("lib/**/*.rb") + spec.files += ["README.md", "LICENSE.txt", "news.md"] + spec.require_paths = ["lib"] + spec.required_ruby_version = ">= 2.3.0" + + spec.add_development_dependency "bundler" + spec.add_development_dependency "rake" + spec.add_development_dependency "benchmark-ips" +end diff --git a/lib/csv/row.rb b/lib/csv/row.rb new file mode 100644 index 0000000000..8ff3480ae8 --- /dev/null +++ b/lib/csv/row.rb @@ -0,0 +1,388 @@ +# frozen_string_literal: true + +require "forwardable" + +class CSV + # + # A CSV::Row is part Array and part Hash. It retains an order for the fields + # and allows duplicates just as an Array would, but also allows you to access + # fields by name just as you could if they were in a Hash. + # + # All rows returned by CSV will be constructed from this class, if header row + # processing is activated. + # + class Row + # + # Construct a new CSV::Row from +headers+ and +fields+, which are expected + # to be Arrays. If one Array is shorter than the other, it will be padded + # with +nil+ objects. + # + # The optional +header_row+ parameter can be set to +true+ to indicate, via + # CSV::Row.header_row?() and CSV::Row.field_row?(), that this is a header + # row. Otherwise, the row is assumes to be a field row. + # + # A CSV::Row object supports the following Array methods through delegation: + # + # * empty?() + # * length() + # * size() + # + def initialize(headers, fields, header_row = false) + @header_row = header_row + headers.each { |h| h.freeze if h.is_a? String } + + # handle extra headers or fields + @row = if headers.size >= fields.size + headers.zip(fields) + else + fields.zip(headers).each(&:reverse!) + end + end + + # Internal data format used to compare equality. + attr_reader :row + protected :row + + ### Array Delegation ### + + extend Forwardable + def_delegators :@row, :empty?, :length, :size + + # Returns +true+ if this is a header row. + def header_row? + @header_row + end + + # Returns +true+ if this is a field row. + def field_row? + not header_row? + end + + # Returns the headers of this row. + def headers + @row.map(&:first) + end + + # + # :call-seq: + # field( header ) + # field( header, offset ) + # field( index ) + # + # This method will return the field value by +header+ or +index+. If a field + # is not found, +nil+ is returned. + # + # When provided, +offset+ ensures that a header match occurs on or later + # than the +offset+ index. You can use this to find duplicate headers, + # without resorting to hard-coding exact indices. + # + def field(header_or_index, minimum_index = 0) + # locate the pair + finder = (header_or_index.is_a?(Integer) || header_or_index.is_a?(Range)) ? :[] : :assoc + pair = @row[minimum_index..-1].send(finder, header_or_index) + + # return the field if we have a pair + if pair.nil? + nil + else + header_or_index.is_a?(Range) ? pair.map(&:last) : pair.last + end + end + alias_method :[], :field + + # + # :call-seq: + # fetch( header ) + # fetch( header ) { |row| ... } + # fetch( header, default ) + # + # This method will fetch the field value by +header+. It has the same + # behavior as Hash#fetch: if there is a field with the given +header+, its + # value is returned. Otherwise, if a block is given, it is yielded the + # +header+ and its result is returned; if a +default+ is given as the + # second argument, it is returned; otherwise a KeyError is raised. + # + def fetch(header, *varargs) + raise ArgumentError, "Too many arguments" if varargs.length > 1 + pair = @row.assoc(header) + if pair + pair.last + else + if block_given? + yield header + elsif varargs.empty? + raise KeyError, "key not found: #{header}" + else + varargs.first + end + end + end + + # Returns +true+ if there is a field with the given +header+. + def has_key?(header) + !!@row.assoc(header) + end + alias_method :include?, :has_key? + alias_method :key?, :has_key? + alias_method :member?, :has_key? + + # + # :call-seq: + # []=( header, value ) + # []=( header, offset, value ) + # []=( index, value ) + # + # Looks up the field by the semantics described in CSV::Row.field() and + # assigns the +value+. + # + # Assigning past the end of the row with an index will set all pairs between + # to <tt>[nil, nil]</tt>. Assigning to an unused header appends the new + # pair. + # + def []=(*args) + value = args.pop + + if args.first.is_a? Integer + if @row[args.first].nil? # extending past the end with index + @row[args.first] = [nil, value] + @row.map! { |pair| pair.nil? ? [nil, nil] : pair } + else # normal index assignment + @row[args.first][1] = value + end + else + index = index(*args) + if index.nil? # appending a field + self << [args.first, value] + else # normal header assignment + @row[index][1] = value + end + end + end + + # + # :call-seq: + # <<( field ) + # <<( header_and_field_array ) + # <<( header_and_field_hash ) + # + # If a two-element Array is provided, it is assumed to be a header and field + # and the pair is appended. A Hash works the same way with the key being + # the header and the value being the field. Anything else is assumed to be + # a lone field which is appended with a +nil+ header. + # + # This method returns the row for chaining. + # + def <<(arg) + if arg.is_a?(Array) and arg.size == 2 # appending a header and name + @row << arg + elsif arg.is_a?(Hash) # append header and name pairs + arg.each { |pair| @row << pair } + else # append field value + @row << [nil, arg] + end + + self # for chaining + end + + # + # A shortcut for appending multiple fields. Equivalent to: + # + # args.each { |arg| csv_row << arg } + # + # This method returns the row for chaining. + # + def push(*args) + args.each { |arg| self << arg } + + self # for chaining + end + + # + # :call-seq: + # delete( header ) + # delete( header, offset ) + # delete( index ) + # + # Used to remove a pair from the row by +header+ or +index+. The pair is + # located as described in CSV::Row.field(). The deleted pair is returned, + # or +nil+ if a pair could not be found. + # + def delete(header_or_index, minimum_index = 0) + if header_or_index.is_a? Integer # by index + @row.delete_at(header_or_index) + elsif i = index(header_or_index, minimum_index) # by header + @row.delete_at(i) + else + [ ] + end + end + + # + # The provided +block+ is passed a header and field for each pair in the row + # and expected to return +true+ or +false+, depending on whether the pair + # should be deleted. + # + # This method returns the row for chaining. + # + # If no block is given, an Enumerator is returned. + # + def delete_if(&block) + return enum_for(__method__) { size } unless block_given? + + @row.delete_if(&block) + + self # for chaining + end + + # + # This method accepts any number of arguments which can be headers, indices, + # Ranges of either, or two-element Arrays containing a header and offset. + # Each argument will be replaced with a field lookup as described in + # CSV::Row.field(). + # + # If called with no arguments, all fields are returned. + # + def fields(*headers_and_or_indices) + if headers_and_or_indices.empty? # return all fields--no arguments + @row.map(&:last) + else # or work like values_at() + all = [] + headers_and_or_indices.each do |h_or_i| + if h_or_i.is_a? Range + index_begin = h_or_i.begin.is_a?(Integer) ? h_or_i.begin : + index(h_or_i.begin) + index_end = h_or_i.end.is_a?(Integer) ? h_or_i.end : + index(h_or_i.end) + new_range = h_or_i.exclude_end? ? (index_begin...index_end) : + (index_begin..index_end) + all.concat(fields.values_at(new_range)) + else + all << field(*Array(h_or_i)) + end + end + return all + end + end + alias_method :values_at, :fields + + # + # :call-seq: + # index( header ) + # index( header, offset ) + # + # This method will return the index of a field with the provided +header+. + # The +offset+ can be used to locate duplicate header names, as described in + # CSV::Row.field(). + # + def index(header, minimum_index = 0) + # find the pair + index = headers[minimum_index..-1].index(header) + # return the index at the right offset, if we found one + index.nil? ? nil : index + minimum_index + end + + # Returns +true+ if +name+ is a header for this row, and +false+ otherwise. + def header?(name) + headers.include? name + end + alias_method :include?, :header? + + # + # Returns +true+ if +data+ matches a field in this row, and +false+ + # otherwise. + # + def field?(data) + fields.include? data + end + + include Enumerable + + # + # Yields each pair of the row as header and field tuples (much like + # iterating over a Hash). This method returns the row for chaining. + # + # If no block is given, an Enumerator is returned. + # + # Support for Enumerable. + # + def each(&block) + return enum_for(__method__) { size } unless block_given? + + @row.each(&block) + + self # for chaining + end + + alias_method :each_pair, :each + + # + # Returns +true+ if this row contains the same headers and fields in the + # same order as +other+. + # + def ==(other) + return @row == other.row if other.is_a? CSV::Row + @row == other + end + + # + # Collapses the row into a simple Hash. Be warned that this discards field + # order and clobbers duplicate fields. + # + def to_h + hash = {} + each do |key, _value| + hash[key] = self[key] unless hash.key?(key) + end + hash + end + alias_method :to_hash, :to_h + + alias_method :to_ary, :to_a + + # + # Returns the row as a CSV String. Headers are not used. Equivalent to: + # + # csv_row.fields.to_csv( options ) + # + def to_csv(**options) + fields.to_csv(options) + end + alias_method :to_s, :to_csv + + # + # Extracts the nested value specified by the sequence of +index+ or +header+ objects by calling dig at each step, + # returning nil if any intermediate step is nil. + # + def dig(index_or_header, *indexes) + value = field(index_or_header) + if value.nil? + nil + elsif indexes.empty? + value + else + unless value.respond_to?(:dig) + raise TypeError, "#{value.class} does not have \#dig method" + end + value.dig(*indexes) + end + end + + # A summary of fields, by header, in an ASCII compatible String. + def inspect + str = ["#<", self.class.to_s] + each do |header, field| + str << " " << (header.is_a?(Symbol) ? header.to_s : header.inspect) << + ":" << field.inspect + end + str << ">" + begin + str.join('') + rescue # any encoding error + str.map do |s| + e = Encoding::Converter.asciicompat_encoding(s.encoding) + e ? s.encode(e) : s.force_encoding("ASCII-8BIT") + end.join('') + end + end + end +end diff --git a/lib/csv/table.rb b/lib/csv/table.rb new file mode 100644 index 0000000000..e9f3366a4a --- /dev/null +++ b/lib/csv/table.rb @@ -0,0 +1,378 @@ +# frozen_string_literal: true + +require "forwardable" + +class CSV + # + # A CSV::Table is a two-dimensional data structure for representing CSV + # documents. Tables allow you to work with the data by row or column, + # manipulate the data, and even convert the results back to CSV, if needed. + # + # All tables returned by CSV will be constructed from this class, if header + # row processing is activated. + # + class Table + # + # Construct a new CSV::Table from +array_of_rows+, which are expected + # to be CSV::Row objects. All rows are assumed to have the same headers. + # + # A CSV::Table object supports the following Array methods through + # delegation: + # + # * empty?() + # * length() + # * size() + # + def initialize(array_of_rows) + @table = array_of_rows + @mode = :col_or_row + end + + # The current access mode for indexing and iteration. + attr_reader :mode + + # Internal data format used to compare equality. + attr_reader :table + protected :table + + ### Array Delegation ### + + extend Forwardable + def_delegators :@table, :empty?, :length, :size + + # + # Returns a duplicate table object, in column mode. This is handy for + # chaining in a single call without changing the table mode, but be aware + # that this method can consume a fair amount of memory for bigger data sets. + # + # This method returns the duplicate table for chaining. Don't chain + # destructive methods (like []=()) this way though, since you are working + # with a duplicate. + # + def by_col + self.class.new(@table.dup).by_col! + end + + # + # Switches the mode of this table to column mode. All calls to indexing and + # iteration methods will work with columns until the mode is changed again. + # + # This method returns the table and is safe to chain. + # + def by_col! + @mode = :col + + self + end + + # + # Returns a duplicate table object, in mixed mode. This is handy for + # chaining in a single call without changing the table mode, but be aware + # that this method can consume a fair amount of memory for bigger data sets. + # + # This method returns the duplicate table for chaining. Don't chain + # destructive methods (like []=()) this way though, since you are working + # with a duplicate. + # + def by_col_or_row + self.class.new(@table.dup).by_col_or_row! + end + + # + # Switches the mode of this table to mixed mode. All calls to indexing and + # iteration methods will use the default intelligent indexing system until + # the mode is changed again. In mixed mode an index is assumed to be a row + # reference while anything else is assumed to be column access by headers. + # + # This method returns the table and is safe to chain. + # + def by_col_or_row! + @mode = :col_or_row + + self + end + + # + # Returns a duplicate table object, in row mode. This is handy for chaining + # in a single call without changing the table mode, but be aware that this + # method can consume a fair amount of memory for bigger data sets. + # + # This method returns the duplicate table for chaining. Don't chain + # destructive methods (like []=()) this way though, since you are working + # with a duplicate. + # + def by_row + self.class.new(@table.dup).by_row! + end + + # + # Switches the mode of this table to row mode. All calls to indexing and + # iteration methods will work with rows until the mode is changed again. + # + # This method returns the table and is safe to chain. + # + def by_row! + @mode = :row + + self + end + + # + # Returns the headers for the first row of this table (assumed to match all + # other rows). An empty Array is returned for empty tables. + # + def headers + if @table.empty? + Array.new + else + @table.first.headers + end + end + + # + # In the default mixed mode, this method returns rows for index access and + # columns for header access. You can force the index association by first + # calling by_col!() or by_row!(). + # + # Columns are returned as an Array of values. Altering that Array has no + # effect on the table. + # + def [](index_or_header) + if @mode == :row or # by index + (@mode == :col_or_row and (index_or_header.is_a?(Integer) or index_or_header.is_a?(Range))) + @table[index_or_header] + else # by header + @table.map { |row| row[index_or_header] } + end + end + + # + # In the default mixed mode, this method assigns rows for index access and + # columns for header access. You can force the index association by first + # calling by_col!() or by_row!(). + # + # Rows may be set to an Array of values (which will inherit the table's + # headers()) or a CSV::Row. + # + # Columns may be set to a single value, which is copied to each row of the + # column, or an Array of values. Arrays of values are assigned to rows top + # to bottom in row major order. Excess values are ignored and if the Array + # does not have a value for each row the extra rows will receive a +nil+. + # + # Assigning to an existing column or row clobbers the data. Assigning to + # new columns creates them at the right end of the table. + # + def []=(index_or_header, value) + if @mode == :row or # by index + (@mode == :col_or_row and index_or_header.is_a? Integer) + if value.is_a? Array + @table[index_or_header] = Row.new(headers, value) + else + @table[index_or_header] = value + end + else # set column + if value.is_a? Array # multiple values + @table.each_with_index do |row, i| + if row.header_row? + row[index_or_header] = index_or_header + else + row[index_or_header] = value[i] + end + end + else # repeated value + @table.each do |row| + if row.header_row? + row[index_or_header] = index_or_header + else + row[index_or_header] = value + end + end + end + end + end + + # + # The mixed mode default is to treat a list of indices as row access, + # returning the rows indicated. Anything else is considered columnar + # access. For columnar access, the return set has an Array for each row + # with the values indicated by the headers in each Array. You can force + # column or row mode using by_col!() or by_row!(). + # + # You cannot mix column and row access. + # + def values_at(*indices_or_headers) + if @mode == :row or # by indices + ( @mode == :col_or_row and indices_or_headers.all? do |index| + index.is_a?(Integer) or + ( index.is_a?(Range) and + index.first.is_a?(Integer) and + index.last.is_a?(Integer) ) + end ) + @table.values_at(*indices_or_headers) + else # by headers + @table.map { |row| row.values_at(*indices_or_headers) } + end + end + + # + # Adds a new row to the bottom end of this table. You can provide an Array, + # which will be converted to a CSV::Row (inheriting the table's headers()), + # or a CSV::Row. + # + # This method returns the table for chaining. + # + def <<(row_or_array) + if row_or_array.is_a? Array # append Array + @table << Row.new(headers, row_or_array) + else # append Row + @table << row_or_array + end + + self # for chaining + end + + # + # A shortcut for appending multiple rows. Equivalent to: + # + # rows.each { |row| self << row } + # + # This method returns the table for chaining. + # + def push(*rows) + rows.each { |row| self << row } + + self # for chaining + end + + # + # Removes and returns the indicated columns or rows. In the default mixed + # mode indices refer to rows and everything else is assumed to be a column + # headers. Use by_col!() or by_row!() to force the lookup. + # + def delete(*indexes_or_headers) + if indexes_or_headers.empty? + raise ArgumentError, "wrong number of arguments (given 0, expected 1+)" + end + deleted_values = indexes_or_headers.map do |index_or_header| + if @mode == :row or # by index + (@mode == :col_or_row and index_or_header.is_a? Integer) + @table.delete_at(index_or_header) + else # by header + @table.map { |row| row.delete(index_or_header).last } + end + end + if indexes_or_headers.size == 1 + deleted_values[0] + else + deleted_values + end + end + + # + # Removes any column or row for which the block returns +true+. In the + # default mixed mode or row mode, iteration is the standard row major + # walking of rows. In column mode, iteration will +yield+ two element + # tuples containing the column name and an Array of values for that column. + # + # This method returns the table for chaining. + # + # If no block is given, an Enumerator is returned. + # + def delete_if(&block) + return enum_for(__method__) { @mode == :row or @mode == :col_or_row ? size : headers.size } unless block_given? + + if @mode == :row or @mode == :col_or_row # by index + @table.delete_if(&block) + else # by header + deleted = [] + headers.each do |header| + deleted << delete(header) if yield([header, self[header]]) + end + end + + self # for chaining + end + + include Enumerable + + # + # In the default mixed mode or row mode, iteration is the standard row major + # walking of rows. In column mode, iteration will +yield+ two element + # tuples containing the column name and an Array of values for that column. + # + # This method returns the table for chaining. + # + # If no block is given, an Enumerator is returned. + # + def each(&block) + return enum_for(__method__) { @mode == :col ? headers.size : size } unless block_given? + + if @mode == :col + headers.each { |header| yield([header, self[header]]) } + else + @table.each(&block) + end + + self # for chaining + end + + # Returns +true+ if all rows of this table ==() +other+'s rows. + def ==(other) + return @table == other.table if other.is_a? CSV::Table + @table == other + end + + # + # Returns the table as an Array of Arrays. Headers will be the first row, + # then all of the field rows will follow. + # + def to_a + array = [headers] + @table.each do |row| + array.push(row.fields) unless row.header_row? + end + + array + end + + # + # Returns the table as a complete CSV String. Headers will be listed first, + # then all of the field rows. + # + # This method assumes you want the Table.headers(), unless you explicitly + # pass <tt>:write_headers => false</tt>. + # + def to_csv(write_headers: true, **options) + array = write_headers ? [headers.to_csv(options)] : [] + @table.each do |row| + array.push(row.fields.to_csv(options)) unless row.header_row? + end + + array.join("") + end + alias_method :to_s, :to_csv + + # + # Extracts the nested value specified by the sequence of +index+ or +header+ objects by calling dig at each step, + # returning nil if any intermediate step is nil. + # + def dig(index_or_header, *index_or_headers) + value = self[index_or_header] + if value.nil? + nil + elsif index_or_headers.empty? + value + else + unless value.respond_to?(:dig) + raise TypeError, "#{value.class} does not have \#dig method" + end + value.dig(*index_or_headers) + end + end + + # Shows the mode and size of this table in a US-ASCII String. + def inspect + "#<#{self.class} mode:#{@mode} row_count:#{to_a.size}>".encode("US-ASCII") + end + end +end
\ No newline at end of file diff --git a/lib/csv/version.rb b/lib/csv/version.rb new file mode 100644 index 0000000000..35adea3a92 --- /dev/null +++ b/lib/csv/version.rb @@ -0,0 +1,6 @@ +# frozen_string_literal: true + +class CSV + # The version of the installed library. + VERSION = "1.0.2" +end |