diff options
Diffstat (limited to 'lib/csv')
-rw-r--r-- | lib/csv/fields_converter.rb | 6 | ||||
-rw-r--r-- | lib/csv/parser.rb | 60 | ||||
-rw-r--r-- | lib/csv/row.rb | 30 | ||||
-rw-r--r-- | lib/csv/table.rb | 54 | ||||
-rw-r--r-- | lib/csv/version.rb | 2 | ||||
-rw-r--r-- | lib/csv/writer.rb | 11 |
6 files changed, 113 insertions, 50 deletions
diff --git a/lib/csv/fields_converter.rb b/lib/csv/fields_converter.rb index c2fa5798ff..a751c9ea1d 100644 --- a/lib/csv/fields_converter.rb +++ b/lib/csv/fields_converter.rb @@ -1,8 +1,14 @@ # frozen_string_literal: true class CSV + # Note: Don't use this class directly. This is an internal class. class FieldsConverter include Enumerable + # + # A CSV::FieldsConverter is a data structure for storing the + # fields converter properties to be passed as a parameter + # when parsing a new file (e.g. CSV::Parser.new(@io, parser_options)) + # def initialize(options={}) @converters = [] diff --git a/lib/csv/parser.rb b/lib/csv/parser.rb index 2ef2a28ff3..42145f8923 100644 --- a/lib/csv/parser.rb +++ b/lib/csv/parser.rb @@ -11,10 +11,31 @@ using CSV::DeleteSuffix if CSV.const_defined?(:DeleteSuffix) using CSV::MatchP if CSV.const_defined?(:MatchP) class CSV + # Note: Don't use this class directly. This is an internal class. class Parser + # + # A CSV::Parser is m17n aware. The parser works in the Encoding of the IO + # or String object being read from or written to. Your data is never transcoded + # (unless you ask Ruby to transcode it for you) and will literally be parsed in + # the Encoding it is in. Thus CSV will return Arrays or Rows of Strings in the + # Encoding of your data. This is accomplished by transcoding the parser itself + # into your Encoding. + # + + # Raised when encoding is invalid. class InvalidEncoding < StandardError end + # + # CSV::Scanner receives a CSV output, scans it and return the content. + # It also controls the life cycle of the object with its methods +keep_start+, + # +keep_end+, +keep_back+, +keep_drop+. + # + # Uses StringScanner (the official strscan gem). Strscan provides lexical + # scanning operations on a String. We inherit its object and take advantage + # on the methods. For more information, please visit: + # https://ruby-doc.org/stdlib-2.6.1/libdoc/strscan/rdoc/StringScanner.html + # class Scanner < StringScanner alias_method :scan_all, :scan @@ -38,7 +59,7 @@ class CSV def keep_end start = @keeps.pop - string[start, pos - start] + string.byteslice(start, pos - start) end def keep_back @@ -50,6 +71,18 @@ class CSV end end + # + # CSV::InputsScanner receives IO inputs, encoding and the chunk_size. + # It also controls the life cycle of the object with its methods +keep_start+, + # +keep_end+, +keep_back+, +keep_drop+. + # + # CSV::InputsScanner.scan() tries to match with pattern at the current position. + # If there's a match, the scanner advances the “scan pointer” and returns the matched string. + # Otherwise, the scanner returns nil. + # + # CSV::InputsScanner.rest() returns the “rest” of the string (i.e. everything after the scan pointer). + # If there is no more data (eos? = true), it returns "". + # class InputsScanner def initialize(inputs, encoding, chunk_size: 8192) @inputs = inputs.dup @@ -137,7 +170,7 @@ class CSV def keep_end start, buffer = @keeps.pop - keep = @scanner.string[start, @scanner.pos - start] + keep = @scanner.string.byteslice(start, @scanner.pos - start) if buffer buffer << keep keep = buffer @@ -192,7 +225,7 @@ class CSV input = @inputs.first case input when StringIO - string = input.string + string = input.read raise InvalidEncoding unless string.valid_encoding? @scanner = StringScanner.new(string) @inputs.shift @@ -319,6 +352,7 @@ class CSV end private + # A set of tasks to prepare the file in order to parse it def prepare prepare_variable prepare_quote_character @@ -447,7 +481,13 @@ class CSV end def prepare_separators - @column_separator = @options[:column_separator].to_s.encode(@encoding) + column_separator = @options[:column_separator] + @column_separator = column_separator.to_s.encode(@encoding) + if @column_separator.size < 1 + message = ":col_sep must be 1 or more characters: " + message += column_separator.inspect + raise ArgumentError, message + end @row_separator = resolve_row_separator(@options[:row_separator]).encode(@encoding) @@ -534,7 +574,9 @@ class CSV cr = "\r".encode(@encoding) lf = "\n".encode(@encoding) if @input.is_a?(StringIO) - separator = detect_row_separator(@input.string, cr, lf) + pos = @input.pos + separator = detect_row_separator(@input.read, cr, lf) + @input.seek(pos) elsif @input.respond_to?(:gets) if @input.is_a?(File) chunk_size = 32 * 1024 @@ -651,7 +693,9 @@ class CSV return false if @quote_character.nil? if @input.is_a?(StringIO) - sample = @input.string + pos = @input.pos + sample = @input.read + @input.seek(pos) else return false if @samples.empty? sample = @samples.first @@ -684,7 +728,7 @@ class CSV UnoptimizedStringIO.new(sample) end if @input.is_a?(StringIO) - inputs << UnoptimizedStringIO.new(@input.string) + inputs << UnoptimizedStringIO.new(@input.read) else inputs << @input end @@ -697,7 +741,7 @@ class CSV def build_scanner string = nil if @samples.empty? and @input.is_a?(StringIO) - string = @input.string + string = @input.read elsif @samples.size == 1 and @input.respond_to?(:eof?) and @input.eof? string = @samples[0] end diff --git a/lib/csv/row.rb b/lib/csv/row.rb index 1e1f27587b..4aa0f30911 100644 --- a/lib/csv/row.rb +++ b/lib/csv/row.rb @@ -4,7 +4,7 @@ require "forwardable" class CSV # - # A CSV::Row is part Array and part Hash. It retains an order for the fields + # A CSV::Row is part Array and part Hash. It retains an order for the fields # and allows duplicates just as an Array would, but also allows you to access # fields by name just as you could if they were in a Hash. # @@ -13,13 +13,13 @@ class CSV # class Row # - # Construct a new CSV::Row from +headers+ and +fields+, which are expected - # to be Arrays. If one Array is shorter than the other, it will be padded + # Constructs a new CSV::Row from +headers+ and +fields+, which are expected + # to be Arrays. If one Array is shorter than the other, it will be padded # with +nil+ objects. # # The optional +header_row+ parameter can be set to +true+ to indicate, via # CSV::Row.header_row?() and CSV::Row.field_row?(), that this is a header - # row. Otherwise, the row is assumes to be a field row. + # row. Otherwise, the row assumes to be a field row. # # A CSV::Row object supports the following Array methods through delegation: # @@ -74,11 +74,11 @@ class CSV # field( header, offset ) # field( index ) # - # This method will return the field value by +header+ or +index+. If a field + # This method will return the field value by +header+ or +index+. If a field # is not found, +nil+ is returned. # # When provided, +offset+ ensures that a header match occurs on or later - # than the +offset+ index. You can use this to find duplicate headers, + # than the +offset+ index. You can use this to find duplicate headers, # without resorting to hard-coding exact indices. # def field(header_or_index, minimum_index = 0) @@ -142,7 +142,7 @@ class CSV # assigns the +value+. # # Assigning past the end of the row with an index will set all pairs between - # to <tt>[nil, nil]</tt>. Assigning to an unused header appends the new + # to <tt>[nil, nil]</tt>. Assigning to an unused header appends the new # pair. # def []=(*args) @@ -172,8 +172,8 @@ class CSV # <<( header_and_field_hash ) # # If a two-element Array is provided, it is assumed to be a header and field - # and the pair is appended. A Hash works the same way with the key being - # the header and the value being the field. Anything else is assumed to be + # and the pair is appended. A Hash works the same way with the key being + # the header and the value being the field. Anything else is assumed to be # a lone field which is appended with a +nil+ header. # # This method returns the row for chaining. @@ -191,7 +191,7 @@ class CSV end # - # A shortcut for appending multiple fields. Equivalent to: + # A shortcut for appending multiple fields. Equivalent to: # # args.each { |arg| csv_row << arg } # @@ -209,8 +209,8 @@ class CSV # delete( header, offset ) # delete( index ) # - # Used to remove a pair from the row by +header+ or +index+. The pair is - # located as described in CSV::Row.field(). The deleted pair is returned, + # Removes a pair from the row by +header+ or +index+. The pair is + # located as described in CSV::Row.field(). The deleted pair is returned, # or +nil+ if a pair could not be found. # def delete(header_or_index, minimum_index = 0) @@ -325,7 +325,7 @@ class CSV end # - # Collapses the row into a simple Hash. Be warned that this discards field + # Collapses the row into a simple Hash. Be warned that this discards field # order and clobbers duplicate fields. # def to_h @@ -340,7 +340,7 @@ class CSV alias_method :to_ary, :to_a # - # Returns the row as a CSV String. Headers are not used. Equivalent to: + # Returns the row as a CSV String. Headers are not used. Equivalent to: # # csv_row.fields.to_csv( options ) # @@ -367,7 +367,9 @@ class CSV end end + # # A summary of fields, by header, in an ASCII compatible String. + # def inspect str = ["#<", self.class.to_s] each do |header, field| diff --git a/lib/csv/table.rb b/lib/csv/table.rb index 29b188a6d7..e6c1ee11fa 100644 --- a/lib/csv/table.rb +++ b/lib/csv/table.rb @@ -5,7 +5,7 @@ require "forwardable" class CSV # # A CSV::Table is a two-dimensional data structure for representing CSV - # documents. Tables allow you to work with the data by row or column, + # documents. Tables allow you to work with the data by row or column, # manipulate the data, and even convert the results back to CSV, if needed. # # All tables returned by CSV will be constructed from this class, if header @@ -13,8 +13,8 @@ class CSV # class Table # - # Construct a new CSV::Table from +array_of_rows+, which are expected - # to be CSV::Row objects. All rows are assumed to have the same headers. + # Constructs a new CSV::Table from +array_of_rows+, which are expected + # to be CSV::Row objects. All rows are assumed to have the same headers. # # The optional +headers+ parameter can be set to Array of headers. # If headers aren't set, headers are fetched from CSV::Row objects. @@ -55,11 +55,11 @@ class CSV def_delegators :@table, :empty?, :length, :size # - # Returns a duplicate table object, in column mode. This is handy for + # Returns a duplicate table object, in column mode. This is handy for # chaining in a single call without changing the table mode, but be aware # that this method can consume a fair amount of memory for bigger data sets. # - # This method returns the duplicate table for chaining. Don't chain + # This method returns the duplicate table for chaining. Don't chain # destructive methods (like []=()) this way though, since you are working # with a duplicate. # @@ -68,7 +68,7 @@ class CSV end # - # Switches the mode of this table to column mode. All calls to indexing and + # Switches the mode of this table to column mode. All calls to indexing and # iteration methods will work with columns until the mode is changed again. # # This method returns the table and is safe to chain. @@ -80,7 +80,7 @@ class CSV end # - # Returns a duplicate table object, in mixed mode. This is handy for + # Returns a duplicate table object, in mixed mode. This is handy for # chaining in a single call without changing the table mode, but be aware # that this method can consume a fair amount of memory for bigger data sets. # @@ -93,9 +93,9 @@ class CSV end # - # Switches the mode of this table to mixed mode. All calls to indexing and + # Switches the mode of this table to mixed mode. All calls to indexing and # iteration methods will use the default intelligent indexing system until - # the mode is changed again. In mixed mode an index is assumed to be a row + # the mode is changed again. In mixed mode an index is assumed to be a row # reference while anything else is assumed to be column access by headers. # # This method returns the table and is safe to chain. @@ -120,7 +120,7 @@ class CSV end # - # Switches the mode of this table to row mode. All calls to indexing and + # Switches the mode of this table to row mode. All calls to indexing and # iteration methods will work with rows until the mode is changed again. # # This method returns the table and is safe to chain. @@ -146,7 +146,7 @@ class CSV # # In the default mixed mode, this method returns rows for index access and - # columns for header access. You can force the index association by first + # columns for header access. You can force the index association by first # calling by_col!() or by_row!(). # # Columns are returned as an Array of values. Altering that Array has no @@ -163,18 +163,18 @@ class CSV # # In the default mixed mode, this method assigns rows for index access and - # columns for header access. You can force the index association by first + # columns for header access. You can force the index association by first # calling by_col!() or by_row!(). # # Rows may be set to an Array of values (which will inherit the table's # headers()) or a CSV::Row. # # Columns may be set to a single value, which is copied to each row of the - # column, or an Array of values. Arrays of values are assigned to rows top - # to bottom in row major order. Excess values are ignored and if the Array + # column, or an Array of values. Arrays of values are assigned to rows top + # to bottom in row major order. Excess values are ignored and if the Array # does not have a value for each row the extra rows will receive a +nil+. # - # Assigning to an existing column or row clobbers the data. Assigning to + # Assigning to an existing column or row clobbers the data. Assigning to # new columns creates them at the right end of the table. # def []=(index_or_header, value) @@ -212,9 +212,9 @@ class CSV # # The mixed mode default is to treat a list of indices as row access, - # returning the rows indicated. Anything else is considered columnar - # access. For columnar access, the return set has an Array for each row - # with the values indicated by the headers in each Array. You can force + # returning the rows indicated. Anything else is considered columnar + # access. For columnar access, the return set has an Array for each row + # with the values indicated by the headers in each Array. You can force # column or row mode using by_col!() or by_row!(). # # You cannot mix column and row access. @@ -234,7 +234,7 @@ class CSV end # - # Adds a new row to the bottom end of this table. You can provide an Array, + # Adds a new row to the bottom end of this table. You can provide an Array, # which will be converted to a CSV::Row (inheriting the table's headers()), # or a CSV::Row. # @@ -251,7 +251,7 @@ class CSV end # - # A shortcut for appending multiple rows. Equivalent to: + # A shortcut for appending multiple rows. Equivalent to: # # rows.each { |row| self << row } # @@ -264,9 +264,9 @@ class CSV end # - # Removes and returns the indicated columns or rows. In the default mixed + # Removes and returns the indicated columns or rows. In the default mixed # mode indices refer to rows and everything else is assumed to be a column - # headers. Use by_col!() or by_row!() to force the lookup. + # headers. Use by_col!() or by_row!() to force the lookup. # def delete(*indexes_or_headers) if indexes_or_headers.empty? @@ -293,9 +293,9 @@ class CSV end # - # Removes any column or row for which the block returns +true+. In the + # Removes any column or row for which the block returns +true+. In the # default mixed mode or row mode, iteration is the standard row major - # walking of rows. In column mode, iteration will +yield+ two element + # walking of rows. In column mode, iteration will +yield+ two element # tuples containing the column name and an Array of values for that column. # # This method returns the table for chaining. @@ -321,7 +321,7 @@ class CSV # # In the default mixed mode or row mode, iteration is the standard row major - # walking of rows. In column mode, iteration will +yield+ two element + # walking of rows. In column mode, iteration will +yield+ two element # tuples containing the column name and an Array of values for that column. # # This method returns the table for chaining. @@ -347,7 +347,7 @@ class CSV end # - # Returns the table as an Array of Arrays. Headers will be the first row, + # Returns the table as an Array of Arrays. Headers will be the first row, # then all of the field rows will follow. # def to_a @@ -360,7 +360,7 @@ class CSV end # - # Returns the table as a complete CSV String. Headers will be listed first, + # Returns the table as a complete CSV String. Headers will be listed first, # then all of the field rows. # # This method assumes you want the Table.headers(), unless you explicitly diff --git a/lib/csv/version.rb b/lib/csv/version.rb index ce55373f02..072400fe01 100644 --- a/lib/csv/version.rb +++ b/lib/csv/version.rb @@ -2,5 +2,5 @@ class CSV # The version of the installed library. - VERSION = "3.1.1" + VERSION = "3.1.2" end diff --git a/lib/csv/writer.rb b/lib/csv/writer.rb index 1682ac03ea..9243d23641 100644 --- a/lib/csv/writer.rb +++ b/lib/csv/writer.rb @@ -6,7 +6,12 @@ require_relative "row" using CSV::MatchP if CSV.const_defined?(:MatchP) class CSV + # Note: Don't use this class directly. This is an internal class. class Writer + # + # A CSV::Writer receives an output, prepares the header, format and output. + # It allows us to write new rows in the object and rewind it. + # attr_reader :lineno attr_reader :headers @@ -22,6 +27,9 @@ class CSV @fields_converter = @options[:fields_converter] end + # + # Adds a new row + # def <<(row) case row when Row @@ -47,6 +55,9 @@ class CSV self end + # + # Winds back to the beginning + # def rewind @lineno = 0 @headers = nil if @options[:headers].nil? |