aboutsummaryrefslogtreecommitdiffstats
path: root/lib/csv/parser.rb
diff options
context:
space:
mode:
authorSutou Kouhei <kou@cozmixng.org>2019-10-12 14:03:21 +0900
committerGitHub <noreply@github.com>2019-10-12 14:03:21 +0900
commit92df7d98b62f48cf21cdec522f2e7b34380fd718 (patch)
treea0d169e177ebd5607caefa26cef90cc70df48232 /lib/csv/parser.rb
parentd6e68bb263e79cb802fa683d9c4139ddca2fd4f5 (diff)
downloadruby-92df7d98b62f48cf21cdec522f2e7b34380fd718.tar.gz
Import CSV 3.1.2 (#2547)
Diffstat (limited to 'lib/csv/parser.rb')
-rw-r--r--lib/csv/parser.rb60
1 files changed, 52 insertions, 8 deletions
diff --git a/lib/csv/parser.rb b/lib/csv/parser.rb
index 2ef2a28ff3..42145f8923 100644
--- a/lib/csv/parser.rb
+++ b/lib/csv/parser.rb
@@ -11,10 +11,31 @@ using CSV::DeleteSuffix if CSV.const_defined?(:DeleteSuffix)
using CSV::MatchP if CSV.const_defined?(:MatchP)
class CSV
+ # Note: Don't use this class directly. This is an internal class.
class Parser
+ #
+ # A CSV::Parser is m17n aware. The parser works in the Encoding of the IO
+ # or String object being read from or written to. Your data is never transcoded
+ # (unless you ask Ruby to transcode it for you) and will literally be parsed in
+ # the Encoding it is in. Thus CSV will return Arrays or Rows of Strings in the
+ # Encoding of your data. This is accomplished by transcoding the parser itself
+ # into your Encoding.
+ #
+
+ # Raised when encoding is invalid.
class InvalidEncoding < StandardError
end
+ #
+ # CSV::Scanner receives a CSV output, scans it and return the content.
+ # It also controls the life cycle of the object with its methods +keep_start+,
+ # +keep_end+, +keep_back+, +keep_drop+.
+ #
+ # Uses StringScanner (the official strscan gem). Strscan provides lexical
+ # scanning operations on a String. We inherit its object and take advantage
+ # on the methods. For more information, please visit:
+ # https://ruby-doc.org/stdlib-2.6.1/libdoc/strscan/rdoc/StringScanner.html
+ #
class Scanner < StringScanner
alias_method :scan_all, :scan
@@ -38,7 +59,7 @@ class CSV
def keep_end
start = @keeps.pop
- string[start, pos - start]
+ string.byteslice(start, pos - start)
end
def keep_back
@@ -50,6 +71,18 @@ class CSV
end
end
+ #
+ # CSV::InputsScanner receives IO inputs, encoding and the chunk_size.
+ # It also controls the life cycle of the object with its methods +keep_start+,
+ # +keep_end+, +keep_back+, +keep_drop+.
+ #
+ # CSV::InputsScanner.scan() tries to match with pattern at the current position.
+ # If there's a match, the scanner advances the “scan pointer” and returns the matched string.
+ # Otherwise, the scanner returns nil.
+ #
+ # CSV::InputsScanner.rest() returns the “rest” of the string (i.e. everything after the scan pointer).
+ # If there is no more data (eos? = true), it returns "".
+ #
class InputsScanner
def initialize(inputs, encoding, chunk_size: 8192)
@inputs = inputs.dup
@@ -137,7 +170,7 @@ class CSV
def keep_end
start, buffer = @keeps.pop
- keep = @scanner.string[start, @scanner.pos - start]
+ keep = @scanner.string.byteslice(start, @scanner.pos - start)
if buffer
buffer << keep
keep = buffer
@@ -192,7 +225,7 @@ class CSV
input = @inputs.first
case input
when StringIO
- string = input.string
+ string = input.read
raise InvalidEncoding unless string.valid_encoding?
@scanner = StringScanner.new(string)
@inputs.shift
@@ -319,6 +352,7 @@ class CSV
end
private
+ # A set of tasks to prepare the file in order to parse it
def prepare
prepare_variable
prepare_quote_character
@@ -447,7 +481,13 @@ class CSV
end
def prepare_separators
- @column_separator = @options[:column_separator].to_s.encode(@encoding)
+ column_separator = @options[:column_separator]
+ @column_separator = column_separator.to_s.encode(@encoding)
+ if @column_separator.size < 1
+ message = ":col_sep must be 1 or more characters: "
+ message += column_separator.inspect
+ raise ArgumentError, message
+ end
@row_separator =
resolve_row_separator(@options[:row_separator]).encode(@encoding)
@@ -534,7 +574,9 @@ class CSV
cr = "\r".encode(@encoding)
lf = "\n".encode(@encoding)
if @input.is_a?(StringIO)
- separator = detect_row_separator(@input.string, cr, lf)
+ pos = @input.pos
+ separator = detect_row_separator(@input.read, cr, lf)
+ @input.seek(pos)
elsif @input.respond_to?(:gets)
if @input.is_a?(File)
chunk_size = 32 * 1024
@@ -651,7 +693,9 @@ class CSV
return false if @quote_character.nil?
if @input.is_a?(StringIO)
- sample = @input.string
+ pos = @input.pos
+ sample = @input.read
+ @input.seek(pos)
else
return false if @samples.empty?
sample = @samples.first
@@ -684,7 +728,7 @@ class CSV
UnoptimizedStringIO.new(sample)
end
if @input.is_a?(StringIO)
- inputs << UnoptimizedStringIO.new(@input.string)
+ inputs << UnoptimizedStringIO.new(@input.read)
else
inputs << @input
end
@@ -697,7 +741,7 @@ class CSV
def build_scanner
string = nil
if @samples.empty? and @input.is_a?(StringIO)
- string = @input.string
+ string = @input.read
elsif @samples.size == 1 and @input.respond_to?(:eof?) and @input.eof?
string = @samples[0]
end