aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSutou Kouhei <kou@clear-code.com>2019-12-25 06:59:43 +0900
committerNobuyoshi Nakada <nobu@ruby-lang.org>2020-07-20 02:32:49 +0900
commit814bfc8adc128ed050f2b60a423beb86e00fc6ec (patch)
treede96cc1881cf72874496ce95857c93ff611f34e2
parentaeac7db8236ad43d8c8992fd1b9d120d567754ec (diff)
downloadruby-814bfc8adc128ed050f2b60a423beb86e00fc6ec.tar.gz
[ruby/csv] Fix a parse bug when split character exists in middle of column value
GitHub: fix #115 Reported by TOMITA Masahiro. Thanks!!! https://github.com/ruby/csv/commit/398b3564c5
-rw-r--r--lib/csv/parser.rb11
-rw-r--r--test/csv/parse/test_strip.rb5
2 files changed, 13 insertions, 3 deletions
diff --git a/lib/csv/parser.rb b/lib/csv/parser.rb
index f30cfc6f76..924ca4620d 100644
--- a/lib/csv/parser.rb
+++ b/lib/csv/parser.rb
@@ -446,6 +446,7 @@ class CSV
@strip = @options[:strip]
@escaped_strip = nil
@strip_value = nil
+ @rstrip_value = nil
if @strip.is_a?(String)
case @strip.length
when 0
@@ -460,6 +461,8 @@ class CSV
if @quote_character
@strip_value = Regexp.new(@escaped_strip +
"+".encode(@encoding))
+ @rstrip_value = Regexp.new(@escaped_strip +
+ "+\\z".encode(@encoding))
end
@need_robust_parsing = true
elsif @strip
@@ -467,6 +470,7 @@ class CSV
@escaped_strip = strip_values.encode(@encoding)
if @quote_character
@strip_value = Regexp.new("[#{strip_values}]+".encode(@encoding))
+ @rstrip_value = Regexp.new("[#{strip_values}]+\\z".encode(@encoding))
end
@need_robust_parsing = true
end
@@ -561,9 +565,6 @@ class CSV
unless @liberal_parsing
no_unquoted_values << @escaped_quote_character
end
- if @escaped_strip
- no_unquoted_values << @escaped_strip
- end
@unquoted_value = Regexp.new("[^".encode(@encoding) +
no_unquoted_values +
"]+".encode(@encoding))
@@ -939,6 +940,7 @@ class CSV
if @liberal_parsing
quoted_value = parse_quoted_column_value
if quoted_value
+ @scanner.scan_all(@strip_value) if @strip_value
unquoted_value = parse_unquoted_column_value
if unquoted_value
if @double_quote_outside_quote
@@ -986,6 +988,9 @@ class CSV
end
end
value.gsub!(@backslash_quote_character, @quote_character) if @backslash_quote
+ if @rstrip_value
+ value.gsub!(@rstrip_value, "")
+ end
value
end
diff --git a/test/csv/parse/test_strip.rb b/test/csv/parse/test_strip.rb
index 0255bb9a30..3564fcb3ba 100644
--- a/test/csv/parse/test_strip.rb
+++ b/test/csv/parse/test_strip.rb
@@ -21,6 +21,11 @@ class TestCSVParseStrip < Test::Unit::TestCase
CSV.parse_line(%Q{a ,b }, strip: true))
end
+ def test_middle
+ assert_equal(["a b"],
+ CSV.parse_line(%Q{a b}, strip: true))
+ end
+
def test_quoted
assert_equal([" a ", " b "],
CSV.parse_line(%Q{" a "," b "}, strip: true))