diff options
author | jeg2 <jeg2@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2016-01-01 02:44:48 +0000 |
---|---|---|
committer | jeg2 <jeg2@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2016-01-01 02:44:48 +0000 |
commit | f18f940802752fb8997164d4440c488fb1396f35 (patch) | |
tree | 4b565a7cf7873e9de8ab876579c951238f47ed87 | |
parent | 7d9342aecd8dc608f85ea13c400b91f8e361ab9f (diff) | |
download | ruby-f18f940802752fb8997164d4440c488fb1396f35.tar.gz |
Adding a liberal_parsing option to CSV. Patch by Braden Anderson.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@53401 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r-- | ChangeLog | 6 | ||||
-rw-r--r-- | lib/csv.rb | 26 | ||||
-rwxr-xr-x | test/csv/test_features.rb | 23 |
3 files changed, 51 insertions, 4 deletions
@@ -1,3 +1,9 @@ +Fri Jan 1 11:42:57 2016 James Edward Gray II <james@graysoftinc.com> + + * lib/csv.rb (CSV): Add a liberal_parsing option. + Patch by Braden Anderson. [#11839] + * test/csv/test_features.rb: test liberal_parsing + Fri Jan 1 10:27:28 2016 Nobuyoshi Nakada <nobu@ruby-lang.org> * tool/mkconfig.rb (RbConfig): prefix SDKROOT to oldincludedir diff --git a/lib/csv.rb b/lib/csv.rb index ba9d62c706..cd2d579b58 100644 --- a/lib/csv.rb +++ b/lib/csv.rb @@ -1019,6 +1019,7 @@ class CSV # <b><tt>:skip_blanks</tt></b>:: +false+ # <b><tt>:force_quotes</tt></b>:: +false+ # <b><tt>:skip_lines</tt></b>:: +nil+ + # <b><tt>:liberal_parsing</tt></b>:: +false+ # DEFAULT_OPTIONS = { col_sep: ",", @@ -1033,6 +1034,7 @@ class CSV skip_blanks: false, force_quotes: false, skip_lines: nil, + liberal_parsing: false, }.freeze # @@ -1499,6 +1501,10 @@ class CSV # a comment. If the passed object does # not respond to <tt>match</tt>, # <tt>ArgumentError</tt> is thrown. + # <b><tt>:liberal_parsing</tt></b>:: When set to a +true+ value, CSV will + # attempt to parse input not conformant + # with RFC 4180, such as double quotes + # in unquoted fields. # # See CSV::DEFAULT_OPTIONS for the default settings. # @@ -1622,6 +1628,8 @@ class CSV def skip_blanks?() @skip_blanks end # Returns +true+ if all output fields are quoted. See CSV::new for details. def force_quotes?() @force_quotes end + # Returns +true+ if illegal input is handled. See CSV::new for details. + def liberal_parsing?() @liberal_parsing end # # The Encoding CSV is parsing or writing in. This will be the Encoding you @@ -1860,12 +1868,12 @@ class CSV end elsif part[0] == @quote_char # If we are starting a new quoted column - if part[-1] != @quote_char || part.count(@quote_char) % 2 != 0 + if part.count(@quote_char) % 2 != 0 # start an extended column csv << part[1..-1] csv.last << @col_sep in_extended_col = true - else + elsif part[-1] == @quote_char # regular quoted column csv << part[1..-2] if csv.last =~ @parsers[:stray_quote] @@ -1873,6 +1881,11 @@ class CSV "Missing or stray quote in line #{lineno + 1}" end csv.last.gsub!(@quote_char * 2, @quote_char) + elsif @liberal_parsing + csv << part + else + raise MalformedCSVError, + "Missing or stray quote in line #{lineno + 1}" end elsif part =~ @parsers[:quote_or_nl] # Unquoted field with bad characters. @@ -1880,7 +1893,11 @@ class CSV raise MalformedCSVError, "Unquoted fields do not allow " + "\\r or \\n (line #{lineno + 1})." else - raise MalformedCSVError, "Illegal quoting in line #{lineno + 1}." + if @liberal_parsing + csv << part + else + raise MalformedCSVError, "Illegal quoting in line #{lineno + 1}." + end end else # Regular ole unquoted field. @@ -1945,7 +1962,7 @@ class CSV str << " encoding:" << @encoding.name # show other attributes %w[ lineno col_sep row_sep - quote_char skip_blanks ].each do |attr_name| + quote_char skip_blanks liberal_parsing ].each do |attr_name| if a = instance_variable_get("@#{attr_name}") str << " " << attr_name << ":" << a.inspect end @@ -2079,6 +2096,7 @@ class CSV # store the parser behaviors @skip_blanks = options.delete(:skip_blanks) @field_size_limit = options.delete(:field_size_limit) + @liberal_parsing = options.delete(:liberal_parsing) # prebuild Regexps for faster parsing esc_row_sep = escape_re(@row_sep) diff --git a/test/csv/test_features.rb b/test/csv/test_features.rb index 37fdab8636..a558875522 100755 --- a/test/csv/test_features.rb +++ b/test/csv/test_features.rb @@ -142,6 +142,29 @@ class TestCSV::Features < TestCSV assert_equal(3, count) end + def test_liberal_parsing + input = '"Johnson, Dwayne",Dwayne "The Rock" Johnson' + assert_raise(CSV::MalformedCSVError) do + CSV.parse_line(input) + end + assert_equal(["Johnson, Dwayne", 'Dwayne "The Rock" Johnson'], + CSV.parse_line(input, liberal_parsing: true)) + + input = '"quoted" field' + assert_raise(CSV::MalformedCSVError) do + CSV.parse_line(input) + end + assert_equal(['"quoted" field'], + CSV.parse_line(input, liberal_parsing: true)) + + assert_raise(CSV::MalformedCSVError) do + CSV.parse_line('is,this "three," or four,fields', liberal_parsing: true) + end + + assert_equal(["is", 'this "three', ' or four"', "fields"], + CSV.parse_line('is,this "three, or four",fields', liberal_parsing: true)) + end + def test_csv_behavior_readers %w[ unconverted_fields return_headers write_headers skip_blanks force_quotes ].each do |behavior| |