diff options
author | hsbt <hsbt@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2017-05-18 02:42:16 +0000 |
---|---|---|
committer | hsbt <hsbt@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2017-05-18 02:42:16 +0000 |
commit | 37abc2fb7ed77fd3063f0ac8fcadd55b80526f4b (patch) | |
tree | 106207350ac2e66f99f1314263d9461582df63df /lib | |
parent | 8ffc4094a42feb49f6dc740af2657fc32ca72353 (diff) | |
download | ruby-37abc2fb7ed77fd3063f0ac8fcadd55b80526f4b.tar.gz |
Improve CSV parsing performance.
Patch by @joshpencheon (Josh Pencheon)
[fix GH-1607]
#### benchmark-ips results
```
trunk:
Warming up --------------------------------------
4.000 i/100ms
Calculating -------------------------------------
39.661 (±10.1%) i/s - 2.352k in 60.034781s
with-patch:
Warming up --------------------------------------
5.000 i/100ms
Calculating -------------------------------------
60.521 (± 9.9%) i/s - 3.595k in 60.047157s
```
#### memory_profiler resuts
```
trunk:
allocated memory by class
-----------------------------------
35588490 String
7454320 Array
294000 MatchData
37340 Regexp
11840 Hash
2400 CSV
1600 Proc
1280 Method
800 StringIO
with-patch:
allocated memory by class
-----------------------------------
18788490 String
3454320 Array
294000 MatchData
37340 Regexp
11840 Hash
2400 CSV
1600 Proc
1280 Method
800 StringIO
```
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58777 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'lib')
-rw-r--r-- | lib/csv.rb | 8 |
1 files changed, 4 insertions, 4 deletions
diff --git a/lib/csv.rb b/lib/csv.rb index e7aedc4558..9a861c7327 100644 --- a/lib/csv.rb +++ b/lib/csv.rb @@ -1876,7 +1876,7 @@ class CSV # If we are continuing a previous column if part.end_with?(@quote_char) && part.count(@quote_char) % 2 != 0 # extended column ends - csv[-1] = csv[-1].push(part[0..-2]).join("") + csv.last << part[0..-2] if csv.last =~ @parsers[:stray_quote] raise MalformedCSVError, "Missing or stray quote in line #{lineno + 1}" @@ -1884,13 +1884,13 @@ class CSV csv.last.gsub!(@double_quote_char, @quote_char) in_extended_col = false else - csv.last.push(part, @col_sep) + csv.last << part << @col_sep end elsif part.start_with?(@quote_char) # If we are starting a new quoted column if part.count(@quote_char) % 2 != 0 # start an extended column - csv << [part[1..-1], @col_sep] + csv << (part[1..-1] << @col_sep) in_extended_col = true elsif part.end_with?(@quote_char) # regular quoted column @@ -1933,7 +1933,7 @@ class CSV if @io.eof? raise MalformedCSVError, "Unclosed quoted field on line #{lineno + 1}." - elsif @field_size_limit and csv.last.sum(&:size) >= @field_size_limit + elsif @field_size_limit and csv.last.size >= @field_size_limit raise MalformedCSVError, "Field size exceeded on line #{lineno + 1}." end # otherwise, we need to loop and pull some more data to complete the row |