aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authornobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2010-12-25 08:32:27 +0000
committernobu <nobu@b2dd03c8-39d4-4d8f-98ff-823fe69b080e>2010-12-25 08:32:27 +0000
commit9c017ca5fb24e89463e5aee0304bd6a2305143df (patch)
treeee21e9905aaf9933c9e8f3ddd9b2bc1966cf1781
parent9a907880f05fef475d5b61ed7f03f045b6103ed8 (diff)
downloadruby-9c017ca5fb24e89463e5aee0304bd6a2305143df.tar.gz
* lib/csv.rb (CSV#init_separators): use IO#gets with length
parameter to get rid of wrong convertion. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@30356 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r--ChangeLog5
-rw-r--r--lib/csv.rb42
-rwxr-xr-xtest/csv/test_encodings.rb18
3 files changed, 25 insertions, 40 deletions
diff --git a/ChangeLog b/ChangeLog
index 656356db42..741980e6d2 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,4 +1,7 @@
-Sat Dec 25 16:04:34 2010 Nobuyoshi Nakada <nobu@ruby-lang.org>
+Sat Dec 25 17:32:24 2010 Nobuyoshi Nakada <nobu@ruby-lang.org>
+
+ * lib/csv.rb (CSV#init_separators): use IO#gets with length
+ parameter to get rid of wrong convertion.
* lib/csv.rb (CSV::foreach, CSV#initialize): directly use encoding
diff --git a/lib/csv.rb b/lib/csv.rb
index 092424e33c..278abc1eea 100644
--- a/lib/csv.rb
+++ b/lib/csv.rb
@@ -1573,10 +1573,7 @@ class CSV
# if we can transcode the needed characters
#
@re_esc = "\\".encode(@encoding) rescue ""
- @re_chars = %w[ \\ . [ ] - ^ $ ?
- * + { } ( ) | #
- \ \r \n \t \f \v ].
- map { |s| s.encode(@encoding) rescue nil }.compact
+ @re_chars = /#{%"[-][\\.^$?*+{}()|# \r\n\t\f\v]".encode(@encoding, fallback: proc{""})}/
init_separators(options)
init_parsers(options)
@@ -2025,15 +2022,13 @@ class CSV
# if we run out of data, it's probably a single line
# (use a sensible default)
#
- if @io.eof?
+ unless sample = @io.gets(nil, 1024)
@row_sep = $INPUT_RECORD_SEPARATOR
break
end
# read ahead a bit
- sample = read_to_char(1024)
- sample += read_to_char(1) if sample[-1..-1] == encode_str("\r") and
- not @io.eof?
+ sample << (@io.gets(nil, 1) || "") if sample.end_with?(encode_str("\r"))
# try to find a standard separator
if sample =~ encode_re("\r\n?|\n")
@row_sep = $&
@@ -2267,7 +2262,7 @@ class CSV
# a backslash cannot be transcoded.
#
def escape_re(str)
- str.chars.map { |c| @re_chars.include?(c) ? @re_esc + c : c }.join('')
+ str.gsub(@re_chars) {|c| @re_esc + c}
end
#
@@ -2286,31 +2281,6 @@ class CSV
chunks.map { |chunk| chunk.encode(@encoding.name) }.join('')
end
- #
- # Reads at least +bytes+ from <tt>@io</tt>, but will read up 10 bytes ahead if
- # needed to ensure the data read is valid in the ecoding of that data. This
- # should ensure that it is safe to use regular expressions on the read data,
- # unless it is actually a broken encoding. The read data will be returned in
- # <tt>@encoding</tt>.
- #
- def read_to_char(bytes)
- return "" if @io.eof?
- data = read_io(bytes)
- begin
- raise unless data.valid_encoding?
- encoded = encode_str(data)
- raise unless encoded.valid_encoding?
- return encoded
- rescue # encoding error or my invalid data raise
- if @io.eof? or data.size >= bytes + 10
- return data
- else
- data += read_io(1)
- retry
- end
- end
- end
-
private
def raw_encoding
@@ -2324,10 +2294,6 @@ class CSV
Encoding::ASCII_8BIT
end
end
-
- def read_io(bytes)
- @io.read(bytes).force_encoding(raw_encoding)
- end
end
# Another name for CSV::instance().
diff --git a/test/csv/test_encodings.rb b/test/csv/test_encodings.rb
index 0f2ec127c5..59f43b1be9 100755
--- a/test/csv/test_encodings.rb
+++ b/test/csv/test_encodings.rb
@@ -238,12 +238,28 @@ class TestCSV::Encodings < TestCSV
def assert_parses(fields, encoding, options = { })
encoding = Encoding.find(encoding) unless encoding.is_a? Encoding
+ orig_fields = fields
fields = encode_ary(fields, encoding)
- parsed = CSV.parse(ary_to_data(fields, options), options)
+ data = ary_to_data(fields, options)
+ parsed = CSV.parse(data, options)
assert_equal(fields, parsed)
parsed.flatten.each_with_index do |field, i|
assert_equal(encoding, field.encoding, "Field[#{i + 1}] was transcoded.")
end
+ File.open(@temp_csv_path, "wb") {|f| f.print(data)}
+ CSV.open(@temp_csv_path, "rb:#{encoding}", options) do |csv|
+ csv.each_with_index do |row, i|
+ assert_equal(fields[i], row)
+ end
+ end
+ begin
+ CSV.open(@temp_csv_path, "rb:#{encoding}:#{__ENCODING__}", options) do |csv|
+ csv.each_with_index do |row, i|
+ assert_equal(orig_fields[i], row)
+ end
+ end unless encoding == __ENCODING__
+ rescue Encoding::ConverterNotFoundError
+ end
end
def encode_ary(ary, encoding)