diff options
author | drbrain <drbrain@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2012-07-19 22:43:38 +0000 |
---|---|---|
committer | drbrain <drbrain@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2012-07-19 22:43:38 +0000 |
commit | b1a0509b5465ce77f52e0384159237889a8d60ec (patch) | |
tree | ea22ccb90a2367364e0c740f15ad571558b025f4 | |
parent | ef19dcf96dd2e84c4fe0a46888a5afd0cd457f80 (diff) | |
download | ruby-b1a0509b5465ce77f52e0384159237889a8d60ec.tar.gz |
* lib/net/http/response.rb: Automatically inflate gzip and
deflate-encoded response bodies. [Feature #6942]
* lib/net/http/generic_request.rb: Automatically accept gzip and
deflate content-encoding for requests. [Feature #6494]
* lib/net/http/request.rb: Updated documentation for #6494.
* lib/net/http.rb: Updated documentation for #6492 and #6494, removed
Content-Encoding handling now present in Net::HTTPResponse.
* test/net/http/test_httpresponse.rb: Tests for #6492
* test/net/http/test_http_request.rb: Tests for #6494
* test/open-uri/test_open-uri.rb (test_content_encoding): Updated test
for automatic content-encoding handling.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@36473 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
-rw-r--r-- | ChangeLog | 14 | ||||
-rw-r--r-- | lib/net/http.rb | 29 | ||||
-rw-r--r-- | lib/net/http/generic_request.rb | 12 | ||||
-rw-r--r-- | lib/net/http/request.rb | 7 | ||||
-rw-r--r-- | lib/net/http/response.rb | 147 | ||||
-rw-r--r-- | test/net/http/test_http_request.rb | 57 | ||||
-rw-r--r-- | test/net/http/test_httpresponse.rb | 158 | ||||
-rw-r--r-- | test/open-uri/test_open-uri.rb | 8 |
8 files changed, 387 insertions, 45 deletions
@@ -1,3 +1,17 @@ +Fri Jul 20 07:40:32 2012 Eric Hodel <drbrain@segment7.net> + + * lib/net/http/response.rb: Automatically inflate gzip and + deflate-encoded response bodies. [Feature #6942] + * lib/net/http/generic_request.rb: Automatically accept gzip and + deflate content-encoding for requests. [Feature #6494] + * lib/net/http/request.rb: Updated documentation for #6494. + * lib/net/http.rb: Updated documentation for #6492 and #6494, removed + Content-Encoding handling now present in Net::HTTPResponse. + * test/net/http/test_httpresponse.rb: Tests for #6492 + * test/net/http/test_http_request.rb: Tests for #6494 + * test/open-uri/test_open-uri.rb (test_content_encoding): Updated test + for automatic content-encoding handling. + Fri Jul 20 03:42:54 2012 NARUSE, Yui <naruse@ruby-lang.org> * thread_pthread.c: use #ifdef, not #if. diff --git a/lib/net/http.rb b/lib/net/http.rb index 7efea79cdd..977daabe1c 100644 --- a/lib/net/http.rb +++ b/lib/net/http.rb @@ -283,6 +283,14 @@ module Net #:nodoc: # See Net::HTTP::Proxy for further details and examples such as proxies that # require a username and password. # + # === Compression + # + # Net::HTTP automatically adds Accept-Encoding for compression of response + # bodies and automatically decompresses gzip and deflate responses unless a + # Range header was sent. + # + # Compression can be disabled through the Accept-Encoding: identity header. + # # == HTTP Request Classes # # Here is the HTTP request class hierarchy. @@ -602,7 +610,6 @@ module Net #:nodoc: @use_ssl = false @ssl_context = nil @enable_post_connection_check = true - @compression = nil @sspi_enabled = false SSL_IVNAMES.each do |ivname| instance_variable_set ivname, nil @@ -1052,28 +1059,10 @@ module Net #:nodoc: initheader = initheader.merge({ "accept-encoding" => "gzip;q=1.0,deflate;q=0.6,identity;q=0.3" }) - @compression = true end end request(Get.new(path, initheader)) {|r| - if r.key?("content-encoding") and @compression - @compression = nil # Clear it till next set. - the_body = r.read_body dest, &block - case r["content-encoding"] - when "gzip" - r.body= Zlib::GzipReader.new(StringIO.new(the_body), encoding: "ASCII-8BIT").read - r.delete("content-encoding") - when "deflate" - r.body= Zlib::Inflate.inflate(the_body); - r.delete("content-encoding") - when "identity" - ; # nothing needed - else - ; # Don't do anything dramatic, unless we need to later - end - else - r.read_body dest, &block - end + r.read_body dest, &block res = r } res diff --git a/lib/net/http/generic_request.rb b/lib/net/http/generic_request.rb index ca4e48f277..bcf87d35be 100644 --- a/lib/net/http/generic_request.rb +++ b/lib/net/http/generic_request.rb @@ -14,6 +14,18 @@ class Net::HTTPGenericRequest raise ArgumentError, "no HTTP request path given" unless path raise ArgumentError, "HTTP request path is empty" if path.empty? @path = path + + if @response_has_body and Net::HTTP::HAVE_ZLIB then + if !initheader || + !initheader.keys.any? { |k| + %w[accept-encoding range].include? k.downcase + } then + initheader = initheader ? initheader.dup : {} + initheader["accept-encoding"] = + "gzip;q=1.0,deflate;q=0.6,identity;q=0.3" + end + end + initialize_http_header initheader self['Accept'] ||= '*/*' self['User-Agent'] ||= 'Ruby' diff --git a/lib/net/http/request.rb b/lib/net/http/request.rb index c5a6c102af..e8b0f48fcc 100644 --- a/lib/net/http/request.rb +++ b/lib/net/http/request.rb @@ -4,7 +4,12 @@ # subclasses: Net::HTTP::Get, Net::HTTP::Post, Net::HTTP::Head. # class Net::HTTPRequest < Net::HTTPGenericRequest - # Creates HTTP request object. + # Creates an HTTP request object for +path+. + # + # +initheader+ are the default headers to use. Net::HTTP adds + # Accept-Encoding to enable compression of the response body unless + # Accept-Encoding or Range are supplied in +initheader+. + def initialize(path, initheader = nil) super self.class::METHOD, self.class::REQUEST_HAS_BODY, diff --git a/lib/net/http/response.rb b/lib/net/http/response.rb index dde5ae308e..69c84bfe28 100644 --- a/lib/net/http/response.rb +++ b/lib/net/http/response.rb @@ -222,25 +222,70 @@ class Net::HTTPResponse private - def read_body_0(dest) - if chunked? - read_chunked dest - return - end - clen = content_length() - if clen - @socket.read clen, dest, true # ignore EOF - return + ## + # Checks for a supported Content-Encoding header and yields an Inflate + # wrapper for this response's socket when zlib is present. If the + # Content-Encoding is unsupported or zlib is missing the plain socket is + # yielded. + # + # If a Content-Range header is present a plain socket is yielded as the + # bytes in the range may not be a complete deflate block. + + def inflater # :nodoc: + return yield @socket unless Net::HTTP::HAVE_ZLIB + return yield @socket if self['content-range'] + + case self['content-encoding'] + when 'deflate', 'gzip', 'x-gzip' then + self.delete 'content-encoding' + + inflate_body_io = Inflater.new(@socket) + + begin + yield inflate_body_io + ensure + inflate_body_io.finish + end + when 'none', 'identity' then + self.delete 'content-encoding' + + yield @socket + else + yield @socket end - clen = range_length() - if clen - @socket.read clen, dest - return + end + + def read_body_0(dest) + inflater do |inflate_body_io| + if chunked? + read_chunked dest, inflate_body_io + return + end + + @socket = inflate_body_io + + clen = content_length() + if clen + @socket.read clen, dest, true # ignore EOF + return + end + clen = range_length() + if clen + @socket.read clen, dest + return + end + @socket.read_all dest end - @socket.read_all dest end - def read_chunked(dest) + ## + # read_chunked reads from +@socket+ for chunk-size, chunk-extension, CRLF, + # etc. and +chunk_data_io+ for chunk-data which may be deflate or gzip + # encoded. + # + # See RFC 2616 section 3.6.1 for definitions + + def read_chunked(dest, chunk_data_io) # :nodoc: len = nil total = 0 while true @@ -250,7 +295,7 @@ class Net::HTTPResponse len = hexlen.hex break if len == 0 begin - @socket.read len, dest + chunk_data_io.read len, dest ensure total += len @socket.read 2 # \r\n @@ -266,8 +311,8 @@ class Net::HTTPResponse end def procdest(dest, block) - raise ArgumentError, 'both arg and block given for HTTP method' \ - if dest and block + raise ArgumentError, 'both arg and block given for HTTP method' if + dest and block if block Net::ReadAdapter.new(block) else @@ -275,5 +320,71 @@ class Net::HTTPResponse end end + ## + # Inflater is a wrapper around Net::BufferedIO that transparently inflates + # zlib and gzip streams. + + class Inflater # :nodoc: + + ## + # Creates a new Inflater wrapping +socket+ + + def initialize socket + @socket = socket + # zlib with automatic gzip detection + @inflate = Zlib::Inflate.new(32 + Zlib::MAX_WBITS) + end + + ## + # Finishes the inflate stream. + + def finish + @inflate.finish + end + + ## + # Returns a Net::ReadAdapter that inflates each read chunk into +dest+. + # + # This allows a large response body to be inflated without storing the + # entire body in memory. + + def inflate_adapter(dest) + block = proc do |compressed_chunk| + @inflate.inflate(compressed_chunk) do |chunk| + dest << chunk + end + end + + Net::ReadAdapter.new(block) + end + + ## + # Reads +clen+ bytes from the socket, inflates them, then writes them to + # +dest+. +ignore_eof+ is passed down to Net::BufferedIO#read + # + # Unlike Net::BufferedIO#read, this method returns more than +clen+ bytes. + # At this time there is no way for a user of Net::HTTPResponse to read a + # specific number of bytes from the HTTP response body, so this internal + # API does not return the same number of bytes as were requested. + # + # See https://bugs.ruby-lang.org/issues/6492 for further discussion. + + def read clen, dest, ignore_eof = false + temp_dest = inflate_adapter(dest) + + data = @socket.read clen, temp_dest, ignore_eof + end + + ## + # Reads the rest of the socket, inflates it, then writes it to +dest+. + + def read_all dest + temp_dest = inflate_adapter(dest) + + @socket.read_all temp_dest + end + + end + end diff --git a/test/net/http/test_http_request.rb b/test/net/http/test_http_request.rb new file mode 100644 index 0000000000..c01e52c0b4 --- /dev/null +++ b/test/net/http/test_http_request.rb @@ -0,0 +1,57 @@ +require 'net/http' +require 'test/unit' +require 'stringio' + +class HTTPRequestTest < Test::Unit::TestCase + + def test_initialize_GET + req = Net::HTTP::Get.new '/' + + assert_equal 'GET', req.method + refute req.request_body_permitted? + assert req.response_body_permitted? + + expected = { + 'accept' => %w[*/*], + 'user-agent' => %w[Ruby], + } + + expected['accept-encoding'] = %w[gzip;q=1.0,deflate;q=0.6,identity;q=0.3] if + Net::HTTP::HAVE_ZLIB + + assert_equal expected, req.to_hash + end + + def test_initialize_GET_range + req = Net::HTTP::Get.new '/', 'Range' => 'bytes=0-9' + + assert_equal 'GET', req.method + refute req.request_body_permitted? + assert req.response_body_permitted? + + expected = { + 'accept' => %w[*/*], + 'user-agent' => %w[Ruby], + 'range' => %w[bytes=0-9], + } + + assert_equal expected, req.to_hash + end + + def test_initialize_HEAD + req = Net::HTTP::Head.new '/' + + assert_equal 'HEAD', req.method + refute req.request_body_permitted? + refute req.response_body_permitted? + + expected = { + 'accept' => %w[*/*], + 'user-agent' => %w[Ruby], + } + + assert_equal expected, req.to_hash + end + +end + diff --git a/test/net/http/test_httpresponse.rb b/test/net/http/test_httpresponse.rb index ab6fdd0ea9..d57614bb73 100644 --- a/test/net/http/test_httpresponse.rb +++ b/test/net/http/test_httpresponse.rb @@ -4,7 +4,7 @@ require 'stringio' class HTTPResponseTest < Test::Unit::TestCase def test_singleline_header - io = dummy_io(<<EOS.gsub(/\n/, "\r\n")) + io = dummy_io(<<EOS) HTTP/1.1 200 OK Content-Length: 5 Connection: close @@ -17,7 +17,7 @@ EOS end def test_multiline_header - io = dummy_io(<<EOS.gsub(/\n/, "\r\n")) + io = dummy_io(<<EOS) HTTP/1.1 200 OK X-Foo: XXX YYY @@ -32,9 +32,163 @@ EOS assert_equal('XXX YYY', res.header['x-bar']) end + def test_read_body + io = dummy_io(<<EOS) +HTTP/1.1 200 OK +Connection: close +Content-Length: 5 + +hello +EOS + + res = Net::HTTPResponse.read_new(io) + + body = nil + + res.reading_body io, true do + body = res.read_body + end + + assert_equal 'hello', body + end + + def test_read_body_block + io = dummy_io(<<EOS) +HTTP/1.1 200 OK +Connection: close +Content-Length: 5 + +hello +EOS + + res = Net::HTTPResponse.read_new(io) + + body = '' + + res.reading_body io, true do + res.read_body do |chunk| + body << chunk + end + end + + assert_equal 'hello', body + end + + def test_read_body_content_encoding_deflate + io = dummy_io(<<EOS) +HTTP/1.1 200 OK +Connection: close +Content-Encoding: deflate +Content-Length: 13 + +x\x9C\xCBH\xCD\xC9\xC9\a\x00\x06,\x02\x15 +EOS + + res = Net::HTTPResponse.read_new(io) + + body = nil + + res.reading_body io, true do + body = res.read_body + end + + assert_equal 'hello', body + end + + def test_read_body_content_encoding_deflate_chunked + io = dummy_io(<<EOS) +HTTP/1.1 200 OK +Connection: close +Content-Encoding: deflate +Transfer-Encoding: chunked + +6 +x\x9C\xCBH\xCD\xC9 +7 +\xC9\a\x00\x06,\x02\x15 +0 + +EOS + + res = Net::HTTPResponse.read_new(io) + + body = nil + + res.reading_body io, true do + body = res.read_body + end + + assert_equal 'hello', body + end + + def test_read_body_content_encoding_deflate_no_length + io = dummy_io(<<EOS) +HTTP/1.1 200 OK +Connection: close +Content-Encoding: deflate + +x\x9C\xCBH\xCD\xC9\xC9\a\x00\x06,\x02\x15 +EOS + + res = Net::HTTPResponse.read_new(io) + + body = nil + + res.reading_body io, true do + body = res.read_body + end + + assert_equal 'hello', body + end + + def test_read_body_content_encoding_deflate_content_range + io = dummy_io(<<EOS) +HTTP/1.1 200 OK +Accept-Ranges: bytes +Connection: close +Content-Encoding: gzip +Content-Length: 10 +Content-Range: bytes 0-9/55 + +\x1F\x8B\b\x00\x00\x00\x00\x00\x00\x03 +EOS + + res = Net::HTTPResponse.read_new(io) + + body = nil + + res.reading_body io, true do + body = res.read_body + end + + assert_equal "\x1F\x8B\b\x00\x00\x00\x00\x00\x00\x03", body + end + + def test_read_body_string + io = dummy_io(<<EOS) +HTTP/1.1 200 OK +Connection: close +Content-Length: 5 + +hello +EOS + + res = Net::HTTPResponse.read_new(io) + + body = '' + + res.reading_body io, true do + res.read_body body + end + + assert_equal 'hello', body + end + private def dummy_io(str) + str = str.gsub(/\n/, "\r\n") + Net::BufferedIO.new(StringIO.new(str)) end end diff --git a/test/open-uri/test_open-uri.rb b/test/open-uri/test_open-uri.rb index 2b0cfc622f..8b3f8173ce 100644 --- a/test/open-uri/test_open-uri.rb +++ b/test/open-uri/test_open-uri.rb @@ -488,12 +488,12 @@ class TestOpenURI < Test::Unit::TestCase srv.mount_proc("/data2/") {|req, res| res.body = content_gz; res['content-encoding'] = 'gzip'; res.chunked = true } srv.mount_proc("/noce/") {|req, res| res.body = content_gz } open("#{url}/data/") {|f| - assert_equal ['gzip'], f.content_encoding - assert_equal(content_gz, f.read.force_encoding("ascii-8bit")) + assert_equal [], f.content_encoding + assert_equal(content, f.read) } open("#{url}/data2/") {|f| - assert_equal ['gzip'], f.content_encoding - assert_equal(content_gz, f.read.force_encoding("ascii-8bit")) + assert_equal [], f.content_encoding + assert_equal(content, f.read) } open("#{url}/noce/") {|f| assert_equal [], f.content_encoding |