aboutsummaryrefslogtreecommitdiffstats
path: root/lib/bundler
diff options
context:
space:
mode:
Diffstat (limited to 'lib/bundler')
-rw-r--r--lib/bundler/compact_index_client.rb21
-rw-r--r--lib/bundler/compact_index_client/cache.rb30
-rw-r--r--lib/bundler/compact_index_client/cache_file.rb153
-rw-r--r--lib/bundler/compact_index_client/updater.rb165
-rw-r--r--lib/bundler/fetcher/compact_index.rb4
-rw-r--r--lib/bundler/shared_helpers.rb15
6 files changed, 288 insertions, 100 deletions
diff --git a/lib/bundler/compact_index_client.rb b/lib/bundler/compact_index_client.rb
index 127a50e810..68e0d7e0d5 100644
--- a/lib/bundler/compact_index_client.rb
+++ b/lib/bundler/compact_index_client.rb
@@ -5,7 +5,13 @@ require "set"
module Bundler
class CompactIndexClient
+ # NOTE: MD5 is here not because we expect a server to respond with it, but
+ # because we use it to generate the etag on first request during the upgrade
+ # to the compact index client that uses opaque etags saved to files.
+ # Remove once 2.5.0 has been out for a while.
+ SUPPORTED_DIGESTS = { "sha-256" => :SHA256, "md5" => :MD5 }.freeze
DEBUG_MUTEX = Thread::Mutex.new
+
def self.debug
return unless ENV["DEBUG_COMPACT_INDEX"]
DEBUG_MUTEX.synchronize { warn("[#{self}] #{yield}") }
@@ -14,6 +20,7 @@ module Bundler
class Error < StandardError; end
require_relative "compact_index_client/cache"
+ require_relative "compact_index_client/cache_file"
require_relative "compact_index_client/updater"
attr_reader :directory
@@ -54,13 +61,13 @@ module Bundler
def names
Bundler::CompactIndexClient.debug { "/names" }
- update(@cache.names_path, "names")
+ update("names", @cache.names_path, @cache.names_etag_path)
@cache.names
end
def versions
Bundler::CompactIndexClient.debug { "/versions" }
- update(@cache.versions_path, "versions")
+ update("versions", @cache.versions_path, @cache.versions_etag_path)
versions, @info_checksums_by_name = @cache.versions
versions
end
@@ -76,36 +83,36 @@ module Bundler
def update_and_parse_checksums!
Bundler::CompactIndexClient.debug { "update_and_parse_checksums!" }
return @info_checksums_by_name if @parsed_checksums
- update(@cache.versions_path, "versions")
+ update("versions", @cache.versions_path, @cache.versions_etag_path)
@info_checksums_by_name = @cache.checksums
@parsed_checksums = true
end
private
- def update(local_path, remote_path)
+ def update(remote_path, local_path, local_etag_path)
Bundler::CompactIndexClient.debug { "update(#{local_path}, #{remote_path})" }
unless synchronize { @endpoints.add?(remote_path) }
Bundler::CompactIndexClient.debug { "already fetched #{remote_path}" }
return
end
- @updater.update(local_path, url(remote_path))
+ @updater.update(url(remote_path), local_path, local_etag_path)
end
def update_info(name)
Bundler::CompactIndexClient.debug { "update_info(#{name})" }
path = @cache.info_path(name)
- checksum = @updater.checksum_for_file(path)
unless existing = @info_checksums_by_name[name]
Bundler::CompactIndexClient.debug { "skipping updating info for #{name} since it is missing from versions" }
return
end
+ checksum = SharedHelpers.checksum_for_file(path, :MD5)
if checksum == existing
Bundler::CompactIndexClient.debug { "skipping updating info for #{name} since the versions checksum matches the local checksum" }
return
end
Bundler::CompactIndexClient.debug { "updating info for #{name} since the versions checksum #{existing} != the local checksum #{checksum}" }
- update(path, "info/#{name}")
+ update("info/#{name}", path, @cache.info_etag_path(name))
end
def url(path)
diff --git a/lib/bundler/compact_index_client/cache.rb b/lib/bundler/compact_index_client/cache.rb
index b5607c8751..5efdf18eba 100644
--- a/lib/bundler/compact_index_client/cache.rb
+++ b/lib/bundler/compact_index_client/cache.rb
@@ -9,11 +9,8 @@ module Bundler
def initialize(directory)
@directory = Pathname.new(directory).expand_path
- info_roots.each do |dir|
- SharedHelpers.filesystem_access(dir) do
- FileUtils.mkdir_p(dir)
- end
- end
+ info_roots.each {|dir| mkdir(dir) }
+ mkdir(info_etag_root)
end
def names
@@ -24,6 +21,10 @@ module Bundler
directory.join("names")
end
+ def names_etag_path
+ directory.join("names.etag")
+ end
+
def versions
versions_by_name = Hash.new {|hash, key| hash[key] = [] }
info_checksums_by_name = {}
@@ -49,6 +50,10 @@ module Bundler
directory.join("versions")
end
+ def versions_etag_path
+ directory.join("versions.etag")
+ end
+
def checksums
checksums = {}
@@ -76,8 +81,19 @@ module Bundler
end
end
+ def info_etag_path(name)
+ name = name.to_s
+ info_etag_root.join("#{name}-#{SharedHelpers.digest(:MD5).hexdigest(name).downcase}")
+ end
+
private
+ def mkdir(dir)
+ SharedHelpers.filesystem_access(dir) do
+ FileUtils.mkdir_p(dir)
+ end
+ end
+
def lines(path)
return [] unless path.file?
lines = SharedHelpers.filesystem_access(path, :read, &:read).split("\n")
@@ -96,6 +112,10 @@ module Bundler
directory.join("info-special-characters"),
]
end
+
+ def info_etag_root
+ directory.join("info-etags")
+ end
end
end
end
diff --git a/lib/bundler/compact_index_client/cache_file.rb b/lib/bundler/compact_index_client/cache_file.rb
new file mode 100644
index 0000000000..5988bc91b3
--- /dev/null
+++ b/lib/bundler/compact_index_client/cache_file.rb
@@ -0,0 +1,153 @@
+# frozen_string_literal: true
+
+require_relative "../vendored_fileutils"
+require "rubygems/package"
+
+module Bundler
+ class CompactIndexClient
+ # write cache files in a way that is robust to concurrent modifications
+ # if digests are given, the checksums will be verified
+ class CacheFile
+ DEFAULT_FILE_MODE = 0o644
+ private_constant :DEFAULT_FILE_MODE
+
+ class Error < RuntimeError; end
+ class ClosedError < Error; end
+
+ class DigestMismatchError < Error
+ def initialize(digests, expected_digests)
+ super "Calculated checksums #{digests.inspect} did not match expected #{expected_digests.inspect}."
+ end
+ end
+
+ # Initialize with a copy of the original file, then yield the instance.
+ def self.copy(path, &block)
+ new(path) do |file|
+ file.initialize_digests
+
+ SharedHelpers.filesystem_access(path, :read) do
+ path.open("rb") do |s|
+ file.open {|f| IO.copy_stream(s, f) }
+ end
+ end
+
+ yield file
+ end
+ end
+
+ # Write data to a temp file, then replace the original file with it verifying the digests if given.
+ def self.write(path, data, digests = nil)
+ return unless data
+ new(path) do |file|
+ file.digests = digests
+ file.write(data)
+ end
+ end
+
+ attr_reader :original_path, :path
+
+ def initialize(original_path, &block)
+ @original_path = original_path
+ @perm = original_path.file? ? original_path.stat.mode : DEFAULT_FILE_MODE
+ @path = original_path.sub(/$/, ".#{$$}.tmp")
+ return unless block_given?
+ begin
+ yield self
+ ensure
+ close
+ end
+ end
+
+ def size
+ path.size
+ end
+
+ # initialize the digests using CompactIndexClient::SUPPORTED_DIGESTS, or a subset based on keys.
+ def initialize_digests(keys = nil)
+ @digests = keys ? SUPPORTED_DIGESTS.slice(*keys) : SUPPORTED_DIGESTS.dup
+ @digests.transform_values! {|algo_class| SharedHelpers.digest(algo_class).new }
+ end
+
+ # reset the digests so they don't contain any previously read data
+ def reset_digests
+ @digests&.each_value(&:reset)
+ end
+
+ # set the digests that will be verified at the end
+ def digests=(expected_digests)
+ @expected_digests = expected_digests
+
+ if @expected_digests.nil?
+ @digests = nil
+ elsif @digests
+ @digests = @digests.slice(*@expected_digests.keys)
+ else
+ initialize_digests(@expected_digests.keys)
+ end
+ end
+
+ # remove this method when we stop generating md5 digests for legacy etags
+ def md5
+ @digests && @digests["md5"]
+ end
+
+ def digests?
+ @digests&.any?
+ end
+
+ # Open the temp file for writing, reusing original permissions, yielding the IO object.
+ def open(write_mode = "wb", perm = @perm, &block)
+ raise ClosedError, "Cannot reopen closed file" if @closed
+ SharedHelpers.filesystem_access(path, :write) do
+ path.open(write_mode, perm) do |f|
+ yield digests? ? Gem::Package::DigestIO.new(f, @digests) : f
+ end
+ end
+ end
+
+ # Returns false without appending when no digests since appending is too error prone to do without digests.
+ def append(data)
+ return false unless digests?
+ open("a") {|f| f.write data }
+ verify && commit
+ end
+
+ def write(data)
+ reset_digests
+ open {|f| f.write data }
+ commit!
+ end
+
+ def commit!
+ verify || raise(DigestMismatchError.new(@base64digests, @expected_digests))
+ commit
+ end
+
+ # Verify the digests, returning true on match, false on mismatch.
+ def verify
+ return true unless @expected_digests && digests?
+ @base64digests = @digests.transform_values!(&:base64digest)
+ @digests = nil
+ @base64digests.all? {|algo, digest| @expected_digests[algo] == digest }
+ end
+
+ # Replace the original file with the temp file without verifying digests.
+ # The file is permanently closed.
+ def commit
+ raise ClosedError, "Cannot commit closed file" if @closed
+ SharedHelpers.filesystem_access(original_path, :write) do
+ FileUtils.mv(path, original_path)
+ end
+ @closed = true
+ end
+
+ # Remove the temp file without replacing the original file.
+ # The file is permanently closed.
+ def close
+ return if @closed
+ FileUtils.remove_file(path) if @path&.file?
+ @closed = true
+ end
+ end
+ end
+end
diff --git a/lib/bundler/compact_index_client/updater.rb b/lib/bundler/compact_index_client/updater.rb
index 3b75d5c129..c4686fad7d 100644
--- a/lib/bundler/compact_index_client/updater.rb
+++ b/lib/bundler/compact_index_client/updater.rb
@@ -1,20 +1,11 @@
# frozen_string_literal: true
-require_relative "../vendored_fileutils"
-
module Bundler
class CompactIndexClient
class Updater
- class MisMatchedChecksumError < Error
- def initialize(path, server_checksum, local_checksum)
- @path = path
- @server_checksum = server_checksum
- @local_checksum = local_checksum
- end
-
- def message
- "The checksum of /#{@path} does not match the checksum provided by the server! Something is wrong " \
- "(local checksum is #{@local_checksum.inspect}, was expecting #{@server_checksum.inspect})."
+ class MismatchedChecksumError < Error
+ def initialize(path, message)
+ super "The checksum of /#{path} does not match the checksum provided by the server! Something is wrong. #{message}"
end
end
@@ -22,100 +13,102 @@ module Bundler
@fetcher = fetcher
end
- def update(local_path, remote_path, retrying = nil)
- headers = {}
-
- local_temp_path = local_path.sub(/$/, ".#{$$}")
- local_temp_path = local_temp_path.sub(/$/, ".retrying") if retrying
- local_temp_path = local_temp_path.sub(/$/, ".tmp")
-
- # first try to fetch any new bytes on the existing file
- if retrying.nil? && local_path.file?
- copy_file local_path, local_temp_path
+ def update(remote_path, local_path, etag_path)
+ append(remote_path, local_path, etag_path) || replace(remote_path, local_path, etag_path)
+ rescue CacheFile::DigestMismatchError => e
+ raise MismatchedChecksumError.new(remote_path, e.message)
+ rescue Zlib::GzipFile::Error
+ raise Bundler::HTTPError
+ end
- headers["If-None-Match"] = etag_for(local_temp_path)
- headers["Range"] =
- if local_temp_path.size.nonzero?
- # Subtract a byte to ensure the range won't be empty.
- # Avoids 416 (Range Not Satisfiable) responses.
- "bytes=#{local_temp_path.size - 1}-"
- else
- "bytes=#{local_temp_path.size}-"
- end
- end
+ private
- response = @fetcher.call(remote_path, headers)
- return nil if response.is_a?(Net::HTTPNotModified)
+ def append(remote_path, local_path, etag_path)
+ return false unless local_path.file? && local_path.size.nonzero?
- content = response.body
+ CacheFile.copy(local_path) do |file|
+ etag = etag_path.read.tap(&:chomp!) if etag_path.file?
+ etag ||= generate_etag(etag_path, file) # Remove this after 2.5.0 has been out for a while.
- etag = (response["ETag"] || "").gsub(%r{\AW/}, "")
- correct_response = SharedHelpers.filesystem_access(local_temp_path) do
- if response.is_a?(Net::HTTPPartialContent) && local_temp_path.size.nonzero?
- local_temp_path.open("a") {|f| f << slice_body(content, 1..-1) }
+ # Subtract a byte to ensure the range won't be empty.
+ # Avoids 416 (Range Not Satisfiable) responses.
+ response = @fetcher.call(remote_path, request_headers(etag, file.size - 1))
+ break true if response.is_a?(Net::HTTPNotModified)
- etag_for(local_temp_path) == etag
+ file.digests = parse_digests(response)
+ # server may ignore Range and return the full response
+ if response.is_a?(Net::HTTPPartialContent)
+ break false unless file.append(response.body.byteslice(1..-1))
else
- local_temp_path.open("wb") {|f| f << content }
-
- etag.length.zero? || etag_for(local_temp_path) == etag
+ file.write(response.body)
end
+ CacheFile.write(etag_path, etag(response))
+ true
end
+ end
- if correct_response
- SharedHelpers.filesystem_access(local_path) do
- FileUtils.mv(local_temp_path, local_path)
- end
- return nil
- end
+ # request without range header to get the full file or a 304 Not Modified
+ def replace(remote_path, local_path, etag_path)
+ etag = etag_path.read.tap(&:chomp!) if etag_path.file?
+ response = @fetcher.call(remote_path, request_headers(etag))
+ return true if response.is_a?(Net::HTTPNotModified)
+ CacheFile.write(local_path, response.body, parse_digests(response))
+ CacheFile.write(etag_path, etag(response))
+ end
- if retrying
- raise MisMatchedChecksumError.new(remote_path, etag, etag_for(local_temp_path))
- end
+ def request_headers(etag, range_start = nil)
+ headers = {}
+ headers["Range"] = "bytes=#{range_start}-" if range_start
+ headers["If-None-Match"] = etag if etag
+ headers
+ end
- update(local_path, remote_path, :retrying)
- rescue Zlib::GzipFile::Error
- raise Bundler::HTTPError
- ensure
- FileUtils.remove_file(local_temp_path) if File.exist?(local_temp_path)
+ def etag_for_request(etag_path)
+ etag_path.read.tap(&:chomp!) if etag_path.file?
end
- def etag_for(path)
- sum = checksum_for_file(path)
- sum ? %("#{sum}") : nil
+ # When first releasing this opaque etag feature, we want to generate the old MD5 etag
+ # based on the content of the file. After that it will always use the saved opaque etag.
+ # This transparently saves existing users with good caches from updating a bunch of files.
+ # Remove this behavior after 2.5.0 has been out for a while.
+ def generate_etag(etag_path, file)
+ etag = file.md5.hexdigest
+ CacheFile.write(etag_path, etag)
+ etag
end
- def slice_body(body, range)
- body.byteslice(range)
+ def etag(response)
+ return unless response["ETag"]
+ etag = response["ETag"].delete_prefix("W/")
+ return if etag.delete_prefix!('"') && !etag.delete_suffix!('"')
+ etag
end
- def checksum_for_file(path)
- return nil unless path.file?
- # This must use File.read instead of Digest.file().hexdigest
- # because we need to preserve \n line endings on windows when calculating
- # the checksum
- SharedHelpers.filesystem_access(path, :read) do
- File.open(path, "rb") do |f|
- digest = SharedHelpers.digest(:MD5).new
- buf = String.new(:capacity => 16_384, :encoding => Encoding::BINARY)
- digest << buf while f.read(16_384, buf)
- digest.hexdigest
- end
+ # Unwraps and returns a Hash of digest algorithms and base64 values
+ # according to RFC 8941 Structured Field Values for HTTP.
+ # https://www.rfc-editor.org/rfc/rfc8941#name-parsing-a-byte-sequence
+ # Ignores unsupported algorithms.
+ def parse_digests(response)
+ return unless header = response["Repr-Digest"] || response["Digest"]
+ digests = {}
+ header.split(",") do |param|
+ algorithm, value = param.split("=", 2)
+ algorithm.strip!
+ algorithm.downcase!
+ next unless SUPPORTED_DIGESTS.key?(algorithm)
+ next unless value = byte_sequence(value)
+ digests[algorithm] = value
end
+ digests.empty? ? nil : digests
end
- private
-
- def copy_file(source, dest)
- SharedHelpers.filesystem_access(source, :read) do
- File.open(source, "r") do |s|
- SharedHelpers.filesystem_access(dest, :write) do
- File.open(dest, "wb", s.stat.mode) do |f|
- IO.copy_stream(s, f)
- end
- end
- end
- end
+ # Unwrap surrounding colons (byte sequence)
+ # The wrapping characters must be matched or we return nil.
+ # Also handles quotes because right now rubygems.org sends them.
+ def byte_sequence(value)
+ return if value.delete_prefix!(":") && !value.delete_suffix!(":")
+ return if value.delete_prefix!('"') && !value.delete_suffix!('"')
+ value
end
end
end
diff --git a/lib/bundler/fetcher/compact_index.rb b/lib/bundler/fetcher/compact_index.rb
index dc30443e27..f0ba30c7ca 100644
--- a/lib/bundler/fetcher/compact_index.rb
+++ b/lib/bundler/fetcher/compact_index.rb
@@ -13,7 +13,7 @@ module Bundler
undef_method(method_name)
define_method(method_name) do |*args, &blk|
method.bind(self).call(*args, &blk)
- rescue NetworkDownError, CompactIndexClient::Updater::MisMatchedChecksumError => e
+ rescue NetworkDownError, CompactIndexClient::Updater::MismatchedChecksumError => e
raise HTTPError, e.message
rescue AuthenticationRequiredError, BadAuthenticationError
# Fail since we got a 401 from the server.
@@ -62,7 +62,7 @@ module Bundler
end
# Read info file checksums out of /versions, so we can know if gems are up to date
compact_index_client.update_and_parse_checksums!
- rescue CompactIndexClient::Updater::MisMatchedChecksumError => e
+ rescue CompactIndexClient::Updater::MismatchedChecksumError => e
Bundler.ui.debug(e.message)
nil
end
diff --git a/lib/bundler/shared_helpers.rb b/lib/bundler/shared_helpers.rb
index cccc2b63d9..fa7db1c09d 100644
--- a/lib/bundler/shared_helpers.rb
+++ b/lib/bundler/shared_helpers.rb
@@ -193,6 +193,21 @@ module Bundler
Digest(name)
end
+ def checksum_for_file(path, digest)
+ return unless path.file?
+ # This must use File.read instead of Digest.file().hexdigest
+ # because we need to preserve \n line endings on windows when calculating
+ # the checksum
+ SharedHelpers.filesystem_access(path, :read) do
+ File.open(path, "rb") do |f|
+ digest = SharedHelpers.digest(digest).new
+ buf = String.new(:capacity => 16_384, :encoding => Encoding::BINARY)
+ digest << buf while f.read(16_384, buf)
+ digest.hexdigest
+ end
+ end
+ end
+
def write_to_gemfile(gemfile_path, contents)
filesystem_access(gemfile_path) {|g| File.open(g, "w") {|file| file.puts contents } }
end