From 5f48306dfcc7bf52c7a20fb9c1f7878bc3ddb6be Mon Sep 17 00:00:00 2001 From: Juanito Fatas Date: Wed, 25 Mar 2020 11:12:00 +0900 Subject: [PATCH 1/5] Avoid clone same repo for Git gems MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a Gemfile installs rails master, it clones rails for 12 times. By storing Railsā€˜s subgems in the pending queue under the same name "rails". We can avoid clone rails gems multiple times. --- lib/gel/git_gems_manager.rb | 21 +++++++++++++++++++++ lib/gel/installer.rb | 14 ++++++++++---- 2 files changed, 31 insertions(+), 4 deletions(-) create mode 100644 lib/gel/git_gems_manager.rb diff --git a/lib/gel/git_gems_manager.rb b/lib/gel/git_gems_manager.rb new file mode 100644 index 00000000..ff850cbb --- /dev/null +++ b/lib/gel/git_gems_manager.rb @@ -0,0 +1,21 @@ +class Gel::GitGemsManager + # These gems all live in rails/rails repository. + META_GEMS = { + "activesupport" => "rails", + "actionpack" => "rails", + "actionview" => "rails", + "activemodel" => "rails", + "activerecord" => "rails", + "actionmailer" => "rails", + "activejob" => "rails", + "actioncable" => "rails", + "activestorage" => "rails", + "actionmailbox" => "rails", + "actiontext" => "rails", + "railties" => "rails", + } + + def self.lookup(name) + META_GEMS.fetch(name) { name } + end +end diff --git a/lib/gel/installer.rb b/lib/gel/installer.rb index 41fe4da4..1decd432 100644 --- a/lib/gel/installer.rb +++ b/lib/gel/installer.rb @@ -7,6 +7,7 @@ require_relative "git_depot" require_relative "package" require_relative "package/installer" +require_relative "git_gems_manager" class Gel::Installer DOWNLOAD_CONCURRENCY = 6 @@ -74,10 +75,14 @@ def install_gem(catalogs, name, version) end def load_git_gem(remote, revision, name) + revised_name = Gel::GitGemsManager.lookup(name) + synchronize do - @pending[name] += 1 - @download_pool.queue(name) do - work_git(remote, revision, name) + if !@pending.key?(revised_name) + @pending[revised_name] += 1 + @download_pool.queue(revised_name) do + work_git(remote, revision, revised_name) + end end end end @@ -86,7 +91,8 @@ def work_git(remote, revision, name) @git_depot.checkout(remote, revision) @messages << "Using #{name} (git)\n" - @pending[name] -= 1 + revised_name = Gel::GitGemsManager.lookup(name) + @pending[revised_name] -= 1 end def download_gem(catalogs, name, version) From cfa48570fd5de21e8a3b3b80a0e72e3a88462c84 Mon Sep 17 00:00:00 2001 From: Juanito Fatas Date: Wed, 1 Apr 2020 14:36:26 +0900 Subject: [PATCH 2/5] Better way to avoid cloning multi-gem repo Use a hash to keep track of whether we've already checked out a repo by remote+revision. --- lib/gel/git_gems_manager.rb | 21 --------------------- lib/gel/installer.rb | 24 +++++++++++++++--------- 2 files changed, 15 insertions(+), 30 deletions(-) delete mode 100644 lib/gel/git_gems_manager.rb diff --git a/lib/gel/git_gems_manager.rb b/lib/gel/git_gems_manager.rb deleted file mode 100644 index ff850cbb..00000000 --- a/lib/gel/git_gems_manager.rb +++ /dev/null @@ -1,21 +0,0 @@ -class Gel::GitGemsManager - # These gems all live in rails/rails repository. - META_GEMS = { - "activesupport" => "rails", - "actionpack" => "rails", - "actionview" => "rails", - "activemodel" => "rails", - "activerecord" => "rails", - "actionmailer" => "rails", - "activejob" => "rails", - "actioncable" => "rails", - "activestorage" => "rails", - "actionmailbox" => "rails", - "actiontext" => "rails", - "railties" => "rails", - } - - def self.lookup(name) - META_GEMS.fetch(name) { name } - end -end diff --git a/lib/gel/installer.rb b/lib/gel/installer.rb index 1decd432..cd5b6c6c 100644 --- a/lib/gel/installer.rb +++ b/lib/gel/installer.rb @@ -7,7 +7,6 @@ require_relative "git_depot" require_relative "package" require_relative "package/installer" -require_relative "git_gems_manager" class Gel::Installer DOWNLOAD_CONCURRENCY = 6 @@ -28,6 +27,7 @@ def initialize(store) @dependencies = Hash.new { |h, k| h[k] = [] } @weights = Hash.new(1) @pending = Hash.new(0) + @git_remotes = Hash.new @download_pool = Gel::WorkPool.new(DOWNLOAD_CONCURRENCY, monitor: self, name: "gel-download", collect_errors: true) @compile_pool = Gel::WorkPool.new(COMPILE_CONCURRENCY, monitor: self, name: "gel-compile", collect_errors: true) @@ -75,24 +75,30 @@ def install_gem(catalogs, name, version) end def load_git_gem(remote, revision, name) - revised_name = Gel::GitGemsManager.lookup(name) - synchronize do - if !@pending.key?(revised_name) - @pending[revised_name] += 1 - @download_pool.queue(revised_name) do - work_git(remote, revision, revised_name) + if !@pending.key?(name) + @pending[name] += 1 + @download_pool.queue(name) do + if not_checked_out?(remote, revision) + work_git(remote, revision, name) + end end end end end + def not_checked_out?(remote, revision) + key = "#{remote}@#{revision}" + return false if @git_remotes.key?(key) + + @git_remotes[key] = 1 + end + def work_git(remote, revision, name) @git_depot.checkout(remote, revision) @messages << "Using #{name} (git)\n" - revised_name = Gel::GitGemsManager.lookup(name) - @pending[revised_name] -= 1 + @pending[name] -= 1 end def download_gem(catalogs, name, version) From f8b9e897fd4e16052c3c27e31b4e8be9826b45b6 Mon Sep 17 00:00:00 2001 From: Matthew Draper Date: Thu, 2 Apr 2020 01:35:21 +1030 Subject: [PATCH 3/5] Track all gems as pending for a git repo .. and then show them all in the "Using" message, too. --- lib/gel/installer.rb | 42 ++++++++++++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/lib/gel/installer.rb b/lib/gel/installer.rb index cd5b6c6c..e00fc7aa 100644 --- a/lib/gel/installer.rb +++ b/lib/gel/installer.rb @@ -76,27 +76,45 @@ def install_gem(catalogs, name, version) def load_git_gem(remote, revision, name) synchronize do - if !@pending.key?(name) - @pending[name] += 1 + @pending[name] += 1 + + checkout_key = "#{remote}@#{revision}" + + # Has another gem already been pulled from this git repo? + if @git_remotes.key?(checkout_key) + if waiting_gems = @git_remotes[checkout_key] + # The checkout is currently queued or in progress; we'll wait + # for it too + waiting_gems << name + else + # The checkout has already finished, so this gem is already + # available + git_ready(name) + end + else + @git_remotes[checkout_key] = [name] + @download_pool.queue(name) do - if not_checked_out?(remote, revision) - work_git(remote, revision, name) - end + work_git(remote, revision, checkout_key) end end end end - def not_checked_out?(remote, revision) - key = "#{remote}@#{revision}" - return false if @git_remotes.key?(key) + def work_git(remote, revision, checkout_key) + @git_depot.checkout(remote, revision) - @git_remotes[key] = 1 - end + synchronize do + gem_names = @git_remotes[checkout_key] + @git_remotes[checkout_key] = nil - def work_git(remote, revision, name) - @git_depot.checkout(remote, revision) + gem_names.each do |name| + git_ready(name) + end + end + end + def git_ready(name) @messages << "Using #{name} (git)\n" @pending[name] -= 1 end From 7e260271581d52a552c471a3b5c35743895f8645 Mon Sep 17 00:00:00 2001 From: Juanito Fatas Date: Thu, 2 Apr 2020 11:45:11 +0900 Subject: [PATCH 4/5] Add a test for install git gems will only clone once --- test/installer_test.rb | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 test/installer_test.rb diff --git a/test/installer_test.rb b/test/installer_test.rb new file mode 100644 index 00000000..0a101c6f --- /dev/null +++ b/test/installer_test.rb @@ -0,0 +1,29 @@ +# frozen_string_literal: true + +require "test_helper" + +require "gel/installer" + +class InstallerTest < Minitest::Test + def test_install_git_gems_only_checkout_once + Dir.mktmpdir do |dir| + remote = "https://github.com/rails/rails.git" + revision = "d56d2e740612717334840b0d0ea979e1ca7cf5b1" + names = ["actioncable", "actionmailbox"] + store = Gel::Store.new(dir) + installer = Gel::Installer.new(store) + fake_download_pool = Minitest::Mock.new + installer.instance_variable_set( + :@download_pool, + fake_download_pool + ) + fake_download_pool.expect(:queue, true) do |argument| + argument == "actioncable" + end + + names.each do |name| + installer.load_git_gem(remote, revision, name) + end + end + end +end From 00ca7aa3f1bdc606cf8edcac7e245f7f78fe3eac Mon Sep 17 00:00:00 2001 From: Juanito Fatas Date: Fri, 3 Apr 2020 10:16:23 +0900 Subject: [PATCH 5/5] Ensure git gem messages always appear When there is no lockfile, we've already cloned the repository earlier to resolve the lockfile. So the check here will return false and some git gems message will not appear. --- lib/gel/lock_loader.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/gel/lock_loader.rb b/lib/gel/lock_loader.rb index db70865c..14f6c323 100644 --- a/lib/gel/lock_loader.rb +++ b/lib/gel/lock_loader.rb @@ -98,7 +98,7 @@ def activate(env, base_store, install: false, output: nil) revision = resolved_gem.body["revision"].first dir = git_depot.git_path(remote, revision) - if installer && !Dir.exist?(dir) + if installer installer.load_git_gem(remote, revision, name) locks[name] = -> { Gel::DirectGem.new(dir, name, resolved_gem.version) }