Skip to content

Commit

Permalink
Fetch optimizations (#142)
Browse files Browse the repository at this point in the history
- When we know the commit hash, only fetch this commit (and its
ancestors)
- When we only have a revision/branch, only fetch the relevant refs (and
their ancestors).

This makes fetches significantly faster. For example, for
googleapis/googleapis, it decreases the time from 1m20s to about 30s.

An even bigger improvement would be to
1. Shallow fetch. This is supported by libgit2 but I couldn't make it
work.
2. Sparse checkout. This is not even supported by libgit2.

#137
  • Loading branch information
rtimush authored Jun 28, 2024
1 parent e7e2885 commit 165a88f
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 53 deletions.
75 changes: 32 additions & 43 deletions src/git/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ use std::{
};

use git2::{
build::RepoBuilder, cert::Cert, CertificateCheckStatus, Config, Cred, CredentialType,
FetchOptions, RemoteCallbacks, Repository,
cert::Cert, AutotagOption, CertificateCheckStatus, Config, Cred, CredentialType, FetchOptions,
RemoteCallbacks, Repository,
};
use gix_lock::Marker;
use log::{debug, info, trace};
Expand Down Expand Up @@ -81,12 +81,18 @@ impl ProtofetchGitCache {
}

pub fn repository(&self, entry: &Coordinate) -> Result<ProtoGitRepository, CacheError> {
let repo = match self.get_entry(entry) {
None => self.clone_repo(entry)?,
Some(path) => self.open_entry(&path, entry)?,
let mut path = self.location.clone();
path.push(entry.to_path());

let url = entry.to_git_url(self.default_protocol);

let repo = if path.exists() {
self.open_entry(&path, &url)?
} else {
self.create_repo(&path, &url)?
};

Ok(ProtoGitRepository::new(self, repo))
Ok(ProtoGitRepository::new(self, repo, url))
}

pub fn worktrees_path(&self) -> &Path {
Expand Down Expand Up @@ -118,51 +124,34 @@ impl ProtofetchGitCache {
}
}

fn get_entry(&self, entry: &Coordinate) -> Option<PathBuf> {
let mut full_path = self.location.clone();
full_path.push(entry.to_path());
fn open_entry(&self, path: &Path, url: &str) -> Result<Repository, CacheError> {
trace!("Opening existing repository at {}", path.display());

if full_path.exists() {
Some(full_path)
} else {
None
}
}

fn open_entry(&self, path: &Path, entry: &Coordinate) -> Result<Repository, CacheError> {
let repo = Repository::open(path).map_err(CacheError::from)?;
let repo = Repository::open(path)?;

{
let remote = repo.find_remote("origin").map_err(CacheError::from)?;

if let (Some(url), Some(protocol)) = (remote.url(), entry.protocol) {
let new_url = entry.to_git_url(protocol);

if url != new_url {
// If true then the protocol was updated before updating the cache.
trace!(
"Updating remote existing url {} to new url {}",
url,
new_url
);
repo.remote_set_url("origin", &new_url)?;
}
let remote = repo.find_remote("origin")?;
if remote.url() != Some(url) {
// If true then the protocol was updated before updating the cache.
trace!(
"Updating remote existing url {:?} to new url {}",
remote.url(),
url
);
repo.remote_set_url("origin", url)?;
}
} // `remote` reference is dropped here so that we can return `repo`
}

Ok(repo)
}

fn clone_repo(&self, entry: &Coordinate) -> Result<Repository, CacheError> {
let mut repo_builder = RepoBuilder::new();
let options = self.fetch_options()?;
repo_builder.bare(true).fetch_options(options);
fn create_repo(&self, path: &Path, url: &str) -> Result<Repository, CacheError> {
trace!("Creating a new repository at {}", path.display());

let url = entry.to_git_url(self.default_protocol);
trace!("Cloning repo {}", url);
repo_builder
.clone(&url, self.location.join(entry.to_path()).as_path())
.map_err(|e| e.into())
let repo = Repository::init_bare(path)?;
repo.remote_with_fetch("origin", url, "")?;

Ok(repo)
}

pub(super) fn fetch_options(&self) -> Result<FetchOptions<'_>, CacheError> {
Expand Down Expand Up @@ -196,7 +185,7 @@ impl ProtofetchGitCache {
let mut fetch_options = FetchOptions::new();
fetch_options
.remote_callbacks(callbacks)
.download_tags(git2::AutotagOption::All);
.download_tags(AutotagOption::None);

Ok(fetch_options)
}
Expand Down
53 changes: 43 additions & 10 deletions src/git/repository.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use std::{path::PathBuf, str::Utf8Error};

use crate::model::protofetch::{Descriptor, ModuleName, Revision, RevisionSpecification};
use git2::{Oid, Repository, ResetType};
use git2::{Oid, Repository, ResetType, WorktreeAddOptions};
use log::{debug, warn};
use thiserror::Error;

Expand Down Expand Up @@ -40,20 +40,42 @@ pub enum ProtoRepoError {
pub struct ProtoGitRepository<'a> {
cache: &'a ProtofetchGitCache,
git_repo: Repository,
origin: String,
}

impl<'a> ProtoGitRepository<'a> {
pub fn new(cache: &'a ProtofetchGitCache, git_repo: Repository) -> ProtoGitRepository {
ProtoGitRepository { cache, git_repo }
pub fn new(
cache: &'a ProtofetchGitCache,
git_repo: Repository,
origin: String,
) -> ProtoGitRepository {
ProtoGitRepository {
cache,
git_repo,
origin,
}
}

pub fn fetch(&self, _specification: &RevisionSpecification) -> anyhow::Result<()> {
pub fn fetch(&self, specification: &RevisionSpecification) -> anyhow::Result<()> {
let mut remote = self.git_repo.find_remote("origin")?;
// TODO: we only need to fetch refspecs from RevisionSpecification
let refspecs: Vec<String> = remote
.refspecs()
.filter_map(|refspec| refspec.str().map(|s| s.to_string()))
.collect();
let mut refspecs = Vec::with_capacity(3);
if let Revision::Pinned { revision } = &specification.revision {
refspecs.push(format!("+refs/tags/{}:refs/tags/{}", revision, revision));
// Some protofetch.toml files specify branch in the revision field, so we
// need to fetch branches as well to maintain compatibility.
refspecs.push(format!(
"+refs/heads/{}:refs/remotes/origin/{}",
revision, revision
));
}
if let Some(branch) = &specification.branch {
refspecs.push(format!(
"+refs/heads/{}:refs/remotes/origin/{}",
branch, branch
));
}

debug!("Fetching {:?} from {}", refspecs, self.origin);
remote.fetch(&refspecs, Some(&mut self.cache.fetch_options()?), None)?;
Ok(())
}
Expand All @@ -69,6 +91,7 @@ impl<'a> ProtoGitRepository<'a> {
}
let mut remote = self.git_repo.find_remote("origin")?;

debug!("Fetching {} from {}", commit_hash, self.origin);
if let Err(error) =
remote.fetch(&[commit_hash], Some(&mut self.cache.fetch_options()?), None)
{
Expand Down Expand Up @@ -212,8 +235,18 @@ impl<'a> ProtoGitRepository<'a> {
worktree_path.to_string_lossy()
);

// We need to create a branch-like reference to be able to create a worktree
let reference = self.git_repo.reference(
&format!("refs/heads/{}", commit_hash),
self.git_repo.revparse_single(commit_hash)?.id(),
true,
"",
)?;

let mut options = WorktreeAddOptions::new();
options.reference(Some(&reference));
self.git_repo
.worktree(worktree_name, &worktree_path, None)?;
.worktree(worktree_name, &worktree_path, Some(&options))?;
}
};

Expand Down

0 comments on commit 165a88f

Please sign in to comment.