Skip to content

Commit

Permalink
feat(refactor): initial RFD5 impl, slight refspec handling improvement
Browse files Browse the repository at this point in the history
Signed-off-by: jlanson <[email protected]>
  • Loading branch information
j-lanson authored and mchernicoff committed Dec 24, 2024
1 parent 15b0a0f commit a3c0c91
Show file tree
Hide file tree
Showing 6 changed files with 312 additions and 208 deletions.
18 changes: 16 additions & 2 deletions hipcheck/src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -500,10 +500,11 @@ impl CheckArgs {
}
impl ToTargetSeed for CheckArgs {
fn to_target_seed(&self) -> Result<TargetSeed> {
let kind = self.command()?.to_target_seed_kind()?;
let command = self.command()?;
let target = TargetSeed {
kind,
kind: command.to_target_seed_kind()?,
refspec: self.refspec.clone(),
specifier: command.get_specifier().to_owned(),
};
// Validate
if let Some(refspec) = &target.refspec {
Expand Down Expand Up @@ -538,6 +539,19 @@ pub enum CheckCommand {
Sbom(CheckSbomArgs),
}

impl CheckCommand {
fn get_specifier(&self) -> &str {
use CheckCommand::*;
match self {
Maven(args) => &args.package,
Npm(args) => &args.package,
Pypi(args) => &args.package,
Repo(args) => &args.source,
Sbom(args) => &args.path,
}
}
}

impl ToTargetSeedKind for CheckCommand {
fn to_target_seed_kind(&self) -> Result<TargetSeedKind> {
match self {
Expand Down
93 changes: 10 additions & 83 deletions hipcheck/src/session/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,12 @@ use crate::{
policy::{config_to_policy, PolicyFile},
report::{ReportParams, ReportParamsStorage},
score::ScoringProviderStorage,
session::{
cyclone_dx::extract_cyclonedx_download_url,
pm::{detect_and_extract, extract_repo_for_maven},
spdx::extract_spdx_download_url,
},
shell::{spinner_phase::SpinnerPhase, Shell},
source,
source::{SourceQuery, SourceQueryStorage},
target::{SbomStandard, Target, TargetSeed, TargetSeedKind},
target::{
resolve::{TargetResolver, TargetResolverConfig},
Target, TargetSeed, TargetSeedKind,
},
util::command::DependentProgram,
util::{git::get_git_version, npm::get_npm_version},
version::{VersionQuery, VersionQueryStorage},
Expand All @@ -39,7 +36,6 @@ use std::{
sync::Arc,
time::Duration,
};
use url::Url;

/// Immutable configuration and base data for a run of Hipcheck.
#[salsa::database(
Expand Down Expand Up @@ -294,81 +290,12 @@ fn load_target(seed: &TargetSeed, home: &Path) -> Result<Target> {

/// Resolves the target specifier into an actual target.
fn resolve_target(seed: &TargetSeed, phase: &SpinnerPhase, home: &Path) -> Result<Target> {
use TargetSeedKind::*;
#[cfg(feature = "print-timings")]
let _0 = crate::benchmarking::print_scope_time!("resolve_source");

match &seed.kind {
RemoteRepo(remote) => {
source::resolve_remote_repo(phase, home, remote.to_owned(), seed.refspec.clone())
}
LocalRepo(source) => {
// Because other TargetSeedKind variants need to transfer refspec info from the CLI,
// there's overlap with LocalGitRepo.git_ref. Copy CLI refspec here.
let mut source = source.to_owned();
source.git_ref = seed.refspec.clone().unwrap_or("HEAD".to_owned());
source::resolve_local_repo(phase, home, source)
}
Package(package) => {
// Attempt to get the git repo URL for the package
let package_git_repo_url =
detect_and_extract(package).context("Could not get git repo URL for package")?;

// Create Target for a remote git repo originating with a package
let package_git_repo = source::get_remote_repo_from_url(package_git_repo_url)?;
// TargetSeed validation step should have already ensured both refspec and package
// version are not provided, so we can do this
let refspec = if let Some(refspec) = &seed.refspec {
Some(refspec.to_owned())
} else if package.has_version() {
Some(package.version.to_owned())
} else {
None
};
source::resolve_remote_package_repo(
phase,
home,
package_git_repo,
format!("{}@{}", package.name, package.version),
refspec,
)
}
MavenPackage(package) => {
// Attempt to get the git repo URL for the Maven package
let package_git_repo_url = extract_repo_for_maven(package.url.as_ref())
.context("Could not get git repo URL for Maven package")?;

// Create Target for a remote git repo originating with a Maven package
let package_git_repo = source::get_remote_repo_from_url(package_git_repo_url)?;
// We do not currently harvest version info from the maven url
source::resolve_remote_package_repo(
phase,
home,
package_git_repo,
package.url.to_string(),
seed.refspec.clone(),
)
}
Sbom(sbom) => {
let source = sbom.path.to_str().ok_or(hc_error!(
"SBOM path contained one or more invalid characters"
))?;
// Attempt to get the download location for the local SBOM package, using the function
// appropriate to the SBOM standard
let download_url = match sbom.standard {
SbomStandard::Spdx => Url::parse(&extract_spdx_download_url(source)?)?,
SbomStandard::CycloneDX => extract_cyclonedx_download_url(source)?,
};
let _0 = crate::benchmarking::print_scope_time!("resolve_target");

// Create a Target for a remote git repo originating with an SBOM
let sbom_git_repo = source::get_remote_repo_from_url(download_url)?;
source::resolve_remote_package_repo(
phase,
home,
sbom_git_repo,
source.to_string(),
seed.refspec.clone(),
)
}
}
let conf = TargetResolverConfig {
phase: Some(phase.clone()),
cache: PathBuf::from(home),
};
TargetResolver::resolve(conf, seed.clone())
}
127 changes: 4 additions & 123 deletions hipcheck/src/source/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,69 +7,13 @@ pub use crate::source::query::*;
use crate::{
error::{Context, Error, Result},
hc_error,
shell::spinner_phase::SpinnerPhase,
target::{KnownRemote, LocalGitRepo, RemoteGitRepo, Target},
target::{KnownRemote, RemoteGitRepo},
util::git::GitCommand,
};
use pathbuf::pathbuf;
use std::path::{Path, PathBuf};
use url::{Host, Url};

// Resolving is how we ensure we have a valid, ready-to-go source of Git data
// for the rest of Hipcheck's analysis. The below functions handle the resolution
// of local or remote repos.
//
// If the repo is local, the resolve function will work with the local repository
// without cloning (all operationsare write-only, so this won't harm the repo at
// all).
//
// If it's a remote source, Hipcheck will clone the source so it can work with a
// local copy, putting the clone in '<root>/clones'. It also notes whether a
// remote repo is from a known or unknown host, because some forms of analysis
// rely on accessing the API's of certain known hosts (currently just GitHub).
//
// In either case, it also gets the commit head of the HEAD commit, so we can
// make sure future operations are all done relative to the HEAD, and that any
// cached data records what the HEAD was at the time of caching, to enable
// cache invalidation.

/// Resolves a specified local git repo into a Target for analysis by Hipcheck
pub fn resolve_local_repo(
phase: &SpinnerPhase,
root: &Path,
local_repo: LocalGitRepo,
) -> Result<Target> {
let src = local_repo.path.clone();

let specifier = src
.to_str()
.ok_or(hc_error!(
"Path to local repo contained one or more invalid characters"
))?
.to_string();

phase.update_status("copying");
let path = clone_local_repo_to_cache(src.as_path(), root)?;
let git_ref = git::checkout(&path, Some(local_repo.git_ref.clone()))?;
phase.update_status("trying to get remote");
let remote = match try_resolve_remote_for_local(&path) {
Ok(remote) => Some(remote),
Err(err) => {
log::debug!("failed to get remote [err='{}']", err);
None
}
};

let local = LocalGitRepo { path, git_ref };

Ok(Target {
specifier,
local,
remote,
package: None,
})
}

/// Creates a RemoteGitRepo struct from a given git URL by idenfitying if it is from a known host (currently only GitHub) or not
pub fn get_remote_repo_from_url(url: Url) -> Result<RemoteGitRepo> {
match url.host() {
Expand All @@ -88,54 +32,7 @@ pub fn get_remote_repo_from_url(url: Url) -> Result<RemoteGitRepo> {
}
}

/// Resolves a remote git repo originally specified by its remote location into a Target for analysis by Hipcheck
pub fn resolve_remote_repo(
phase: &SpinnerPhase,
root: &Path,
remote_repo: RemoteGitRepo,
refspec: Option<String>,
) -> Result<Target> {
// For remote repos originally specified by their URL, the specifier is just that URL
let specifier = remote_repo.url.to_string();

let path = match remote_repo.known_remote {
Some(KnownRemote::GitHub {
ref owner,
ref repo,
}) => pathbuf![root, "clones", "github", owner, repo],
_ => {
let clone_dir = build_unknown_remote_clone_dir(&remote_repo.url)
.context("failed to prepare local clone directory")?;
pathbuf![root, "clones", "unknown", &clone_dir]
}
};

let git_ref = clone_or_update_remote(phase, &remote_repo.url, &path, refspec)?;

let local = LocalGitRepo { path, git_ref };

Ok(Target {
specifier,
local,
remote: Some(remote_repo),
package: None,
})
}

/// Resolves a remote git repo derived from a source other than its remote location (e.g. a package or SPDX file) into a Target for analysis by Hipcheck
pub fn resolve_remote_package_repo(
phase: &SpinnerPhase,
root: &Path,
remote_repo: RemoteGitRepo,
specifier: String,
refspec: Option<String>,
) -> Result<Target> {
let mut target = resolve_remote_repo(phase, root, remote_repo, refspec)?;
target.specifier = specifier;
Ok(target)
}

fn try_resolve_remote_for_local(local: &Path) -> Result<RemoteGitRepo> {
pub fn try_resolve_remote_for_local(local: &Path) -> Result<RemoteGitRepo> {
let url = {
let symbolic_ref = get_symbolic_ref(local)?;

Expand Down Expand Up @@ -215,7 +112,7 @@ pub fn get_github_owner_and_repo(url: &Url) -> Result<(String, String)> {
Ok((owner, repo))
}

fn build_unknown_remote_clone_dir(url: &Url) -> Result<String> {
pub fn build_unknown_remote_clone_dir(url: &Url) -> Result<String> {
let mut dir = String::new();

// Add the host to the destination.
Expand All @@ -238,7 +135,7 @@ fn build_unknown_remote_clone_dir(url: &Url) -> Result<String> {
Ok(dir)
}

fn clone_local_repo_to_cache(src: &Path, root: &Path) -> Result<PathBuf> {
pub fn clone_local_repo_to_cache(src: &Path, root: &Path) -> Result<PathBuf> {
let src = src.canonicalize()?;
let hc_data_root = pathbuf![root, "clones"];
// If src dir is already in HC_CACHE/clones, leave it be. else clone from local fs
Expand All @@ -259,22 +156,6 @@ fn clone_local_repo_to_cache(src: &Path, root: &Path) -> Result<PathBuf> {
Ok(dest)
}

pub fn clone_or_update_remote(
phase: &SpinnerPhase,
url: &Url,
dest: &Path,
refspec: Option<String>,
) -> Result<String> {
if dest.exists() {
phase.update_status("pulling");
git::fetch(dest).context("failed to update remote repository")?;
} else {
phase.update_status("cloning");
git::clone(url, dest).context("failed to clone remote repository")?;
}
git::checkout(dest, refspec)
}

fn get_symbolic_ref(dest: &Path) -> Result<String> {
let output = GitCommand::for_repo(dest, ["symbolic-ref", "-q", "HEAD"])?
.output()
Expand Down
5 changes: 5 additions & 0 deletions hipcheck/src/target/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: Apache-2.0

pub mod resolve;
pub mod types;
pub use types::*;

Expand All @@ -8,6 +9,7 @@ use crate::error::Error;
use clap::ValueEnum;
use packageurl::PackageUrl;
use serde::Serialize;
use std::path::PathBuf;
use std::str::FromStr;
use url::Url;

Expand Down Expand Up @@ -148,6 +150,9 @@ impl TargetType {
|| tgt.ends_with(".cdx.xml")
{
Some((Sbom, tgt.to_string()))
// If is path to a file/dir that exists, treat as a local Repo
} else if PathBuf::from(tgt).exists() {
Some((Repo, tgt.to_string()))
} else {
None
}
Expand Down
Loading

0 comments on commit a3c0c91

Please sign in to comment.