From 5e860b0359a1638d89fa82b7459b3b76ca691fab Mon Sep 17 00:00:00 2001 From: Rohit kumar Date: Fri, 22 Nov 2024 15:55:51 +0530 Subject: [PATCH] PR Clone fix --- .github/workflows/build.yml | 2 +- src/scans/tools/license_tool.rs | 59 +++++-- src/scans/tools/sast_tool.rs | 4 +- src/scans/tools/sca_tool.rs | 4 +- src/scans/tools/secret_tool.rs | 2 +- src/utils/common.rs | 301 ++++++++++++++++++-------------- 6 files changed, 222 insertions(+), 150 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 79344e4..f3c7b6a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -28,4 +28,4 @@ jobs: context: . file: ./Dockerfile push: true - tags: ${{ secrets.DOCKERHUB_USERNAME }}/hela:v7 + tags: ${{ secrets.DOCKERHUB_USERNAME }}/hela:v8 diff --git a/src/scans/tools/license_tool.rs b/src/scans/tools/license_tool.rs index 692d61e..9ebb2b3 100644 --- a/src/scans/tools/license_tool.rs +++ b/src/scans/tools/license_tool.rs @@ -1,11 +1,12 @@ - use std::{collections::HashMap, time::Instant}; use mongodb::bson::uuid; use serde_json::json; -use crate::{utils::{common::{execute_command, post_json_data}, file_utils::find_files_recursively}, scans::tools::sca_tool::SUPPORTED_MANIFESTS}; - +use crate::{ + scans::tools::sca_tool::SUPPORTED_MANIFESTS, + utils::{common::execute_command, file_utils::find_files_recursively}, +}; pub struct LicenseTool; @@ -13,8 +14,14 @@ impl LicenseTool { pub fn new() -> Self { LicenseTool } - - pub async fn run_scan(&self, _path: &str, _commit_id: Option<&str>, _branch: Option<&str>, verbose: bool) { + + pub async fn run_scan( + &self, + _path: &str, + _commit_id: Option<&str>, + _branch: Option<&str>, + verbose: bool, + ) { let start_time = Instant::now(); if verbose { println!("[+] Running License compliance scan on path: {}", _path); @@ -33,11 +40,11 @@ impl LicenseTool { if let Some(_branch) = _branch { let clone_command = format!("git clone -b {} {} /tmp/app", _branch, _path); execute_command(&clone_command, false).await; - }else{ + } else { let clone_command = format!("git clone {} /tmp/app", _path); execute_command(&clone_command, false).await; } - }else{ + } else { if verbose { println!("[+] Copying project to /tmp/app..."); } @@ -58,20 +65,27 @@ impl LicenseTool { // now run secret scan on /tmp/code folder _path = format!("/tmp/code"); } - let manifests = find_files_recursively(&_path, unsafe { SUPPORTED_MANIFESTS.to_vec() }, ignore_dirs).await; + let manifests = + find_files_recursively(&_path, unsafe { SUPPORTED_MANIFESTS.to_vec() }, ignore_dirs) + .await; let mut manifest_license = HashMap::new(); for manifest in manifests.iter() { let file_name = manifest.split("/").last().unwrap(); let folder_path = manifest.replace(file_name, ""); let random_file_name = format!("{}.json", uuid::Uuid::new().to_string()); // if manifest ends with pom.xml then pass -t java otherwise nothing - let mut license_command = format!("cd {} && cdxgen -o {}", folder_path, random_file_name); + let mut license_command = + format!("cd {} && cdxgen -o {}", folder_path, random_file_name); if file_name.ends_with("pom.xml") { - license_command = format!("cd {} && cdxgen -o {} -t java", folder_path, random_file_name); + license_command = format!( + "cd {} && cdxgen -o {} -t java", + folder_path, random_file_name + ); } execute_command(&license_command, false).await; // Read JSON file and parse data - let license_json = std::fs::read_to_string(format!("{}/{}", folder_path, random_file_name)).unwrap(); + let license_json = + std::fs::read_to_string(format!("{}/{}", folder_path, random_file_name)).unwrap(); let json_data = serde_json::from_str::(&license_json).unwrap(); // extract license data from "components" key there will be list of components so grab licenses from there let components = json_data["components"].as_array().unwrap(); @@ -87,8 +101,14 @@ impl LicenseTool { license_names.push(license["id"].as_str().unwrap().to_string()); } } - component_licenses.insert(format!("{}@{}", component_name, component_version), license_names); - manifest_license.insert(format!("{}/{}", folder_path, file_name), component_licenses.clone()); + component_licenses.insert( + format!("{}@{}", component_name, component_version), + license_names, + ); + manifest_license.insert( + format!("{}/{}", folder_path, file_name), + component_licenses.clone(), + ); } } // save data in output.json and before that get json data from output.json file if it exists and then append new data to it @@ -99,10 +119,17 @@ impl LicenseTool { output_json = serde_json::from_str::(&output_json_data).unwrap(); } output_json["license"] = json!(manifest_license); - std::fs::write("/tmp/output.json", serde_json::to_string_pretty(&output_json).unwrap()).unwrap(); + std::fs::write( + "/tmp/output.json", + serde_json::to_string_pretty(&output_json).unwrap(), + ) + .unwrap(); let end_time = Instant::now(); let elapsed_time = end_time - start_time; let elapsed_seconds = elapsed_time.as_secs_f64().round(); - println!("Execution time for License Compliance scan: {:?} seconds", elapsed_seconds); + println!( + "Execution time for License Compliance scan: {:?} seconds", + elapsed_seconds + ); } -} \ No newline at end of file +} diff --git a/src/scans/tools/sast_tool.rs b/src/scans/tools/sast_tool.rs index dc10f3d..351e8a2 100644 --- a/src/scans/tools/sast_tool.rs +++ b/src/scans/tools/sast_tool.rs @@ -21,7 +21,7 @@ impl SastTool { ) { let start_time = Instant::now(); if verbose { - println!("[+] Running SAST scan on path: {}", _path.clone()); + println!("[+] Running SAST scan on path: {}", _path); } println!("Commit ID: {:?}", _commit_id); println!("Branch: {:?}", _branch); @@ -54,7 +54,7 @@ impl SastTool { if verbose { println!("[+] Copying project to /tmp/app..."); } - let copy_command = format!("cp -r {} /tmp/app", _path.clone()); + let copy_command = format!("cp -r {} /tmp/app", _path); execute_command(©_command, true).await; } } diff --git a/src/scans/tools/sca_tool.rs b/src/scans/tools/sca_tool.rs index 513b799..15cad20 100644 --- a/src/scans/tools/sca_tool.rs +++ b/src/scans/tools/sca_tool.rs @@ -3,7 +3,7 @@ use std::{collections::HashMap, fs, time::Instant}; use serde_json::{json, Value}; use crate::utils::{ - common::{checkout, execute_command, post_json_data}, + common::{checkout, execute_command}, file_utils::find_files_recursively, }; @@ -251,7 +251,7 @@ impl ScaTool { if verbose { println!("[+] Copying project to /tmp/app..."); } - let copy_command = format!("cp -r {} /tmp/app", _path.clone()); + let copy_command = format!("cp -r {} /tmp/app", _path); execute_command(©_command, true).await; } } diff --git a/src/scans/tools/secret_tool.rs b/src/scans/tools/secret_tool.rs index 17f15ae..fb6f8fc 100644 --- a/src/scans/tools/secret_tool.rs +++ b/src/scans/tools/secret_tool.rs @@ -32,7 +32,7 @@ impl SecretTool { if verbose { println!("[+] Copying project to /tmp/app..."); } - let copy_command = format!("cp -r {} /tmp/app", _path.clone()); + let copy_command = format!("cp -r {} /tmp/app", _path); execute_command(©_command, true).await; } } diff --git a/src/utils/common.rs b/src/utils/common.rs index 01c9a6e..174e3d8 100644 --- a/src/utils/common.rs +++ b/src/utils/common.rs @@ -4,22 +4,23 @@ use futures::StreamExt; use mongodb::{ bson::{doc, Bson, Document}, error::Error, - options::{ClientOptions, FindOptions}, + options::ClientOptions, Client, Collection, }; use chrono::Utc; use regex::Regex; -use reqwest::header::{HeaderMap, HeaderValue}; use serde_json::Value; use sha2::{Digest, Sha256}; -use std::fs::{self, File}; -use std::io::Read; -use std::path::{Path, PathBuf}; -use std::process::Stdio; +use std::io::{Read, Write}; +use std::path::Path; use std::time::Duration; use std::{collections::HashMap, process::Command}; use std::{collections::HashSet, env}; +use std::{ + fs::{self, File}, + path::PathBuf, +}; // define static exit codes and message pub const EXIT_CODE_LICENSE_FAILED: i32 = 101; @@ -47,7 +48,7 @@ pub async fn upload_to_defect_dojo( engagement_name: &str, filename: &str, ) -> Result<(), reqwest::Error> { - let mut file = File::open(filename.clone()).unwrap(); + let mut file = File::open(filename).unwrap(); let mut buffer = Vec::new(); file.read_to_end(&mut buffer).unwrap(); @@ -142,7 +143,7 @@ pub fn redact_github_token(input: &str) -> String { async fn connect_to_mongodb( mongo_uri: &str, - db_name: &str, + _db_name: &str, ) -> Result { let client_options = ClientOptions::parse(mongo_uri).await?; let client = Client::with_options(client_options)?; @@ -266,159 +267,203 @@ pub async fn execute_command(command: &str, suppress_error: bool) -> String { stdout.to_string() } -pub fn checkout( - clone_url: &str, - clone_path: &str, - branch: Option<&str>, - pr_branch: Option<&str>, -) -> Result<(), Box> { - // Clone the repository; use the specified branch or default branch if `branch` is None - let mut clone_cmd = Command::new("git"); - clone_cmd.arg("clone").arg(clone_url).arg(clone_path); - if let Some(branch_name) = branch { - clone_cmd.arg("--branch").arg(branch_name); - } - let output = clone_cmd.output()?; - if !output.status.success() { - let error_msg = String::from_utf8_lossy(&output.stderr); - return Err(format!("Failed to clone repository: {}", error_msg).into()); - } +fn delete_except(files: &[String], base_dir: &Path) -> Result<(), Box> { + println!("Deleting all files except the following:"); + println!("__________________________________________ {:?}", files); + let files_to_keep: Vec = files + .iter() + .map(|file| base_dir.join(file.trim())) + .collect(); - // Set the working directory to the cloned path - let cloned_path = Path::new(clone_path).canonicalize()?; - let repo_path = cloned_path.to_str().unwrap(); - env::set_current_dir(&cloned_path)?; + traverse_and_delete(base_dir, &files_to_keep)?; - // Configure Git user for commits in this repository - Command::new("git") - .args(&["config", "user.email", "ci@example.com"]) - .output()?; - Command::new("git") - .args(&["config", "user.name", "CI Bot"]) - .output()?; + Ok(()) +} - // Store the set of changed files - let mut changed_files = HashSet::new(); +fn traverse_and_delete(base_dir: &Path, files_to_keep: &[PathBuf]) -> Result<(), std::io::Error> { + for entry in fs::read_dir(base_dir)? { + let entry = entry?; + let path = entry.path(); - // If a pr_branch is provided, fetch it as a local branch and compare with the base branch - if let Some(pr_branch_name) = pr_branch { - // Fetch the PR branch and create a local branch - let fetch_output = Command::new("git") - .args(&[ - "fetch", - "origin", - &format!("{}:{}", pr_branch_name, pr_branch_name), - ]) - .output()?; - if !fetch_output.status.success() { - let error_msg = String::from_utf8_lossy(&fetch_output.stderr); - return Err(format!( - "Failed to fetch PR branch '{}': {}", - pr_branch_name, error_msg - ) - .into()); + // Skip the .git directory + if path.is_dir() && path.file_name().map_or(false, |name| name == ".git") { + continue; } - // Perform a diff between `branch` (or the default branch) and `pr_branch` - let base_branch = branch.unwrap_or("HEAD"); - let diff_output = Command::new("git") - .args(&["diff", "--name-only", base_branch, pr_branch_name]) - .output()?; - - if !diff_output.status.success() { - let error_msg = String::from_utf8_lossy(&diff_output.stderr); - return Err(format!("Failed to diff branches: {}", error_msg).into()); + if path.is_dir() { + traverse_and_delete(&path, files_to_keep)?; } - // Parse the diff output into a set of changed files - let diff_output_str = String::from_utf8_lossy(&diff_output.stdout); - for line in diff_output_str.lines() { - changed_files.insert(line.trim().to_string()); + // Check if the path should be deleted (only delete files) + if path.is_file() && !files_to_keep.contains(&path.canonicalize()?) { + fs::remove_file(&path)?; } - } else { - // If no PR branch, list all files in the base branch - let list_output = Command::new("git") - .args(&["ls-tree", "-r", "--name-only", "HEAD"]) - .output()?; + } + + Ok(()) +} + +fn delete_empty_directories(start_dir: &Path) -> Result<(), std::io::Error> { + for entry in fs::read_dir(start_dir)? { + let entry = entry?; + let path = entry.path(); - if !list_output.status.success() { - let error_msg = String::from_utf8_lossy(&list_output.stderr); - return Err(format!("Failed to list files in base branch: {}", error_msg).into()); + // Skip the .git directory + if path.is_dir() && path.file_name().map_or(false, |name| name == ".git") { + continue; } - // Parse the list output into a set of files - let list_output_str = String::from_utf8_lossy(&list_output.stdout); - for line in list_output_str.lines() { - changed_files.insert(line.trim().to_string()); + if path.is_dir() { + delete_empty_directories(&path)?; + if fs::read_dir(&path)?.next().is_none() { + fs::remove_dir(&path)?; + } } } - // Print the changed files for debugging purposes - println!("Changed files:\n{:#?}", changed_files); + Ok(()) +} - // Ensure the working directory is up-to-date before checking out files - Command::new("git") - .args(&["checkout", pr_branch.unwrap_or("HEAD")]) - .output()?; +fn get_cumulative_pr_files( + base_branch: Option<&str>, + pr_branch: Option<&str>, +) -> Result, Box> { + if let Some(pr) = pr_branch { + // If base branch is provided, merge it into a temp branch + if let Some(base) = base_branch { + // Step 1: Checkout the base branch + Command::new("git").args(&["checkout", base]).output()?; + + // Step 2: Create a temporary merge branch + Command::new("git") + .args(&["checkout", "-b", "temp_pr_merge_branch", base]) + .output()?; + + // Step 3: Merge the PR branch into the temporary branch + let merge_output = Command::new("git") + .args(&["merge", "--no-ff", &format!("origin/{}", pr)]) + .output()?; + if !merge_output.status.success() { + let error_msg = String::from_utf8_lossy(&merge_output.stderr); + return Err(format!("Failed to merge PR branch: {}", error_msg).into()); + } - // Ensure each changed file is checked out from the PR branch - for file in &changed_files { - let checkout_output = Command::new("git") - .args(&["checkout", pr_branch.unwrap_or("HEAD"), "--", file]) - .output()?; + // Step 4: Get the list of changed files between base and temp PR branch + let diff_output = Command::new("git") + .args(&["diff", "--name-only", base, "temp_pr_merge_branch"]) + .output()?; + if !diff_output.status.success() { + let error_msg = String::from_utf8_lossy(&diff_output.stderr); + return Err(format!("Failed to get changed files: {}", error_msg).into()); + } + + let changed_files: Vec = String::from_utf8_lossy(&diff_output.stdout) + .lines() + .map(String::from) + .collect(); + + // No cleanup: Stay on the temporary branch to get the PR branch content + + Ok(changed_files) + } else { + // If only PR branch is provided, just get the list of files in that branch + let diff_output = Command::new("git") + .args(&["ls-tree", "-r", "--name-only", &format!("origin/{}", pr)]) + .output()?; + if !diff_output.status.success() { + let error_msg = String::from_utf8_lossy(&diff_output.stderr); + return Err(format!("Failed to list files in PR branch: {}", error_msg).into()); + } - if !checkout_output.status.success() { - let error_msg = String::from_utf8_lossy(&checkout_output.stderr); - println!("Failed to checkout file '{}': {}", file, error_msg); + let changed_files: Vec = String::from_utf8_lossy(&diff_output.stdout) + .lines() + .map(String::from) + .collect(); + Ok(changed_files) } + } else { + Err("PR branch is required to fetch changes.".into()) } +} + +fn save_pr_branch_files( + changed_files: &[String], + pr_branch: &str, +) -> Result<(), Box> { + for file in changed_files { + let file_content = Command::new("git") + .args(&["show", &format!("origin/{}:{}", pr_branch, file)]) + .output()?; + if !file_content.status.success() { + let error_msg = String::from_utf8_lossy(&file_content.stderr); + return Err(format!("Failed to get content of file {}: {}", file, error_msg).into()); + } - // Remove all files not in the `changed_files` set - remove_unwanted_files(repo_path, &changed_files)?; + let file_path = Path::new(file); + if let Some(parent) = file_path.parent() { + fs::create_dir_all(parent)?; + } - println!("Only the changed files have been kept locally."); + let mut file_handle = File::create(file_path)?; + file_handle.write_all(&file_content.stdout)?; + } Ok(()) } -/// Removes all files that are not in the `files_to_keep` set, but preserves directories. -/// -/// # Arguments -/// -/// * `repo_path` - The path of the repository. -/// * `files_to_keep` - A set of file paths to keep relative to the `repo_path`. -fn remove_unwanted_files( - repo_path: &str, - files_to_keep: &HashSet, +pub fn checkout( + clone_url: &str, + clone_path: &str, + base_branch: Option<&str>, + pr_branch: Option<&str>, ) -> Result<(), Box> { - // Recursively remove unwanted files - for entry in fs::read_dir(repo_path)? { - let entry = entry?; - let path = entry.path(); + // Step 1: Clone the repository + let mut clone_cmd = Command::new("git"); + clone_cmd.arg("clone").arg(clone_url).arg(clone_path); + if let Some(branch) = base_branch { + clone_cmd.arg("--branch").arg(branch); + } - // Skip the .git directory to preserve repository integrity - if path.is_dir() && path.file_name().map_or(false, |name| name == ".git") { - continue; - } + let output = clone_cmd.output()?; + if !output.status.success() { + let error_msg = String::from_utf8_lossy(&output.stderr); + return Err(format!("Failed to clone repository: {}", error_msg).into()); + } - // Determine the relative path - let relative_path = path.strip_prefix(repo_path)?.to_str().unwrap().to_string(); + let cloned_path = Path::new(clone_path).canonicalize()?; + env::set_current_dir(&cloned_path)?; - // Check if the file should be kept or removed - if path.is_file() && !files_to_keep.contains(&relative_path) { - println!("Removing file: {}", relative_path); - fs::remove_file(&path)?; - } else if path.is_dir() { - // Recursively clean up subdirectories - remove_unwanted_files(path.to_str().unwrap(), files_to_keep)?; + // Fetch the PR branch + if let Some(pr) = pr_branch { + let fetch_output = Command::new("git") + .args(&["fetch", "origin", pr]) + .output()?; + if !fetch_output.status.success() { + let error_msg = String::from_utf8_lossy(&fetch_output.stderr); + return Err(format!("Failed to fetch PR branch: {}", error_msg).into()); + } + } - // Check if the directory is empty and remove it - if fs::read_dir(&path)?.next().is_none() { - println!("Removing empty directory: {}", relative_path); - fs::remove_dir(&path)?; - } + // Get the list of changed files + let changed_files = match (base_branch, pr_branch) { + (Some(base), Some(pr)) => get_cumulative_pr_files(Some(base), Some(pr))?, + (None, Some(pr)) => get_cumulative_pr_files(None, Some(pr))?, + _ => { + return Err("At least PR branch must be specified.".into()); } + }; + + println!("Changed files:\n{:?}", changed_files); + + // Save the content of the changed files from the PR branch + if let Some(pr) = pr_branch { + save_pr_branch_files(&changed_files, pr)?; } + + // Now proceed with deletion based on the changed files + delete_except(&changed_files, &cloned_path)?; + delete_empty_directories(&cloned_path)?; + Ok(()) }