diff --git a/.config/nextest.toml b/.config/nextest.toml new file mode 100644 index 0000000..96268e9 --- /dev/null +++ b/.config/nextest.toml @@ -0,0 +1,5 @@ +[profile.default] +default-filter = 'not test(docker)' + +[profile.docker-ci] +default-filter = 'test(docker)' diff --git a/.github/fixture/Dockerfile b/.github/fixture/Dockerfile new file mode 100644 index 0000000..2090a24 --- /dev/null +++ b/.github/fixture/Dockerfile @@ -0,0 +1,10 @@ +FROM rust:latest + +ENV RUST_BACKTRACE=1 +WORKDIR /app + +COPY --from=docker:latest /usr/local/bin/docker /usr/local/bin/ + +COPY . . + +ENTRYPOINT ["cargo", "test", "--package", "pre-commit-rs", "languages::docker::tests::test_get_docker_path", "--", "--ignored", "--exact", "--show-output"] diff --git a/.github/workflows/ci-docker.yml b/.github/workflows/ci-docker.yml new file mode 100644 index 0000000..01d6b10 --- /dev/null +++ b/.github/workflows/ci-docker.yml @@ -0,0 +1,61 @@ +name: CI docker + +on: + push: + branches: [ master ] + paths: + - "**/docker.rs" + pull_request: + paths: + - "**/docker.rs" + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.event.pull_request.number || github.sha }} + cancel-in-progress: true + +jobs: + test-inner-docker: + timeout-minutes: 10 + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: "Build docker image" + run: | + docker build --tag cs --file=./.github/fixture/Dockerfile . + - name: "Docker run" + run: > + docker run -v ${{ github.workspace }}/tests/files:/app/outside/test + -v /var/run/docker.sock:/var/run/docker.sock + --privileged + -e OUTSIDE_PATH=${{ github.workspace }}/tests/files/uv-pre-commit-config.yaml + --rm + cs + --nocapture + + cargo-test-docker: + timeout-minutes: 10 + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: rui314/setup-mold@v1 + + - uses: Swatinem/rust-cache@v2 + + - name: "Install Rust toolchain" + run: rustup show + + - name: "Install cargo nextest" + uses: taiki-e/install-action@v2 + with: + tool: cargo-nextest + + - name: "Install uv" + uses: astral-sh/setup-uv@v3 + + - name: "Cargo test" + run: | + cargo nextest run \ + --workspace \ + --status-level skip --failure-output immediate --no-fail-fast -j 8 --final-status-level slow -P docker-ci diff --git a/.gitignore b/.gitignore index 781edd2..fb571e8 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,7 @@ # macOS **/.DS_Store +.idea # profiling flamegraphs *.flamegraph.svg diff --git a/Cargo.lock b/Cargo.lock index 7ce6d89..32c8a0e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1261,6 +1261,12 @@ dependencies = [ "regex-automata 0.1.10", ] +[[package]] +name = "md5" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" + [[package]] name = "memchr" version = "2.7.4" @@ -1526,6 +1532,8 @@ dependencies = [ "insta", "insta-cmd", "itertools", + "libc", + "md5", "owo-colors", "pprof", "predicates", @@ -1534,6 +1542,7 @@ dependencies = [ "regex", "rusqlite", "serde", + "serde_json", "serde_yaml", "shlex", "tempfile", @@ -1542,6 +1551,7 @@ dependencies = [ "tokio", "tracing", "tracing-subscriber", + "tracing-test", "unicode-width 0.2.0", "url", "which", @@ -2346,6 +2356,27 @@ dependencies = [ "tracing-log", ] +[[package]] +name = "tracing-test" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "557b891436fe0d5e0e363427fc7f217abf9ccd510d5136549847bdcbcd011d68" +dependencies = [ + "tracing-core", + "tracing-subscriber", + "tracing-test-macro", +] + +[[package]] +name = "tracing-test-macro" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04659ddb06c87d233c566112c1c9c5b9e98256d9af50ec3bc9c8327f873a7568" +dependencies = [ + "quote", + "syn", +] + [[package]] name = "try-lock" version = "0.2.5" diff --git a/Cargo.toml b/Cargo.toml index f35eae7..5749a71 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,11 +35,13 @@ http = "1.1.0" indicatif = "0.17.8" indoc = "2.0.5" itertools = "0.13.0" +md5 = "0.7.0" owo-colors = "4.1.0" rand = "0.8.5" rayon = "1.10.0" rusqlite = { version = "0.32.1", features = ["bundled"] } serde = { version = "1.0.210", features = ["derive"] } +serde_json = "1.0.132" serde_yaml = "0.9.34" shlex = "1.3.0" tempfile = "3.13.0" @@ -53,6 +55,7 @@ url = { version = "2.5.2", features = ["serde"] } which = "6.0.3" [target.'cfg(unix)'.dependencies] +libc = "0.2.164" pprof = { version = "0.14.0", optional = true } [dev-dependencies] @@ -62,6 +65,7 @@ insta = { version = "1.40.0", features = ["filters"] } insta-cmd = "0.6.0" predicates = "3.1.2" regex = "1.11.0" +tracing-test = { version = "0.2", features = ["no-env-filter"] } [lints.rust] dead_code = "allow" diff --git a/_typos.toml b/_typos.toml index 55967c9..5d1f330 100644 --- a/_typos.toml +++ b/_typos.toml @@ -1,3 +1,6 @@ [default.extend-words] edn = "edn" styl = "styl" + +[default] +extend-ignore-re = ["(?s)(#|//)\\s*spellchecker:off.*?\\n\\s*(#|//)\\s*spellchecker:on"] diff --git a/src/languages/docker.rs b/src/languages/docker.rs new file mode 100644 index 0000000..eed3fc0 --- /dev/null +++ b/src/languages/docker.rs @@ -0,0 +1,299 @@ +use std::borrow::Cow; +use std::collections::HashMap; +use std::ffi::OsStr; +use std::fs; +use std::path::Path; +use std::sync::Arc; + +use anyhow::Result; +use assert_cmd::output::{OutputError, OutputOkExt}; +use fancy_regex::Regex; +use tokio::process::Command; +use tracing::debug; + +use crate::config::Language; +use crate::fs::CWD; +use crate::hook::Hook; +use crate::languages::{LanguageImpl, DEFAULT_VERSION}; +use crate::run::run_by_batch; + +const PRE_COMMIT_LABEL: &str = "PRE_COMMIT"; + +#[derive(Debug, Copy, Clone)] +pub struct Docker; + +impl Docker { + fn docker_tag(hook: &Hook) -> Option { + hook.path() + .file_name() + .and_then(OsStr::to_str) + .map(|s| format!("pre-commit-{:x}", md5::compute(s))) + } + + async fn build_docker_image(hook: &Hook, pull: bool) -> Result<()> { + let mut cmd = Command::new("docker"); + + let cmd = cmd.arg("build").args([ + "--tag", + &Self::docker_tag(hook).expect("Tag can't generate"), + "--label", + PRE_COMMIT_LABEL, + ]); + + if pull { + cmd.arg("--pull"); + } + + // This must come last for old versions of docker. + // see https://github.com/pre-commit/pre-commit/issues/477 + cmd.arg("."); + + debug!(cmd = ?cmd, "docker build_docker_image:"); + + cmd.current_dir(hook.path()) + .output() + .await + .map_err(OutputError::with_cause)? + .ok()?; + + Ok(()) + } + + /// see + fn is_in_docker() -> bool { + if fs::metadata("/.dockerenv").is_ok() || fs::metadata("/run/.containerenv").is_ok() { + return true; + } + false + } + + /// It should check [`Self::is_in_docker`] first, but like [Codespaces](https://github.com/features/codespaces) also run inner docker. + /// + /// There are no valid algorithm to get container id inner container, see + /// + fn get_container_id() -> Option { + // copy from https://github.com/open-telemetry/opentelemetry-java-instrumentation/pull/7167/files + if let Ok(regex) = Regex::new(r".*/docker/containers/([0-9a-f]{64})/.*") { + if let Ok(v2_group_path) = fs_err::read_to_string("/proc/self/mountinfo") { + if let Ok(Some(captures)) = regex.captures(&v2_group_path) { + return captures.get(1).map(|m| m.as_str().to_string()); + } + } + } + + None + } + + async fn get_docker_path(path: &Path) -> Result> { + if !Self::is_in_docker() { + return Ok(path.to_string_lossy()); + }; + + let Some(container_id) = Self::get_container_id() else { + return Ok(path.to_string_lossy()); + }; + + debug!(%container_id, "Docker get_docker_path:"); + + if let Ok(output) = Command::new("docker") + .args(["inspect", "--format", "'{{json .Mounts}}'", &container_id]) + .output() + .await + { + #[derive(serde::Deserialize, Debug)] + struct Mount { + #[serde(rename = "Source")] + source: String, + #[serde(rename = "Destination")] + destination: String, + } + debug!(?output, "Docker get_docker_path:"); + + // using test env Dockerfile return around `'` and end with `\n` + let stdout = String::from_utf8_lossy(&output.stdout); + let stdout = stdout.trim().trim_matches('\''); + let mounts: Vec = serde_json::from_str(stdout)?; + + debug!(?mounts, ?path, "Docker get_docker_path:"); + + for mount in mounts { + if path.starts_with(&mount.destination) { + let mut res = path + .to_string_lossy() + .replace(&mount.destination, &mount.source); + if res.contains('\\') { + // that means runner on the win + res = res.replace('/', "\\"); + } + return Ok(Cow::Owned(res)); + } + } + } + + Ok(path.to_string_lossy()) + } + + /// This aim to run as non-root user + /// + /// ## Windows: + /// + /// no way, see + /// + /// ## Other Unix Platform + /// + /// see + #[cfg(unix)] + fn get_docker_user() -> [String; 2] { + unsafe { + [ + "-u".to_owned(), + format!("{}:{}", libc::geteuid(), libc::geteuid()), + ] + } + } + + #[cfg(not(unix))] + fn get_docker_user() -> [String; 0] { + [] + } + + fn get_docker_tty(color: bool) -> Option { + if color { + Some("--tty".to_owned()) + } else { + None + } + } + + pub(crate) async fn docker_cmd(color: bool) -> Result { + let mut command = Command::new("docker"); + command.args(["run", "--rm"]); + if let Some(tty) = Self::get_docker_tty(color) { + command.arg(&tty); + } + + command.args(Self::get_docker_user()).args([ + "-v", + // https://docs.docker.com/engine/reference/commandline/run/#mount-volumes-from-container-volumes-from + &format!("{}:/src:rw,Z", Self::get_docker_path(&CWD).await?), + "--workdir", + "/src", + ]); + + Ok(command) + } +} + +impl LanguageImpl for Docker { + fn name(&self) -> Language { + Language::Docker + } + + fn default_version(&self) -> &str { + DEFAULT_VERSION + } + + fn environment_dir(&self) -> Option<&str> { + None + } + + async fn install(&self, hook: &Hook) -> Result<()> { + let env = hook.environment_dir().expect("No environment dir found"); + debug!(path = ?hook.path(), env=?env, "docker install:"); + Docker::build_docker_image(hook, true).await?; + fs_err::create_dir_all(env)?; + Ok(()) + } + + async fn check_health(&self) -> Result<()> { + todo!() + } + + async fn run( + &self, + hook: &Hook, + filenames: &[&String], + env_vars: Arc>, + ) -> Result<(i32, Vec)> { + Docker::build_docker_image(hook, false).await?; + + let docker_tag = Docker::docker_tag(hook).unwrap(); + + let cmds = shlex::split(&hook.entry).ok_or(anyhow::anyhow!("Failed to parse entry"))?; + + let cmds = Arc::new(cmds); + let hook_args = Arc::new(hook.args.clone()); + + let run = move |batch: Vec| { + let cmds = cmds.clone(); + let docker_tag = docker_tag.clone(); + let hook_args = hook_args.clone(); + let env_vars = env_vars.clone(); + + async move { + // docker run [OPTIONS] IMAGE [COMMAND] [ARG...] + let mut cmd = Docker::docker_cmd(true).await?; + let cmd = cmd + .args(["--entrypoint", &cmds[0], &docker_tag]) + .args(&cmds[1..]) + .args(hook_args.as_ref()) + .args(batch) + .stderr(std::process::Stdio::inherit()) + .envs(env_vars.as_ref()); + + debug!(cmd = ?cmd, "Docker run batch:"); + + let mut output = cmd.output().await?; + output.stdout.extend(output.stderr); + let code = output.status.code().unwrap_or(1); + anyhow::Ok((code, output.stdout)) + } + }; + + let results = run_by_batch(hook, filenames, run).await?; + + // Collect results + let mut combined_status = 0; + let mut combined_output = Vec::new(); + + for (code, output) in results { + combined_status |= code; + combined_output.extend(output); + } + + Ok((combined_status, combined_output)) + } +} + +#[cfg(test)] +mod tests { + use super::Docker; + use std::env; + use std::path::Path; + use tracing::debug; + use tracing_test::traced_test; + + // This test should run by docker build by [Dockerfile](../../.github/fixture/Dockerfile) + #[test] + #[ignore] + #[traced_test] + fn test_get_docker_path() { + assert!(Docker::is_in_docker()); + let env_path = env::var("OUTSIDE_PATH").unwrap(); + + debug!(%env_path, "test_get_docker_path"); + + let runtime = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .expect("Failed to create tokio runtime"); + + let path = Path::new("./outside/test/uv-pre-commit-config.yaml") + .canonicalize() + .unwrap(); + + let result = runtime.block_on(Docker::get_docker_path(&path)).unwrap(); + + assert_eq!(result, env_path); + } +} diff --git a/src/languages/docker_image.rs b/src/languages/docker_image.rs new file mode 100644 index 0000000..46ec210 --- /dev/null +++ b/src/languages/docker_image.rs @@ -0,0 +1,80 @@ +use crate::config::Language; +use crate::hook::Hook; +use crate::languages::docker::Docker; +use crate::languages::{LanguageImpl, DEFAULT_VERSION}; +use crate::run::run_by_batch; +use std::collections::HashMap; +use std::sync::Arc; +use tracing::debug; + +#[derive(Debug, Copy, Clone)] +pub struct DockerImage; + +impl LanguageImpl for DockerImage { + fn name(&self) -> Language { + Language::DockerImage + } + + fn default_version(&self) -> &str { + DEFAULT_VERSION + } + + fn environment_dir(&self) -> Option<&str> { + None + } + + async fn install(&self, _: &Hook) -> anyhow::Result<()> { + Ok(()) + } + + async fn check_health(&self) -> anyhow::Result<()> { + todo!() + } + + async fn run( + &self, + hook: &Hook, + filenames: &[&String], + env_vars: Arc>, + ) -> anyhow::Result<(i32, Vec)> { + let cmds = shlex::split(&hook.entry).ok_or(anyhow::anyhow!("Failed to parse entry"))?; + + let cmds = Arc::new(cmds); + let hook_args = Arc::new(hook.args.clone()); + let cwd = crate::fs::CWD.as_path(); + debug!(?cwd); + let run = move |batch: Vec| { + let cmds = cmds.clone(); + let hook_args = hook_args.clone(); + let env_vars = env_vars.clone(); + + async move { + let mut cmd = Docker::docker_cmd(true).await?; + let cmd = cmd + .args(&cmds[..]) + .args(hook_args.as_ref()) + .args(batch) + .stderr(std::process::Stdio::inherit()) + .envs(env_vars.as_ref()); + + let mut output = cmd.output().await?; + output.stdout.extend(output.stderr); + let code = output.status.code().unwrap_or(1); + anyhow::Ok((code, output.stdout)) + } + }; + + let results = run_by_batch(hook, filenames, run).await?; + + // Collect results + let mut combined_status = 0; + let mut combined_output = Vec::new(); + + for (code, output) in results { + combined_status |= code; + combined_output.extend(output); + } + + Ok((combined_status, combined_output)) + } +} diff --git a/src/languages/mod.rs b/src/languages/mod.rs index 1258151..8841e95 100644 --- a/src/languages/mod.rs +++ b/src/languages/mod.rs @@ -7,6 +7,8 @@ use anyhow::Result; use crate::config; use crate::hook::Hook; +mod docker; +mod docker_image; mod fail; mod node; mod python; @@ -34,6 +36,8 @@ pub enum Language { Node(node::Node), System(system::System), Fail(fail::Fail), + Docker(docker::Docker), + DockerImage(docker_image::DockerImage), } impl From for Language { @@ -42,8 +46,8 @@ impl From for Language { // config::Language::Conda => Language::Conda, // config::Language::Coursier => Language::Coursier, // config::Language::Dart => Language::Dart, - // config::Language::Docker => Language::Docker, - // config::Language::DockerImage => Language::DockerImage, + config::Language::Docker => Language::Docker(docker::Docker), + config::Language::DockerImage => Language::DockerImage(docker_image::DockerImage), // config::Language::Dotnet => Language::Dotnet, config::Language::Fail => Language::Fail(fail::Fail), // config::Language::Golang => Language::Golang, @@ -71,6 +75,8 @@ impl Display for Language { Self::Node(node) => node.fmt(f), Self::System(system) => system.fmt(f), Self::Fail(fail) => fail.fmt(f), + Self::Docker(docker) => docker.fmt(f), + Self::DockerImage(docker_image) => docker_image.fmt(f), } } } @@ -82,6 +88,8 @@ impl Language { Self::Node(node) => node.name(), Self::System(system) => system.name(), Self::Fail(fail) => fail.name(), + Self::Docker(docker) => docker.name(), + Self::DockerImage(docker_image) => docker_image.name(), } } @@ -91,6 +99,8 @@ impl Language { Self::Node(node) => node.default_version(), Self::System(system) => system.default_version(), Self::Fail(fail) => fail.default_version(), + Self::Docker(docker) => docker.default_version(), + Self::DockerImage(docker_image) => docker_image.default_version(), } } @@ -100,6 +110,8 @@ impl Language { Self::Node(node) => node.environment_dir(), Self::System(system) => system.environment_dir(), Self::Fail(fail) => fail.environment_dir(), + Self::Docker(docker) => docker.environment_dir(), + Self::DockerImage(docker_image) => docker_image.environment_dir(), } } @@ -109,6 +121,8 @@ impl Language { Self::Node(node) => node.install(hook).await, Self::System(system) => system.install(hook).await, Self::Fail(fail) => fail.install(hook).await, + Self::Docker(docker) => docker.install(hook).await, + Self::DockerImage(docker_image) => docker_image.install(hook).await, } } @@ -118,6 +132,8 @@ impl Language { Self::Node(node) => node.check_health().await, Self::System(system) => system.check_health().await, Self::Fail(fail) => fail.check_health().await, + Self::Docker(docker) => docker.check_health().await, + Self::DockerImage(docker_image) => docker_image.check_health().await, } } @@ -132,6 +148,8 @@ impl Language { Self::Node(node) => node.run(hook, filenames, env_vars).await, Self::System(system) => system.run(hook, filenames, env_vars).await, Self::Fail(fail) => fail.run(hook, filenames, env_vars).await, + Self::Docker(docker) => docker.run(hook, filenames, env_vars).await, + Self::DockerImage(docker_image) => docker_image.run(hook, filenames, env_vars).await, } } } diff --git a/tests/common/mod.rs b/tests/common/mod.rs index d4be8b4..493e9bd 100644 --- a/tests/common/mod.rs +++ b/tests/common/mod.rs @@ -241,6 +241,8 @@ pub const INSTA_FILTERS: &[(&str, &str)] = &[ r"Caused by: .* \(os error 2\)", "Caused by: No such file or directory (os error 2)", ), + // Time + (r"(\d+[:\.]\d+)(AM|PM|ms)", "[TIME]$2"), ]; #[allow(unused_macros)] diff --git a/tests/language.rs b/tests/language.rs index dadbc3e..0368e86 100644 --- a/tests/language.rs +++ b/tests/language.rs @@ -50,3 +50,142 @@ fn fail() -> Result<()> { Ok(()) } + +#[test] +fn docker() -> Result<()> { + // spellchecker:off + let context = TestContext::new(); + + context.init_project(); + + let cwd = context.workdir(); + // test suit from https://github.com/crate-ci/typos/blob/master/crates/typos-cli/tests/cmd/extend-words-case.in/file.txt + cwd.child("file.txt").write_str( + "public function noErrorOnTraillingSemicolonAndWhitespace(Connection $connection)", + )?; + cwd.child("_typos.toml") + // language=toml + .write_str( + r#" + [default.extend-words] + "trailling" = "trailing" + "#, + )?; + + cwd.child(".pre-commit-config.yaml") + .write_str(indoc::indoc! + // language=yaml + {r" + repos: + - repo: https://github.com/crate-ci/typos + rev: v1.26.0 + hooks: + - id: typos-docker + args: [] + "})?; + + Command::new("git") + .current_dir(cwd) + .arg("add") + .arg(".") + .assert() + .success(); + + cmd_snapshot!(context.filters(), context.run(), @r" + success: false + exit_code: 1 + ----- stdout ----- + Cloning https://github.com/crate-ci/typos@v1.26.0 + typos....................................................................Failed + - hook id: typos-docker + - exit code: 2 + error: `Trailling` should be `Trailing` + --> file.txt:1:26 + | + 1 | public function noErrorOnTraillingSemicolonAndWhitespace(Connection $connection) + | ^^^^^^^^^ + | + + ----- stderr ----- + "); + // spellchecker:on + Ok(()) +} + +#[test] +fn docker_image() -> Result<()> { + let context = TestContext::new(); + + context.init_project(); + + let cwd = context.workdir(); + // test suit from https://github.com/super-linter/super-linter/tree/main/test/linters/gitleaks/bad + cwd.child("gitleaks_bad_01.txt").write_str( + r"aws_access_key_id = AROA47DSWDEZA3RQASWB +aws_secret_access_key = wQwdsZDiWg4UA5ngO0OSI2TkM4kkYxF6d2S1aYWM", + )?; + + Command::new("docker") + .args(["pull", "zricethezav/gitleaks:latest"]) + .assert() + .success(); + + cwd.child(".pre-commit-config.yaml") + .write_str(indoc::indoc! + // language=yaml + {r" + repos: + - repo: local + hooks: + - id: gitleaks-docker + name: Detect hardcoded secrets + language: docker_image + entry: zricethezav/gitleaks:latest git --pre-commit --redact --staged --verbose + pass_filenames: false + "})?; + + Command::new("git") + .current_dir(cwd) + .arg("add") + .arg(".") + .assert() + .success(); + + cmd_snapshot!(context.filters(), context.run(), @r" + success: false + exit_code: 1 + ----- stdout ----- + Detect hardcoded secrets.................................................Failed + - hook id: gitleaks-docker + - exit code: 1 + ○ + │╲ + │ ○ + ○ ░ + ░ gitleaks + + Finding: aws_access_key_id = REDACTED + Secret: REDACTED + RuleID: generic-api-key + Entropy: 3.521928 + File: gitleaks_bad_01.txt + Line: 1 + Fingerprint: gitleaks_bad_01.txt:generic-api-key:1 + + Finding: ...ROA47DSWDEZA3RQASWB + aws_secret_access_key = REDACTED + Secret: REDACTED + RuleID: generic-api-key + Entropy: 4.703056 + File: gitleaks_bad_01.txt + Line: 2 + Fingerprint: gitleaks_bad_01.txt:generic-api-key:2 + + [TIME]PM INF 1 commits scanned. + [TIME]PM INF scan completed in [TIME]ms + [TIME]PM WRN leaks found: 2 + + ----- stderr ----- + "); + Ok(()) +}