From 1df629b2fa126f6fad984e3a42a076346fe1ea25 Mon Sep 17 00:00:00 2001 From: Weihang Lo Date: Sat, 28 Dec 2024 18:52:22 -0500 Subject: [PATCH 1/4] refactor(source): wrap `PathBuf` with `PathEntry` This gives us more room to store file metadata. For example, * knowing a source file is a symlink and resolving it when packaging, * providing a rich JSON output for `cargo package --list` * enriching the `.cargo-vcs-info.json` with copied/symlinked file info --- src/cargo/ops/cargo_package/mod.rs | 5 +-- src/cargo/ops/cargo_package/vcs.rs | 6 ++-- src/cargo/ops/vendor.rs | 4 ++- src/cargo/sources/mod.rs | 1 + src/cargo/sources/path.rs | 52 +++++++++++++++++++++++------- 5 files changed, 52 insertions(+), 16 deletions(-) diff --git a/src/cargo/ops/cargo_package/mod.rs b/src/cargo/ops/cargo_package/mod.rs index 07ba919c98a..6c920eec4e3 100644 --- a/src/cargo/ops/cargo_package/mod.rs +++ b/src/cargo/ops/cargo_package/mod.rs @@ -16,6 +16,7 @@ use crate::core::Workspace; use crate::core::{Package, PackageId, PackageSet, Resolve, SourceId}; use crate::ops::lockfile::LOCKFILE_NAME; use crate::ops::registry::{infer_registry, RegistryOrIndex}; +use crate::sources::path::PathEntry; use crate::sources::registry::index::{IndexPackage, RegistryDependency}; use crate::sources::{PathSource, CRATES_IO_REGISTRY}; use crate::util::cache_lock::CacheLockMode; @@ -396,7 +397,7 @@ fn prepare_archive( fn build_ar_list( ws: &Workspace<'_>, pkg: &Package, - src_files: Vec, + src_files: Vec, vcs_info: Option, ) -> CargoResult> { let mut result = HashMap::new(); @@ -420,7 +421,7 @@ fn build_ar_list( .push(ArchiveFile { rel_path: rel_path.to_owned(), rel_str: rel_str.to_owned(), - contents: FileContents::OnDisk(src_file.clone()), + contents: FileContents::OnDisk(src_file.to_path_buf()), }); } } diff --git a/src/cargo/ops/cargo_package/vcs.rs b/src/cargo/ops/cargo_package/vcs.rs index 44adfd85a1a..ce81235fddc 100644 --- a/src/cargo/ops/cargo_package/vcs.rs +++ b/src/cargo/ops/cargo_package/vcs.rs @@ -9,6 +9,7 @@ use serde::Serialize; use tracing::debug; use crate::core::Package; +use crate::sources::PathEntry; use crate::CargoResult; use crate::GlobalContext; @@ -41,7 +42,7 @@ pub struct GitVcsInfo { #[tracing::instrument(skip_all)] pub fn check_repo_state( p: &Package, - src_files: &[PathBuf], + src_files: &[PathEntry], gctx: &GlobalContext, opts: &PackageOpts<'_>, ) -> CargoResult> { @@ -114,7 +115,7 @@ pub fn check_repo_state( fn git( pkg: &Package, gctx: &GlobalContext, - src_files: &[PathBuf], + src_files: &[PathEntry], repo: &git2::Repository, opts: &PackageOpts<'_>, ) -> CargoResult> { @@ -136,6 +137,7 @@ fn git( let mut dirty_src_files: Vec<_> = src_files .iter() .filter(|src_file| dirty_files.iter().any(|path| src_file.starts_with(path))) + .map(|p| p.as_ref()) .chain(dirty_metadata_paths(pkg, repo)?.iter()) .map(|path| { pathdiff::diff_paths(path, cwd) diff --git a/src/cargo/ops/vendor.rs b/src/cargo/ops/vendor.rs index 35d0e0c9417..c6c76f624da 100644 --- a/src/cargo/ops/vendor.rs +++ b/src/cargo/ops/vendor.rs @@ -2,6 +2,7 @@ use crate::core::shell::Verbosity; use crate::core::{GitReference, Package, Workspace}; use crate::ops; use crate::sources::path::PathSource; +use crate::sources::PathEntry; use crate::sources::CRATES_IO_REGISTRY; use crate::util::cache_lock::CacheLockMode; use crate::util::{try_canonicalize, CargoResult, GlobalContext}; @@ -315,13 +316,14 @@ fn sync( fn cp_sources( pkg: &Package, src: &Path, - paths: &[PathBuf], + paths: &[PathEntry], dst: &Path, cksums: &mut BTreeMap, tmp_buf: &mut [u8], gctx: &GlobalContext, ) -> CargoResult<()> { for p in paths { + let p = p.as_ref(); let relative = p.strip_prefix(&src).unwrap(); match relative.to_str() { diff --git a/src/cargo/sources/mod.rs b/src/cargo/sources/mod.rs index 9c98cc49eaa..dfbf79c76bc 100644 --- a/src/cargo/sources/mod.rs +++ b/src/cargo/sources/mod.rs @@ -29,6 +29,7 @@ pub use self::config::SourceConfigMap; pub use self::directory::DirectorySource; pub use self::git::GitSource; +pub use self::path::PathEntry; pub use self::path::PathSource; pub use self::path::RecursivePathSource; pub use self::registry::{ diff --git a/src/cargo/sources/path.rs b/src/cargo/sources/path.rs index ee2e6fea47f..a2e8c22fe06 100644 --- a/src/cargo/sources/path.rs +++ b/src/cargo/sources/path.rs @@ -94,7 +94,7 @@ impl<'gctx> PathSource<'gctx> { /// use other methods like `.gitignore`, `package.include`, or /// `package.exclude` to filter the list of files. #[tracing::instrument(skip_all)] - pub fn list_files(&self, pkg: &Package) -> CargoResult> { + pub fn list_files(&self, pkg: &Package) -> CargoResult> { list_files(pkg, self.gctx) } @@ -278,7 +278,7 @@ impl<'gctx> RecursivePathSource<'gctx> { /// are relevant for building this package, but it also contains logic to /// use other methods like `.gitignore`, `package.include`, or /// `package.exclude` to filter the list of files. - pub fn list_files(&self, pkg: &Package) -> CargoResult> { + pub fn list_files(&self, pkg: &Package) -> CargoResult> { list_files(pkg, self.gctx) } @@ -404,6 +404,32 @@ impl<'gctx> Source for RecursivePathSource<'gctx> { } } +/// [`PathBuf`] with extra metadata. +#[derive(Clone, Debug)] +pub struct PathEntry { + path: PathBuf, +} + +impl PathEntry { + pub fn into_path_buf(self) -> PathBuf { + self.path + } +} + +impl std::ops::Deref for PathEntry { + type Target = Path; + + fn deref(&self) -> &Self::Target { + self.path.as_path() + } +} + +impl AsRef for PathEntry { + fn as_ref(&self) -> &PathBuf { + &self.path + } +} + fn first_package<'p>( pkg_id: PackageId, pkgs: &'p Vec, @@ -446,7 +472,7 @@ fn first_package<'p>( /// are relevant for building this package, but it also contains logic to /// use other methods like `.gitignore`, `package.include`, or /// `package.exclude` to filter the list of files. -pub fn list_files(pkg: &Package, gctx: &GlobalContext) -> CargoResult> { +pub fn list_files(pkg: &Package, gctx: &GlobalContext) -> CargoResult> { _list_files(pkg, gctx).with_context(|| { format!( "failed to determine list of files in {}", @@ -456,7 +482,7 @@ pub fn list_files(pkg: &Package, gctx: &GlobalContext) -> CargoResult CargoResult> { +fn _list_files(pkg: &Package, gctx: &GlobalContext) -> CargoResult> { let root = pkg.root(); let no_include_option = pkg.manifest().include().is_empty(); let git_repo = if no_include_option { @@ -580,7 +606,7 @@ fn list_files_gix( repo: &gix::Repository, filter: &dyn Fn(&Path, bool) -> bool, gctx: &GlobalContext, -) -> CargoResult> { +) -> CargoResult> { debug!("list_files_gix {}", pkg.package_id()); let options = repo .dirwalk_options()? @@ -619,7 +645,7 @@ fn list_files_gix( vec![include, exclude] }; - let mut files = Vec::::new(); + let mut files = Vec::::new(); let mut subpackages_found = Vec::new(); for item in repo .dirwalk_iter(index.clone(), pathspec, Default::default(), options)? @@ -701,7 +727,7 @@ fn list_files_gix( } else if (filter)(&file_path, is_dir) { assert!(!is_dir); trace!(" found {}", file_path.display()); - files.push(file_path); + files.push(PathEntry { path: file_path }); } } @@ -715,7 +741,7 @@ fn list_files_gix( /// is not tracked under a Git repository. fn list_files_walk( path: &Path, - ret: &mut Vec, + ret: &mut Vec, is_root: bool, filter: &dyn Fn(&Path, bool) -> bool, gctx: &GlobalContext, @@ -756,7 +782,9 @@ fn list_files_walk( Ok(entry) => { let file_type = entry.file_type(); if file_type.is_file() || file_type.is_symlink() { - ret.push(entry.into_path()); + ret.push(PathEntry { + path: entry.into_path(), + }); } } Err(err) if err.loop_ancestor().is_some() => { @@ -770,7 +798,9 @@ fn list_files_walk( // Otherwise, simply recover from it. // Don't worry about error skipping here, the callers would // still hit the IO error if they do access it thereafter. - Some(path) => ret.push(path.to_path_buf()), + Some(path) => ret.push(PathEntry { + path: path.to_path_buf(), + }), None => return Err(err.into()), }, } @@ -801,7 +831,7 @@ fn last_modified_file( let mtime = paths::mtime(&file).unwrap_or_else(|_| FileTime::zero()); if mtime > max { max = mtime; - max_path = file; + max_path = file.into_path_buf(); } } trace!("last modified file {}: {}", path.display(), max); From 8adbe0eb064a0899937014053c7a4b1557e10800 Mon Sep 17 00:00:00 2001 From: Weihang Lo Date: Sat, 28 Dec 2024 21:47:01 -0500 Subject: [PATCH 2/4] refactor(package): preserve file type information in PathEntry So that we can tell whether a path is a symlink and need to traverse to the actual file to check dirtiness or copy real content. --- src/cargo/sources/path.rs | 67 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 66 insertions(+), 1 deletion(-) diff --git a/src/cargo/sources/path.rs b/src/cargo/sources/path.rs index a2e8c22fe06..5ced38da6b5 100644 --- a/src/cargo/sources/path.rs +++ b/src/cargo/sources/path.rs @@ -404,16 +404,70 @@ impl<'gctx> Source for RecursivePathSource<'gctx> { } } +/// Type that abstracts over [`gix::dir::entry::Kind`] and [`fs::FileType`]. +#[derive(Debug, Clone, Copy)] +enum FileType { + File, + Dir, + Symlink, + Other, +} + +impl From for FileType { + fn from(value: fs::FileType) -> Self { + if value.is_file() { + FileType::File + } else if value.is_dir() { + FileType::Dir + } else if value.is_symlink() { + FileType::Symlink + } else { + FileType::Other + } + } +} + +impl From for FileType { + fn from(value: gix::dir::entry::Kind) -> Self { + use gix::dir::entry::Kind; + match value { + Kind::Untrackable => FileType::Other, + Kind::File => FileType::File, + Kind::Symlink => FileType::Symlink, + Kind::Directory | Kind::Repository => FileType::Dir, + } + } +} + /// [`PathBuf`] with extra metadata. #[derive(Clone, Debug)] pub struct PathEntry { path: PathBuf, + ty: FileType, } impl PathEntry { pub fn into_path_buf(self) -> PathBuf { self.path } + + /// Similar to [`std::path::Path::is_file`] + /// but doesn't follow the symbolic link nor make any system call + pub fn is_file(&self) -> bool { + matches!(self.ty, FileType::File) + } + + /// Similar to [`std::path::Path::is_dir`] + /// but doesn't follow the symbolic link nor make any system call + pub fn is_dir(&self) -> bool { + matches!(self.ty, FileType::Dir) + } + + /// Similar to [`std::path::Path::is_symlink`] + /// but doesn't follow the symbolic link nor make any system call + pub fn is_symlink(&self) -> bool { + matches!(self.ty, FileType::Symlink) + } } impl std::ops::Deref for PathEntry { @@ -727,7 +781,10 @@ fn list_files_gix( } else if (filter)(&file_path, is_dir) { assert!(!is_dir); trace!(" found {}", file_path.display()); - files.push(PathEntry { path: file_path }); + files.push(PathEntry { + path: file_path, + ty: kind.map(Into::into).unwrap_or(FileType::Other), + }); } } @@ -782,8 +839,15 @@ fn list_files_walk( Ok(entry) => { let file_type = entry.file_type(); if file_type.is_file() || file_type.is_symlink() { + // We follow_links(true) here so check if entry was created from a symlink + let ty = if entry.path_is_symlink() { + FileType::Symlink + } else { + file_type.into() + }; ret.push(PathEntry { path: entry.into_path(), + ty, }); } } @@ -800,6 +864,7 @@ fn list_files_walk( // still hit the IO error if they do access it thereafter. Some(path) => ret.push(PathEntry { path: path.to_path_buf(), + ty: FileType::Other, }), None => return Err(err.into()), }, From 871b17f59af1743bf0d475ec69daee31f940ebef Mon Sep 17 00:00:00 2001 From: Weihang Lo Date: Tue, 31 Dec 2024 11:51:10 -0500 Subject: [PATCH 3/4] test(package): show behavior with `core.symlinks=false` --- tests/testsuite/package.rs | 82 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) diff --git a/tests/testsuite/package.rs b/tests/testsuite/package.rs index b418513eace..093cf89eb27 100644 --- a/tests/testsuite/package.rs +++ b/tests/testsuite/package.rs @@ -7025,3 +7025,85 @@ src/main.rs "#]]) .run(); } + +#[cargo_test] +fn git_core_symlinks_false() { + if !symlink_supported() { + return; + } + + let git_project = git::new("bar", |p| { + p.file( + "Cargo.toml", + r#" + [package] + name = "bar" + description = "bar" + license = "MIT" + edition = "2021" + documentation = "foo" + "#, + ) + .file("src/lib.rs", "//! This is a module") + .symlink("src/lib.rs", "symlink-lib.rs") + .symlink_dir("src", "symlink-dir") + }); + + let url = git_project.root().to_url().to_string(); + + let p = project().build(); + let root = p.root(); + // Remove the default project layout, + // so we can git-fetch from git_project under the same directory + fs::remove_dir_all(&root).unwrap(); + fs::create_dir_all(&root).unwrap(); + let repo = git::init(&root); + + let mut cfg = repo.config().unwrap(); + cfg.set_bool("core.symlinks", false).unwrap(); + + // let's fetch from git_project so it respects our core.symlinks=false config. + repo.remote_anonymous(&url) + .unwrap() + .fetch(&["HEAD"], None, None) + .unwrap(); + let rev = repo + .find_reference("FETCH_HEAD") + .unwrap() + .peel_to_commit() + .unwrap(); + repo.reset(rev.as_object(), git2::ResetType::Hard, None) + .unwrap(); + + p.cargo("package --allow-dirty") + .with_stderr_data(str![[r#" +[PACKAGING] bar v0.0.0 ([ROOT]/foo) +[PACKAGED] 7 files, [FILE_SIZE]B ([FILE_SIZE]B compressed) +[VERIFYING] bar v0.0.0 ([ROOT]/foo) +[COMPILING] bar v0.0.0 ([ROOT]/foo/target/package/bar-0.0.0) +[FINISHED] `dev` profile [unoptimized + debuginfo] target(s) in [ELAPSED]s + +"#]]) + .run(); + + let f = File::open(&p.root().join("target/package/bar-0.0.0.crate")).unwrap(); + validate_crate_contents( + f, + "bar-0.0.0.crate", + &[ + "Cargo.lock", + "Cargo.toml", + "Cargo.toml.orig", + "src/lib.rs", + // We're missing symlink-dir/lib.rs in the `.crate` file. + "symlink-dir", + "symlink-lib.rs", + ".cargo_vcs_info.json", + ], + [ + // And their contents are incorrect. + ("symlink-dir", str!["[ROOT]/bar/src"]), + ("symlink-lib.rs", str!["[ROOT]/bar/src/lib.rs"]), + ], + ); +} From 059fe1608590afba6edebf7e13bb927817e98ed3 Mon Sep 17 00:00:00 2001 From: Weihang Lo Date: Tue, 31 Dec 2024 11:54:12 -0500 Subject: [PATCH 4/4] fix(package): warn if symlinks checked out as plain text files `cargo package` will warn users when git `core.symlinks` is `false` and some symlinks were checked out as plain files during packaging. Git config [`core.symlinks`] defaults to true when unset. In git-for-windows (and git as well), the config should be set to false explicitly when the repo was created, if symlink support wasn't detected [^1]. We assume the config was always set at creation time and never changed. So, if it is true, we don't bother users with any warning. [^1]: [`core.symlinks`]: https://git-scm.com/docs/git-config#Documentation/git-config.txt-coresymlinks --- src/cargo/ops/cargo_package/vcs.rs | 48 +++++++++++++++++++++++++ src/cargo/sources/path.rs | 56 ++++++++++++++++++++++++++---- tests/testsuite/package.rs | 4 +++ 3 files changed, 101 insertions(+), 7 deletions(-) diff --git a/src/cargo/ops/cargo_package/vcs.rs b/src/cargo/ops/cargo_package/vcs.rs index ce81235fddc..3c447f32b72 100644 --- a/src/cargo/ops/cargo_package/vcs.rs +++ b/src/cargo/ops/cargo_package/vcs.rs @@ -92,6 +92,8 @@ pub fn check_repo_state( return Ok(None); } + warn_symlink_checked_out_as_plain_text_file(gctx, src_files, &repo)?; + debug!( "found (git) Cargo.toml at `{}` in workdir `{}`", path.display(), @@ -111,6 +113,52 @@ pub fn check_repo_state( return Ok(Some(VcsInfo { git, path_in_vcs })); } +/// Warns if any symlinks were checked out as plain text files. +/// +/// Git config [`core.symlinks`] defaults to true when unset. +/// In git-for-windows (and git as well), +/// the config should be set to false explicitly when the repo was created, +/// if symlink support wasn't detected [^1]. +/// +/// We assume the config was always set at creation time and never changed. +/// So, if it is true, we don't bother users with any warning. +/// +/// [^1]: +/// +/// [`core.symlinks`]: https://git-scm.com/docs/git-config#Documentation/git-config.txt-coresymlinks +fn warn_symlink_checked_out_as_plain_text_file( + gctx: &GlobalContext, + src_files: &[PathEntry], + repo: &git2::Repository, +) -> CargoResult<()> { + if repo + .config() + .and_then(|c| c.get_bool("core.symlinks")) + .unwrap_or(true) + { + return Ok(()); + } + + if src_files.iter().any(|f| f.maybe_plain_text_symlink()) { + let mut shell = gctx.shell(); + shell.warn(format_args!( + "found symbolic links that may be checked out as regular files for git repo at `{}`\n\ + This might cause the `.crate` file to include incorrect or incomplete files", + repo.workdir().unwrap().display(), + ))?; + let extra_note = if cfg!(windows) { + "\nAnd on Windows, enable the Developer Mode to support symlinks" + } else { + "" + }; + shell.note(format_args!( + "to avoid this, set the Git config `core.symlinks` to `true`{extra_note}", + ))?; + } + + Ok(()) +} + /// The real git status check starts from here. fn git( pkg: &Package, diff --git a/src/cargo/sources/path.rs b/src/cargo/sources/path.rs index 5ced38da6b5..8bafcf18099 100644 --- a/src/cargo/sources/path.rs +++ b/src/cargo/sources/path.rs @@ -407,7 +407,7 @@ impl<'gctx> Source for RecursivePathSource<'gctx> { /// Type that abstracts over [`gix::dir::entry::Kind`] and [`fs::FileType`]. #[derive(Debug, Clone, Copy)] enum FileType { - File, + File { maybe_symlink: bool }, Dir, Symlink, Other, @@ -416,7 +416,9 @@ enum FileType { impl From for FileType { fn from(value: fs::FileType) -> Self { if value.is_file() { - FileType::File + FileType::File { + maybe_symlink: false, + } } else if value.is_dir() { FileType::Dir } else if value.is_symlink() { @@ -432,7 +434,9 @@ impl From for FileType { use gix::dir::entry::Kind; match value { Kind::Untrackable => FileType::Other, - Kind::File => FileType::File, + Kind::File => FileType::File { + maybe_symlink: false, + }, Kind::Symlink => FileType::Symlink, Kind::Directory | Kind::Repository => FileType::Dir, } @@ -454,7 +458,7 @@ impl PathEntry { /// Similar to [`std::path::Path::is_file`] /// but doesn't follow the symbolic link nor make any system call pub fn is_file(&self) -> bool { - matches!(self.ty, FileType::File) + matches!(self.ty, FileType::File { .. }) } /// Similar to [`std::path::Path::is_dir`] @@ -468,6 +472,19 @@ impl PathEntry { pub fn is_symlink(&self) -> bool { matches!(self.ty, FileType::Symlink) } + + /// Whether this path might be a plain text symlink. + /// + /// Git may check out symlinks as plain text files that contain the link texts, + /// when either `core.symlinks` is `false`, or on Windows. + pub fn maybe_plain_text_symlink(&self) -> bool { + matches!( + self.ty, + FileType::File { + maybe_symlink: true + } + ) + } } impl std::ops::Deref for PathEntry { @@ -713,7 +730,24 @@ fn list_files_gix( && item.entry.rela_path == "Cargo.lock") }) }) - .map(|res| res.map(|item| (item.entry.rela_path, item.entry.disk_kind))) + .map(|res| { + res.map(|item| { + // Assumption: if a file tracked as a symlink in Git index, and + // the actual file type on disk is file, then it might be a + // plain text file symlink. + // There are exceptions like the file has changed from a symlink + // to a real text file, but hasn't been committed to Git index. + // Exceptions may be rare so we're okay with this now. + let maybe_plain_text_symlink = item.entry.index_kind + == Some(gix::dir::entry::Kind::Symlink) + && item.entry.disk_kind == Some(gix::dir::entry::Kind::File); + ( + item.entry.rela_path, + item.entry.disk_kind, + maybe_plain_text_symlink, + ) + }) + }) .chain( // Append entries that might be tracked in `/target/`. index @@ -731,12 +765,13 @@ fn list_files_gix( // This traversal is not part of a `status()`, and tracking things in `target/` // is rare. None, + false, ) }) .map(Ok), ) { - let (rela_path, kind) = item?; + let (rela_path, kind, maybe_plain_text_symlink) = item?; let file_path = root.join(gix::path::from_bstr(rela_path)); if file_path.file_name().and_then(|name| name.to_str()) == Some("Cargo.toml") { // Keep track of all sub-packages found and also strip out all @@ -781,9 +816,16 @@ fn list_files_gix( } else if (filter)(&file_path, is_dir) { assert!(!is_dir); trace!(" found {}", file_path.display()); + let ty = match kind.map(Into::into) { + Some(FileType::File { .. }) => FileType::File { + maybe_symlink: maybe_plain_text_symlink, + }, + Some(ty) => ty, + None => FileType::Other, + }; files.push(PathEntry { path: file_path, - ty: kind.map(Into::into).unwrap_or(FileType::Other), + ty, }); } } diff --git a/tests/testsuite/package.rs b/tests/testsuite/package.rs index 093cf89eb27..9c6d0493f83 100644 --- a/tests/testsuite/package.rs +++ b/tests/testsuite/package.rs @@ -7077,6 +7077,10 @@ fn git_core_symlinks_false() { p.cargo("package --allow-dirty") .with_stderr_data(str![[r#" +[WARNING] found symbolic links that may be checked out as regular files for git repo at `[ROOT]/foo/` +This might cause the `.crate` file to include incorrect or incomplete files +[NOTE] to avoid this, set the Git config `core.symlinks` to `true` +... [PACKAGING] bar v0.0.0 ([ROOT]/foo) [PACKAGED] 7 files, [FILE_SIZE]B ([FILE_SIZE]B compressed) [VERIFYING] bar v0.0.0 ([ROOT]/foo)