diff --git a/src/cargo/core/workspace.rs b/src/cargo/core/workspace.rs index 015bee2341f..1dc75c881e4 100644 --- a/src/cargo/core/workspace.rs +++ b/src/cargo/core/workspace.rs @@ -1142,8 +1142,7 @@ impl<'gctx> Workspace<'gctx> { MaybePackage::Package(ref p) => p.clone(), MaybePackage::Virtual(_) => continue, }; - let mut src = PathSource::new(pkg.root(), pkg.package_id().source_id(), self.gctx); - src.preload_with(pkg); + let src = PathSource::preload_with(pkg, self.gctx); registry.add_preloaded(Box::new(src)); } } diff --git a/src/cargo/ops/resolve.rs b/src/cargo/ops/resolve.rs index 1b54bbc2745..a1075bdb7fa 100644 --- a/src/cargo/ops/resolve.rs +++ b/src/cargo/ops/resolve.rs @@ -73,7 +73,7 @@ use crate::core::PackageSet; use crate::core::SourceId; use crate::core::Workspace; use crate::ops; -use crate::sources::PathSource; +use crate::sources::RecursivePathSource; use crate::util::cache_lock::CacheLockMode; use crate::util::errors::CargoResult; use crate::util::CanonicalUrl; @@ -453,7 +453,7 @@ pub fn add_overrides<'a>( for (path, definition) in paths { let id = SourceId::for_path(&path)?; - let mut source = PathSource::new_recursive(&path, id, ws.gctx()); + let mut source = RecursivePathSource::new(&path, id, ws.gctx()); source.update().with_context(|| { format!( "failed to update path override `{}` \ diff --git a/src/cargo/sources/git/source.rs b/src/cargo/sources/git/source.rs index 8beacdd456d..f38c880f695 100644 --- a/src/cargo/sources/git/source.rs +++ b/src/cargo/sources/git/source.rs @@ -10,7 +10,7 @@ use crate::sources::source::MaybePackage; use crate::sources::source::QueryKind; use crate::sources::source::Source; use crate::sources::IndexSummary; -use crate::sources::PathSource; +use crate::sources::RecursivePathSource; use crate::util::cache_lock::CacheLockMode; use crate::util::errors::CargoResult; use crate::util::hex::short_hash; @@ -24,7 +24,7 @@ use tracing::trace; use url::Url; /// `GitSource` contains one or more packages gathering from a Git repository. -/// Under the hood it uses [`PathSource`] to discover packages inside the +/// Under the hood it uses [`RecursivePathSource`] to discover packages inside the /// repository. /// /// ## Filesystem layout @@ -79,7 +79,7 @@ pub struct GitSource<'gctx> { /// /// This gets set to `Some` after the git repo has been checked out /// (automatically handled via [`GitSource::block_until_ready`]). - path_source: Option>, + path_source: Option>, /// A short string that uniquely identifies the version of the checkout. /// /// This is typically a 7-character string of the OID hash, automatically @@ -356,7 +356,7 @@ impl<'gctx> Source for GitSource<'gctx> { let source_id = self .source_id .with_git_precise(Some(actual_rev.to_string())); - let path_source = PathSource::new_recursive(&checkout_path, source_id, self.gctx); + let path_source = RecursivePathSource::new(&checkout_path, source_id, self.gctx); self.path_source = Some(path_source); self.short_id = Some(short_id.as_str().into()); diff --git a/src/cargo/sources/mod.rs b/src/cargo/sources/mod.rs index 1d2f51a3718..c487aada522 100644 --- a/src/cargo/sources/mod.rs +++ b/src/cargo/sources/mod.rs @@ -30,6 +30,7 @@ pub use self::config::SourceConfigMap; pub use self::directory::DirectorySource; pub use self::git::GitSource; pub use self::path::PathSource; +pub use self::path::RecursivePathSource; pub use self::registry::{ IndexSummary, RegistrySource, CRATES_IO_DOMAIN, CRATES_IO_INDEX, CRATES_IO_REGISTRY, }; diff --git a/src/cargo/sources/path.rs b/src/cargo/sources/path.rs index 77854ae0dd5..99d9f878888 100644 --- a/src/cargo/sources/path.rs +++ b/src/cargo/sources/path.rs @@ -20,13 +20,9 @@ use ignore::gitignore::GitignoreBuilder; use tracing::{debug, trace, warn}; use walkdir::WalkDir; -/// A source represents one or multiple packages gathering from a given root +/// A source that represents a package gathered at the root /// path on the filesystem. /// -/// It's the cornerstone of every other source --- other implementations -/// eventually need to call `PathSource` to read local packages somewhere on -/// the filesystem. -/// /// It also provides convenient methods like [`PathSource::list_files`] to /// list all files in a package, given its ability to walk the filesystem. pub struct PathSource<'gctx> { @@ -37,10 +33,7 @@ pub struct PathSource<'gctx> { /// Whether this source has updated all package information it may contain. updated: bool, /// Packages that this sources has discovered. - packages: Vec, - /// Whether this source should discover nested packages recursively. - /// See [`PathSource::new_recursive`] for more. - recursive: bool, + package: Option, gctx: &'gctx GlobalContext, } @@ -49,44 +42,28 @@ impl<'gctx> PathSource<'gctx> { /// /// This source will only return the package at precisely the `path` /// specified, and it will be an error if there's not a package at `path`. - pub fn new(path: &Path, source_id: SourceId, gctx: &'gctx GlobalContext) -> PathSource<'gctx> { - PathSource { + pub fn new(path: &Path, source_id: SourceId, gctx: &'gctx GlobalContext) -> Self { + Self { source_id, path: path.to_path_buf(), updated: false, - packages: Vec::new(), + package: None, gctx, - recursive: false, - } - } - - /// Creates a new source which is walked recursively to discover packages. - /// - /// This is similar to the [`PathSource::new`] method except that instead - /// of requiring a valid package to be present at `root` the folder is - /// walked entirely to crawl for packages. - /// - /// Note that this should be used with care and likely shouldn't be chosen - /// by default! - pub fn new_recursive( - root: &Path, - id: SourceId, - gctx: &'gctx GlobalContext, - ) -> PathSource<'gctx> { - PathSource { - recursive: true, - ..PathSource::new(root, id, gctx) } } /// Preloads a package for this source. The source is assumed that it has /// yet loaded any other packages. - pub fn preload_with(&mut self, pkg: Package) { - assert!(!self.updated); - assert!(!self.recursive); - assert!(self.packages.is_empty()); - self.updated = true; - self.packages.push(pkg); + pub fn preload_with(pkg: Package, gctx: &'gctx GlobalContext) -> Self { + let source_id = pkg.package_id().source_id(); + let path = pkg.root().to_owned(); + Self { + source_id, + path, + updated: true, + package: Some(pkg), + gctx, + } } /// Gets the package on the root path. @@ -95,7 +72,7 @@ impl<'gctx> PathSource<'gctx> { self.update()?; - match self.packages.iter().find(|p| p.root() == &*self.path) { + match &self.package { Some(pkg) => Ok(pkg.clone()), None => Err(internal(format!( "no package found in source {:?}", @@ -108,16 +85,19 @@ impl<'gctx> PathSource<'gctx> { /// filesystem if package information haven't yet updated. pub fn read_packages(&self) -> CargoResult> { if self.updated { - Ok(self.packages.clone()) - } else if self.recursive { - ops::read_packages(&self.path, self.source_id, self.gctx) + Ok(self.package.clone().into_iter().collect()) } else { - let path = self.path.join("Cargo.toml"); - let pkg = ops::read_package(&path, self.source_id, self.gctx)?; + let pkg = self.read_package()?; Ok(vec![pkg]) } } + fn read_package(&self) -> CargoResult { + let path = self.path.join("Cargo.toml"); + let pkg = ops::read_package(&path, self.source_id, self.gctx)?; + Ok(pkg) + } + /// List all files relevant to building this package inside this source. /// /// This function will use the appropriate methods to determine the @@ -129,556 +109,178 @@ impl<'gctx> PathSource<'gctx> { /// use other methods like `.gitignore`, `package.include`, or /// `package.exclude` to filter the list of files. pub fn list_files(&self, pkg: &Package) -> CargoResult> { - self._list_files(pkg).with_context(|| { - format!( - "failed to determine list of files in {}", - pkg.root().display() - ) - }) + list_files(pkg, self.gctx) } - /// See [`PathSource::list_files`]. - fn _list_files(&self, pkg: &Package) -> CargoResult> { - let root = pkg.root(); - let no_include_option = pkg.manifest().include().is_empty(); - let git_repo = if no_include_option { - if self - .gctx - .get_env("__CARGO_GITOXIDE_DISABLE_LIST_FILES") - .ok() - .as_deref() - == Some("1") - { - self.discover_git_repo(root)?.map(Git2OrGixRepository::Git2) - } else { - self.discover_gix_repo(root)?.map(Git2OrGixRepository::Gix) - } - } else { - None - }; - - let mut exclude_builder = GitignoreBuilder::new(root); - if no_include_option && git_repo.is_none() { - // no include option and not git repo discovered (see rust-lang/cargo#7183). - exclude_builder.add_line(None, ".*")?; - } - for rule in pkg.manifest().exclude() { - exclude_builder.add_line(None, rule)?; - } - let ignore_exclude = exclude_builder.build()?; - - let mut include_builder = GitignoreBuilder::new(root); - for rule in pkg.manifest().include() { - include_builder.add_line(None, rule)?; + /// Gets the last modified file in a package. + pub fn last_modified_file(&self, pkg: &Package) -> CargoResult<(FileTime, PathBuf)> { + if !self.updated { + return Err(internal(format!( + "BUG: source `{:?}` was not updated", + self.path + ))); } - let ignore_include = include_builder.build()?; + last_modified_file(&self.path, pkg, self.gctx) + } - let ignore_should_package = |relative_path: &Path, is_dir: bool| { - // "Include" and "exclude" options are mutually exclusive. - if no_include_option { - !ignore_exclude - .matched_path_or_any_parents(relative_path, is_dir) - .is_ignore() - } else { - if is_dir { - // Generally, include directives don't list every - // directory (nor should they!). Just skip all directory - // checks, and only check files. - return true; - } - ignore_include - .matched_path_or_any_parents(relative_path, /* is_dir */ false) - .is_ignore() - } - }; + /// Returns the root path of this source. + pub fn path(&self) -> &Path { + &self.path + } - let filter = |path: &Path, is_dir: bool| { - let Ok(relative_path) = path.strip_prefix(root) else { - return false; - }; + /// Discovers packages inside this source if it hasn't yet done. + pub fn update(&mut self) -> CargoResult<()> { + if !self.updated { + self.package = Some(self.read_package()?); + self.updated = true; + } - let rel = relative_path.as_os_str(); - if rel == "Cargo.lock" { - return pkg.include_lockfile(); - } else if rel == "Cargo.toml" { - return true; - } + Ok(()) + } +} - ignore_should_package(relative_path, is_dir) - }; +impl<'gctx> Debug for PathSource<'gctx> { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!(f, "the paths source") + } +} - // Attempt Git-prepopulate only if no `include` (see rust-lang/cargo#4135). - if no_include_option { - if let Some(repo) = git_repo { - return match repo { - Git2OrGixRepository::Git2(repo) => self.list_files_git(pkg, &repo, &filter), - Git2OrGixRepository::Gix(repo) => self.list_files_gix(pkg, &repo, &filter), - }; - } - } - self.list_files_walk(pkg, &filter) - } - - /// Returns `Some(git2::Repository)` if found sibling `Cargo.toml` and `.git` - /// directory; otherwise, caller should fall back on full file list. - fn discover_git_repo(&self, root: &Path) -> CargoResult> { - let repo = match git2::Repository::discover(root) { - Ok(repo) => repo, - Err(e) => { - tracing::debug!( - "could not discover git repo at or above {}: {}", - root.display(), - e - ); - return Ok(None); - } - }; - let index = repo - .index() - .with_context(|| format!("failed to open git index at {}", repo.path().display()))?; - let repo_root = repo.workdir().ok_or_else(|| { - anyhow::format_err!( - "did not expect repo at {} to be bare", - repo.path().display() - ) - })?; - let repo_relative_path = match paths::strip_prefix_canonical(root, repo_root) { - Ok(p) => p, - Err(e) => { - warn!( - "cannot determine if path `{:?}` is in git repo `{:?}`: {:?}", - root, repo_root, e - ); - return Ok(None); - } - }; - let manifest_path = repo_relative_path.join("Cargo.toml"); - if index.get_path(&manifest_path, 0).is_some() { - return Ok(Some(repo)); - } - // Package Cargo.toml is not in git, don't use git to guide our selection. - Ok(None) - } - - /// Returns [`Some(gix::Repository)`](gix::Repository) if the discovered repository - /// (searched upwards from `root`) contains a tracked `/Cargo.toml`. - /// Otherwise, the caller should fall back on full file list. - fn discover_gix_repo(&self, root: &Path) -> CargoResult> { - let repo = match gix::ThreadSafeRepository::discover(root) { - Ok(repo) => repo.to_thread_local(), - Err(e) => { - tracing::debug!( - "could not discover git repo at or above {}: {}", - root.display(), - e - ); - return Ok(None); - } - }; - let index = repo - .index_or_empty() - .with_context(|| format!("failed to open git index at {}", repo.path().display()))?; - let repo_root = repo.work_dir().ok_or_else(|| { - anyhow::format_err!( - "did not expect repo at {} to be bare", - repo.path().display() - ) - })?; - let repo_relative_path = match paths::strip_prefix_canonical(root, repo_root) { - Ok(p) => p, - Err(e) => { - warn!( - "cannot determine if path `{:?}` is in git repo `{:?}`: {:?}", - root, repo_root, e - ); - return Ok(None); +impl<'gctx> Source for PathSource<'gctx> { + fn query( + &mut self, + dep: &Dependency, + kind: QueryKind, + f: &mut dyn FnMut(IndexSummary), + ) -> Poll> { + self.update()?; + if let Some(s) = self.package.as_ref().map(|p| p.summary()) { + let matched = match kind { + QueryKind::Exact => dep.matches(s), + QueryKind::Alternatives => true, + QueryKind::Normalized => dep.matches(s), + }; + if matched { + f(IndexSummary::Candidate(s.clone())) } - }; - let manifest_path = gix::path::join_bstr_unix_pathsep( - gix::path::to_unix_separators_on_windows(gix::path::into_bstr(repo_relative_path)), - "Cargo.toml", - ); - if index.entry_index_by_path(&manifest_path).is_ok() { - return Ok(Some(repo)); } - // Package Cargo.toml is not in git, don't use git to guide our selection. - Ok(None) + Poll::Ready(Ok(())) } - /// Lists files relevant to building this package inside this source by - /// consulting both Git index (tracked) or status (untracked) under - /// a given Git repository. - /// - /// This looks into Git submodules as well. - fn list_files_git( - &self, - pkg: &Package, - repo: &git2::Repository, - filter: &dyn Fn(&Path, bool) -> bool, - ) -> CargoResult> { - debug!("list_files_git {}", pkg.package_id()); - let index = repo.index()?; - let root = repo - .workdir() - .ok_or_else(|| anyhow::format_err!("can't list files on a bare repository"))?; - let pkg_path = pkg.root(); - - let mut ret = Vec::::new(); - - // We use information from the Git repository to guide us in traversing - // its tree. The primary purpose of this is to take advantage of the - // `.gitignore` and auto-ignore files that don't matter. - // - // Here we're also careful to look at both tracked and untracked files as - // the untracked files are often part of a build and may become relevant - // as part of a future commit. - let index_files = index.iter().map(|entry| { - use libgit2_sys::{GIT_FILEMODE_COMMIT, GIT_FILEMODE_LINK}; - // ``is_dir`` is an optimization to avoid calling - // ``fs::metadata`` on every file. - let is_dir = if entry.mode == GIT_FILEMODE_LINK as u32 { - // Let the code below figure out if this symbolic link points - // to a directory or not. - None - } else { - Some(entry.mode == GIT_FILEMODE_COMMIT as u32) - }; - (join(root, &entry.path), is_dir) - }); - let mut opts = git2::StatusOptions::new(); - opts.include_untracked(true); - if let Ok(suffix) = pkg_path.strip_prefix(root) { - opts.pathspec(suffix); - } - let statuses = repo.statuses(Some(&mut opts))?; - let mut skip_paths = HashSet::new(); - let untracked: Vec<_> = statuses - .iter() - .filter_map(|entry| { - match entry.status() { - // Don't include Cargo.lock if it is untracked. Packaging will - // generate a new one as needed. - git2::Status::WT_NEW if entry.path() != Some("Cargo.lock") => { - Some(Ok((join(root, entry.path_bytes()), None))) - } - git2::Status::WT_DELETED => { - let path = match join(root, entry.path_bytes()) { - Ok(p) => p, - Err(e) => return Some(Err(e)), - }; - skip_paths.insert(path); - None - } - _ => None, - } - }) - .collect::>()?; - - let mut subpackages_found = Vec::new(); - - for (file_path, is_dir) in index_files.chain(untracked) { - let file_path = file_path?; - if skip_paths.contains(&file_path) { - continue; - } + fn supports_checksums(&self) -> bool { + false + } - // Filter out files blatantly outside this package. This is helped a - // bit above via the `pathspec` function call, but we need to filter - // the entries in the index as well. - if !file_path.starts_with(pkg_path) { - continue; - } + fn requires_precise(&self) -> bool { + false + } - match file_path.file_name().and_then(|s| s.to_str()) { - // The `target` directory is never included. - Some("target") => { - // Only filter out target if its in the package root. - if file_path.parent().unwrap() == pkg_path { - continue; - } - } + fn source_id(&self) -> SourceId { + self.source_id + } - // Keep track of all sub-packages found and also strip out all - // matches we've found so far. Note, though, that if we find - // our own `Cargo.toml`, we keep going. - Some("Cargo.toml") => { - let path = file_path.parent().unwrap(); - if path != pkg_path { - debug!("subpackage found: {}", path.display()); - ret.retain(|p| !p.starts_with(path)); - subpackages_found.push(path.to_path_buf()); - continue; - } - } + fn download(&mut self, id: PackageId) -> CargoResult { + trace!("getting packages; id={}", id); + self.update()?; + let pkg = self.package.iter().find(|pkg| pkg.package_id() == id); + pkg.cloned() + .map(MaybePackage::Ready) + .ok_or_else(|| internal(format!("failed to find {} in path source", id))) + } - _ => {} - } + fn finish_download(&mut self, _id: PackageId, _data: Vec) -> CargoResult { + panic!("no download should have started") + } - // If this file is part of any other sub-package we've found so far, - // skip it. - if subpackages_found.iter().any(|p| file_path.starts_with(p)) { - continue; - } + fn fingerprint(&self, pkg: &Package) -> CargoResult { + let (max, max_path) = self.last_modified_file(pkg)?; + // Note that we try to strip the prefix of this package to get a + // relative path to ensure that the fingerprint remains consistent + // across entire project directory renames. + let max_path = max_path.strip_prefix(&self.path).unwrap_or(&max_path); + Ok(format!("{} ({})", max, max_path.display())) + } - // `is_dir` is None for symlinks. The `unwrap` checks if the - // symlink points to a directory. - let is_dir = is_dir.unwrap_or_else(|| file_path.is_dir()); - if is_dir { - trace!(" found directory {}", file_path.display()); - match git2::Repository::open(&file_path) { - Ok(repo) => { - let files = self.list_files_git(pkg, &repo, filter)?; - ret.extend(files.into_iter()); - } - Err(..) => { - self.walk(&file_path, &mut ret, false, filter)?; - } - } - } else if filter(&file_path, is_dir) { - assert!(!is_dir); - // We found a file! - trace!(" found {}", file_path.display()); - ret.push(file_path); - } - } - return Ok(ret); - - #[cfg(unix)] - fn join(path: &Path, data: &[u8]) -> CargoResult { - use std::ffi::OsStr; - use std::os::unix::prelude::*; - Ok(path.join(::from_bytes(data))) - } - #[cfg(windows)] - fn join(path: &Path, data: &[u8]) -> CargoResult { - use std::str; - match str::from_utf8(data) { - Ok(s) => Ok(path.join(s)), - Err(e) => Err(anyhow::format_err!( - "cannot process path in git with a non utf8 filename: {}\n{:?}", - e, - data - )), - } + fn describe(&self) -> String { + match self.source_id.url().to_file_path() { + Ok(path) => path.display().to_string(), + Err(_) => self.source_id.to_string(), } } - /// Lists files relevant to building this package inside this source by - /// traversing the git working tree, while avoiding ignored files. - /// - /// This looks into Git sub-repositories as well, resolving them to individual files. - /// Symlinks to directories will also be resolved, but walked as repositories if they - /// point to one to avoid picking up `.git` directories. - fn list_files_gix( - &self, - pkg: &Package, - repo: &gix::Repository, - filter: &dyn Fn(&Path, bool) -> bool, - ) -> CargoResult> { - debug!("list_files_gix {}", pkg.package_id()); - let options = repo - .dirwalk_options()? - .emit_untracked(gix::dir::walk::EmissionMode::Matching) - .emit_ignored(None) - .emit_tracked(true) - .recurse_repositories(false) - .symlinks_to_directories_are_ignored_like_directories(true) - .emit_empty_directories(false); - let index = repo.index_or_empty()?; - let root = repo - .work_dir() - .ok_or_else(|| anyhow::format_err!("can't list files on a bare repository"))?; - assert!( - root.is_absolute(), - "BUG: paths used internally are absolute, and the repo inherits that" - ); - - let pkg_path = pkg.root(); - let repo_relative_pkg_path = pkg_path.strip_prefix(root).unwrap_or(Path::new("")); - let target_prefix = gix::path::to_unix_separators_on_windows(gix::path::into_bstr( - repo_relative_pkg_path.join("target/"), - )); - let package_prefix = - gix::path::to_unix_separators_on_windows(gix::path::into_bstr(repo_relative_pkg_path)); - - let pathspec = { - // Include the package root. - let mut include = BString::from(":/"); - include.push_str(package_prefix.as_ref()); - - // Exclude the target directory. - let mut exclude = BString::from(":!/"); - exclude.push_str(target_prefix.as_ref()); - - vec![include, exclude] - }; - - let mut files = Vec::::new(); - let mut subpackages_found = Vec::new(); - for item in repo - .dirwalk_iter(index.clone(), pathspec, Default::default(), options)? - .filter(|res| { - // Don't include Cargo.lock if it is untracked. Packaging will - // generate a new one as needed. - res.as_ref().map_or(true, |item| { - !(item.entry.status == Status::Untracked - && item.entry.rela_path == "Cargo.lock") - }) - }) - .map(|res| res.map(|item| (item.entry.rela_path, item.entry.disk_kind))) - .chain( - // Append entries that might be tracked in `/target/`. - index - .prefixed_entries(target_prefix.as_ref()) - .unwrap_or_default() - .iter() - .filter(|entry| { - // probably not needed as conflicts prevent this to run, but let's be explicit. - entry.stage() == Stage::Unconflicted - }) - .map(|entry| { - ( - entry.path(&index).to_owned(), - // Do not trust what's recorded in the index, enforce checking the disk. - // This traversal is not part of a `status()`, and tracking things in `target/` - // is rare. - None, - ) - }) - .map(Ok), - ) - { - let (rela_path, kind) = item?; - let file_path = root.join(gix::path::from_bstr(rela_path)); - if file_path.file_name().and_then(|name| name.to_str()) == Some("Cargo.toml") { - // Keep track of all sub-packages found and also strip out all - // matches we've found so far. Note, though, that if we find - // our own `Cargo.toml`, we keep going. - let path = file_path.parent().unwrap(); - if path != pkg_path { - debug!("subpackage found: {}", path.display()); - files.retain(|p| !p.starts_with(path)); - subpackages_found.push(path.to_path_buf()); - continue; - } - } - - // If this file is part of any other sub-package we've found so far, - // skip it. - if subpackages_found.iter().any(|p| file_path.starts_with(p)) { - continue; - } + fn add_to_yanked_whitelist(&mut self, _pkgs: &[PackageId]) {} - let is_dir = kind.map_or(false, |kind| { - if kind == gix::dir::entry::Kind::Symlink { - // Symlinks must be checked to see if they point to a directory - // we should traverse. - file_path.is_dir() - } else { - kind.is_dir() - } - }); - if is_dir { - // This could be a submodule, or a sub-repository. In any case, we prefer to walk - // it with git-support to leverage ignored files and to avoid pulling in entire - // .git repositories. - match gix::open(&file_path) { - Ok(sub_repo) => { - files.extend(self.list_files_gix(pkg, &sub_repo, filter)?); - } - Err(_) => { - self.walk(&file_path, &mut files, false, filter)?; - } - } - } else if (filter)(&file_path, is_dir) { - assert!(!is_dir); - trace!(" found {}", file_path.display()); - files.push(file_path); - } - } + fn is_yanked(&mut self, _pkg: PackageId) -> Poll> { + Poll::Ready(Ok(false)) + } - return Ok(files); + fn block_until_ready(&mut self) -> CargoResult<()> { + self.update() } - /// Lists files relevant to building this package inside this source by - /// walking the filesystem from the package root path. - /// - /// This is a fallback for [`PathSource::list_files_git`] when the package - /// is not tracked under a Git repository. - fn list_files_walk( - &self, - pkg: &Package, - filter: &dyn Fn(&Path, bool) -> bool, - ) -> CargoResult> { - let mut ret = Vec::new(); - self.walk(pkg.root(), &mut ret, true, filter)?; - Ok(ret) - } - - /// Helper recursive function for [`PathSource::list_files_walk`]. - fn walk( - &self, - path: &Path, - ret: &mut Vec, - is_root: bool, - filter: &dyn Fn(&Path, bool) -> bool, - ) -> CargoResult<()> { - let walkdir = WalkDir::new(path) - .follow_links(true) - .into_iter() - .filter_entry(|entry| { - let path = entry.path(); - let at_root = is_root && entry.depth() == 0; - let is_dir = entry.file_type().is_dir(); - - if !at_root && !filter(path, is_dir) { - return false; - } + fn invalidate_cache(&mut self) { + // Path source has no local cache. + } - if !is_dir { - return true; - } + fn set_quiet(&mut self, _quiet: bool) { + // Path source does not display status + } +} - // Don't recurse into any sub-packages that we have. - if !at_root && path.join("Cargo.toml").exists() { - return false; - } +/// A source that represents one or multiple packages gathered from a given root +/// path on the filesystem. +pub struct RecursivePathSource<'gctx> { + /// The unique identifier of this source. + source_id: SourceId, + /// The root path of this source. + path: PathBuf, + /// Whether this source has updated all package information it may contain. + updated: bool, + /// Packages that this sources has discovered. + packages: Vec, + gctx: &'gctx GlobalContext, +} - // Skip root Cargo artifacts. - if is_root - && entry.depth() == 1 - && path.file_name().and_then(|s| s.to_str()) == Some("target") - { - return false; - } +impl<'gctx> RecursivePathSource<'gctx> { + /// Creates a new source which is walked recursively to discover packages. + /// + /// This is similar to the [`PathSource::new`] method except that instead + /// of requiring a valid package to be present at `root` the folder is + /// walked entirely to crawl for packages. + /// + /// Note that this should be used with care and likely shouldn't be chosen + /// by default! + pub fn new(root: &Path, source_id: SourceId, gctx: &'gctx GlobalContext) -> Self { + Self { + source_id, + path: root.to_path_buf(), + updated: false, + packages: Vec::new(), + gctx, + } + } - true - }); - for entry in walkdir { - match entry { - Ok(entry) => { - if !entry.file_type().is_dir() { - ret.push(entry.into_path()); - } - } - Err(err) if err.loop_ancestor().is_some() => { - self.gctx.shell().warn(err)?; - } - Err(err) => match err.path() { - // If an error occurs with a path, filter it again. - // If it is excluded, Just ignore it in this case. - // See issue rust-lang/cargo#10917 - Some(path) if !filter(path, path.is_dir()) => {} - // Otherwise, simply recover from it. - // Don't worry about error skipping here, the callers would - // still hit the IO error if they do access it thereafter. - Some(path) => ret.push(path.to_path_buf()), - None => return Err(err.into()), - }, - } + /// Returns the packages discovered by this source. It may walk the + /// filesystem if package information haven't yet updated. + pub fn read_packages(&self) -> CargoResult> { + if self.updated { + Ok(self.packages.clone()) + } else { + ops::read_packages(&self.path, self.source_id, self.gctx) } + } - Ok(()) + /// List all files relevant to building this package inside this source. + /// + /// This function will use the appropriate methods to determine the + /// set of files underneath this source's directory which are relevant for + /// building `pkg`. + /// + /// The basic assumption of this method is that all files in the directory + /// are relevant for building this package, but it also contains logic to + /// use other methods like `.gitignore`, `package.include`, or + /// `package.exclude` to filter the list of files. + pub fn list_files(&self, pkg: &Package) -> CargoResult> { + list_files(pkg, self.gctx) } /// Gets the last modified file in a package. @@ -689,28 +291,7 @@ impl<'gctx> PathSource<'gctx> { self.path ))); } - - let mut max = FileTime::zero(); - let mut max_path = PathBuf::new(); - for file in self.list_files(pkg).with_context(|| { - format!( - "failed to determine the most recently modified file in {}", - pkg.root().display() - ) - })? { - // An `fs::stat` error here is either because path is a - // broken symlink, a permissions error, or a race - // condition where this path was `rm`-ed -- either way, - // we can ignore the error and treat the path's `mtime` - // as `0`. - let mtime = paths::mtime(&file).unwrap_or_else(|_| FileTime::zero()); - if mtime > max { - max = mtime; - max_path = file; - } - } - trace!("last modified file {}: {}", self.path.display(), max); - Ok((max, max_path)) + last_modified_file(&self.path, pkg, self.gctx) } /// Returns the root path of this source. @@ -730,18 +311,13 @@ impl<'gctx> PathSource<'gctx> { } } -enum Git2OrGixRepository { - Git2(git2::Repository), - Gix(gix::Repository), -} - -impl<'gctx> Debug for PathSource<'gctx> { +impl<'gctx> Debug for RecursivePathSource<'gctx> { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { write!(f, "the paths source") } } -impl<'gctx> Source for PathSource<'gctx> { +impl<'gctx> Source for RecursivePathSource<'gctx> { fn query( &mut self, dep: &Dependency, @@ -821,3 +397,598 @@ impl<'gctx> Source for PathSource<'gctx> { // Path source does not display status } } + +/// List all files relevant to building this package inside this source. +/// +/// This function will use the appropriate methods to determine the +/// set of files underneath this source's directory which are relevant for +/// building `pkg`. +/// +/// The basic assumption of this method is that all files in the directory +/// are relevant for building this package, but it also contains logic to +/// use other methods like `.gitignore`, `package.include`, or +/// `package.exclude` to filter the list of files. +pub fn list_files(pkg: &Package, gctx: &GlobalContext) -> CargoResult> { + _list_files(pkg, gctx).with_context(|| { + format!( + "failed to determine list of files in {}", + pkg.root().display() + ) + }) +} + +/// See [`PathSource::list_files`]. +fn _list_files(pkg: &Package, gctx: &GlobalContext) -> CargoResult> { + let root = pkg.root(); + let no_include_option = pkg.manifest().include().is_empty(); + let git_repo = if no_include_option { + if gctx + .get_env("__CARGO_GITOXIDE_DISABLE_LIST_FILES") + .ok() + .as_deref() + == Some("1") + { + discover_git_repo(root)?.map(Git2OrGixRepository::Git2) + } else { + discover_gix_repo(root)?.map(Git2OrGixRepository::Gix) + } + } else { + None + }; + + let mut exclude_builder = GitignoreBuilder::new(root); + if no_include_option && git_repo.is_none() { + // no include option and not git repo discovered (see rust-lang/cargo#7183). + exclude_builder.add_line(None, ".*")?; + } + for rule in pkg.manifest().exclude() { + exclude_builder.add_line(None, rule)?; + } + let ignore_exclude = exclude_builder.build()?; + + let mut include_builder = GitignoreBuilder::new(root); + for rule in pkg.manifest().include() { + include_builder.add_line(None, rule)?; + } + let ignore_include = include_builder.build()?; + + let ignore_should_package = |relative_path: &Path, is_dir: bool| { + // "Include" and "exclude" options are mutually exclusive. + if no_include_option { + !ignore_exclude + .matched_path_or_any_parents(relative_path, is_dir) + .is_ignore() + } else { + if is_dir { + // Generally, include directives don't list every + // directory (nor should they!). Just skip all directory + // checks, and only check files. + return true; + } + ignore_include + .matched_path_or_any_parents(relative_path, /* is_dir */ false) + .is_ignore() + } + }; + + let filter = |path: &Path, is_dir: bool| { + let Ok(relative_path) = path.strip_prefix(root) else { + return false; + }; + + let rel = relative_path.as_os_str(); + if rel == "Cargo.lock" { + return pkg.include_lockfile(); + } else if rel == "Cargo.toml" { + return true; + } + + ignore_should_package(relative_path, is_dir) + }; + + // Attempt Git-prepopulate only if no `include` (see rust-lang/cargo#4135). + if no_include_option { + if let Some(repo) = git_repo { + return match repo { + Git2OrGixRepository::Git2(repo) => list_files_git(pkg, &repo, &filter, gctx), + Git2OrGixRepository::Gix(repo) => list_files_gix(pkg, &repo, &filter, gctx), + }; + } + } + list_files_walk(pkg, &filter, gctx) +} + +enum Git2OrGixRepository { + Git2(git2::Repository), + Gix(gix::Repository), +} + +/// Returns `Some(git2::Repository)` if found sibling `Cargo.toml` and `.git` +/// directory; otherwise, caller should fall back on full file list. +fn discover_git_repo(root: &Path) -> CargoResult> { + let repo = match git2::Repository::discover(root) { + Ok(repo) => repo, + Err(e) => { + tracing::debug!( + "could not discover git repo at or above {}: {}", + root.display(), + e + ); + return Ok(None); + } + }; + let index = repo + .index() + .with_context(|| format!("failed to open git index at {}", repo.path().display()))?; + let repo_root = repo.workdir().ok_or_else(|| { + anyhow::format_err!( + "did not expect repo at {} to be bare", + repo.path().display() + ) + })?; + let repo_relative_path = match paths::strip_prefix_canonical(root, repo_root) { + Ok(p) => p, + Err(e) => { + warn!( + "cannot determine if path `{:?}` is in git repo `{:?}`: {:?}", + root, repo_root, e + ); + return Ok(None); + } + }; + let manifest_path = repo_relative_path.join("Cargo.toml"); + if index.get_path(&manifest_path, 0).is_some() { + return Ok(Some(repo)); + } + // Package Cargo.toml is not in git, don't use git to guide our selection. + Ok(None) +} + +/// Returns [`Some(gix::Repository)`](gix::Repository) if the discovered repository +/// (searched upwards from `root`) contains a tracked `/Cargo.toml`. +/// Otherwise, the caller should fall back on full file list. +fn discover_gix_repo(root: &Path) -> CargoResult> { + let repo = match gix::ThreadSafeRepository::discover(root) { + Ok(repo) => repo.to_thread_local(), + Err(e) => { + tracing::debug!( + "could not discover git repo at or above {}: {}", + root.display(), + e + ); + return Ok(None); + } + }; + let index = repo + .index_or_empty() + .with_context(|| format!("failed to open git index at {}", repo.path().display()))?; + let repo_root = repo.work_dir().ok_or_else(|| { + anyhow::format_err!( + "did not expect repo at {} to be bare", + repo.path().display() + ) + })?; + let repo_relative_path = match paths::strip_prefix_canonical(root, repo_root) { + Ok(p) => p, + Err(e) => { + warn!( + "cannot determine if path `{:?}` is in git repo `{:?}`: {:?}", + root, repo_root, e + ); + return Ok(None); + } + }; + let manifest_path = gix::path::join_bstr_unix_pathsep( + gix::path::to_unix_separators_on_windows(gix::path::into_bstr(repo_relative_path)), + "Cargo.toml", + ); + if index.entry_index_by_path(&manifest_path).is_ok() { + return Ok(Some(repo)); + } + // Package Cargo.toml is not in git, don't use git to guide our selection. + Ok(None) +} + +/// Lists files relevant to building this package inside this source by +/// consulting both Git index (tracked) or status (untracked) under +/// a given Git repository. +/// +/// This looks into Git submodules as well. +fn list_files_git( + pkg: &Package, + repo: &git2::Repository, + filter: &dyn Fn(&Path, bool) -> bool, + gctx: &GlobalContext, +) -> CargoResult> { + debug!("list_files_git {}", pkg.package_id()); + let index = repo.index()?; + let root = repo + .workdir() + .ok_or_else(|| anyhow::format_err!("can't list files on a bare repository"))?; + let pkg_path = pkg.root(); + + let mut ret = Vec::::new(); + + // We use information from the Git repository to guide us in traversing + // its tree. The primary purpose of this is to take advantage of the + // `.gitignore` and auto-ignore files that don't matter. + // + // Here we're also careful to look at both tracked and untracked files as + // the untracked files are often part of a build and may become relevant + // as part of a future commit. + let index_files = index.iter().map(|entry| { + use libgit2_sys::{GIT_FILEMODE_COMMIT, GIT_FILEMODE_LINK}; + // ``is_dir`` is an optimization to avoid calling + // ``fs::metadata`` on every file. + let is_dir = if entry.mode == GIT_FILEMODE_LINK as u32 { + // Let the code below figure out if this symbolic link points + // to a directory or not. + None + } else { + Some(entry.mode == GIT_FILEMODE_COMMIT as u32) + }; + (join(root, &entry.path), is_dir) + }); + let mut opts = git2::StatusOptions::new(); + opts.include_untracked(true); + if let Ok(suffix) = pkg_path.strip_prefix(root) { + opts.pathspec(suffix); + } + let statuses = repo.statuses(Some(&mut opts))?; + let mut skip_paths = HashSet::new(); + let untracked: Vec<_> = statuses + .iter() + .filter_map(|entry| { + match entry.status() { + // Don't include Cargo.lock if it is untracked. Packaging will + // generate a new one as needed. + git2::Status::WT_NEW if entry.path() != Some("Cargo.lock") => { + Some(Ok((join(root, entry.path_bytes()), None))) + } + git2::Status::WT_DELETED => { + let path = match join(root, entry.path_bytes()) { + Ok(p) => p, + Err(e) => return Some(Err(e)), + }; + skip_paths.insert(path); + None + } + _ => None, + } + }) + .collect::>()?; + + let mut subpackages_found = Vec::new(); + + for (file_path, is_dir) in index_files.chain(untracked) { + let file_path = file_path?; + if skip_paths.contains(&file_path) { + continue; + } + + // Filter out files blatantly outside this package. This is helped a + // bit above via the `pathspec` function call, but we need to filter + // the entries in the index as well. + if !file_path.starts_with(pkg_path) { + continue; + } + + match file_path.file_name().and_then(|s| s.to_str()) { + // The `target` directory is never included. + Some("target") => { + // Only filter out target if its in the package root. + if file_path.parent().unwrap() == pkg_path { + continue; + } + } + + // Keep track of all sub-packages found and also strip out all + // matches we've found so far. Note, though, that if we find + // our own `Cargo.toml`, we keep going. + Some("Cargo.toml") => { + let path = file_path.parent().unwrap(); + if path != pkg_path { + debug!("subpackage found: {}", path.display()); + ret.retain(|p| !p.starts_with(path)); + subpackages_found.push(path.to_path_buf()); + continue; + } + } + + _ => {} + } + + // If this file is part of any other sub-package we've found so far, + // skip it. + if subpackages_found.iter().any(|p| file_path.starts_with(p)) { + continue; + } + + // `is_dir` is None for symlinks. The `unwrap` checks if the + // symlink points to a directory. + let is_dir = is_dir.unwrap_or_else(|| file_path.is_dir()); + if is_dir { + trace!(" found directory {}", file_path.display()); + match git2::Repository::open(&file_path) { + Ok(repo) => { + let files = list_files_git(pkg, &repo, filter, gctx)?; + ret.extend(files.into_iter()); + } + Err(..) => { + walk(&file_path, &mut ret, false, filter, gctx)?; + } + } + } else if filter(&file_path, is_dir) { + assert!(!is_dir); + // We found a file! + trace!(" found {}", file_path.display()); + ret.push(file_path); + } + } + return Ok(ret); + + #[cfg(unix)] + fn join(path: &Path, data: &[u8]) -> CargoResult { + use std::ffi::OsStr; + use std::os::unix::prelude::*; + Ok(path.join(::from_bytes(data))) + } + #[cfg(windows)] + fn join(path: &Path, data: &[u8]) -> CargoResult { + use std::str; + match str::from_utf8(data) { + Ok(s) => Ok(path.join(s)), + Err(e) => Err(anyhow::format_err!( + "cannot process path in git with a non utf8 filename: {}\n{:?}", + e, + data + )), + } + } +} + +/// Lists files relevant to building this package inside this source by +/// traversing the git working tree, while avoiding ignored files. +/// +/// This looks into Git sub-repositories as well, resolving them to individual files. +/// Symlinks to directories will also be resolved, but walked as repositories if they +/// point to one to avoid picking up `.git` directories. +fn list_files_gix( + pkg: &Package, + repo: &gix::Repository, + filter: &dyn Fn(&Path, bool) -> bool, + gctx: &GlobalContext, +) -> CargoResult> { + debug!("list_files_gix {}", pkg.package_id()); + let options = repo + .dirwalk_options()? + .emit_untracked(gix::dir::walk::EmissionMode::Matching) + .emit_ignored(None) + .emit_tracked(true) + .recurse_repositories(false) + .symlinks_to_directories_are_ignored_like_directories(true) + .emit_empty_directories(false); + let index = repo.index_or_empty()?; + let root = repo + .work_dir() + .ok_or_else(|| anyhow::format_err!("can't list files on a bare repository"))?; + assert!( + root.is_absolute(), + "BUG: paths used internally are absolute, and the repo inherits that" + ); + + let pkg_path = pkg.root(); + let repo_relative_pkg_path = pkg_path.strip_prefix(root).unwrap_or(Path::new("")); + let target_prefix = gix::path::to_unix_separators_on_windows(gix::path::into_bstr( + repo_relative_pkg_path.join("target/"), + )); + let package_prefix = + gix::path::to_unix_separators_on_windows(gix::path::into_bstr(repo_relative_pkg_path)); + + let pathspec = { + // Include the package root. + let mut include = BString::from(":/"); + include.push_str(package_prefix.as_ref()); + + // Exclude the target directory. + let mut exclude = BString::from(":!/"); + exclude.push_str(target_prefix.as_ref()); + + vec![include, exclude] + }; + + let mut files = Vec::::new(); + let mut subpackages_found = Vec::new(); + for item in repo + .dirwalk_iter(index.clone(), pathspec, Default::default(), options)? + .filter(|res| { + // Don't include Cargo.lock if it is untracked. Packaging will + // generate a new one as needed. + res.as_ref().map_or(true, |item| { + !(item.entry.status == Status::Untracked && item.entry.rela_path == "Cargo.lock") + }) + }) + .map(|res| res.map(|item| (item.entry.rela_path, item.entry.disk_kind))) + .chain( + // Append entries that might be tracked in `/target/`. + index + .prefixed_entries(target_prefix.as_ref()) + .unwrap_or_default() + .iter() + .filter(|entry| { + // probably not needed as conflicts prevent this to run, but let's be explicit. + entry.stage() == Stage::Unconflicted + }) + .map(|entry| { + ( + entry.path(&index).to_owned(), + // Do not trust what's recorded in the index, enforce checking the disk. + // This traversal is not part of a `status()`, and tracking things in `target/` + // is rare. + None, + ) + }) + .map(Ok), + ) + { + let (rela_path, kind) = item?; + let file_path = root.join(gix::path::from_bstr(rela_path)); + if file_path.file_name().and_then(|name| name.to_str()) == Some("Cargo.toml") { + // Keep track of all sub-packages found and also strip out all + // matches we've found so far. Note, though, that if we find + // our own `Cargo.toml`, we keep going. + let path = file_path.parent().unwrap(); + if path != pkg_path { + debug!("subpackage found: {}", path.display()); + files.retain(|p| !p.starts_with(path)); + subpackages_found.push(path.to_path_buf()); + continue; + } + } + + // If this file is part of any other sub-package we've found so far, + // skip it. + if subpackages_found.iter().any(|p| file_path.starts_with(p)) { + continue; + } + + let is_dir = kind.map_or(false, |kind| { + if kind == gix::dir::entry::Kind::Symlink { + // Symlinks must be checked to see if they point to a directory + // we should traverse. + file_path.is_dir() + } else { + kind.is_dir() + } + }); + if is_dir { + // This could be a submodule, or a sub-repository. In any case, we prefer to walk + // it with git-support to leverage ignored files and to avoid pulling in entire + // .git repositories. + match gix::open(&file_path) { + Ok(sub_repo) => { + files.extend(list_files_gix(pkg, &sub_repo, filter, gctx)?); + } + Err(_) => { + walk(&file_path, &mut files, false, filter, gctx)?; + } + } + } else if (filter)(&file_path, is_dir) { + assert!(!is_dir); + trace!(" found {}", file_path.display()); + files.push(file_path); + } + } + + return Ok(files); +} + +/// Lists files relevant to building this package inside this source by +/// walking the filesystem from the package root path. +/// +/// This is a fallback for [`list_files_git`] when the package +/// is not tracked under a Git repository. +fn list_files_walk( + pkg: &Package, + filter: &dyn Fn(&Path, bool) -> bool, + gctx: &GlobalContext, +) -> CargoResult> { + let mut ret = Vec::new(); + walk(pkg.root(), &mut ret, true, filter, gctx)?; + Ok(ret) +} + +/// Helper recursive function for [`list_files_walk`]. +fn walk( + path: &Path, + ret: &mut Vec, + is_root: bool, + filter: &dyn Fn(&Path, bool) -> bool, + gctx: &GlobalContext, +) -> CargoResult<()> { + let walkdir = WalkDir::new(path) + .follow_links(true) + .into_iter() + .filter_entry(|entry| { + let path = entry.path(); + let at_root = is_root && entry.depth() == 0; + let is_dir = entry.file_type().is_dir(); + + if !at_root && !filter(path, is_dir) { + return false; + } + + if !is_dir { + return true; + } + + // Don't recurse into any sub-packages that we have. + if !at_root && path.join("Cargo.toml").exists() { + return false; + } + + // Skip root Cargo artifacts. + if is_root + && entry.depth() == 1 + && path.file_name().and_then(|s| s.to_str()) == Some("target") + { + return false; + } + + true + }); + for entry in walkdir { + match entry { + Ok(entry) => { + if !entry.file_type().is_dir() { + ret.push(entry.into_path()); + } + } + Err(err) if err.loop_ancestor().is_some() => { + gctx.shell().warn(err)?; + } + Err(err) => match err.path() { + // If an error occurs with a path, filter it again. + // If it is excluded, Just ignore it in this case. + // See issue rust-lang/cargo#10917 + Some(path) if !filter(path, path.is_dir()) => {} + // Otherwise, simply recover from it. + // Don't worry about error skipping here, the callers would + // still hit the IO error if they do access it thereafter. + Some(path) => ret.push(path.to_path_buf()), + None => return Err(err.into()), + }, + } + } + + Ok(()) +} + +/// Gets the last modified file in a package. +fn last_modified_file( + path: &Path, + pkg: &Package, + gctx: &GlobalContext, +) -> CargoResult<(FileTime, PathBuf)> { + let mut max = FileTime::zero(); + let mut max_path = PathBuf::new(); + for file in list_files(pkg, gctx).with_context(|| { + format!( + "failed to determine the most recently modified file in {}", + pkg.root().display() + ) + })? { + // An `fs::stat` error here is either because path is a + // broken symlink, a permissions error, or a race + // condition where this path was `rm`-ed -- either way, + // we can ignore the error and treat the path's `mtime` + // as `0`. + let mtime = paths::mtime(&file).unwrap_or_else(|_| FileTime::zero()); + if mtime > max { + max = mtime; + max_path = file; + } + } + trace!("last modified file {}: {}", path.display(), max); + Ok((max, max_path)) +}