From cd74470c4e8fa8cb9dd66496fb7347325b7e4ba9 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Mon, 15 Jul 2024 10:38:28 -0500 Subject: [PATCH] perf(source): Don't `du` on every git source load When profiling Zed (#14238), a major factor in their no-op run times is git patches and git dependencies. The slowest operation for each git source is running `du`. This is extraneous for a couple of reasons - GC isn't stable, slowing people down for a feature they aren't using - Size tracking was expected to be lazy, only reading sizes when the GC is configured for size, while this was eager - Git checkouts are immutable but we check on every load - This optimized for "while filesystem caches are warm" from a checkout operation when checkout operations are rare compared to all of the other commands run on a working directory. This removes the `du`, relying on the lazy loading that happens in `update_null_sizes`. --- src/cargo/core/global_cache_tracker.rs | 2 +- src/cargo/sources/git/source.rs | 14 +++++--------- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/src/cargo/core/global_cache_tracker.rs b/src/cargo/core/global_cache_tracker.rs index 89ee32c8466..df80aa336bc 100644 --- a/src/cargo/core/global_cache_tracker.rs +++ b/src/cargo/core/global_cache_tracker.rs @@ -1800,7 +1800,7 @@ pub fn is_silent_error(e: &anyhow::Error) -> bool { /// Returns the disk usage for a git checkout directory. #[tracing::instrument] -pub fn du_git_checkout(path: &Path) -> CargoResult { +fn du_git_checkout(path: &Path) -> CargoResult { // !.git is used because clones typically use hardlinks for the git // contents. TODO: Verify behavior on Windows. // TODO: Or even better, switch to worktrees, and remove this. diff --git a/src/cargo/sources/git/source.rs b/src/cargo/sources/git/source.rs index 1209d43f005..aa22eb3e439 100644 --- a/src/cargo/sources/git/source.rs +++ b/src/cargo/sources/git/source.rs @@ -145,13 +145,13 @@ impl<'gctx> GitSource<'gctx> { self.path_source.as_mut().unwrap().read_packages() } - fn mark_used(&self, size: Option) -> CargoResult<()> { + fn mark_used(&self) -> CargoResult<()> { self.gctx .deferred_global_last_use()? .mark_git_checkout_used(global_cache_tracker::GitCheckout { encoded_git_name: self.ident, short_name: self.short_id.expect("update before download"), - size, + size: None, }); Ok(()) } @@ -268,7 +268,7 @@ impl<'gctx> Source for GitSource<'gctx> { fn block_until_ready(&mut self) -> CargoResult<()> { if self.path_source.is_some() { - self.mark_used(None)?; + self.mark_used()?; return Ok(()); } @@ -363,11 +363,7 @@ impl<'gctx> Source for GitSource<'gctx> { self.locked_rev = Revision::Locked(actual_rev); self.path_source.as_mut().unwrap().load()?; - // Hopefully this shouldn't incur too much of a performance hit since - // most of this should already be in cache since it was just - // extracted. - let size = global_cache_tracker::du_git_checkout(&checkout_path)?; - self.mark_used(Some(size))?; + self.mark_used()?; Ok(()) } @@ -377,7 +373,7 @@ impl<'gctx> Source for GitSource<'gctx> { id, self.remote ); - self.mark_used(None)?; + self.mark_used()?; self.path_source .as_mut() .expect("BUG: `update()` must be called before `get()`")