Skip to content

Commit 031e92c

Browse files
osiewiczdinocostaepage
committed
clean: Optimize clean with multiple -p specifiers
This commit optimizes implementation of `cargo clean -p` by reducing the amount of directory walks that take place. We now walk each directory at most once and add to the list of files to be cleaned, step by step. In practice this helps us significantly reduce the runtime for clearing large workspaces (as implemented in #16263); for Zed, `cargo clean --workspace` went down from 73 seconds to 3 seconds. We have 216 workspace members. Co-authored-by: dino <dinojoaocosta@gmail.com> Co-authored-by: Ed Page <eopage@gmail.com>
1 parent 2c283a9 commit 031e92c

File tree

2 files changed

+102
-97
lines changed

2 files changed

+102
-97
lines changed

src/cargo/core/compiler/build_context/target_info.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,13 @@ impl FileType {
147147
should_replace_hyphens: true,
148148
}
149149
}
150+
151+
pub fn output_prefix_suffix(&self, target: &Target) -> (String, String) {
152+
(
153+
format!("{}{}-", self.prefix, target.crate_name()),
154+
self.suffix.clone(),
155+
)
156+
}
150157
}
151158

152159
impl TargetInfo {

src/cargo/ops/cargo_clean.rs

Lines changed: 95 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,9 @@ use crate::util::interning::InternedString;
99
use crate::util::{GlobalContext, Progress, ProgressStyle};
1010
use anyhow::bail;
1111
use cargo_util::paths;
12-
use indexmap::IndexSet;
13-
use std::collections::{HashMap, HashSet};
12+
use indexmap::{IndexMap, IndexSet};
13+
14+
use std::ffi::OsString;
1415
use std::fs;
1516
use std::path::{Path, PathBuf};
1617
use std::rc::Rc;
@@ -250,31 +251,40 @@ fn clean_specs(
250251
}
251252
}
252253
} else {
253-
// Try to reduce the amount of times we iterate over the same target directory by storing away
254-
// the directories we've iterated over (and cleaned for a given package).
255-
let mut cleaned_packages: HashMap<_, HashSet<_>> = HashMap::default();
254+
let mut dirs_to_clean = DirectoriesToClean::default();
256255
for pkg in packages {
257-
let pkg_dir = format!("{}-*", pkg.name());
256+
let pkg_fingerprint_prefix = format!("{}-", pkg.name());
258257
clean_ctx.progress.on_cleaning_package(&pkg.name())?;
259258

260259
// Clean fingerprints.
260+
let package = &*pkg.name();
261261
for (_, layout) in &layouts_with_host {
262-
let dir = escape_glob_path(layout.build_dir().legacy_fingerprint())?;
263-
clean_ctx.rm_rf_package_glob_containing_hash(
264-
&pkg.name(),
265-
&Path::new(&dir).join(&pkg_dir),
266-
)?;
262+
dirs_to_clean.mark_utf(layout.build_dir().legacy_fingerprint(), |path| {
263+
if !path.starts_with(&pkg_fingerprint_prefix) {
264+
return false;
265+
}
266+
let Some((pkg_name, _)) = path.rsplit_once('-') else {
267+
return false;
268+
};
269+
270+
pkg_name == package
271+
});
267272
}
268273

269274
for target in pkg.targets() {
270275
if target.is_custom_build() {
271276
// Get both the build_script_build and the output directory.
272277
for (_, layout) in &layouts_with_host {
273-
let dir = escape_glob_path(layout.build_dir().build())?;
274-
clean_ctx.rm_rf_package_glob_containing_hash(
275-
&pkg.name(),
276-
&Path::new(&dir).join(&pkg_dir),
277-
)?;
278+
dirs_to_clean.mark_utf(layout.build_dir().build(), |path| {
279+
if !path.starts_with(&pkg_fingerprint_prefix) {
280+
return false;
281+
}
282+
let Some((pkg_name, _)) = path.rsplit_once('-') else {
283+
return false;
284+
};
285+
286+
pkg_name == package
287+
});
278288
}
279289
continue;
280290
}
@@ -304,15 +314,15 @@ fn clean_specs(
304314
}
305315
_ => (layout.build_dir().legacy_deps(), Some(artifact_dir.dest())),
306316
};
307-
let mut dir_glob_str = escape_glob_path(dir)?;
308-
let dir_glob = Path::new(&dir_glob_str);
317+
309318
for file_type in file_types {
310319
// Some files include a hash in the filename, some don't.
311-
let hashed_name = file_type.output_filename(target, Some("*"));
320+
let (prefix, suffix) = file_type.output_prefix_suffix(target);
312321
let unhashed_name = file_type.output_filename(target, None);
313-
314-
clean_ctx.rm_rf_glob(&dir_glob.join(&hashed_name))?;
315-
clean_ctx.rm_rf(&dir.join(&unhashed_name))?;
322+
dirs_to_clean.mark_utf(&dir, |filename| {
323+
(filename.starts_with(&prefix) && filename.ends_with(&suffix))
324+
|| unhashed_name == filename
325+
});
316326

317327
// Remove the uplifted copy.
318328
if let Some(uplift_dir) = uplift_dir {
@@ -324,44 +334,82 @@ fn clean_specs(
324334
clean_ctx.rm_rf(&dep_info)?;
325335
}
326336
}
327-
let unhashed_dep_info = dir.join(format!("{}.d", crate_name));
328-
clean_ctx.rm_rf(&unhashed_dep_info)?;
337+
let unhashed_dep_info = format!("{}.d", crate_name);
338+
dirs_to_clean.mark_utf(dir, |filename| filename == unhashed_dep_info);
329339

330-
if !dir_glob_str.ends_with(std::path::MAIN_SEPARATOR) {
331-
dir_glob_str.push(std::path::MAIN_SEPARATOR);
332-
}
333-
dir_glob_str.push('*');
334-
let dir_glob_str: Rc<str> = dir_glob_str.into();
335-
if cleaned_packages
336-
.entry(dir_glob_str.clone())
337-
.or_default()
338-
.insert(crate_name.clone())
339-
{
340-
let paths = [
340+
dirs_to_clean.mark_utf(dir, |path| {
341+
if path.starts_with(&path_dash) {
341342
// Remove dep-info file generated by rustc. It is not tracked in
342343
// file_types. It does not have a prefix.
343-
(path_dash, ".d"),
344+
path.ends_with(".d")
345+
} else if path.starts_with(&path_dot) {
344346
// Remove split-debuginfo files generated by rustc.
345-
(path_dot, ".o"),
346-
(path_dot, ".dwo"),
347-
(path_dot, ".dwp"),
348-
];
349-
clean_ctx.rm_rf_prefix_list(&dir_glob_str, &paths)?;
350-
}
347+
[".o", ".dwo", ".dwp"]
348+
.iter()
349+
.any(|suffix| path.ends_with(suffix))
350+
} else {
351+
false
352+
}
353+
});
351354

352355
// TODO: what to do about build_script_build?
353-
let dir = escape_glob_path(layout.build_dir().incremental())?;
354-
let incremental = Path::new(&dir).join(format!("{}-*", crate_name));
355-
clean_ctx.rm_rf_glob(&incremental)?;
356+
let incremental_prefix = format!("{}-", crate_name);
357+
dirs_to_clean.mark_utf(layout.build_dir().incremental(), |path| {
358+
path.starts_with(&incremental_prefix)
359+
});
356360
}
357361
}
358362
}
359363
}
364+
clean_ctx.rm_rf_all(dirs_to_clean)?;
360365
}
361366

362367
Ok(())
363368
}
364369

370+
#[derive(Default)]
371+
struct DirectoriesToClean {
372+
dir_contents: IndexMap<PathBuf, IndexSet<OsString>>,
373+
to_remove: IndexSet<PathBuf>,
374+
}
375+
376+
impl DirectoriesToClean {
377+
fn mark(&mut self, directory: &Path, mut should_remove_entry: impl FnMut(&OsString) -> bool) {
378+
let entry = match self.dir_contents.entry(directory.to_owned()) {
379+
indexmap::map::Entry::Occupied(occupied_entry) => occupied_entry.into_mut(),
380+
indexmap::map::Entry::Vacant(vacant_entry) => {
381+
let Ok(dir_entries) = std::fs::read_dir(directory.to_owned()) else {
382+
return;
383+
};
384+
vacant_entry.insert(
385+
dir_entries
386+
.into_iter()
387+
.flatten()
388+
.map(|entry| entry.file_name())
389+
.collect::<IndexSet<_>>(),
390+
)
391+
}
392+
};
393+
394+
entry.retain(|filename| {
395+
let should_remove = should_remove_entry(filename);
396+
if should_remove {
397+
self.to_remove.insert(directory.join(filename));
398+
}
399+
!should_remove
400+
});
401+
}
402+
403+
fn mark_utf(&mut self, directory: &Path, mut should_remove_entry: impl FnMut(&str) -> bool) {
404+
self.mark(directory, move |path| {
405+
let Some(as_utf) = path.to_str() else {
406+
return false;
407+
};
408+
should_remove_entry(as_utf)
409+
});
410+
}
411+
}
412+
365413
fn escape_glob_path(pattern: &Path) -> CargoResult<String> {
366414
let pattern = pattern
367415
.to_str()
@@ -384,34 +432,8 @@ impl<'gctx> CleanContext<'gctx> {
384432
}
385433
}
386434

387-
/// Glob remove artifacts for the provided `package`
388-
///
389-
/// Make sure the artifact is for `package` and not another crate that is prefixed by
390-
/// `package` by getting the original name stripped of the trailing hash and possible
391-
/// extension
392-
fn rm_rf_package_glob_containing_hash(
393-
&mut self,
394-
package: &str,
395-
pattern: &Path,
396-
) -> CargoResult<()> {
397-
// TODO: Display utf8 warning to user? Or switch to globset?
398-
let pattern = pattern
399-
.to_str()
400-
.ok_or_else(|| anyhow::anyhow!("expected utf-8 path"))?;
401-
for path in glob::glob(pattern)? {
402-
let path = path?;
403-
404-
let pkg_name = path
405-
.file_name()
406-
.and_then(std::ffi::OsStr::to_str)
407-
.and_then(|artifact| artifact.rsplit_once('-'))
408-
.ok_or_else(|| anyhow::anyhow!("expected utf-8 path"))?
409-
.0;
410-
411-
if pkg_name != package {
412-
continue;
413-
}
414-
435+
fn rm_rf_all(&mut self, dirs: DirectoriesToClean) -> CargoResult<()> {
436+
for path in dirs.to_remove {
415437
self.rm_rf(&path)?;
416438
}
417439
Ok(())
@@ -428,30 +450,6 @@ impl<'gctx> CleanContext<'gctx> {
428450
Ok(())
429451
}
430452

431-
/// Removes files matching a glob and any of the provided filename patterns (prefix/suffix pairs).
432-
///
433-
/// This function iterates over files matching a glob (`pattern`) and removes those whose
434-
/// filenames start and end with specific prefix/suffix pairs. It should be more efficient for
435-
/// operations involving multiple prefix/suffix pairs, as it iterates over the directory
436-
/// only once, unlike making multiple calls to [`Self::rm_rf_glob`].
437-
fn rm_rf_prefix_list(
438-
&mut self,
439-
pattern: &str,
440-
path_matchers: &[(&str, &str)],
441-
) -> CargoResult<()> {
442-
for path in glob::glob(pattern)? {
443-
let path = path?;
444-
let filename = path.file_name().and_then(|name| name.to_str()).unwrap();
445-
if path_matchers
446-
.iter()
447-
.any(|(prefix, suffix)| filename.starts_with(prefix) && filename.ends_with(suffix))
448-
{
449-
self.rm_rf(&path)?;
450-
}
451-
}
452-
Ok(())
453-
}
454-
455453
pub fn rm_rf(&mut self, path: &Path) -> CargoResult<()> {
456454
let meta = match fs::symlink_metadata(path) {
457455
Ok(meta) => meta,

0 commit comments

Comments
 (0)