Skip to content

Commit 75bdb72

Browse files
osiewiczdinocostaepage
committed
clean: Optimize clean with multiple -p specifiers
This commit optimizes implementation of `cargo clean -p` by reducing the amount of directory walks that take place. We now walk each directory at most once and add to the list of files to be cleaned, step by step. In practice this helps us significantly reduce the runtime for clearing large workspaces (as implemented in rust-lang#16263); for Zed, `cargo clean --workspace` went down from 73 seconds to 3 seconds. We have 216 workspace members. Co-authored-by: dino <dinojoaocosta@gmail.com> Co-authored-by: Ed Page <eopage@gmail.com>
1 parent 2c283a9 commit 75bdb72

File tree

2 files changed

+95
-97
lines changed

2 files changed

+95
-97
lines changed

src/cargo/core/compiler/build_context/target_info.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,13 @@ impl FileType {
147147
should_replace_hyphens: true,
148148
}
149149
}
150+
151+
pub fn output_prefix_suffix(&self, target: &Target) -> (String, String) {
152+
(
153+
format!("{}{}-", self.prefix, target.crate_name()),
154+
self.suffix.clone(),
155+
)
156+
}
150157
}
151158

152159
impl TargetInfo {

src/cargo/ops/cargo_clean.rs

Lines changed: 88 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,9 @@ use crate::util::interning::InternedString;
99
use crate::util::{GlobalContext, Progress, ProgressStyle};
1010
use anyhow::bail;
1111
use cargo_util::paths;
12-
use indexmap::IndexSet;
13-
use std::collections::{HashMap, HashSet};
12+
use indexmap::{IndexMap, IndexSet};
13+
14+
use std::ffi::OsString;
1415
use std::fs;
1516
use std::path::{Path, PathBuf};
1617
use std::rc::Rc;
@@ -250,31 +251,34 @@ fn clean_specs(
250251
}
251252
}
252253
} else {
253-
// Try to reduce the amount of times we iterate over the same target directory by storing away
254-
// the directories we've iterated over (and cleaned for a given package).
255-
let mut cleaned_packages: HashMap<_, HashSet<_>> = HashMap::default();
254+
let mut dirs_to_clean = DirectoriesToClean::default();
256255
for pkg in packages {
257-
let pkg_dir = format!("{}-*", pkg.name());
256+
let pkg_fingerprint_prefix = format!("{}-", pkg.name());
258257
clean_ctx.progress.on_cleaning_package(&pkg.name())?;
259258

260259
// Clean fingerprints.
260+
let package = &*pkg.name();
261261
for (_, layout) in &layouts_with_host {
262-
let dir = escape_glob_path(layout.build_dir().legacy_fingerprint())?;
263-
clean_ctx.rm_rf_package_glob_containing_hash(
264-
&pkg.name(),
265-
&Path::new(&dir).join(&pkg_dir),
266-
)?;
262+
dirs_to_clean.mark_utf(layout.build_dir().legacy_fingerprint(), |path| {
263+
let Some((pkg_name, _)) = path.rsplit_once('-') else {
264+
return false;
265+
};
266+
267+
pkg_name == package
268+
});
267269
}
268270

269271
for target in pkg.targets() {
270272
if target.is_custom_build() {
271273
// Get both the build_script_build and the output directory.
272274
for (_, layout) in &layouts_with_host {
273-
let dir = escape_glob_path(layout.build_dir().build())?;
274-
clean_ctx.rm_rf_package_glob_containing_hash(
275-
&pkg.name(),
276-
&Path::new(&dir).join(&pkg_dir),
277-
)?;
275+
dirs_to_clean.mark_utf(layout.build_dir().build(), |path| {
276+
let Some((current_name, _)) = path.rsplit_once('-') else {
277+
return false;
278+
};
279+
280+
current_name == package
281+
});
278282
}
279283
continue;
280284
}
@@ -304,15 +308,15 @@ fn clean_specs(
304308
}
305309
_ => (layout.build_dir().legacy_deps(), Some(artifact_dir.dest())),
306310
};
307-
let mut dir_glob_str = escape_glob_path(dir)?;
308-
let dir_glob = Path::new(&dir_glob_str);
311+
309312
for file_type in file_types {
310313
// Some files include a hash in the filename, some don't.
311-
let hashed_name = file_type.output_filename(target, Some("*"));
314+
let (prefix, suffix) = file_type.output_prefix_suffix(target);
312315
let unhashed_name = file_type.output_filename(target, None);
313-
314-
clean_ctx.rm_rf_glob(&dir_glob.join(&hashed_name))?;
315-
clean_ctx.rm_rf(&dir.join(&unhashed_name))?;
316+
dirs_to_clean.mark_utf(&dir, |filename| {
317+
(filename.starts_with(&prefix) && filename.ends_with(&suffix))
318+
|| unhashed_name == filename
319+
});
316320

317321
// Remove the uplifted copy.
318322
if let Some(uplift_dir) = uplift_dir {
@@ -324,44 +328,81 @@ fn clean_specs(
324328
clean_ctx.rm_rf(&dep_info)?;
325329
}
326330
}
327-
let unhashed_dep_info = dir.join(format!("{}.d", crate_name));
328-
clean_ctx.rm_rf(&unhashed_dep_info)?;
331+
let unhashed_dep_info = format!("{}.d", crate_name);
332+
dirs_to_clean.mark_utf(dir, |filename| filename == unhashed_dep_info);
329333

330-
if !dir_glob_str.ends_with(std::path::MAIN_SEPARATOR) {
331-
dir_glob_str.push(std::path::MAIN_SEPARATOR);
332-
}
333-
dir_glob_str.push('*');
334-
let dir_glob_str: Rc<str> = dir_glob_str.into();
335-
if cleaned_packages
336-
.entry(dir_glob_str.clone())
337-
.or_default()
338-
.insert(crate_name.clone())
339-
{
340-
let paths = [
334+
dirs_to_clean.mark_utf(dir, |path| {
335+
if path.starts_with(&path_dash) {
341336
// Remove dep-info file generated by rustc. It is not tracked in
342337
// file_types. It does not have a prefix.
343-
(path_dash, ".d"),
338+
path.ends_with(".d")
339+
} else if path.starts_with(&path_dot) {
344340
// Remove split-debuginfo files generated by rustc.
345-
(path_dot, ".o"),
346-
(path_dot, ".dwo"),
347-
(path_dot, ".dwp"),
348-
];
349-
clean_ctx.rm_rf_prefix_list(&dir_glob_str, &paths)?;
350-
}
341+
[".o", ".dwo", ".dwp"]
342+
.iter()
343+
.any(|suffix| path.ends_with(suffix))
344+
} else {
345+
false
346+
}
347+
});
351348

352349
// TODO: what to do about build_script_build?
353-
let dir = escape_glob_path(layout.build_dir().incremental())?;
354-
let incremental = Path::new(&dir).join(format!("{}-*", crate_name));
355-
clean_ctx.rm_rf_glob(&incremental)?;
350+
dirs_to_clean.mark_utf(layout.build_dir().incremental(), |path| {
351+
path.starts_with(path_dash)
352+
});
356353
}
357354
}
358355
}
359356
}
357+
clean_ctx.rm_rf_all(dirs_to_clean)?;
360358
}
361359

362360
Ok(())
363361
}
364362

363+
#[derive(Default)]
364+
struct DirectoriesToClean {
365+
dir_contents: IndexMap<PathBuf, IndexSet<OsString>>,
366+
to_remove: IndexSet<PathBuf>,
367+
}
368+
369+
impl DirectoriesToClean {
370+
fn mark(&mut self, directory: &Path, mut should_remove_entry: impl FnMut(&OsString) -> bool) {
371+
let entry = match self.dir_contents.entry(directory.to_owned()) {
372+
indexmap::map::Entry::Occupied(occupied_entry) => occupied_entry.into_mut(),
373+
indexmap::map::Entry::Vacant(vacant_entry) => {
374+
let Ok(dir_entries) = std::fs::read_dir(directory.to_owned()) else {
375+
return;
376+
};
377+
vacant_entry.insert(
378+
dir_entries
379+
.into_iter()
380+
.flatten()
381+
.map(|entry| entry.file_name())
382+
.collect::<IndexSet<_>>(),
383+
)
384+
}
385+
};
386+
387+
entry.retain(|path| {
388+
let should_remove = should_remove_entry(path);
389+
if should_remove {
390+
self.to_remove.insert(directory.join(path));
391+
}
392+
!should_remove
393+
});
394+
}
395+
396+
fn mark_utf(&mut self, directory: &Path, mut should_remove_entry: impl FnMut(&str) -> bool) {
397+
self.mark(directory, move |path| {
398+
let Some(as_utf) = path.to_str() else {
399+
return false;
400+
};
401+
should_remove_entry(as_utf)
402+
});
403+
}
404+
}
405+
365406
fn escape_glob_path(pattern: &Path) -> CargoResult<String> {
366407
let pattern = pattern
367408
.to_str()
@@ -384,34 +425,8 @@ impl<'gctx> CleanContext<'gctx> {
384425
}
385426
}
386427

387-
/// Glob remove artifacts for the provided `package`
388-
///
389-
/// Make sure the artifact is for `package` and not another crate that is prefixed by
390-
/// `package` by getting the original name stripped of the trailing hash and possible
391-
/// extension
392-
fn rm_rf_package_glob_containing_hash(
393-
&mut self,
394-
package: &str,
395-
pattern: &Path,
396-
) -> CargoResult<()> {
397-
// TODO: Display utf8 warning to user? Or switch to globset?
398-
let pattern = pattern
399-
.to_str()
400-
.ok_or_else(|| anyhow::anyhow!("expected utf-8 path"))?;
401-
for path in glob::glob(pattern)? {
402-
let path = path?;
403-
404-
let pkg_name = path
405-
.file_name()
406-
.and_then(std::ffi::OsStr::to_str)
407-
.and_then(|artifact| artifact.rsplit_once('-'))
408-
.ok_or_else(|| anyhow::anyhow!("expected utf-8 path"))?
409-
.0;
410-
411-
if pkg_name != package {
412-
continue;
413-
}
414-
428+
fn rm_rf_all(&mut self, dirs: DirectoriesToClean) -> CargoResult<()> {
429+
for path in dirs.to_remove {
415430
self.rm_rf(&path)?;
416431
}
417432
Ok(())
@@ -428,30 +443,6 @@ impl<'gctx> CleanContext<'gctx> {
428443
Ok(())
429444
}
430445

431-
/// Removes files matching a glob and any of the provided filename patterns (prefix/suffix pairs).
432-
///
433-
/// This function iterates over files matching a glob (`pattern`) and removes those whose
434-
/// filenames start and end with specific prefix/suffix pairs. It should be more efficient for
435-
/// operations involving multiple prefix/suffix pairs, as it iterates over the directory
436-
/// only once, unlike making multiple calls to [`Self::rm_rf_glob`].
437-
fn rm_rf_prefix_list(
438-
&mut self,
439-
pattern: &str,
440-
path_matchers: &[(&str, &str)],
441-
) -> CargoResult<()> {
442-
for path in glob::glob(pattern)? {
443-
let path = path?;
444-
let filename = path.file_name().and_then(|name| name.to_str()).unwrap();
445-
if path_matchers
446-
.iter()
447-
.any(|(prefix, suffix)| filename.starts_with(prefix) && filename.ends_with(suffix))
448-
{
449-
self.rm_rf(&path)?;
450-
}
451-
}
452-
Ok(())
453-
}
454-
455446
pub fn rm_rf(&mut self, path: &Path) -> CargoResult<()> {
456447
let meta = match fs::symlink_metadata(path) {
457448
Ok(meta) => meta,

0 commit comments

Comments
 (0)