Skip to content

Commit 43b04a2

Browse files
osiewiczdinocostaepage
committed
clean: Optimize clean with multiple -p specifiers
This commit optimizes implementation of `cargo clean -p` by reducing the amount of directory walks that take place. We now walk each directory at most once and add to the list of files to be cleaned, step by step. In practice this helps us significantly reduce the runtime for clearing large workspaces (as implemented in #16263); for Zed, `cargo clean --workspace` went down from 73 seconds to 3 seconds. We have 216 workspace members. Co-authored-by: dino <dinojoaocosta@gmail.com> Co-authored-by: Ed Page <eopage@gmail.com>
1 parent 2c283a9 commit 43b04a2

File tree

2 files changed

+105
-121
lines changed

2 files changed

+105
-121
lines changed

src/cargo/core/compiler/build_context/target_info.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,13 @@ impl FileType {
147147
should_replace_hyphens: true,
148148
}
149149
}
150+
151+
pub fn output_prefix_suffix(&self, target: &Target) -> (String, String) {
152+
(
153+
format!("{}{}-", self.prefix, target.crate_name()),
154+
self.suffix.clone(),
155+
)
156+
}
150157
}
151158

152159
impl TargetInfo {

src/cargo/ops/cargo_clean.rs

Lines changed: 98 additions & 121 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,9 @@ use crate::util::interning::InternedString;
99
use crate::util::{GlobalContext, Progress, ProgressStyle};
1010
use anyhow::bail;
1111
use cargo_util::paths;
12-
use indexmap::IndexSet;
13-
use std::collections::{HashMap, HashSet};
12+
use indexmap::{IndexMap, IndexSet};
13+
14+
use std::ffi::OsString;
1415
use std::fs;
1516
use std::path::{Path, PathBuf};
1617
use std::rc::Rc;
@@ -193,6 +194,7 @@ fn clean_specs(
193194
let packages = pkg_set.get_many(pkg_ids)?;
194195

195196
clean_ctx.progress = Box::new(CleaningPackagesBar::new(clean_ctx.gctx, packages.len()));
197+
let mut dirs_to_clean = DirectoriesToClean::default();
196198

197199
if clean_ctx.gctx.cli_unstable().build_dir_new_layout {
198200
for pkg in packages {
@@ -233,48 +235,54 @@ fn clean_specs(
233235
};
234236
if let Some(uplift_dir) = uplift_dir {
235237
for file_type in file_types {
236-
let uplifted_path =
237-
uplift_dir.join(file_type.uplift_filename(target));
238-
clean_ctx.rm_rf(&uplifted_path)?;
238+
let uplifted_filename = file_type.uplift_filename(target);
239+
239240
// Dep-info generated by Cargo itself.
240-
let dep_info = uplifted_path.with_extension("d");
241-
clean_ctx.rm_rf(&dep_info)?;
241+
let dep_info = Path::new(&uplifted_filename)
242+
.with_extension("d")
243+
.to_string_lossy()
244+
.into_owned();
245+
246+
dirs_to_clean.mark_utf(uplift_dir, |filename| {
247+
filename == uplifted_filename || filename == dep_info
248+
});
242249
}
243250
}
244251

245-
let dir = escape_glob_path(layout.build_dir().incremental())?;
246-
let incremental = Path::new(&dir).join(format!("{}-*", crate_name));
247-
clean_ctx.rm_rf_glob(&incremental)?;
252+
dirs_to_clean.mark_utf(layout.build_dir().incremental(), |filename| {
253+
filename.starts_with(crate_name.as_ref())
254+
});
248255
}
249256
}
250257
}
251258
}
252259
} else {
253-
// Try to reduce the amount of times we iterate over the same target directory by storing away
254-
// the directories we've iterated over (and cleaned for a given package).
255-
let mut cleaned_packages: HashMap<_, HashSet<_>> = HashMap::default();
256260
for pkg in packages {
257-
let pkg_dir = format!("{}-*", pkg.name());
258261
clean_ctx.progress.on_cleaning_package(&pkg.name())?;
259262

260263
// Clean fingerprints.
264+
let package = pkg.name().as_str();
261265
for (_, layout) in &layouts_with_host {
262-
let dir = escape_glob_path(layout.build_dir().legacy_fingerprint())?;
263-
clean_ctx.rm_rf_package_glob_containing_hash(
264-
&pkg.name(),
265-
&Path::new(&dir).join(&pkg_dir),
266-
)?;
266+
dirs_to_clean.mark_utf(layout.build_dir().legacy_fingerprint(), |filename| {
267+
let Some((pkg_name, _)) = filename.rsplit_once('-') else {
268+
return false;
269+
};
270+
271+
pkg_name == package
272+
});
267273
}
268274

269275
for target in pkg.targets() {
270276
if target.is_custom_build() {
271277
// Get both the build_script_build and the output directory.
272278
for (_, layout) in &layouts_with_host {
273-
let dir = escape_glob_path(layout.build_dir().build())?;
274-
clean_ctx.rm_rf_package_glob_containing_hash(
275-
&pkg.name(),
276-
&Path::new(&dir).join(&pkg_dir),
277-
)?;
279+
dirs_to_clean.mark_utf(layout.build_dir().build(), |filename| {
280+
let Some((current_name, _)) = filename.rsplit_once('-') else {
281+
return false;
282+
};
283+
284+
current_name == package
285+
});
278286
}
279287
continue;
280288
}
@@ -304,15 +312,15 @@ fn clean_specs(
304312
}
305313
_ => (layout.build_dir().legacy_deps(), Some(artifact_dir.dest())),
306314
};
307-
let mut dir_glob_str = escape_glob_path(dir)?;
308-
let dir_glob = Path::new(&dir_glob_str);
315+
309316
for file_type in file_types {
310317
// Some files include a hash in the filename, some don't.
311-
let hashed_name = file_type.output_filename(target, Some("*"));
318+
let (prefix, suffix) = file_type.output_prefix_suffix(target);
312319
let unhashed_name = file_type.output_filename(target, None);
313-
314-
clean_ctx.rm_rf_glob(&dir_glob.join(&hashed_name))?;
315-
clean_ctx.rm_rf(&dir.join(&unhashed_name))?;
320+
dirs_to_clean.mark_utf(&dir, |filename| {
321+
(filename.starts_with(&prefix) && filename.ends_with(&suffix))
322+
|| unhashed_name == filename
323+
});
316324

317325
// Remove the uplifted copy.
318326
if let Some(uplift_dir) = uplift_dir {
@@ -324,49 +332,79 @@ fn clean_specs(
324332
clean_ctx.rm_rf(&dep_info)?;
325333
}
326334
}
327-
let unhashed_dep_info = dir.join(format!("{}.d", crate_name));
328-
clean_ctx.rm_rf(&unhashed_dep_info)?;
335+
let unhashed_dep_info = format!("{}.d", crate_name);
336+
dirs_to_clean.mark_utf(dir, |filename| filename == unhashed_dep_info);
329337

330-
if !dir_glob_str.ends_with(std::path::MAIN_SEPARATOR) {
331-
dir_glob_str.push(std::path::MAIN_SEPARATOR);
332-
}
333-
dir_glob_str.push('*');
334-
let dir_glob_str: Rc<str> = dir_glob_str.into();
335-
if cleaned_packages
336-
.entry(dir_glob_str.clone())
337-
.or_default()
338-
.insert(crate_name.clone())
339-
{
340-
let paths = [
338+
dirs_to_clean.mark_utf(dir, |filename| {
339+
if filename.starts_with(&path_dash) {
341340
// Remove dep-info file generated by rustc. It is not tracked in
342341
// file_types. It does not have a prefix.
343-
(path_dash, ".d"),
342+
filename.ends_with(".d")
343+
} else if filename.starts_with(&path_dot) {
344344
// Remove split-debuginfo files generated by rustc.
345-
(path_dot, ".o"),
346-
(path_dot, ".dwo"),
347-
(path_dot, ".dwp"),
348-
];
349-
clean_ctx.rm_rf_prefix_list(&dir_glob_str, &paths)?;
350-
}
345+
[".o", ".dwo", ".dwp"]
346+
.iter()
347+
.any(|suffix| filename.ends_with(suffix))
348+
} else {
349+
false
350+
}
351+
});
351352

352353
// TODO: what to do about build_script_build?
353-
let dir = escape_glob_path(layout.build_dir().incremental())?;
354-
let incremental = Path::new(&dir).join(format!("{}-*", crate_name));
355-
clean_ctx.rm_rf_glob(&incremental)?;
354+
dirs_to_clean.mark_utf(layout.build_dir().incremental(), |filename| {
355+
filename.starts_with(path_dash)
356+
});
356357
}
357358
}
358359
}
359360
}
360361
}
362+
clean_ctx.rm_rf_all(dirs_to_clean)?;
361363

362364
Ok(())
363365
}
364366

365-
fn escape_glob_path(pattern: &Path) -> CargoResult<String> {
366-
let pattern = pattern
367-
.to_str()
368-
.ok_or_else(|| anyhow::anyhow!("expected utf-8 path"))?;
369-
Ok(glob::Pattern::escape(pattern))
367+
#[derive(Default)]
368+
struct DirectoriesToClean {
369+
dir_contents: IndexMap<PathBuf, IndexSet<OsString>>,
370+
to_remove: IndexSet<PathBuf>,
371+
}
372+
373+
impl DirectoriesToClean {
374+
fn mark(&mut self, directory: &Path, mut should_remove_entry: impl FnMut(&OsString) -> bool) {
375+
let entry = match self.dir_contents.entry(directory.to_owned()) {
376+
indexmap::map::Entry::Occupied(occupied_entry) => occupied_entry.into_mut(),
377+
indexmap::map::Entry::Vacant(vacant_entry) => {
378+
let Ok(dir_entries) = std::fs::read_dir(directory.to_owned()) else {
379+
return;
380+
};
381+
vacant_entry.insert(
382+
dir_entries
383+
.into_iter()
384+
.flatten()
385+
.map(|entry| entry.file_name())
386+
.collect::<IndexSet<_>>(),
387+
)
388+
}
389+
};
390+
391+
entry.retain(|path| {
392+
let should_remove = should_remove_entry(path);
393+
if should_remove {
394+
self.to_remove.insert(directory.join(path));
395+
}
396+
!should_remove
397+
});
398+
}
399+
400+
fn mark_utf(&mut self, directory: &Path, mut should_remove_entry: impl FnMut(&str) -> bool) {
401+
self.mark(directory, move |filename| {
402+
let Some(as_utf) = filename.to_str() else {
403+
return false;
404+
};
405+
should_remove_entry(as_utf)
406+
});
407+
}
370408
}
371409

372410
impl<'gctx> CleanContext<'gctx> {
@@ -384,74 +422,13 @@ impl<'gctx> CleanContext<'gctx> {
384422
}
385423
}
386424

387-
/// Glob remove artifacts for the provided `package`
388-
///
389-
/// Make sure the artifact is for `package` and not another crate that is prefixed by
390-
/// `package` by getting the original name stripped of the trailing hash and possible
391-
/// extension
392-
fn rm_rf_package_glob_containing_hash(
393-
&mut self,
394-
package: &str,
395-
pattern: &Path,
396-
) -> CargoResult<()> {
397-
// TODO: Display utf8 warning to user? Or switch to globset?
398-
let pattern = pattern
399-
.to_str()
400-
.ok_or_else(|| anyhow::anyhow!("expected utf-8 path"))?;
401-
for path in glob::glob(pattern)? {
402-
let path = path?;
403-
404-
let pkg_name = path
405-
.file_name()
406-
.and_then(std::ffi::OsStr::to_str)
407-
.and_then(|artifact| artifact.rsplit_once('-'))
408-
.ok_or_else(|| anyhow::anyhow!("expected utf-8 path"))?
409-
.0;
410-
411-
if pkg_name != package {
412-
continue;
413-
}
414-
425+
fn rm_rf_all(&mut self, dirs: DirectoriesToClean) -> CargoResult<()> {
426+
for path in dirs.to_remove {
415427
self.rm_rf(&path)?;
416428
}
417429
Ok(())
418430
}
419431

420-
fn rm_rf_glob(&mut self, pattern: &Path) -> CargoResult<()> {
421-
// TODO: Display utf8 warning to user? Or switch to globset?
422-
let pattern = pattern
423-
.to_str()
424-
.ok_or_else(|| anyhow::anyhow!("expected utf-8 path"))?;
425-
for path in glob::glob(pattern)? {
426-
self.rm_rf(&path?)?;
427-
}
428-
Ok(())
429-
}
430-
431-
/// Removes files matching a glob and any of the provided filename patterns (prefix/suffix pairs).
432-
///
433-
/// This function iterates over files matching a glob (`pattern`) and removes those whose
434-
/// filenames start and end with specific prefix/suffix pairs. It should be more efficient for
435-
/// operations involving multiple prefix/suffix pairs, as it iterates over the directory
436-
/// only once, unlike making multiple calls to [`Self::rm_rf_glob`].
437-
fn rm_rf_prefix_list(
438-
&mut self,
439-
pattern: &str,
440-
path_matchers: &[(&str, &str)],
441-
) -> CargoResult<()> {
442-
for path in glob::glob(pattern)? {
443-
let path = path?;
444-
let filename = path.file_name().and_then(|name| name.to_str()).unwrap();
445-
if path_matchers
446-
.iter()
447-
.any(|(prefix, suffix)| filename.starts_with(prefix) && filename.ends_with(suffix))
448-
{
449-
self.rm_rf(&path)?;
450-
}
451-
}
452-
Ok(())
453-
}
454-
455432
pub fn rm_rf(&mut self, path: &Path) -> CargoResult<()> {
456433
let meta = match fs::symlink_metadata(path) {
457434
Ok(meta) => meta,

0 commit comments

Comments
 (0)