Skip to content

Commit 132008d

Browse files
osiewiczdinocostaepage
committed
clean: Optimize clean with multiple -p specifiers
This commit optimizes implementation of `cargo clean -p` by reducing the amount of directory walks that take place. We now batch calls to `rm_rf_prefix_list`, thus potentially avoiding multiple walks over a single subdirectory. In practice this helps us significantly reduce the runtime for clearing large workspaces (as implemented in rust-lang#16263); for Zed, `cargo clean --workspace` went down from 73 seconds to 3 seconds. We have 216 workspace members. Co-authored-by: dino <dinojoaocosta@gmail.com> Co-authored-by: Ed Page <eopage@gmail.com>
1 parent edba308 commit 132008d

File tree

2 files changed

+102
-97
lines changed

2 files changed

+102
-97
lines changed

src/cargo/core/compiler/build_context/target_info.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,13 @@ impl FileType {
145145
should_replace_hyphens: true,
146146
}
147147
}
148+
149+
pub fn output_prefix_suffix(&self, target: &Target) -> (String, String) {
150+
(
151+
format!("{}{}-", self.prefix, target.crate_name()),
152+
self.suffix.clone(),
153+
)
154+
}
148155
}
149156

150157
impl TargetInfo {

src/cargo/ops/cargo_clean.rs

Lines changed: 95 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,9 @@ use crate::util::interning::InternedString;
99
use crate::util::{GlobalContext, Progress, ProgressStyle};
1010
use anyhow::bail;
1111
use cargo_util::paths;
12-
use indexmap::IndexSet;
13-
use std::collections::{HashMap, HashSet};
12+
use indexmap::{IndexMap, IndexSet};
13+
14+
use std::ffi::OsString;
1415
use std::fs;
1516
use std::path::{Path, PathBuf};
1617
use std::rc::Rc;
@@ -250,31 +251,40 @@ fn clean_specs(
250251
}
251252
}
252253
} else {
253-
// Try to reduce the amount of times we iterate over the same target directory by storing away
254-
// the directories we've iterated over (and cleaned for a given package).
255-
let mut cleaned_packages: HashMap<_, HashSet<_>> = HashMap::default();
254+
let mut dirs_to_clean = DirectoriesToClean::default();
256255
for pkg in packages {
257-
let pkg_dir = format!("{}-*", pkg.name());
256+
let pkg_fingerprint_prefix = format!("{}-", pkg.name());
258257
clean_ctx.progress.on_cleaning_package(&pkg.name())?;
259258

260259
// Clean fingerprints.
260+
let package = &*pkg.name();
261261
for (_, layout) in &layouts_with_host {
262-
let dir = escape_glob_path(layout.build_dir().legacy_fingerprint())?;
263-
clean_ctx.rm_rf_package_glob_containing_hash(
264-
&pkg.name(),
265-
&Path::new(&dir).join(&pkg_dir),
266-
)?;
262+
dirs_to_clean.mark_utf(layout.build_dir().legacy_fingerprint(), |path| {
263+
if !path.starts_with(&pkg_fingerprint_prefix) {
264+
return false;
265+
}
266+
let Some((pkg_name, _)) = path.rsplit_once('-') else {
267+
return false;
268+
};
269+
270+
pkg_name == package
271+
});
267272
}
268273

269274
for target in pkg.targets() {
270275
if target.is_custom_build() {
271276
// Get both the build_script_build and the output directory.
272277
for (_, layout) in &layouts_with_host {
273-
let dir = escape_glob_path(layout.build_dir().build())?;
274-
clean_ctx.rm_rf_package_glob_containing_hash(
275-
&pkg.name(),
276-
&Path::new(&dir).join(&pkg_dir),
277-
)?;
278+
dirs_to_clean.mark_utf(layout.build_dir().build(), |path| {
279+
if !path.starts_with(&pkg_fingerprint_prefix) {
280+
return false;
281+
}
282+
let Some((pkg_name, _)) = path.rsplit_once('-') else {
283+
return false;
284+
};
285+
286+
pkg_name == package
287+
});
278288
}
279289
continue;
280290
}
@@ -304,15 +314,15 @@ fn clean_specs(
304314
}
305315
_ => (layout.build_dir().legacy_deps(), Some(artifact_dir.dest())),
306316
};
307-
let mut dir_glob_str = escape_glob_path(dir)?;
308-
let dir_glob = Path::new(&dir_glob_str);
317+
309318
for file_type in file_types {
310319
// Some files include a hash in the filename, some don't.
311-
let hashed_name = file_type.output_filename(target, Some("*"));
320+
let (prefix, suffix) = file_type.output_prefix_suffix(target);
312321
let unhashed_name = file_type.output_filename(target, None);
313-
314-
clean_ctx.rm_rf_glob(&dir_glob.join(&hashed_name))?;
315-
clean_ctx.rm_rf(&dir.join(&unhashed_name))?;
322+
dirs_to_clean.mark_utf(&dir, |filename| {
323+
(filename.starts_with(&prefix) && filename.ends_with(&suffix))
324+
|| unhashed_name == filename
325+
});
316326

317327
// Remove the uplifted copy.
318328
if let Some(uplift_dir) = uplift_dir {
@@ -324,44 +334,82 @@ fn clean_specs(
324334
clean_ctx.rm_rf(&dep_info)?;
325335
}
326336
}
327-
let unhashed_dep_info = dir.join(format!("{}.d", crate_name));
328-
clean_ctx.rm_rf(&unhashed_dep_info)?;
337+
let unhashed_dep_info = format!("{}.d", crate_name);
338+
dirs_to_clean.mark_utf(dir, |filename| filename == unhashed_dep_info);
329339

330-
if !dir_glob_str.ends_with(std::path::MAIN_SEPARATOR) {
331-
dir_glob_str.push(std::path::MAIN_SEPARATOR);
332-
}
333-
dir_glob_str.push('*');
334-
let dir_glob_str: Rc<str> = dir_glob_str.into();
335-
if cleaned_packages
336-
.entry(dir_glob_str.clone())
337-
.or_default()
338-
.insert(crate_name.clone())
339-
{
340-
let paths = [
340+
dirs_to_clean.mark_utf(dir, |path| {
341+
if path.starts_with(&path_dash) {
341342
// Remove dep-info file generated by rustc. It is not tracked in
342343
// file_types. It does not have a prefix.
343-
(path_dash, ".d"),
344+
path.ends_with(".d")
345+
} else if path.starts_with(&path_dot) {
344346
// Remove split-debuginfo files generated by rustc.
345-
(path_dot, ".o"),
346-
(path_dot, ".dwo"),
347-
(path_dot, ".dwp"),
348-
];
349-
clean_ctx.rm_rf_prefix_list(&dir_glob_str, &paths)?;
350-
}
347+
[".o", ".dwo", ".dwp"]
348+
.iter()
349+
.any(|suffix| path.ends_with(suffix))
350+
} else {
351+
false
352+
}
353+
});
351354

352355
// TODO: what to do about build_script_build?
353-
let dir = escape_glob_path(layout.build_dir().incremental())?;
354-
let incremental = Path::new(&dir).join(format!("{}-*", crate_name));
355-
clean_ctx.rm_rf_glob(&incremental)?;
356+
let incremental_prefix = format!("{}-", crate_name);
357+
dirs_to_clean.mark_utf(layout.build_dir().incremental(), |path| {
358+
path.starts_with(&incremental_prefix)
359+
});
356360
}
357361
}
358362
}
359363
}
364+
clean_ctx.rm_rf_all(dirs_to_clean)?;
360365
}
361366

362367
Ok(())
363368
}
364369

370+
#[derive(Default)]
371+
struct DirectoriesToClean {
372+
dir_contents: IndexMap<PathBuf, IndexSet<OsString>>,
373+
to_remove: IndexSet<PathBuf>,
374+
}
375+
376+
impl DirectoriesToClean {
377+
fn mark(&mut self, directory: &Path, mut should_remove_entry: impl FnMut(&OsString) -> bool) {
378+
let entry = match self.dir_contents.entry(directory.to_owned()) {
379+
indexmap::map::Entry::Occupied(occupied_entry) => occupied_entry.into_mut(),
380+
indexmap::map::Entry::Vacant(vacant_entry) => {
381+
let Ok(dir_entries) = std::fs::read_dir(directory.to_owned()) else {
382+
return;
383+
};
384+
vacant_entry.insert(
385+
dir_entries
386+
.into_iter()
387+
.flatten()
388+
.map(|entry| entry.file_name())
389+
.collect::<IndexSet<_>>(),
390+
)
391+
}
392+
};
393+
394+
entry.retain(|filename| {
395+
let should_remove = should_remove_entry(filename);
396+
if should_remove {
397+
self.to_remove.insert(directory.join(filename));
398+
}
399+
!should_remove
400+
});
401+
}
402+
403+
fn mark_utf(&mut self, directory: &Path, mut should_remove_entry: impl FnMut(&str) -> bool) {
404+
self.mark(directory, move |path| {
405+
let Some(as_utf) = path.to_str() else {
406+
return false;
407+
};
408+
should_remove_entry(as_utf)
409+
});
410+
}
411+
}
412+
365413
fn escape_glob_path(pattern: &Path) -> CargoResult<String> {
366414
let pattern = pattern
367415
.to_str()
@@ -384,34 +432,8 @@ impl<'gctx> CleanContext<'gctx> {
384432
}
385433
}
386434

387-
/// Glob remove artifacts for the provided `package`
388-
///
389-
/// Make sure the artifact is for `package` and not another crate that is prefixed by
390-
/// `package` by getting the original name stripped of the trailing hash and possible
391-
/// extension
392-
fn rm_rf_package_glob_containing_hash(
393-
&mut self,
394-
package: &str,
395-
pattern: &Path,
396-
) -> CargoResult<()> {
397-
// TODO: Display utf8 warning to user? Or switch to globset?
398-
let pattern = pattern
399-
.to_str()
400-
.ok_or_else(|| anyhow::anyhow!("expected utf-8 path"))?;
401-
for path in glob::glob(pattern)? {
402-
let path = path?;
403-
404-
let pkg_name = path
405-
.file_name()
406-
.and_then(std::ffi::OsStr::to_str)
407-
.and_then(|artifact| artifact.rsplit_once('-'))
408-
.ok_or_else(|| anyhow::anyhow!("expected utf-8 path"))?
409-
.0;
410-
411-
if pkg_name != package {
412-
continue;
413-
}
414-
435+
fn rm_rf_all(&mut self, dirs: DirectoriesToClean) -> CargoResult<()> {
436+
for path in dirs.to_remove {
415437
self.rm_rf(&path)?;
416438
}
417439
Ok(())
@@ -428,30 +450,6 @@ impl<'gctx> CleanContext<'gctx> {
428450
Ok(())
429451
}
430452

431-
/// Removes files matching a glob and any of the provided filename patterns (prefix/suffix pairs).
432-
///
433-
/// This function iterates over files matching a glob (`pattern`) and removes those whose
434-
/// filenames start and end with specific prefix/suffix pairs. It should be more efficient for
435-
/// operations involving multiple prefix/suffix pairs, as it iterates over the directory
436-
/// only once, unlike making multiple calls to [`Self::rm_rf_glob`].
437-
fn rm_rf_prefix_list(
438-
&mut self,
439-
pattern: &str,
440-
path_matchers: &[(&str, &str)],
441-
) -> CargoResult<()> {
442-
for path in glob::glob(pattern)? {
443-
let path = path?;
444-
let filename = path.file_name().and_then(|name| name.to_str()).unwrap();
445-
if path_matchers
446-
.iter()
447-
.any(|(prefix, suffix)| filename.starts_with(prefix) && filename.ends_with(suffix))
448-
{
449-
self.rm_rf(&path)?;
450-
}
451-
}
452-
Ok(())
453-
}
454-
455453
pub fn rm_rf(&mut self, path: &Path) -> CargoResult<()> {
456454
let meta = match fs::symlink_metadata(path) {
457455
Ok(meta) => meta,

0 commit comments

Comments
 (0)