Skip to content

Commit d4a8b8d

Browse files
osiewiczdinocosta
andcommitted
clean: Optimize clean with multiple -p specifiers
This commit optimizes implementation of `cargo clean -p` by reducing the amount of directory walks that take place. We now batch calls to `rm_rf_prefix_list`, thus potentially avoiding multiple walks over a single subdirectory. In practice this helps us significantly reduce the runtime for clearing large workspaces (as implemented in #16263); for Zed, `cargo clean --workspace` went down from 73 seconds to 3 seconds. We have 216 workspace members. Co-authored-by: dino <dinojoaocosta@gmail.com>
1 parent 3a4485d commit d4a8b8d

File tree

2 files changed

+67
-54
lines changed

2 files changed

+67
-54
lines changed

src/cargo/core/compiler/build_context/target_info.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,13 @@ impl FileType {
140140
should_replace_hyphens: true,
141141
}
142142
}
143+
144+
pub fn output_prefix_suffix(&self, target: &Target) -> (String, String) {
145+
(
146+
format!("{}{}-", self.prefix, target.crate_name()),
147+
self.suffix.clone(),
148+
)
149+
}
143150
}
144151

145152
impl TargetInfo {

src/cargo/ops/cargo_clean.rs

Lines changed: 60 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,11 @@ use crate::util::edit_distance;
77
use crate::util::errors::CargoResult;
88
use crate::util::interning::InternedString;
99
use crate::util::{GlobalContext, Progress, ProgressStyle};
10-
use anyhow::bail;
10+
use anyhow::{Context as _, bail};
1111
use cargo_util::paths;
1212
use indexmap::IndexSet;
13-
use std::collections::{HashMap, HashSet};
13+
use std::borrow::Cow;
14+
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
1415
use std::fs;
1516
use std::path::{Path, PathBuf};
1617
use std::rc::Rc;
@@ -195,6 +196,8 @@ fn clean_specs(
195196
// Try to reduce the amount of times we iterate over the same target directory by storing away
196197
// the directories we've iterated over (and cleaned for a given package).
197198
let mut cleaned_packages: HashMap<_, HashSet<_>> = HashMap::default();
199+
// Root directory -> (prefix, [suffixes])
200+
let mut packages_to_clean: BTreeMap<Rc<Path>, BTreeMap<_, BTreeSet<_>>> = Default::default();
198201
for pkg in packages {
199202
let pkg_dir = format!("{}-*", pkg.name());
200203
clean_ctx.progress.on_cleaning_package(&pkg.name())?;
@@ -267,8 +270,8 @@ fn clean_specs(
267270
continue;
268271
}
269272
let crate_name: Rc<str> = target.crate_name().into();
270-
let path_dot: &str = &format!("{crate_name}.");
271-
let path_dash: &str = &format!("{crate_name}-");
273+
let path_dot = format!("{crate_name}.");
274+
let path_dash = format!("{crate_name}-");
272275
for &mode in &[
273276
CompileMode::Build,
274277
CompileMode::Test,
@@ -292,14 +295,18 @@ fn clean_specs(
292295
}
293296
_ => (layout.build_dir().legacy_deps(), Some(artifact_dir.dest())),
294297
};
295-
let mut dir_glob_str = escape_glob_path(dir)?;
296-
let dir_glob = Path::new(&dir_glob_str);
298+
let dir = Rc::<Path>::from(dir);
297299
for file_type in file_types {
298300
// Some files include a hash in the filename, some don't.
299-
let hashed_name = file_type.output_filename(target, Some("*"));
301+
let (prefix, suffix) = file_type.output_prefix_suffix(target);
300302
let unhashed_name = file_type.output_filename(target, None);
303+
packages_to_clean
304+
.entry(dir.clone())
305+
.or_default()
306+
.entry(prefix)
307+
.or_default()
308+
.extend([Cow::Owned(suffix)]);
301309

302-
clean_ctx.rm_rf_glob(&dir_glob.join(&hashed_name))?;
303310
clean_ctx.rm_rf(&dir.join(&unhashed_name))?;
304311

305312
// Remove the uplifted copy.
@@ -314,37 +321,44 @@ fn clean_specs(
314321
let unhashed_dep_info = dir.join(format!("{}.d", crate_name));
315322
clean_ctx.rm_rf(&unhashed_dep_info)?;
316323

317-
if !dir_glob_str.ends_with(std::path::MAIN_SEPARATOR) {
318-
dir_glob_str.push(std::path::MAIN_SEPARATOR);
319-
}
320-
dir_glob_str.push('*');
321-
let dir_glob_str: Rc<str> = dir_glob_str.into();
322324
if cleaned_packages
323-
.entry(dir_glob_str.clone())
325+
.entry(dir.clone())
324326
.or_default()
325327
.insert(crate_name.clone())
326328
{
327-
let paths = [
328-
// Remove dep-info file generated by rustc. It is not tracked in
329-
// file_types. It does not have a prefix.
330-
(path_dash, ".d"),
331-
// Remove split-debuginfo files generated by rustc.
332-
(path_dot, ".o"),
333-
(path_dot, ".dwo"),
334-
(path_dot, ".dwp"),
335-
];
336-
clean_ctx.rm_rf_prefix_list(&dir_glob_str, &paths)?;
329+
let to_clean = packages_to_clean.entry(dir).or_default();
330+
// Remove dep-info file generated by rustc. It is not tracked in
331+
// file_types. It does not have a prefix.
332+
to_clean
333+
.entry(path_dash.clone())
334+
.or_default()
335+
.extend([Cow::Borrowed(".d")]);
336+
// Remove split-debuginfo files generated by rustc.
337+
to_clean.entry(path_dot.clone()).or_default().extend([
338+
Cow::Borrowed(".o"),
339+
Cow::Borrowed(".dwo"),
340+
Cow::Borrowed(".dwp"),
341+
]);
337342
}
338343

339344
// TODO: what to do about build_script_build?
340-
let dir = escape_glob_path(layout.build_dir().incremental())?;
341-
let incremental = Path::new(&dir).join(format!("{}-*", crate_name));
342-
clean_ctx.rm_rf_glob(&incremental)?;
345+
let dir = Rc::from(layout.build_dir().incremental());
346+
347+
packages_to_clean
348+
.entry(dir)
349+
.or_default()
350+
.entry(path_dash.clone())
351+
.or_default()
352+
.extend([Cow::Borrowed("")]);
343353
}
344354
}
345355
}
346356
}
347357

358+
for (base_dir, entries_to_clean) in packages_to_clean {
359+
clean_ctx.rm_rf_prefix_list(&base_dir, &entries_to_clean)?;
360+
}
361+
348362
Ok(())
349363
}
350364

@@ -403,36 +417,28 @@ impl<'gctx> CleanContext<'gctx> {
403417
Ok(())
404418
}
405419

406-
fn rm_rf_glob(&mut self, pattern: &Path) -> CargoResult<()> {
407-
// TODO: Display utf8 warning to user? Or switch to globset?
408-
let pattern = pattern
409-
.to_str()
410-
.ok_or_else(|| anyhow::anyhow!("expected utf-8 path"))?;
411-
for path in glob::glob(pattern)? {
412-
self.rm_rf(&path?)?;
413-
}
414-
Ok(())
415-
}
416-
417-
/// Removes files matching a glob and any of the provided filename patterns (prefix/suffix pairs).
418-
///
419-
/// This function iterates over files matching a glob (`pattern`) and removes those whose
420-
/// filenames start and end with specific prefix/suffix pairs. It should be more efficient for
421-
/// operations involving multiple prefix/suffix pairs, as it iterates over the directory
422-
/// only once, unlike making multiple calls to [`Self::rm_rf_glob`].
420+
/// Removes children of `root_directory` that start with any of the provided prefix/suffix pairs.
423421
fn rm_rf_prefix_list(
424422
&mut self,
425-
pattern: &str,
426-
path_matchers: &[(&str, &str)],
423+
root_directory: &Path,
424+
path_matchers: &BTreeMap<String, BTreeSet<Cow<'static, str>>>,
427425
) -> CargoResult<()> {
428-
for path in glob::glob(pattern)? {
429-
let path = path?;
430-
let filename = path.file_name().and_then(|name| name.to_str()).unwrap();
431-
if path_matchers
432-
.iter()
433-
.any(|(prefix, suffix)| filename.starts_with(prefix) && filename.ends_with(suffix))
434-
{
435-
self.rm_rf(&path)?;
426+
let Ok(dir) = std::fs::read_dir(root_directory) else {
427+
return Ok(());
428+
};
429+
for entry in dir {
430+
let entry = entry?;
431+
let filename = entry.file_name();
432+
let filename = filename
433+
.to_str()
434+
.context("Expected file name to be a valid UTF-8 string")?;
435+
if path_matchers.iter().any(|(prefix, suffixes)| {
436+
filename.starts_with(prefix)
437+
&& suffixes
438+
.iter()
439+
.any(|suffix| filename.ends_with(suffix.as_ref()))
440+
}) {
441+
self.rm_rf(&entry.path())?;
436442
}
437443
}
438444
Ok(())

0 commit comments

Comments
 (0)