Skip to content

Commit 752d064

Browse files
osiewiczdinocosta
andcommitted
clean: Optimize clean with multiple -p specifiers
This commit optimizes implementation of `cargo clean -p` by reducing the amount of directory walks that take place. We now batch calls to `rm_rf_prefix_list`, thus potentially avoiding multiple walks over a single subdirectory. In practice this helps us significantly reduce the runtime for clearing large workspaces (as implemented in #16263); for Zed, `cargo clean --workspace` went down from 73 seconds to 3 seconds. We have 216 workspace members. Co-authored-by: dino <dinojoaocosta@gmail.com>
1 parent bd97934 commit 752d064

File tree

2 files changed

+68
-51
lines changed

2 files changed

+68
-51
lines changed

src/cargo/core/compiler/build_context/target_info.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,13 @@ impl FileType {
140140
should_replace_hyphens: true,
141141
}
142142
}
143+
144+
pub fn output_prefix_suffix(&self, target: &Target) -> (String, String) {
145+
(
146+
format!("{}{}-", self.prefix, target.crate_name()),
147+
self.suffix.clone(),
148+
)
149+
}
143150
}
144151

145152
impl TargetInfo {

src/cargo/ops/cargo_clean.rs

Lines changed: 61 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,11 @@ use crate::util::edit_distance;
77
use crate::util::errors::CargoResult;
88
use crate::util::interning::InternedString;
99
use crate::util::{GlobalContext, Progress, ProgressStyle};
10-
use anyhow::bail;
10+
use anyhow::{Context as _, bail};
1111
use cargo_util::paths;
1212
use indexmap::IndexSet;
13-
use std::collections::{HashMap, HashSet};
13+
use std::borrow::Cow;
14+
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
1415
use std::fs;
1516
use std::path::{Path, PathBuf};
1617
use std::rc::Rc;
@@ -248,6 +249,8 @@ fn clean_specs(
248249
// Try to reduce the amount of times we iterate over the same target directory by storing away
249250
// the directories we've iterated over (and cleaned for a given package).
250251
let mut cleaned_packages: HashMap<_, HashSet<_>> = HashMap::default();
252+
let mut directories_to_clean: BTreeMap<Rc<Path>, BTreeMap<_, BTreeSet<_>>> =
253+
Default::default();
251254
for pkg in packages {
252255
let pkg_dir = format!("{}-*", pkg.name());
253256
clean_ctx.progress.on_cleaning_package(&pkg.name())?;
@@ -274,8 +277,8 @@ fn clean_specs(
274277
continue;
275278
}
276279
let crate_name: Rc<str> = target.crate_name().into();
277-
let path_dot: &str = &format!("{crate_name}.");
278-
let path_dash: &str = &format!("{crate_name}-");
280+
let path_dot = format!("{crate_name}.");
281+
let path_dash = format!("{crate_name}-");
279282
for &mode in &[
280283
CompileMode::Build,
281284
CompileMode::Test,
@@ -299,14 +302,18 @@ fn clean_specs(
299302
}
300303
_ => (layout.build_dir().legacy_deps(), Some(artifact_dir.dest())),
301304
};
302-
let mut dir_glob_str = escape_glob_path(dir)?;
303-
let dir_glob = Path::new(&dir_glob_str);
305+
let dir = Rc::<Path>::from(dir);
304306
for file_type in file_types {
305307
// Some files include a hash in the filename, some don't.
306-
let hashed_name = file_type.output_filename(target, Some("*"));
308+
let (prefix, suffix) = file_type.output_prefix_suffix(target);
307309
let unhashed_name = file_type.output_filename(target, None);
310+
directories_to_clean
311+
.entry(dir.clone())
312+
.or_default()
313+
.entry(prefix)
314+
.or_default()
315+
.extend([Cow::Owned(suffix)]);
308316

309-
clean_ctx.rm_rf_glob(&dir_glob.join(&hashed_name))?;
310317
clean_ctx.rm_rf(&dir.join(&unhashed_name))?;
311318

312319
// Remove the uplifted copy.
@@ -322,36 +329,42 @@ fn clean_specs(
322329
let unhashed_dep_info = dir.join(format!("{}.d", crate_name));
323330
clean_ctx.rm_rf(&unhashed_dep_info)?;
324331

325-
if !dir_glob_str.ends_with(std::path::MAIN_SEPARATOR) {
326-
dir_glob_str.push(std::path::MAIN_SEPARATOR);
327-
}
328-
dir_glob_str.push('*');
329-
let dir_glob_str: Rc<str> = dir_glob_str.into();
330332
if cleaned_packages
331-
.entry(dir_glob_str.clone())
333+
.entry(dir.clone())
332334
.or_default()
333335
.insert(crate_name.clone())
334336
{
335-
let paths = [
336-
// Remove dep-info file generated by rustc. It is not tracked in
337-
// file_types. It does not have a prefix.
338-
(path_dash, ".d"),
339-
// Remove split-debuginfo files generated by rustc.
340-
(path_dot, ".o"),
341-
(path_dot, ".dwo"),
342-
(path_dot, ".dwp"),
343-
];
344-
clean_ctx.rm_rf_prefix_list(&dir_glob_str, &paths)?;
337+
let to_clean = directories_to_clean.entry(dir).or_default();
338+
// Remove dep-info file generated by rustc. It is not tracked in
339+
// file_types. It does not have a prefix.
340+
to_clean
341+
.entry(path_dash.clone())
342+
.or_default()
343+
.extend([Cow::Borrowed(".d")]);
344+
// Remove split-debuginfo files generated by rustc.
345+
to_clean.entry(path_dot.clone()).or_default().extend([
346+
Cow::Borrowed(".o"),
347+
Cow::Borrowed(".dwo"),
348+
Cow::Borrowed(".dwp"),
349+
]);
345350
}
346351

347352
// TODO: what to do about build_script_build?
348-
let dir = escape_glob_path(layout.build_dir().incremental())?;
349-
let incremental = Path::new(&dir).join(format!("{}-*", crate_name));
350-
clean_ctx.rm_rf_glob(&incremental)?;
353+
let dir = Rc::from(layout.build_dir().incremental());
354+
355+
directories_to_clean
356+
.entry(dir)
357+
.or_default()
358+
.entry(path_dash.clone())
359+
.or_default()
360+
.extend([Cow::Borrowed("")]);
351361
}
352362
}
353363
}
354364
}
365+
for (base_dir, entries_to_clean) in directories_to_clean {
366+
clean_ctx.rm_rf_prefix_list(&base_dir, &entries_to_clean)?;
367+
}
355368
}
356369

357370
Ok(())
@@ -412,38 +425,35 @@ impl<'gctx> CleanContext<'gctx> {
412425
Ok(())
413426
}
414427

415-
fn rm_rf_glob(&mut self, pattern: &Path) -> CargoResult<()> {
416-
// TODO: Display utf8 warning to user? Or switch to globset?
417-
let pattern = pattern
418-
.to_str()
419-
.ok_or_else(|| anyhow::anyhow!("expected utf-8 path"))?;
420-
for path in glob::glob(pattern)? {
421-
self.rm_rf(&path?)?;
422-
}
423-
Ok(())
424-
}
425-
426-
/// Removes files matching a glob and any of the provided filename patterns (prefix/suffix pairs).
427-
///
428-
/// This function iterates over files matching a glob (`pattern`) and removes those whose
429-
/// filenames start and end with specific prefix/suffix pairs. It should be more efficient for
430-
/// operations involving multiple prefix/suffix pairs, as it iterates over the directory
431-
/// only once, unlike making multiple calls to [`Self::rm_rf_glob`].
428+
/// Removes children of `root_directory` that start with any of the provided prefix/suffix pairs.
432429
fn rm_rf_prefix_list(
433430
&mut self,
434-
pattern: &str,
435-
path_matchers: &[(&str, &str)],
431+
root_directory: &Path,
432+
path_matchers: &BTreeMap<String, BTreeSet<Cow<'static, str>>>,
436433
) -> CargoResult<()> {
437-
for path in glob::glob(pattern)? {
438-
let path = path?;
439-
let filename = path.file_name().and_then(|name| name.to_str()).unwrap();
434+
let Ok(dir) = std::fs::read_dir(root_directory) else {
435+
return Ok(());
436+
};
437+
for entry in dir {
438+
let entry = entry?;
439+
let filename = entry.file_name();
440+
let filename = filename
441+
.to_str()
442+
.context("Expected file name to be a valid UTF-8 string")?;
440443
if path_matchers
441444
.iter()
442-
.any(|(prefix, suffix)| filename.starts_with(prefix) && filename.ends_with(suffix))
445+
.take_while(|(prefix, _)| filename >= prefix.as_str())
446+
.any(|(prefix, suffixes)| {
447+
filename.starts_with(prefix)
448+
&& suffixes
449+
.iter()
450+
.any(|suffix| filename.ends_with(suffix.as_ref()))
451+
})
443452
{
444-
self.rm_rf(&path)?;
453+
self.rm_rf(&entry.path())?;
445454
}
446455
}
456+
447457
Ok(())
448458
}
449459

0 commit comments

Comments
 (0)