Skip to content

Commit

Permalink
Auto merge of #11308 - ehuss:clean-tmp-libgit2, r=weihanglo
Browse files Browse the repository at this point in the history
Clean stale git temp files

### What does this PR try to resolve?

When cargo is interrupted while libgit2 is indexing the pack file, it will leave behind a temp file that never gets deleted. These files can be very large. This checks for these stale temp files and deletes them.

### How should we test and review this PR?

There is a simulated test here. To test with the actual behavior:

1. Run `CARGO_HOME=chome cargo fetch`
2. While it is "resolving deltas", hit Ctrl-C.
3. Notice that there is a 200MB file in `chome/registry/index/github.com-1ecc6299db9ec823/.git/objects/pack/`
4. Do that several times if you want, each time adds another 200MB file.
5. Build this PR: `cargo b -r`
6. Run `CARGO_HOME=chome CARGO_LOG=cargo::sources::git::utils=debug ./target/release/cargo fetch`
7. Notice that it deletes the `pack_git2_*` files.
  • Loading branch information
bors committed Nov 1, 2022
2 parents da20496 + 754de17 commit 37cad5b
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 1 deletion.
39 changes: 39 additions & 0 deletions src/cargo/sources/git/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -807,6 +807,8 @@ pub fn fetch(
// request we're about to issue.
maybe_gc_repo(repo)?;

clean_repo_temp_files(repo);

// Translate the reference desired here into an actual list of refspecs
// which need to get fetched. Additionally record if we're fetching tags.
let mut refspecs = Vec::new();
Expand Down Expand Up @@ -996,6 +998,43 @@ fn maybe_gc_repo(repo: &mut git2::Repository) -> CargoResult<()> {
reinitialize(repo)
}

/// Removes temporary files left from previous activity.
///
/// If libgit2 is interrupted while indexing pack files, it will leave behind
/// some temporary files that it doesn't clean up. These can be quite large in
/// size, so this tries to clean things up.
///
/// This intentionally ignores errors. This is only an opportunistic cleaning,
/// and we don't really care if there are issues (there's unlikely anything
/// that can be done).
///
/// The git CLI has similar behavior (its temp files look like
/// `objects/pack/tmp_pack_9kUSA8`). Those files are normally deleted via `git
/// prune` which is run by `git gc`. However, it doesn't know about libgit2's
/// filenames, so they never get cleaned up.
fn clean_repo_temp_files(repo: &git2::Repository) {
let path = repo.path().join("objects/pack/pack_git2_*");
let pattern = match path.to_str() {
Some(p) => p,
None => {
log::warn!("cannot convert {path:?} to a string");
return;
}
};
let paths = match glob::glob(pattern) {
Ok(paths) => paths,
Err(_) => return,
};
for path in paths {
if let Ok(path) = path {
match paths::remove_file(&path) {
Ok(_) => log::debug!("removed stale temp git file {path:?}"),
Err(e) => log::warn!("failed to remove {path:?} while cleaning temp files: {e}"),
}
}
}
}

fn reinitialize(repo: &mut git2::Repository) -> CargoResult<()> {
// Here we want to drop the current repository object pointed to by `repo`,
// so we initialize temporary repository in a sub-folder, blow away the
Expand Down
33 changes: 33 additions & 0 deletions tests/testsuite/git.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use std::sync::Arc;
use std::thread;

use cargo_test_support::paths::{self, CargoPathExt};
use cargo_test_support::registry::Package;
use cargo_test_support::{basic_lib_manifest, basic_manifest, git, main_file, path2url, project};
use cargo_test_support::{sleep_ms, t, Project};

Expand Down Expand Up @@ -3619,3 +3620,35 @@ fn _corrupted_checkout(with_cli: bool) {
e.run();
assert!(ok.exists());
}

#[cargo_test]
fn cleans_temp_pack_files() {
// Checks that cargo removes temp files left by libgit2 when it is
// interrupted (see clean_repo_temp_files).
Package::new("bar", "1.0.0").publish();
let p = project()
.file(
"Cargo.toml",
r#"
[package]
name = "foo"
version = "0.1.0"
[dependencies]
bar = "1.0"
"#,
)
.file("src/lib.rs", "")
.build();
p.cargo("fetch").run();
// Simulate what happens when libgit2 is interrupted while indexing a pack file.
let tmp_path = super::git_gc::find_index().join(".git/objects/pack/pack_git2_91ab40da04fdc2e7");
fs::write(&tmp_path, "test").unwrap();
let mut perms = fs::metadata(&tmp_path).unwrap().permissions();
perms.set_readonly(true);
fs::set_permissions(&tmp_path, perms).unwrap();

// Trigger an index update.
p.cargo("generate-lockfile").run();
assert!(!tmp_path.exists());
}
2 changes: 1 addition & 1 deletion tests/testsuite/git_gc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use cargo_test_support::registry::Package;

use url::Url;

fn find_index() -> PathBuf {
pub fn find_index() -> PathBuf {
let dir = paths::home().join(".cargo/registry/index");
dir.read_dir().unwrap().next().unwrap().unwrap().path()
}
Expand Down

0 comments on commit 37cad5b

Please sign in to comment.