Skip to content

Commit

Permalink
support minimal 'fetch' using gitoxide
Browse files Browse the repository at this point in the history
This most notably excludes:

* progress
* SSH name guessing
* retry on spurious timeout/connection issues
  • Loading branch information
Byron committed Dec 4, 2022
1 parent 0acd0eb commit 1759263
Show file tree
Hide file tree
Showing 4 changed files with 185 additions and 41 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ filetime = "0.2.9"
flate2 = { version = "1.0.3", default-features = false, features = ["zlib"] }
git2 = "0.15.0"
git2-curl = "0.16.0"
git-repository = { version = "0.29.0", features = ["blocking-http-transport-curl"] }
glob = "0.3.0"
hex = "0.4"
home = "0.5"
Expand Down
1 change: 1 addition & 0 deletions src/cargo/sources/git/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
pub use self::source::GitSource;
pub use self::utils::{fetch, GitCheckout, GitDatabase, GitRemote};
mod oxide;
mod source;
mod utils;
69 changes: 69 additions & 0 deletions src/cargo/sources/git/oxide.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
//! This module contains all code sporting `gitoxide` for operations on `git` repositories and it mirrors
//! `utils` closely for now. One day it can be renamed into `utils` once `git2` isn't required anymore.
use crate::util::{network, Progress};
use crate::{CargoResult, Config};
use git_repository as git;
use std::sync::atomic::AtomicBool;
use std::sync::Arc;
use std::time::Duration;

/// For the time being, `repo_path` makes it easy to instantiate a gitoxide repo just for fetching.
/// In future this may change to be the gitoxide repository itself.
pub fn with_retry_and_progress(
repo_path: &std::path::Path,
config: &Config,
cb: &mut (dyn FnMut(
&git::Repository,
&AtomicBool,
&mut git::progress::tree::Item,
) -> CargoResult<()>
+ Send),
) -> CargoResult<()> {
let repo = git::open_opts(repo_path, {
let mut opts = git::open::Options::default();
// We need `git_binary` configuration as well for being able to see credential helpers
// that are configured with the `git` installation itself.
// However, this is slow on windows (~150ms) and most people won't need it as they use the
// standard index which won't ever need authentication.
// TODO: This is certainly something to make configurable, at the very least on windows.
// Maybe it's also something that could be cached, all we need is the path to the configuration file
// which usually doesn't change unless the installation changes. Maybe something keyed by the location of the
// binary along with its fingerprint.
opts.permissions.config = git::permissions::Config::all();
opts
})?;

let progress_root: Arc<git::progress::tree::Root> = git::progress::tree::root::Options {
initial_capacity: 10,
message_buffer_capacity: 10,
}
.into();
let mut progress = progress_root.add_child("operation");

// For decent interrupts of long-running computations and removal of temp files we should handle interrupts, and this
// is an easy way to do that. We will remove them later.
// We intentionally swallow errors here as if for some reason we can't register handlers, `cargo` will just work like before and
// abort on signals.
let _deregister_signal_handlers_on_drop = git::interrupt::init_handler(|| {})
.ok()
.unwrap_or_default()
.auto_deregister();
let should_interrupt = AtomicBool::new(false);
let _progress_bar = Progress::new("Fetch", config);
std::thread::scope(move |s| {
s.spawn({
let root = Arc::downgrade(&progress_root);
move || -> CargoResult<()> {
let mut tasks = Vec::with_capacity(10);
while let Some(root) = root.upgrade() {
root.sorted_snapshot(&mut tasks);
// dbg!(&tasks);
std::thread::sleep(Duration::from_millis(300));
}
Ok(())
}
});
network::with_retry(config, || cb(&repo, &should_interrupt, &mut progress))
})
}
155 changes: 114 additions & 41 deletions src/cargo/sources/git/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
//! authentication/cloning.
use crate::core::GitReference;
use crate::sources::git::oxide;
use crate::util::errors::CargoResult;
use crate::util::{human_readable_bytes, network, Config, IntoUrl, MetricsCounter, Progress};
use anyhow::{anyhow, Context as _};
Expand Down Expand Up @@ -855,51 +856,123 @@ pub fn fetch(
if let Some(true) = config.net_config()?.git_fetch_with_cli {
return fetch_with_cli(repo, url, &refspecs, tags, config);
}

debug!("doing a fetch for {}", url);
let git_config = git2::Config::open_default()?;
with_fetch_options(&git_config, url, config, &mut |mut opts| {
if tags {
opts.download_tags(git2::AutotagOption::All);
}
// The `fetch` operation here may fail spuriously due to a corrupt
// repository. It could also fail, however, for a whole slew of other
// reasons (aka network related reasons). We want Cargo to automatically
// recover from corrupt repositories, but we don't want Cargo to stomp
// over other legitimate errors.
//
// Consequently we save off the error of the `fetch` operation and if it
// looks like a "corrupt repo" error then we blow away the repo and try
// again. If it looks like any other kind of error, or if we've already
// blown away the repository, then we want to return the error as-is.
let mut repo_reinitialized = false;
loop {
debug!("initiating fetch of {:?} from {}", refspecs, url);
let res = repo
.remote_anonymous(url)?
.fetch(&refspecs, Some(&mut opts), None);
let err = match res {
Ok(()) => break,
Err(e) => e,
};
debug!("fetch failed: {}", err);

if !repo_reinitialized && matches!(err.class(), ErrorClass::Reference | ErrorClass::Odb)
{
repo_reinitialized = true;
debug!(
"looks like this is a corrupt repository, reinitializing \
if config
.cli_unstable()
.gitoxide
.map_or(false, |git| git.fetch)
{
use git::remote::fetch::Error;
use git_repository as git;
let git2_repo = repo;
oxide::with_retry_and_progress(
&git2_repo.path().to_owned(),
config,
&mut |repo, should_interrupt, progress| {
// The `fetch` operation here may fail spuriously due to a corrupt
// repository. It could also fail, however, for a whole slew of other
// reasons (aka network related reasons). We want Cargo to automatically
// recover from corrupt repositories, but we don't want Cargo to stomp
// over other legitimate errors.
//
// Consequently we save off the error of the `fetch` operation and if it
// looks like a "corrupt repo" error then we blow away the repo and try
// again. If it looks like any other kind of error, or if we've already
// blown away the repository, then we want to return the error as-is.
let mut repo_reinitialized = false;
let mut repo_storage;
let mut repo = &*repo;
loop {
debug!("initiating fetch of {:?} from {}", refspecs, url);
let res = repo
.remote_at(url)?
.with_refspecs(
refspecs.iter().map(|s| s.as_str()),
git::remote::Direction::Fetch,
)?
.connect(git::remote::Direction::Fetch, progress.add_child("fetch"))?
.prepare_fetch(git::remote::ref_map::Options::default())?
.receive(should_interrupt);
let err = match res {
Ok(_) => break,
Err(e) => e,
};
debug!("fetch failed: {}", err);

if !repo_reinitialized
&& matches!(
err,
Error::Configuration { .. }
| Error::IncompatibleObjectHash { .. }
| Error::WritePack(_)
| Error::UpdateRefs(_)
| Error::RemovePackKeepFile { .. }
)
{
repo_reinitialized = true;
debug!(
"looks like this is a corrupt repository, reinitializing \
and trying again"
);
if reinitialize(repo).is_ok() {
continue;
);
if reinitialize(git2_repo).is_ok() {
repo_storage =
git::open_opts(repo.path(), repo.open_options().to_owned())?;
repo = &repo_storage;
continue;
}
}

return Err(err.into());
}
Ok(())
},
)
} else {
debug!("doing a fetch for {}", url);
let git_config = git2::Config::open_default()?;
with_fetch_options(&git_config, url, config, &mut |mut opts| {
if tags {
opts.download_tags(git2::AutotagOption::All);
}
// The `fetch` operation here may fail spuriously due to a corrupt
// repository. It could also fail, however, for a whole slew of other
// reasons (aka network related reasons). We want Cargo to automatically
// recover from corrupt repositories, but we don't want Cargo to stomp
// over other legitimate errors.
//
// Consequently we save off the error of the `fetch` operation and if it
// looks like a "corrupt repo" error then we blow away the repo and try
// again. If it looks like any other kind of error, or if we've already
// blown away the repository, then we want to return the error as-is.
let mut repo_reinitialized = false;
loop {
debug!("initiating fetch of {:?} from {}", refspecs, url);
let res = repo
.remote_anonymous(url)?
.fetch(&refspecs, Some(&mut opts), None);
let err = match res {
Ok(()) => break,
Err(e) => e,
};
debug!("fetch failed: {}", err);

if !repo_reinitialized
&& matches!(err.class(), ErrorClass::Reference | ErrorClass::Odb)
{
repo_reinitialized = true;
debug!(
"looks like this is a corrupt repository, reinitializing \
and trying again"
);
if reinitialize(repo).is_ok() {
continue;
}
}

return Err(err.into());
}
Ok(())
})
return Err(err.into());
}
Ok(())
})
}
}

fn fetch_with_cli(
Expand Down

0 comments on commit 1759263

Please sign in to comment.