Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Apply GitHub fast path even for partial hashes #10807

Merged
merged 1 commit into from
Aug 24, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 96 additions & 26 deletions src/cargo/sources/git/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,15 @@ use crate::util::{network, Config, IntoUrl, MetricsCounter, Progress};
use anyhow::{anyhow, Context as _};
use cargo_util::{paths, ProcessBuilder};
use curl::easy::List;
use git2::{self, ErrorClass, ObjectType};
use git2::{self, ErrorClass, ObjectType, Oid};
use log::{debug, info};
use serde::ser;
use serde::Serialize;
use std::env;
use std::fmt;
use std::path::{Path, PathBuf};
use std::process::Command;
use std::str;
use std::time::{Duration, Instant};
use url::Url;

Expand Down Expand Up @@ -781,11 +782,15 @@ pub fn fetch(

// If we're fetching from GitHub, attempt GitHub's special fast path for
// testing if we've already got an up-to-date copy of the repository
match github_up_to_date(repo, url, reference, config) {
Ok(true) => return Ok(()),
Ok(false) => {}
Err(e) => debug!("failed to check github {:?}", e),
}
let oid_to_fetch = match github_fast_path(repo, url, reference, config) {
Ok(FastPathRev::UpToDate) => return Ok(()),
Ok(FastPathRev::NeedsFetch(rev)) => Some(rev),
Ok(FastPathRev::Indeterminate) => None,
Err(e) => {
debug!("failed to check github {:?}", e);
None
}
};

// We reuse repositories quite a lot, so before we go through and update the
// repo check to see if it's a little too old and could benefit from a gc.
Expand Down Expand Up @@ -815,11 +820,10 @@ pub fn fetch(
}

GitReference::Rev(rev) => {
let is_github = || Url::parse(url).map_or(false, |url| is_github(&url));
if rev.starts_with("refs/") {
refspecs.push(format!("+{0}:{0}", rev));
} else if is_github() && is_long_hash(rev) {
refspecs.push(format!("+{0}:refs/commit/{0}", rev));
} else if let Some(oid_to_fetch) = oid_to_fetch {
refspecs.push(format!("+{0}:refs/commit/{0}", oid_to_fetch));
} else {
// We don't know what the rev will point to. To handle this
// situation we fetch all branches and tags, and then we pray
Expand Down Expand Up @@ -1016,45 +1020,79 @@ fn init(path: &Path, bare: bool) -> CargoResult<git2::Repository> {
Ok(git2::Repository::init_opts(&path, &opts)?)
}

enum FastPathRev {
/// The local rev (determined by `reference.resolve(repo)`) is already up to
/// date with what this rev resolves to on GitHub's server.
UpToDate,
/// The following SHA must be fetched in order for the local rev to become
/// up to date.
NeedsFetch(Oid),
/// Don't know whether local rev is up to date. We'll fetch _all_ branches
/// and tags from the server and see what happens.
Indeterminate,
}

/// Updating the index is done pretty regularly so we want it to be as fast as
/// possible. For registries hosted on GitHub (like the crates.io index) there's
/// a fast path available to use [1] to tell us that there's no updates to be
/// made.
///
/// This function will attempt to hit that fast path and verify that the `oid`
/// is actually the current branch of the repository. If `true` is returned then
/// no update needs to be performed, but if `false` is returned then the
/// standard update logic still needs to happen.
/// is actually the current branch of the repository.
///
/// [1]: https://developer.github.com/v3/repos/commits/#get-the-sha-1-of-a-commit-reference
///
/// Note that this function should never cause an actual failure because it's
/// just a fast path. As a result all errors are ignored in this function and we
/// just return a `bool`. Any real errors will be reported through the normal
/// update path above.
fn github_up_to_date(
fn github_fast_path(
repo: &mut git2::Repository,
url: &str,
reference: &GitReference,
config: &Config,
) -> CargoResult<bool> {
) -> CargoResult<FastPathRev> {
let url = Url::parse(url)?;
if !is_github(&url) {
return Ok(false);
return Ok(FastPathRev::Indeterminate);
}

let local_object = reference.resolve(repo).ok();

let github_branch_name = match reference {
GitReference::Branch(branch) => branch,
GitReference::Tag(tag) => tag,
GitReference::DefaultBranch => "HEAD",
GitReference::Rev(rev) => {
if rev.starts_with("refs/") {
rev
} else if is_long_hash(rev) {
return Ok(reference.resolve(repo).is_ok());
} else if looks_like_commit_hash(rev) {
// `revparse_single` (used by `resolve`) is the only way to turn
// short hash -> long hash, but it also parses other things,
// like branch and tag names, which might coincidentally be
// valid hex.
//
// We only return early if `rev` is a prefix of the object found
// by `revparse_single`. Don't bother talking to GitHub in that
// case, since commit hashes are permanent. If a commit with the
// requested hash is already present in the local clone, its
// contents must be the same as what is on the server for that
// hash.
//
// If `rev` is not found locally by `revparse_single`, we'll
// need GitHub to resolve it and get a hash. If `rev` is found
// but is not a short hash of the found object, it's probably a
// branch and we also need to get a hash from GitHub, in case
// the branch has moved.
if let Some(local_object) = local_object {
if is_short_hash_of(rev, local_object) {
return Ok(FastPathRev::UpToDate);
}
}
rev
} else {
debug!("can't use github fast path with `rev = \"{}\"`", rev);
return Ok(false);
return Ok(FastPathRev::Indeterminate);
}
}
};
Expand Down Expand Up @@ -1087,18 +1125,50 @@ fn github_up_to_date(
handle.get(true)?;
handle.url(&url)?;
handle.useragent("cargo")?;
let mut headers = List::new();
headers.append("Accept: application/vnd.github.3.sha")?;
headers.append(&format!("If-None-Match: \"{}\"", reference.resolve(repo)?))?;
handle.http_headers(headers)?;
handle.perform()?;
Ok(handle.response_code()? == 304)
handle.http_headers({
let mut headers = List::new();
headers.append("Accept: application/vnd.github.3.sha")?;
if let Some(local_object) = local_object {
headers.append(&format!("If-None-Match: \"{}\"", local_object))?;
}
headers
dtolnay marked this conversation as resolved.
Show resolved Hide resolved
})?;

let mut response_body = Vec::new();
let mut transfer = handle.transfer();
transfer.write_function(|data| {
response_body.extend_from_slice(data);
Ok(data.len())
})?;
transfer.perform()?;
drop(transfer); // end borrow of handle so that response_code can be called

let response_code = handle.response_code()?;
if response_code == 304 {
Ok(FastPathRev::UpToDate)
} else if response_code == 200 {
let oid_to_fetch = str::from_utf8(&response_body)?.parse::<Oid>()?;
Ok(FastPathRev::NeedsFetch(oid_to_fetch))
} else {
// Usually response_code == 404 if the repository does not exist, and
// response_code == 422 if exists but GitHub is unable to resolve the
// requested rev.
Ok(FastPathRev::Indeterminate)
}
}

fn is_github(url: &Url) -> bool {
url.host_str() == Some("github.com")
}

fn is_long_hash(rev: &str) -> bool {
rev.len() == 40 && rev.chars().all(|ch| ch.is_ascii_hexdigit())
fn looks_like_commit_hash(rev: &str) -> bool {
rev.len() >= 7 && rev.chars().all(|ch| ch.is_ascii_hexdigit())
}

fn is_short_hash_of(rev: &str, oid: Oid) -> bool {
let long_hash = oid.to_string();
match long_hash.get(..rev.len()) {
Some(truncated_long_hash) => truncated_long_hash.eq_ignore_ascii_case(rev),
None => false,
}
}