diff --git a/src/cargo/core/resolver/dep_cache.rs b/src/cargo/core/resolver/dep_cache.rs index 7e3f4fb5938..1f6c49ca0fb 100644 --- a/src/cargo/core/resolver/dep_cache.rs +++ b/src/cargo/core/resolver/dep_cache.rs @@ -14,8 +14,10 @@ use crate::core::resolver::errors::describe_path; use crate::core::resolver::types::{ConflictReason, DepInfo, FeaturesSet}; use crate::core::resolver::{ActivateError, ActivateResult, ResolveOpts}; use crate::core::{Dependency, FeatureValue, PackageId, PackageIdSpec, Registry, Summary}; +use crate::core::{GitReference, SourceId}; use crate::util::errors::{CargoResult, CargoResultExt}; use crate::util::interning::InternedString; +use crate::util::Config; use log::debug; use std::cmp::Ordering; use std::collections::{BTreeSet, HashMap, HashSet}; @@ -38,6 +40,10 @@ pub struct RegistryQueryer<'a> { >, /// all the cases we ended up using a supplied replacement used_replacements: HashMap, + /// Where to print warnings, if configured. + config: Option<&'a Config>, + /// Sources that we've already wared about possibly colliding in the future. + warned_git_collisions: HashSet, } impl<'a> RegistryQueryer<'a> { @@ -46,6 +52,7 @@ impl<'a> RegistryQueryer<'a> { replacements: &'a [(PackageIdSpec, Dependency)], try_to_use: &'a HashSet, minimal_versions: bool, + config: Option<&'a Config>, ) -> Self { RegistryQueryer { registry, @@ -55,6 +62,8 @@ impl<'a> RegistryQueryer<'a> { registry_cache: HashMap::new(), summary_cache: HashMap::new(), used_replacements: HashMap::new(), + config, + warned_git_collisions: HashSet::new(), } } @@ -66,6 +75,44 @@ impl<'a> RegistryQueryer<'a> { self.used_replacements.get(&p) } + /// Issues a future-compatible warning targeted at removing reliance on + /// unifying behavior between these two dependency directives: + /// + /// ```toml + /// [dependencies] + /// a = { git = 'https://example.org/foo' } + /// a = { git = 'https://example.org/foo', branch = 'master } + /// ``` + /// + /// Historical versions of Cargo considered these equivalent but going + /// forward we'd like to fix this. For more details see the comments in + /// src/cargo/sources/git/utils.rs + fn warn_colliding_git_sources(&mut self, id: SourceId) -> CargoResult<()> { + let config = match self.config { + Some(config) => config, + None => return Ok(()), + }; + let prev = match self.warned_git_collisions.replace(id) { + Some(prev) => prev, + None => return Ok(()), + }; + match (id.git_reference(), prev.git_reference()) { + (Some(GitReference::DefaultBranch), Some(GitReference::Branch(b))) + | (Some(GitReference::Branch(b)), Some(GitReference::DefaultBranch)) + if b == "master" => {} + _ => return Ok(()), + } + + config.shell().warn(&format!( + "two git dependencies found for `{}` \ + where one uses `branch = \"master\"` and the other doesn't; \ + this will break in a future version of Cargo, so please \ + ensure the dependency forms are consistent", + id.url(), + ))?; + Ok(()) + } + /// Queries the `registry` to return a list of candidates for `dep`. /// /// This method is the location where overrides are taken into account. If @@ -73,6 +120,7 @@ impl<'a> RegistryQueryer<'a> { /// applied by performing a second query for what the override should /// return. pub fn query(&mut self, dep: &Dependency) -> CargoResult>> { + self.warn_colliding_git_sources(dep.source_id())?; if let Some(out) = self.registry_cache.get(dep).cloned() { return Ok(out); } diff --git a/src/cargo/core/resolver/mod.rs b/src/cargo/core/resolver/mod.rs index 6cec24702f4..0531f3dba89 100644 --- a/src/cargo/core/resolver/mod.rs +++ b/src/cargo/core/resolver/mod.rs @@ -133,7 +133,8 @@ pub fn resolve( Some(config) => config.cli_unstable().minimal_versions, None => false, }; - let mut registry = RegistryQueryer::new(registry, replacements, try_to_use, minimal_versions); + let mut registry = + RegistryQueryer::new(registry, replacements, try_to_use, minimal_versions, config); let cx = activate_deps_loop(cx, &mut registry, summaries, config)?; let mut cksums = HashMap::new(); diff --git a/src/cargo/core/resolver/resolve.rs b/src/cargo/core/resolver/resolve.rs index d024959b9e9..38942bfb900 100644 --- a/src/cargo/core/resolver/resolve.rs +++ b/src/cargo/core/resolver/resolve.rs @@ -409,6 +409,6 @@ impl Default for ResolveVersion { /// file anyway so it takes the opportunity to bump the lock file version /// forward. fn default() -> ResolveVersion { - ResolveVersion::V3 + ResolveVersion::V2 } } diff --git a/src/cargo/core/source/source_id.rs b/src/cargo/core/source/source_id.rs index 6061a9cf7db..a1183f39670 100644 --- a/src/cargo/core/source/source_id.rs +++ b/src/cargo/core/source/source_id.rs @@ -394,9 +394,45 @@ impl Ord for SourceId { // Sort first based on `kind`, deferring to the URL comparison below if // the kinds are equal. - match self.inner.kind.cmp(&other.inner.kind) { - Ordering::Equal => {} - other => return other, + match (&self.inner.kind, &other.inner.kind) { + (SourceKind::Path, SourceKind::Path) => {} + (SourceKind::Path, _) => return Ordering::Less, + (_, SourceKind::Path) => return Ordering::Greater, + + (SourceKind::Registry, SourceKind::Registry) => {} + (SourceKind::Registry, _) => return Ordering::Less, + (_, SourceKind::Registry) => return Ordering::Greater, + + (SourceKind::LocalRegistry, SourceKind::LocalRegistry) => {} + (SourceKind::LocalRegistry, _) => return Ordering::Less, + (_, SourceKind::LocalRegistry) => return Ordering::Greater, + + (SourceKind::Directory, SourceKind::Directory) => {} + (SourceKind::Directory, _) => return Ordering::Less, + (_, SourceKind::Directory) => return Ordering::Greater, + + (SourceKind::Git(a), SourceKind::Git(b)) => { + use GitReference::*; + let ord = match (a, b) { + (Tag(a), Tag(b)) => a.cmp(b), + (Tag(_), _) => Ordering::Less, + (_, Tag(_)) => Ordering::Greater, + + (Rev(a), Rev(b)) => a.cmp(b), + (Rev(_), _) => Ordering::Less, + (_, Rev(_)) => Ordering::Greater, + + // See module comments in src/cargo/sources/git/utils.rs + // for why `DefaultBranch` is treated specially here. + (Branch(a), DefaultBranch) => a.as_str().cmp("master"), + (DefaultBranch, Branch(b)) => "master".cmp(b), + (Branch(a), Branch(b)) => a.cmp(b), + (DefaultBranch, DefaultBranch) => Ordering::Equal, + }; + if ord != Ordering::Equal { + return ord; + } + } } // If the `kind` and the `url` are equal, then for git sources we also @@ -473,9 +509,43 @@ impl fmt::Display for SourceId { // The hash of SourceId is used in the name of some Cargo folders, so shouldn't // vary. `as_str` gives the serialisation of a url (which has a spec) and so // insulates against possible changes in how the url crate does hashing. +// +// Note that the semi-funky hashing here is done to handle `DefaultBranch` +// hashing the same as `"master"`, and also to hash the same as previous +// versions of Cargo while it's somewhat convenient to do so (that way all +// versions of Cargo use the same checkout). impl Hash for SourceId { fn hash(&self, into: &mut S) { - self.inner.kind.hash(into); + match &self.inner.kind { + SourceKind::Git(GitReference::Tag(a)) => { + 0usize.hash(into); + 0usize.hash(into); + a.hash(into); + } + SourceKind::Git(GitReference::Branch(a)) => { + 0usize.hash(into); + 1usize.hash(into); + a.hash(into); + } + // For now hash `DefaultBranch` the same way as `Branch("master")`, + // and for more details see module comments in + // src/cargo/sources/git/utils.rs for why `DefaultBranch` + SourceKind::Git(GitReference::DefaultBranch) => { + 0usize.hash(into); + 1usize.hash(into); + "master".hash(into); + } + SourceKind::Git(GitReference::Rev(a)) => { + 0usize.hash(into); + 2usize.hash(into); + a.hash(into); + } + + SourceKind::Path => 1usize.hash(into), + SourceKind::Registry => 2usize.hash(into), + SourceKind::LocalRegistry => 3usize.hash(into), + SourceKind::Directory => 4usize.hash(into), + } match self.inner.kind { SourceKind::Git(_) => self.inner.canonical_url.hash(into), _ => self.inner.url.as_str().hash(into), diff --git a/src/cargo/sources/git/source.rs b/src/cargo/sources/git/source.rs index 9d7c42b829f..0723e360628 100644 --- a/src/cargo/sources/git/source.rs +++ b/src/cargo/sources/git/source.rs @@ -126,10 +126,12 @@ impl<'cfg> Source for GitSource<'cfg> { // database, then try to resolve our reference with the preexisting // repository. (None, Some(db)) if self.config.offline() => { - let rev = db.resolve(&self.manifest_reference).with_context(|| { - "failed to lookup reference in preexisting repository, and \ + let rev = db + .resolve(&self.manifest_reference, None) + .with_context(|| { + "failed to lookup reference in preexisting repository, and \ can't check for updates in offline mode (--offline)" - })?; + })?; (db, rev) } diff --git a/src/cargo/sources/git/utils.rs b/src/cargo/sources/git/utils.rs index 1998f27a23e..bdb3638b61c 100644 --- a/src/cargo/sources/git/utils.rs +++ b/src/cargo/sources/git/utils.rs @@ -1,5 +1,110 @@ //! Utilities for handling git repositories, mainly around //! authentication/cloning. +//! +//! # `DefaultBranch` vs `Branch("master")` +//! +//! Long ago in a repository not so far away, an author (*cough* me *cough*) +//! didn't understand how branches work in Git. This led the author to +//! interpret these two dependency declarations the exact same way with the +//! former literally internally desugaring to the latter: +//! +//! ```toml +//! [dependencies] +//! foo = { git = "https://example.org/foo" } +//! foo = { git = "https://example.org/foo", branch = "master" } +//! ``` +//! +//! It turns out there's this things called `HEAD` in git remotes which points +//! to the "main branch" of a repository, and the main branch is not always +//! literally called master. What Cargo would like to do is to differentiate +//! these two dependency directives, with the first meaning "depend on `HEAD`". +//! +//! Unfortunately implementing this is a breaking change. This was first +//! attempted in #8364 but resulted in #8468 which has two independent bugs +//! listed on that issue. Despite this breakage we would still like to roll out +//! this change in Cargo, but we're now going to take it very slow and try to +//! break as few people as possible along the way. These comments are intended +//! to log the current progress and what wonkiness you might see within Cargo +//! when handling `DefaultBranch` vs `Branch("master")` +//! +//! ### Repositories with `master` and a default branch +//! +//! This is one of the most obvious sources of breakage. If our `foo` example +//! in above had two branches, one called `master` and another which was +//! actually the main branch, then Cargo's change will always be a breaking +//! change. This is because what's downloaded is an entirely different branch +//! if we change the meaning of the dependency directive. +//! +//! It's expected this is quite rare, but to handle this case nonetheless when +//! Cargo fetches from a git remote and the dependency specification is +//! `DefaultBranch` then it will issue a warning if the `HEAD` reference +//! doesn't match `master`. It's expected in this situation that authors will +//! fix builds locally by specifying `branch = 'master'`. +//! +//! ### Differences in `cargo vendor` configuration +//! +//! When executing `cargo vendor` it will print out configuration which can +//! then be used to configure Cargo to use the `vendor` directory. Historically +//! this configuration looked like: +//! +//! ```toml +//! [source."https://example.org/foo"] +//! git = "https://example.org/foo" +//! branch = "master" +//! replace-with = "vendored-sources" +//! ``` +//! +//! We would like to, however, transition this to not include the `branch = +//! "master"` unless the dependency directive actually mentions a branch. +//! Conveniently older Cargo implementations all interpret a missing `branch` +//! as `branch = "master"` so it's a backwards-compatible change to remove the +//! `branch = "master"` directive. As a result, `cargo vendor` will no longer +//! emit a `branch` if the git reference is `DefaultBranch` +//! +//! ### Differences in lock file formats +//! +//! Another issue pointed out in #8364 was that `Cargo.lock` files were no +//! longer compatible on stable and nightly with each other. The underlying +//! issue is that Cargo was serializing `branch = "master"` *differently* on +//! nightly than it was on stable. Historical implementations of Cargo would +//! encode `DefaultBranch` and `Branch("master")` the same way in `Cargo.lock`, +//! so when reading a lock file we have no way of differentiating between the +//! two. +//! +//! To handle this difference in encoding of `Cargo.lock` we'll be employing +//! the standard scheme to change `Cargo.lock`: +//! +//! * Add support in Cargo for a future format, don't turn it on. +//! * Wait a long time +//! * Turn on the future format +//! +//! Here the "future format" is `branch=master` shows up if you have a `branch` +//! in `Cargo.toml`, and otherwise nothing shows up in URLs. Due to the effect +//! on crate graph resolution, however, this flows into the next point.. +//! +//! ### Unification in the Cargo dependency graph +//! +//! Today dependencies with `branch = "master"` will unify with dependencies +//! that say nothing. (that's because the latter simply desugars). This means +//! the two `foo` directives above will resolve to the same dependency. +//! +//! The best idea I've got to fix this is to basically get everyone (if anyone) +//! to stop doing this today. The crate graph resolver will start to warn if it +//! detects that multiple `Cargo.toml` directives are detected and mixed. The +//! thinking is that when we turn on the new lock file format it'll also be +//! hard breaking change for any project which still has dependencies to +//! both the `master` branch and not. +//! +//! ### What we're doing today +//! +//! The general goal of Cargo today is to internally distinguish +//! `DefaultBranch` and `Branch("master")`, but for the time being they should +//! be functionally equivalent in terms of builds. The hope is that we'll let +//! all these warnings and such bake for a good long time, and eventually we'll +//! flip some switches if your build has no warnings it'll work before and +//! after. +//! +//! That's the dream at least, we'll see how this plays out. use crate::core::GitReference; use crate::util::errors::{CargoResult, CargoResultExt}; @@ -77,7 +182,7 @@ impl GitRemote { } pub fn rev_for(&self, path: &Path, reference: &GitReference) -> CargoResult { - reference.resolve(&self.db_at(path)?.repo) + reference.resolve(&self.db_at(path)?.repo, None) } pub fn checkout( @@ -102,7 +207,7 @@ impl GitRemote { } } None => { - if let Ok(rev) = reference.resolve(&db.repo) { + if let Ok(rev) = reference.resolve(&db.repo, Some((&self.url, cargo_config))) { return Ok((db, rev)); } } @@ -121,7 +226,7 @@ impl GitRemote { .context(format!("failed to clone into: {}", into.display()))?; let rev = match locked_rev { Some(rev) => rev, - None => reference.resolve(&repo)?, + None => reference.resolve(&repo, Some((&self.url, cargo_config)))?, }; Ok(( @@ -190,13 +295,21 @@ impl GitDatabase { self.repo.revparse_single(&oid.to_string()).is_ok() } - pub fn resolve(&self, r: &GitReference) -> CargoResult { - r.resolve(&self.repo) + pub fn resolve( + &self, + r: &GitReference, + remote_and_config: Option<(&Url, &Config)>, + ) -> CargoResult { + r.resolve(&self.repo, remote_and_config) } } impl GitReference { - pub fn resolve(&self, repo: &git2::Repository) -> CargoResult { + pub fn resolve( + &self, + repo: &git2::Repository, + remote_and_config: Option<(&Url, &Config)>, + ) -> CargoResult { let id = match self { // Note that we resolve the named tag here in sync with where it's // fetched into via `fetch` below. @@ -221,11 +334,38 @@ impl GitReference { .ok_or_else(|| anyhow::format_err!("branch `{}` did not have a target", s))? } - // We'll be using the HEAD commit + // See the module docs for why we're using `master` here. GitReference::DefaultBranch => { - let head_id = repo.refname_to_id("refs/remotes/origin/HEAD")?; - let head = repo.find_object(head_id, None)?; - head.peel(ObjectType::Commit)?.id() + let master = repo + .find_branch("origin/master", git2::BranchType::Remote) + .chain_err(|| "failed to find branch `master`")?; + let master = master + .get() + .target() + .ok_or_else(|| anyhow::format_err!("branch `master` did not have a target"))?; + + if let Some((remote, config)) = remote_and_config { + let head_id = repo.refname_to_id("refs/remotes/origin/HEAD")?; + let head = repo.find_object(head_id, None)?; + let head = head.peel(ObjectType::Commit)?.id(); + + if head != master { + config.shell().warn(&format!( + "\ + fetching `master` branch from `{}` but the `HEAD` \ + reference for this repository is not the \ + `master` branch. This behavior will change \ + in Cargo in the future and your build may \ + break, so it's recommended to place \ + `branch = \"master\"` in Cargo.toml when \ + depending on this git repository to ensure \ + that your build will continue to work.\ + ", + remote, + ))?; + } + } + master } GitReference::Rev(s) => { @@ -759,6 +899,8 @@ pub fn fetch( } GitReference::DefaultBranch => { + // See the module docs for why we're fetching `master` here. + refspecs.push(String::from("refs/heads/master:refs/remotes/origin/master")); refspecs.push(String::from("HEAD:refs/remotes/origin/HEAD")); } @@ -1024,7 +1166,10 @@ fn github_up_to_date( handle.useragent("cargo")?; let mut headers = List::new(); headers.append("Accept: application/vnd.github.3.sha")?; - headers.append(&format!("If-None-Match: \"{}\"", reference.resolve(repo)?))?; + headers.append(&format!( + "If-None-Match: \"{}\"", + reference.resolve(repo, None)? + ))?; handle.http_headers(headers)?; handle.perform()?; Ok(handle.response_code()? == 304) diff --git a/src/cargo/sources/registry/remote.rs b/src/cargo/sources/registry/remote.rs index 91bf3301b21..d3f9eb9c03c 100644 --- a/src/cargo/sources/registry/remote.rs +++ b/src/cargo/sources/registry/remote.rs @@ -106,7 +106,7 @@ impl<'cfg> RemoteRegistry<'cfg> { fn head(&self) -> CargoResult { if self.head.get().is_none() { let repo = self.repo()?; - let oid = self.index_git_ref.resolve(repo)?; + let oid = self.index_git_ref.resolve(repo, None)?; self.head.set(Some(oid)); } Ok(self.head.get().unwrap()) diff --git a/tests/testsuite/git.rs b/tests/testsuite/git.rs index b0b7a1b7fbb..6b64592cfda 100644 --- a/tests/testsuite/git.rs +++ b/tests/testsuite/git.rs @@ -2787,6 +2787,66 @@ to proceed despite [..] git_project.cargo("package --no-verify").run(); } +#[cargo_test] +fn default_not_master() { + let project = project(); + + // Create a repository with a `master` branch, but switch the head to a + // branch called `main` at the same time. + let (git_project, repo) = git::new_repo("dep1", |project| { + project + .file("Cargo.toml", &basic_lib_manifest("dep1")) + .file("src/lib.rs", "pub fn foo() {}") + }); + let head_id = repo.head().unwrap().target().unwrap(); + let head = repo.find_commit(head_id).unwrap(); + repo.branch("main", &head, false).unwrap(); + repo.set_head("refs/heads/main").unwrap(); + + // Then create a commit on the new `main` branch so `master` and `main` + // differ. + git_project.change_file("src/lib.rs", ""); + git::add(&repo); + git::commit(&repo); + + let project = project + .file( + "Cargo.toml", + &format!( + r#" + [project] + name = "foo" + version = "0.5.0" + + [dependencies] + dep1 = {{ git = '{}' }} + "#, + git_project.url() + ), + ) + .file("src/lib.rs", "pub fn foo() { dep1::foo() }") + .build(); + + project + .cargo("build") + .with_stderr( + "\ +[UPDATING] git repository `[..]` +warning: fetching `master` branch from `[..]` but the `HEAD` \ + reference for this repository is not the \ + `master` branch. This behavior will change \ + in Cargo in the future and your build may \ + break, so it's recommended to place \ + `branch = \"master\"` in Cargo.toml when \ + depending on this git repository to ensure \ + that your build will continue to work. +[COMPILING] dep1 v0.5.0 ([..]) +[COMPILING] foo v0.5.0 ([..]) +[FINISHED] dev [unoptimized + debuginfo] target(s) in [..]", + ) + .run(); +} + #[cargo_test] fn historical_lockfile_works() { let project = project(); @@ -2899,6 +2959,68 @@ dependencies = [ project.cargo("build").run(); } +#[cargo_test] +fn two_dep_forms() { + let project = project(); + + let (git_project, _repo) = git::new_repo("dep1", |project| { + project + .file("Cargo.toml", &basic_lib_manifest("dep1")) + .file("src/lib.rs", "") + }); + + let project = project + .file( + "Cargo.toml", + &format!( + r#" + [project] + name = "foo" + version = "0.5.0" + + [dependencies] + dep1 = {{ git = '{}', branch = 'master' }} + a = {{ path = 'a' }} + "#, + git_project.url() + ), + ) + .file("src/lib.rs", "") + .file( + "a/Cargo.toml", + &format!( + r#" + [project] + name = "a" + version = "0.5.0" + + [dependencies] + dep1 = {{ git = '{}' }} + "#, + git_project.url() + ), + ) + .file("a/src/lib.rs", "") + .build(); + + project + .cargo("build") + .with_stderr( + "\ +[UPDATING] [..] +warning: two git dependencies found for `[..]` where one uses `branch = \"master\"` \ +and the other doesn't; this will break in a future version of Cargo, so please \ +ensure the dependency forms are consistent +warning: [..] +[COMPILING] [..] +[COMPILING] [..] +[COMPILING] [..] +[FINISHED] [..] +", + ) + .run(); +} + #[cargo_test] fn metadata_master_consistency() { // SourceId consistency in the `cargo metadata` output when `master` is diff --git a/tests/testsuite/lockfile_compat.rs b/tests/testsuite/lockfile_compat.rs index 8a16b5454d7..37bc6e33641 100644 --- a/tests/testsuite/lockfile_compat.rs +++ b/tests/testsuite/lockfile_compat.rs @@ -26,8 +26,6 @@ fn oldest_lockfile_still_works_with_command(cargo_command: &str) { let expected_lockfile = r#"# This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 - [[package]] name = "bar" version = "0.1.0" @@ -172,8 +170,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" assert_lockfiles_eq( r#"# This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 - [[package]] name = "bar" version = "0.1.0" @@ -412,8 +408,6 @@ fn current_lockfile_format() { let expected = "\ # This file is automatically @generated by Cargo.\n# It is not intended for manual editing. -version = 3 - [[package]] name = \"bar\" version = \"0.1.0\" @@ -474,8 +468,6 @@ dependencies = [ assert_lockfiles_eq( r#"# [..] # [..] -version = 3 - [[package]] name = "bar" version = "0.1.0" diff --git a/tests/testsuite/update.rs b/tests/testsuite/update.rs index 8a4d589989f..37680cba275 100644 --- a/tests/testsuite/update.rs +++ b/tests/testsuite/update.rs @@ -584,7 +584,8 @@ fn preserve_top_comment() { let mut lines = lockfile.lines().collect::>(); lines.insert(2, "# some other comment"); let mut lockfile = lines.join("\n"); - lockfile.push('\n'); // .lines/.join loses the last newline + lockfile.push_str("\n\n"); // .lines/.join loses the last newline + // >>>>>>> parent of 7dd9872c1... Change git dependencies to use `HEAD` by default println!("saving Cargo.lock contents:\n{}", lockfile); p.change_file("Cargo.lock", &lockfile);