From 221a2b33a6a042f31d21ad491bbdcc8ce31c9ee1 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Tue, 1 Oct 2024 11:31:18 -0400 Subject: [PATCH] Allow dependency metadata entires for direct URL requirements --- .../src/dependency_metadata.rs | 64 +++++++-- .../src/distribution_database.rs | 22 +-- crates/uv-distribution/src/source/mod.rs | 34 +++++ crates/uv-git/src/git.rs | 2 +- crates/uv-git/src/resolver.rs | 44 ++++++ crates/uv-git/src/source.rs | 62 +++++++++ crates/uv-resolver/src/redirect.rs | 4 +- crates/uv/tests/it/lock.rs | 125 ++++++++++++++++++ docs/concepts/projects.md | 6 + docs/concepts/resolution.md | 6 + 10 files changed, 341 insertions(+), 28 deletions(-) diff --git a/crates/uv-distribution-types/src/dependency_metadata.rs b/crates/uv-distribution-types/src/dependency_metadata.rs index f4d2e8bda0f94..2380cd8fadd48 100644 --- a/crates/uv-distribution-types/src/dependency_metadata.rs +++ b/crates/uv-distribution-types/src/dependency_metadata.rs @@ -1,5 +1,6 @@ use rustc_hash::FxHashMap; use serde::{Deserialize, Serialize}; +use tracing::{debug, warn}; use uv_normalize::{ExtraName, PackageName}; use uv_pep440::{Version, VersionSpecifiers}; use uv_pep508::Requirement; @@ -20,22 +21,57 @@ impl DependencyMetadata { } /// Retrieve a [`StaticMetadata`] entry by [`PackageName`] and [`Version`]. - pub fn get(&self, package: &PackageName, version: &Version) -> Option { + pub fn get( + &self, + package: &PackageName, + version: Option<&Version>, + ) -> Option { let versions = self.0.get(package)?; - // Search for an exact, then a global match. - let metadata = versions - .iter() - .find(|v| v.version.as_ref() == Some(version)) - .or_else(|| versions.iter().find(|v| v.version.is_none()))?; - - Some(ResolutionMetadata { - name: metadata.name.clone(), - version: version.clone(), - requires_dist: metadata.requires_dist.clone(), - requires_python: metadata.requires_python.clone(), - provides_extras: metadata.provides_extras.clone(), - }) + if let Some(version) = version { + // If a specific version was requested, search for an exact match, then a global match. + let metadata = versions + .iter() + .find(|v| v.version.as_ref() == Some(version)) + .inspect(|_| { + debug!("Found dependency metadata entry for `{package}=={version}`",); + }) + .or_else(|| versions.iter().find(|v| v.version.is_none())) + .inspect(|_| { + debug!("Found global metadata entry for `{package}`",); + }); + let Some(metadata) = metadata else { + warn!("No dependency metadata entry found for `{package}=={version}`"); + return None; + }; + debug!("Found dependency metadata entry for `{package}=={version}`",); + Some(ResolutionMetadata { + name: metadata.name.clone(), + version: version.clone(), + requires_dist: metadata.requires_dist.clone(), + requires_python: metadata.requires_python.clone(), + provides_extras: metadata.provides_extras.clone(), + }) + } else { + // If no version was requested (i.e., it's a direct URL dependency), allow a single + // versioned match. + let [metadata] = versions.as_slice() else { + warn!("Multiple dependency metadata entries found for `{package}`"); + return None; + }; + let Some(version) = metadata.version.clone() else { + warn!("No version found in dependency metadata entry for `{package}`"); + return None; + }; + debug!("Found dependency metadata entry for `{package}` (assuming: `{version}`)"); + Some(ResolutionMetadata { + name: metadata.name.clone(), + version, + requires_dist: metadata.requires_dist.clone(), + requires_python: metadata.requires_python.clone(), + provides_extras: metadata.provides_extras.clone(), + }) + } } /// Retrieve all [`StaticMetadata`] entries. diff --git a/crates/uv-distribution/src/distribution_database.rs b/crates/uv-distribution/src/distribution_database.rs index 05db4ce8da117..03ddd809de0fe 100644 --- a/crates/uv-distribution/src/distribution_database.rs +++ b/crates/uv-distribution/src/distribution_database.rs @@ -354,7 +354,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { /// /// While hashes will be generated in some cases, hash-checking is _not_ enforced and should /// instead be enforced by the caller. - pub async fn get_wheel_metadata( + async fn get_wheel_metadata( &self, dist: &BuiltDist, hashes: HashPolicy<'_>, @@ -363,7 +363,7 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { if let Some(metadata) = self .build_context .dependency_metadata() - .get(dist.name(), dist.version()) + .get(dist.name(), Some(dist.version())) { return Ok(ArchiveMetadata::from_metadata23(metadata.clone())); } @@ -425,14 +425,16 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> { ) -> Result { // If the metadata was provided by the user directly, prefer it. if let Some(dist) = source.as_dist() { - if let Some(version) = dist.version() { - if let Some(metadata) = self - .build_context - .dependency_metadata() - .get(dist.name(), version) - { - return Ok(ArchiveMetadata::from_metadata23(metadata.clone())); - } + if let Some(metadata) = self + .build_context + .dependency_metadata() + .get(dist.name(), dist.version()) + { + // If we skipped the build, we should still resolve any Git dependencies to precise + // commits. + self.builder.resolve_revision(source, &self.client).await?; + + return Ok(ArchiveMetadata::from_metadata23(metadata.clone())); } } diff --git a/crates/uv-distribution/src/source/mod.rs b/crates/uv-distribution/src/source/mod.rs index a8e3d011339a0..4e408350936c2 100644 --- a/crates/uv-distribution/src/source/mod.rs +++ b/crates/uv-distribution/src/source/mod.rs @@ -1497,6 +1497,40 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { )) } + /// Resolve a source to a specific revision. + pub(crate) async fn resolve_revision( + &self, + source: &BuildableSource<'_>, + client: &ManagedClient<'_>, + ) -> Result<(), Error> { + match source { + BuildableSource::Dist(SourceDist::Git(source)) => { + self.build_context + .git() + .fetch( + &source.git, + client.unmanaged.uncached_client(&source.url).clone(), + self.build_context.cache().bucket(CacheBucket::Git), + self.reporter.clone().map(Facade::from), + ) + .await?; + } + BuildableSource::Url(SourceUrl::Git(source)) => { + self.build_context + .git() + .fetch( + source.git, + client.unmanaged.uncached_client(source.url).clone(), + self.build_context.cache().bucket(CacheBucket::Git), + self.reporter.clone().map(Facade::from), + ) + .await?; + } + _ => {} + } + Ok(()) + } + /// Heal a [`Revision`] for a local archive. async fn heal_archive_revision( &self, diff --git a/crates/uv-git/src/git.rs b/crates/uv-git/src/git.rs index 49a04af6e6022..86af38a7e7780 100644 --- a/crates/uv-git/src/git.rs +++ b/crates/uv-git/src/git.rs @@ -590,7 +590,7 @@ pub(crate) fn fetch( } } -/// Attempts to use `git` CLI installed on the system to fetch a repository,. +/// Attempts to use `git` CLI installed on the system to fetch a repository. fn fetch_with_cli( repo: &mut GitRepository, url: &str, diff --git a/crates/uv-git/src/resolver.rs b/crates/uv-git/src/resolver.rs index 4d34619833d0c..e0636ec243d3a 100644 --- a/crates/uv-git/src/resolver.rs +++ b/crates/uv-git/src/resolver.rs @@ -38,6 +38,50 @@ impl GitResolver { self.0.get(reference) } + /// Resolve a Git URL to a specific commit. + pub async fn resolve( + &self, + url: &GitUrl, + client: ClientWithMiddleware, + cache: PathBuf, + reporter: Option, + ) -> Result { + debug!("Resolving source distribution from Git: {url}"); + + let reference = RepositoryReference::from(url); + + // If we know the precise commit already, return it. + if let Some(precise) = self.get(&reference) { + return Ok(*precise); + } + + // Avoid races between different processes, too. + let lock_dir = cache.join("locks"); + fs::create_dir_all(&lock_dir).await?; + let repository_url = RepositoryUrl::new(url.repository()); + let _lock = LockedFile::acquire( + lock_dir.join(cache_digest(&repository_url)), + &repository_url, + ) + .await?; + + // Fetch the Git repository. + let source = if let Some(reporter) = reporter { + GitSource::new(url.clone(), client, cache).with_reporter(reporter) + } else { + GitSource::new(url.clone(), client, cache) + }; + let precise = tokio::task::spawn_blocking(move || source.resolve()) + .await? + .map_err(GitResolverError::Git)?; + + // Insert the resolved URL into the in-memory cache. This ensures that subsequent fetches + // resolve to the same precise commit. + self.insert(reference, precise); + + Ok(precise) + } + /// Fetch a remote Git repository. pub async fn fetch( &self, diff --git a/crates/uv-git/src/source.rs b/crates/uv-git/src/source.rs index bf3b7bd9bdb0d..5b475af90fd6e 100644 --- a/crates/uv-git/src/source.rs +++ b/crates/uv-git/src/source.rs @@ -51,6 +51,68 @@ impl GitSource { } } + /// Resolve a Git source to a specific revision. + #[instrument(skip(self), fields(repository = %self.git.repository, rev = ?self.git.precise))] + pub fn resolve(self) -> Result { + // Compute the canonical URL for the repository. + let canonical = RepositoryUrl::new(&self.git.repository); + + // The path to the repo, within the Git database. + let ident = cache_digest(&canonical); + let db_path = self.cache.join("db").join(&ident); + + // Authenticate the URL, if necessary. + let remote = if let Some(credentials) = GIT_STORE.get(&canonical) { + Cow::Owned(credentials.apply(self.git.repository.clone())) + } else { + Cow::Borrowed(&self.git.repository) + }; + + let remote = GitRemote::new(&remote); + let (db, actual_rev, task) = match (self.git.precise, remote.db_at(&db_path).ok()) { + // If we have a locked revision, and we have a preexisting database + // which has that revision, then no update needs to happen. + (Some(rev), Some(db)) if db.contains(rev.into()) => { + debug!("Using existing Git source `{}`", self.git.repository); + (db, rev, None) + } + + // ... otherwise we use this state to update the git database. Note + // that we still check for being offline here, for example in the + // situation that we have a locked revision but the database + // doesn't have it. + (locked_rev, db) => { + debug!("Updating Git source `{}`", self.git.repository); + + // Report the checkout operation to the reporter. + let task = self.reporter.as_ref().map(|reporter| { + reporter.on_checkout_start(remote.url(), self.git.reference.as_rev()) + }); + + let (db, actual_rev) = remote.checkout( + &db_path, + db, + &self.git.reference, + locked_rev.map(GitOid::from), + &self.client, + )?; + + (db, GitSha::from(actual_rev), task) + } + }; + + let short_id = db.to_short_id(actual_rev.into())?; + + // Report the checkout operation to the reporter. + if let Some(task) = task { + if let Some(reporter) = self.reporter.as_ref() { + reporter.on_checkout_complete(remote.url(), short_id.as_str(), task); + } + } + + Ok(actual_rev) + } + /// Fetch the underlying Git repository at the given revision. #[instrument(skip(self), fields(repository = %self.git.repository, rev = ?self.git.precise))] pub fn fetch(self) -> Result { diff --git a/crates/uv-resolver/src/redirect.rs b/crates/uv-resolver/src/redirect.rs index 60004c394c24e..3010fcedf8f8c 100644 --- a/crates/uv-resolver/src/redirect.rs +++ b/crates/uv-resolver/src/redirect.rs @@ -1,5 +1,4 @@ use url::Url; - use uv_git::{GitReference, GitResolver}; use uv_pep508::VerbatimUrl; use uv_pypi_types::{ParsedGitUrl, ParsedUrl, VerbatimParsedUrl}; @@ -17,9 +16,8 @@ pub(crate) fn url_to_precise(url: VerbatimParsedUrl, git: &GitResolver) -> Verba let Some(new_git_url) = git.precise(git_url.clone()) else { debug_assert!( matches!(git_url.reference(), GitReference::FullCommit(_)), - "Unseen Git URL: {}, {:?}", + "Unseen Git URL: {}, {git_url:?}", url.verbatim, - git_url ); return url; }; diff --git a/crates/uv/tests/it/lock.rs b/crates/uv/tests/it/lock.rs index 3ad208f83b922..9c0dd87462cc1 100644 --- a/crates/uv/tests/it/lock.rs +++ b/crates/uv/tests/it/lock.rs @@ -14533,6 +14533,131 @@ fn lock_dependency_metadata() -> Result<()> { Ok(()) } +#[test] +fn lock_dependency_metadata_git() -> Result<()> { + let context = TestContext::new("3.12"); + + let pyproject_toml = context.temp_dir.child("pyproject.toml"); + pyproject_toml.write_str( + r#" + [project] + name = "project" + version = "0.1.0" + requires-python = ">=3.12" + dependencies = ["anyio"] + + [tool.uv.sources] + anyio = { git = "https://github.com/agronholm/anyio", tag = "4.6.2" } + + [build-system] + requires = ["setuptools>=42"] + build-backend = "setuptools.build_meta" + + [[tool.uv.dependency-metadata]] + name = "anyio" + version = "4.6.0.post2" + requires-dist = ["iniconfig"] + "#, + )?; + + uv_snapshot!(context.filters(), context.lock(), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 3 packages in [TIME] + "###); + + let lock = context.read("uv.lock"); + + insta::with_settings!({ + filters => context.filters(), + }, { + assert_snapshot!( + lock, @r###" + version = 1 + requires-python = ">=3.12" + + [options] + exclude-newer = "2024-03-25T00:00:00Z" + + [manifest] + + [[manifest.dependency-metadata]] + name = "anyio" + version = "4.6.0.post2" + requires-dist = ["iniconfig"] + + [[package]] + name = "anyio" + version = "4.6.0.post2" + source = { git = "https://github.com/agronholm/anyio?tag=4.6.2#c4844254e6db0cb804c240ba07405db73d810e0b" } + dependencies = [ + { name = "iniconfig" }, + ] + + [[package]] + name = "iniconfig" + version = "2.0.0" + source = { registry = "https://pypi.org/simple" } + sdist = { url = "https://files.pythonhosted.org/packages/d7/4b/cbd8e699e64a6f16ca3a8220661b5f83792b3017d0f79807cb8708d33913/iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3", size = 4646 } + wheels = [ + { url = "https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374", size = 5892 }, + ] + + [[package]] + name = "project" + version = "0.1.0" + source = { editable = "." } + dependencies = [ + { name = "anyio" }, + ] + + [package.metadata] + requires-dist = [{ name = "anyio", git = "https://github.com/agronholm/anyio?tag=4.6.2" }] + "### + ); + }); + + // Re-run with `--locked`. + uv_snapshot!(context.filters(), context.lock().arg("--locked"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 3 packages in [TIME] + "###); + + // Re-run with `--offline`. We shouldn't need a network connection to validate an + // already-correct lockfile with immutable metadata. + uv_snapshot!(context.filters(), context.lock().arg("--locked").arg("--offline").arg("--no-cache"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Resolved 3 packages in [TIME] + "###); + + // Install from the lockfile. + uv_snapshot!(context.filters(), context.sync().arg("--frozen"), @r###" + success: true + exit_code: 0 + ----- stdout ----- + + ----- stderr ----- + Prepared 3 packages in [TIME] + Installed 3 packages in [TIME] + + anyio==4.6.2 (from git+https://github.com/agronholm/anyio@c4844254e6db0cb804c240ba07405db73d810e0b) + + iniconfig==2.0.0 + + project==0.1.0 (from file://[TEMP_DIR]/) + "###); + + Ok(()) +} + #[test] fn lock_strip_fragment() -> Result<()> { let context = TestContext::new("3.12"); diff --git a/docs/concepts/projects.md b/docs/concepts/projects.md index d4aa0766f805f..52ea27e98a460 100644 --- a/docs/concepts/projects.md +++ b/docs/concepts/projects.md @@ -855,3 +855,9 @@ You could run the following sequence of commands to sync `flash-attn`: $ uv sync --extra build $ uv sync --extra build --extra compile ``` + +!!! note + + The `version` field in `tool.uv.dependency-metadata` is optional for registry-based + dependencies (when omitted, uv will assume the metadata applies to all versions of the package), + but _required_ for direct URL dependencies. diff --git a/docs/concepts/resolution.md b/docs/concepts/resolution.md index 77239e0dff4f6..f84e7e254670d 100644 --- a/docs/concepts/resolution.md +++ b/docs/concepts/resolution.md @@ -313,6 +313,12 @@ package's metadata is incorrect or incomplete, or when a package is not availabl index. While dependency overrides allow overriding the allowed versions of a package globally, metadata overrides allow overriding the declared metadata of a _specific package_. +!!! note + + The `version` field in `tool.uv.dependency-metadata` is optional for registry-based + dependencies (when omitted, uv will assume the metadata applies to all versions of the package), + but _required_ for direct URL dependencies (like Git dependencies). + Entries in the `tool.uv.dependency-metadata` table follow the [Metadata 2.3](https://packaging.python.org/en/latest/specifications/core-metadata/) specification, though only `name`, `version`, `requires-dist`, `requires-python`, and `provides-extra` are read by