From 3e76d77565bdba1623ececaf82aa687a6589ec82 Mon Sep 17 00:00:00 2001 From: konstin Date: Mon, 9 Sep 2024 16:35:11 -0400 Subject: [PATCH 1/3] Extract METADATA reading into a crate This is preparatory work for the upload functionality, which needs to read the METADATA file and attach its parsed contents to the POST request: We move finding the `.dist-info` from `install-wheel-rs` and `uv-client` to a new `uv-metadata` crate, so it can be shared with the publish crate. This isn't properly ready since the upload code isn't ready, but i'm PR-ing it now because it already had merge conflicts. --- Cargo.lock | 24 ++- Cargo.toml | 5 +- crates/install-wheel-rs/src/lib.rs | 15 -- crates/uv-client/Cargo.toml | 4 +- crates/uv-client/src/error.rs | 4 +- crates/uv-client/src/registry_client.rs | 119 +++--------- crates/uv-client/src/remote_metadata.rs | 12 +- crates/uv-distribution/Cargo.toml | 2 +- crates/uv-distribution/src/download.rs | 13 +- crates/uv-distribution/src/error.rs | 4 +- crates/uv-distribution/src/source/mod.rs | 12 +- crates/uv-extract/Cargo.toml | 2 +- crates/uv-metadata/Cargo.toml | 28 +++ .../metadata.rs => uv-metadata/src/lib.rs} | 173 ++++++++++++++---- crates/uv-resolver/Cargo.toml | 2 +- crates/uv-resolver/src/resolver/mod.rs | 10 +- crates/uv-resolver/src/resolver/provider.rs | 19 +- 17 files changed, 262 insertions(+), 186 deletions(-) create mode 100644 crates/uv-metadata/Cargo.toml rename crates/{install-wheel-rs/src/metadata.rs => uv-metadata/src/lib.rs} (56%) diff --git a/Cargo.lock b/Cargo.lock index 993f668d18c7..d1c55168658e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4661,7 +4661,6 @@ dependencies = [ "hyper", "hyper-util", "insta", - "install-wheel-rs", "itertools 0.13.0", "jiff", "pep440_rs", @@ -4687,6 +4686,7 @@ dependencies = [ "uv-cache", "uv-configuration", "uv-fs", + "uv-metadata", "uv-normalize", "uv-version", "uv-warnings", @@ -4793,7 +4793,6 @@ dependencies = [ "futures", "indoc", "insta", - "install-wheel-rs", "nanoid", "pep440_rs", "pep508_rs", @@ -4817,6 +4816,7 @@ dependencies = [ "uv-extract", "uv-fs", "uv-git", + "uv-metadata", "uv-normalize", "uv-types", "uv-warnings", @@ -4938,6 +4938,24 @@ dependencies = [ "textwrap", ] +[[package]] +name = "uv-metadata" +version = "0.1.0" +dependencies = [ + "async_zip", + "distribution-filename", + "fs-err", + "futures", + "pep440_rs", + "pypi-types", + "thiserror", + "tokio", + "tokio-util", + "tracing", + "uv-normalize", + "zip", +] + [[package]] name = "uv-normalize" version = "0.0.1" @@ -5065,7 +5083,6 @@ dependencies = [ "futures", "indexmap", "insta", - "install-wheel-rs", "itertools 0.13.0", "jiff", "once-map", @@ -5095,6 +5112,7 @@ dependencies = [ "uv-distribution", "uv-fs", "uv-git", + "uv-metadata", "uv-normalize", "uv-pubgrub", "uv-python", diff --git a/Cargo.toml b/Cargo.toml index 6724250d342f..db46b10c2968 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -41,12 +41,13 @@ uv-fs = { path = "crates/uv-fs" } uv-git = { path = "crates/uv-git" } uv-installer = { path = "crates/uv-installer" } uv-macros = { path = "crates/uv-macros" } +uv-metadata = { path = "crates/uv-metadata" } uv-normalize = { path = "crates/uv-normalize" } uv-options-metadata = { path = "crates/uv-options-metadata" } +uv-pubgrub = { path = "crates/uv-pubgrub" } uv-python = { path = "crates/uv-python" } uv-requirements = { path = "crates/uv-requirements" } uv-resolver = { path = "crates/uv-resolver" } -uv-pubgrub = { path = "crates/uv-pubgrub" } uv-scripts = { path = "crates/uv-scripts" } uv-settings = { path = "crates/uv-settings" } uv-shell = { path = "crates/uv-shell" } @@ -64,7 +65,7 @@ async-channel = { version = "2.2.0" } async-compression = { version = "0.4.6" } async-trait = { version = "0.1.78" } async_http_range_reader = { version = "0.8.0" } -async_zip = { git = "https://github.com/charliermarsh/rs-async-zip", rev = "011b24604fa7bc223daaad7712c0694bac8f0a87", features = ["deflate"] } +async_zip = { git = "https://github.com/charliermarsh/rs-async-zip", rev = "011b24604fa7bc223daaad7712c0694bac8f0a87", features = ["deflate", "tokio"] } axoupdater = { version = "0.7.0", default-features = false } backoff = { version = "0.4.0" } base64 = { version = "0.22.0" } diff --git a/crates/install-wheel-rs/src/lib.rs b/crates/install-wheel-rs/src/lib.rs index 8a6f487630fc..eae7b03c2604 100644 --- a/crates/install-wheel-rs/src/lib.rs +++ b/crates/install-wheel-rs/src/lib.rs @@ -16,7 +16,6 @@ use uv_normalize::PackageName; pub use wheel::{parse_wheel_file, read_record_file, LibKind}; pub mod linker; -pub mod metadata; mod record; mod script; mod uninstall; @@ -82,24 +81,10 @@ pub enum Error { Pep440, #[error("Invalid direct_url.json")] DirectUrlJson(#[from] serde_json::Error), - #[error("No .dist-info directory found")] - MissingDistInfo, #[error("Cannot uninstall package; `RECORD` file not found at: {}", _0.user_display())] MissingRecord(PathBuf), #[error("Cannot uninstall package; `top_level.txt` file not found at: {}", _0.user_display())] MissingTopLevel(PathBuf), - #[error("Multiple .dist-info directories found: {0}")] - MultipleDistInfo(String), - #[error( - "The .dist-info directory {0} does not consist of the normalized package name and version" - )] - MissingDistInfoSegments(String), - #[error("The .dist-info directory {0} does not start with the normalized package name: {1}")] - MissingDistInfoPackageName(String, String), - #[error("The .dist-info directory {0} does not start with the normalized version: {1}")] - MissingDistInfoVersion(String, String), - #[error("The .dist-info directory name contains invalid characters")] - InvalidDistInfoPrefix, #[error("Invalid wheel size")] InvalidSize, #[error("Invalid package name")] diff --git a/crates/uv-client/Cargo.toml b/crates/uv-client/Cargo.toml index bbd7882e23b7..2c64db2e25d3 100644 --- a/crates/uv-client/Cargo.toml +++ b/crates/uv-client/Cargo.toml @@ -10,7 +10,6 @@ workspace = true cache-key = { workspace = true } distribution-filename = { workspace = true } distribution-types = { workspace = true } -install-wheel-rs = { workspace = true } pep440_rs = { workspace = true } pep508_rs = { workspace = true } platform-tags = { workspace = true } @@ -19,6 +18,7 @@ uv-auth = { workspace = true } uv-cache = { workspace = true } uv-configuration = { workspace = true } uv-fs = { workspace = true, features = ["tokio"] } +uv-metadata = { workspace = true } uv-normalize = { workspace = true } uv-version = { workspace = true } uv-warnings = { workspace = true } @@ -26,7 +26,7 @@ uv-warnings = { workspace = true } anyhow = { workspace = true } async-trait = { workspace = true } async_http_range_reader = { workspace = true } -async_zip = { workspace = true, features = ["tokio"] } +async_zip = { workspace = true } fs-err = { workspace = true, features = ["tokio"] } futures = { workspace = true } html-escape = { workspace = true } diff --git a/crates/uv-client/src/error.rs b/crates/uv-client/src/error.rs index afa86b54478f..e757353038de 100644 --- a/crates/uv-client/src/error.rs +++ b/crates/uv-client/src/error.rs @@ -148,8 +148,8 @@ pub enum ErrorKind { #[error("Expected an index URL, but received non-base URL: {0}")] CannotBeABase(Url), - #[error(transparent)] - DistInfo(#[from] install_wheel_rs::Error), + #[error("Failed to read metadata: `{0}`")] + Metadata(String, #[source] uv_metadata::Error), #[error("{0} isn't available locally, but making network requests to registries was banned")] NoIndex(String), diff --git a/crates/uv-client/src/registry_client.rs b/crates/uv-client/src/registry_client.rs index cc3442389fb0..332b04bb3464 100644 --- a/crates/uv-client/src/registry_client.rs +++ b/crates/uv-client/src/registry_client.rs @@ -9,8 +9,6 @@ use http::HeaderMap; use reqwest::{Client, Response, StatusCode}; use reqwest_middleware::ClientWithMiddleware; use serde::{Deserialize, Serialize}; -use tokio::io::AsyncReadExt; -use tokio_util::compat::{FuturesAsyncReadCompatExt, TokioAsyncReadCompatExt}; use tracing::{info_span, instrument, trace, warn, Instrument}; use url::Url; @@ -18,7 +16,6 @@ use distribution_filename::{DistFilename, SourceDistFilename, WheelFilename}; use distribution_types::{ BuiltDist, File, FileLocation, IndexCapabilities, IndexUrl, IndexUrls, Name, }; -use install_wheel_rs::metadata::{find_archive_dist_info, is_metadata_entry}; use pep440_rs::Version; use pep508_rs::MarkerEnvironment; use platform_tags::Platform; @@ -26,6 +23,7 @@ use pypi_types::{Metadata23, SimpleJson}; use uv_cache::{Cache, CacheBucket, CacheEntry, WheelCache}; use uv_configuration::KeyringProviderType; use uv_configuration::{IndexStrategy, TrustedHost}; +use uv_metadata::{read_metadata_async_seek, read_metadata_async_stream}; use uv_normalize::PackageName; use crate::base_client::BaseClientBuilder; @@ -452,8 +450,18 @@ impl RegistryClient { .await .map_err(ErrorKind::Io)?; let reader = tokio::io::BufReader::new(file); - read_metadata_async_seek(&wheel.filename, built_dist.to_string(), reader) - .await? + let contents = read_metadata_async_seek(&wheel.filename, reader) + .await + .map_err(|err| { + ErrorKind::Metadata(path.to_string_lossy().to_string(), err) + })?; + Metadata23::parse_metadata(&contents).map_err(|err| { + ErrorKind::MetadataParseError( + wheel.filename.clone(), + built_dist.to_string(), + Box::new(err), + ) + })? } WheelLocation::Url(url) => { self.wheel_metadata_registry(&wheel.index, &wheel.file, &url, capabilities) @@ -476,7 +484,18 @@ impl RegistryClient { .await .map_err(ErrorKind::Io)?; let reader = tokio::io::BufReader::new(file); - read_metadata_async_seek(&wheel.filename, built_dist.to_string(), reader).await? + let contents = read_metadata_async_seek(&wheel.filename, reader) + .await + .map_err(|err| { + ErrorKind::Metadata(wheel.install_path.to_string_lossy().to_string(), err) + })?; + Metadata23::parse_metadata(&contents).map_err(|err| { + ErrorKind::MetadataParseError( + wheel.filename.clone(), + built_dist.to_string(), + Box::new(err), + ) + })? } }; @@ -609,7 +628,7 @@ impl RegistryClient { .await .map_err(ErrorKind::AsyncHttpRangeReader)?; trace!("Getting metadata for {filename} by range request"); - let text = wheel_metadata_from_remote_zip(filename, &mut reader).await?; + let text = wheel_metadata_from_remote_zip(filename, url, &mut reader).await?; let metadata = Metadata23::parse_metadata(text.as_bytes()).map_err(|err| { Error::from(ErrorKind::MetadataParseError( filename.clone(), @@ -675,7 +694,9 @@ impl RegistryClient { .map_err(|err| self.handle_response_errors(err)) .into_async_read(); - read_metadata_async_stream(filename, url.to_string(), reader).await + read_metadata_async_stream(filename, url.as_ref(), reader) + .await + .map_err(|err| ErrorKind::Metadata(url.to_string(), err)) } .instrument(info_span!("read_metadata_stream", wheel = %filename)) }; @@ -701,88 +722,6 @@ impl RegistryClient { } } -/// Read a wheel's `METADATA` file from a zip file. -async fn read_metadata_async_seek( - filename: &WheelFilename, - debug_source: String, - reader: impl tokio::io::AsyncRead + tokio::io::AsyncSeek + Unpin, -) -> Result { - let reader = futures::io::BufReader::new(reader.compat()); - let mut zip_reader = async_zip::base::read::seek::ZipFileReader::new(reader) - .await - .map_err(|err| ErrorKind::Zip(filename.clone(), err))?; - - let (metadata_idx, _dist_info_prefix) = find_archive_dist_info( - filename, - zip_reader - .file() - .entries() - .iter() - .enumerate() - .filter_map(|(index, entry)| Some((index, entry.filename().as_str().ok()?))), - ) - .map_err(ErrorKind::DistInfo)?; - - // Read the contents of the `METADATA` file. - let mut contents = Vec::new(); - zip_reader - .reader_with_entry(metadata_idx) - .await - .map_err(|err| ErrorKind::Zip(filename.clone(), err))? - .read_to_end_checked(&mut contents) - .await - .map_err(|err| ErrorKind::Zip(filename.clone(), err))?; - - let metadata = Metadata23::parse_metadata(&contents).map_err(|err| { - ErrorKind::MetadataParseError(filename.clone(), debug_source, Box::new(err)) - })?; - Ok(metadata) -} - -/// Like [`read_metadata_async_seek`], but doesn't use seek. -async fn read_metadata_async_stream( - filename: &WheelFilename, - debug_source: String, - reader: R, -) -> Result { - let reader = futures::io::BufReader::with_capacity(128 * 1024, reader); - let mut zip = async_zip::base::read::stream::ZipFileReader::new(reader); - - while let Some(mut entry) = zip - .next_with_entry() - .await - .map_err(|err| ErrorKind::Zip(filename.clone(), err))? - { - // Find the `METADATA` entry. - let path = entry - .reader() - .entry() - .filename() - .as_str() - .map_err(|err| ErrorKind::Zip(filename.clone(), err))?; - - if is_metadata_entry(path, filename).map_err(ErrorKind::DistInfo)? { - let mut reader = entry.reader_mut().compat(); - let mut contents = Vec::new(); - reader.read_to_end(&mut contents).await.unwrap(); - - let metadata = Metadata23::parse_metadata(&contents).map_err(|err| { - ErrorKind::MetadataParseError(filename.clone(), debug_source, Box::new(err)) - })?; - return Ok(metadata); - } - - // Close current file to get access to the next one. See docs: - // https://docs.rs/async_zip/0.0.16/async_zip/base/read/stream/ - zip = entry - .skip() - .await - .map_err(|err| ErrorKind::Zip(filename.clone(), err))?; - } - - Err(ErrorKind::MetadataNotFound(filename.clone(), debug_source).into()) -} - #[derive( Default, Debug, Serialize, Deserialize, rkyv::Archive, rkyv::Deserialize, rkyv::Serialize, )] diff --git a/crates/uv-client/src/remote_metadata.rs b/crates/uv-client/src/remote_metadata.rs index 954212588ae5..0ebd8ab5046b 100644 --- a/crates/uv-client/src/remote_metadata.rs +++ b/crates/uv-client/src/remote_metadata.rs @@ -1,11 +1,10 @@ +use crate::{Error, ErrorKind}; use async_http_range_reader::AsyncHttpRangeReader; +use distribution_filename::WheelFilename; use futures::io::BufReader; use tokio_util::compat::TokioAsyncReadCompatExt; - -use distribution_filename::WheelFilename; -use install_wheel_rs::metadata::find_archive_dist_info; - -use crate::{Error, ErrorKind}; +use url::Url; +use uv_metadata::find_archive_dist_info; /// Read the `.dist-info/METADATA` file from a async remote zip reader, so we avoid downloading the /// entire wheel just for the one file. @@ -50,6 +49,7 @@ use crate::{Error, ErrorKind}; /// rest of the crate. pub(crate) async fn wheel_metadata_from_remote_zip( filename: &WheelFilename, + debug_name: &Url, reader: &mut AsyncHttpRangeReader, ) -> Result { // Make sure we have the back part of the stream. @@ -75,7 +75,7 @@ pub(crate) async fn wheel_metadata_from_remote_zip( .enumerate() .filter_map(|(idx, e)| Some(((idx, e), e.filename().as_str().ok()?))), ) - .map_err(ErrorKind::DistInfo)?; + .map_err(|err| ErrorKind::Metadata(debug_name.to_string(), err))?; let offset = metadata_entry.header_offset(); let size = metadata_entry.compressed_size() diff --git a/crates/uv-distribution/Cargo.toml b/crates/uv-distribution/Cargo.toml index 4b798c025379..9915c6a54b9c 100644 --- a/crates/uv-distribution/Cargo.toml +++ b/crates/uv-distribution/Cargo.toml @@ -15,7 +15,6 @@ workspace = true [dependencies] distribution-filename = { workspace = true } distribution-types = { workspace = true } -install-wheel-rs = { workspace = true } pep440_rs = { workspace = true } pep508_rs = { workspace = true } platform-tags = { workspace = true } @@ -27,6 +26,7 @@ uv-configuration = { workspace = true } uv-extract = { workspace = true } uv-fs = { workspace = true, features = ["tokio"] } uv-git = { workspace = true } +uv-metadata = { workspace = true } uv-normalize = { workspace = true } uv-types = { workspace = true } uv-warnings = { workspace = true } diff --git a/crates/uv-distribution/src/download.rs b/crates/uv-distribution/src/download.rs index 94692ed2d027..17229f9188d5 100644 --- a/crates/uv-distribution/src/download.rs +++ b/crates/uv-distribution/src/download.rs @@ -4,6 +4,8 @@ use crate::Error; use distribution_filename::WheelFilename; use distribution_types::{CachedDist, Dist, Hashed}; use pypi_types::{HashDigest, Metadata23}; +use uv_metadata::read_flat_wheel_metadata; + use uv_cache_info::CacheInfo; /// A locally available wheel. @@ -41,6 +43,7 @@ impl LocalWheel { /// Read the [`Metadata23`] from a wheel. pub fn metadata(&self) -> Result { read_flat_wheel_metadata(&self.filename, &self.archive) + .map_err(|err| Error::WheelMetadata(self.archive.clone(), Box::new(err))) } } @@ -68,13 +71,3 @@ impl std::fmt::Display for LocalWheel { write!(f, "{}", self.remote()) } } - -/// Read the [`Metadata23`] from an unzipped wheel. -fn read_flat_wheel_metadata( - filename: &WheelFilename, - wheel: impl AsRef, -) -> Result { - let dist_info = install_wheel_rs::metadata::find_flat_dist_info(filename, &wheel)?; - let metadata = install_wheel_rs::metadata::read_dist_info_metadata(&dist_info, &wheel)?; - Ok(Metadata23::parse_metadata(&metadata)?) -} diff --git a/crates/uv-distribution/src/error.rs b/crates/uv-distribution/src/error.rs index 9a13a6a7f706..068bf7c50f72 100644 --- a/crates/uv-distribution/src/error.rs +++ b/crates/uv-distribution/src/error.rs @@ -63,8 +63,8 @@ pub enum Error { VersionMismatch { given: Version, metadata: Version }, #[error("Failed to parse metadata from built wheel")] Metadata(#[from] pypi_types::MetadataError), - #[error("Failed to read `dist-info` metadata from built wheel")] - DistInfo(#[from] install_wheel_rs::Error), + #[error("Failed to read metadata: `{}`", _0.user_display())] + WheelMetadata(PathBuf, #[source] Box), #[error("Failed to read zip archive from built wheel")] Zip(#[from] ZipError), #[error("Source distribution directory contains neither readable `pyproject.toml` nor `setup.py`: `{}`", _0.user_display())] diff --git a/crates/uv-distribution/src/source/mod.rs b/crates/uv-distribution/src/source/mod.rs index 483e4619b587..a6b0969aa5d5 100644 --- a/crates/uv-distribution/src/source/mod.rs +++ b/crates/uv-distribution/src/source/mod.rs @@ -19,7 +19,6 @@ use distribution_types::{ }; use fs_err::tokio as fs; use futures::{FutureExt, TryStreamExt}; -use install_wheel_rs::metadata::read_archive_metadata; use platform_tags::Tags; use pypi_types::{HashDigest, Metadata12, Metadata23, RequiresTxt}; use reqwest::Response; @@ -34,6 +33,7 @@ use uv_client::{ use uv_configuration::{BuildKind, BuildOutput}; use uv_extract::hash::Hasher; use uv_fs::{rename_with_retry, write_atomic, LockedFile}; +use uv_metadata::read_archive_metadata; use uv_types::{BuildContext, SourceBuildTrait}; use zip::ZipArchive; @@ -1444,7 +1444,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { // Read the metadata from the wheel. let filename = WheelFilename::from_str(&disk_filename)?; - let metadata = read_wheel_metadata(&filename, cache_shard.join(&disk_filename))?; + let metadata = read_wheel_metadata(&filename, &cache_shard.join(&disk_filename))?; // Validate the metadata. validate(source, &metadata)?; @@ -1955,14 +1955,12 @@ async fn read_cached_metadata(cache_entry: &CacheEntry) -> Result, -) -> Result { +fn read_wheel_metadata(filename: &WheelFilename, wheel: &Path) -> Result { let file = fs_err::File::open(wheel).map_err(Error::CacheRead)?; let reader = std::io::BufReader::new(file); let mut archive = ZipArchive::new(reader)?; - let dist_info = read_archive_metadata(filename, &mut archive)?; + let dist_info = read_archive_metadata(filename, &mut archive) + .map_err(|err| Error::WheelMetadata(wheel.to_path_buf(), Box::new(err)))?; Ok(Metadata23::parse_metadata(&dist_info)?) } diff --git a/crates/uv-extract/Cargo.toml b/crates/uv-extract/Cargo.toml index 70bbb825fc2a..c55efb41f9a1 100644 --- a/crates/uv-extract/Cargo.toml +++ b/crates/uv-extract/Cargo.toml @@ -17,7 +17,7 @@ distribution-filename = { workspace = true } pypi-types = { workspace = true } async-compression = { workspace = true, features = ["bzip2", "gzip", "zstd", "xz"] } -async_zip = { workspace = true, features = ["tokio"] } +async_zip = { workspace = true } fs-err = { workspace = true, features = ["tokio"] } futures = { workspace = true } md-5 = { workspace = true } diff --git a/crates/uv-metadata/Cargo.toml b/crates/uv-metadata/Cargo.toml new file mode 100644 index 000000000000..086d335ad0f8 --- /dev/null +++ b/crates/uv-metadata/Cargo.toml @@ -0,0 +1,28 @@ +[package] +name = "uv-metadata" +version = "0.1.0" +edition.workspace = true +rust-version.workspace = true +homepage.workspace = true +documentation.workspace = true +repository.workspace = true +authors.workspace = true +license.workspace = true + +[dependencies] +distribution-filename = { workspace = true } +pep440_rs = { workspace = true } +pypi-types = { workspace = true } +uv-normalize = { workspace = true } + +async_zip = { workspace = true } +fs-err = { workspace = true } +futures = { workspace = true } +thiserror = { workspace = true } +tokio = { workspace = true } +tokio-util = { workspace = true } +tracing = { workspace = true } +zip = { workspace = true } + +[lints] +workspace = true diff --git a/crates/install-wheel-rs/src/metadata.rs b/crates/uv-metadata/src/lib.rs similarity index 56% rename from crates/install-wheel-rs/src/metadata.rs rename to crates/uv-metadata/src/lib.rs index 383fea2207de..62db5407f87b 100644 --- a/crates/install-wheel-rs/src/metadata.rs +++ b/crates/uv-metadata/src/lib.rs @@ -1,15 +1,51 @@ +//! Read metadata from wheels and source distributions. +//! +//! This module reads all fields exhaustively. The fields are defined in the [Core metadata +//! specification](https://packaging.python.org/en/latest/specifications/core-metadata/). + +use distribution_filename::WheelFilename; +use pep440_rs::Version; +use pypi_types::Metadata23; +use std::io; use std::io::{Read, Seek}; use std::path::Path; use std::str::FromStr; - +use thiserror::Error; +use tokio::io::AsyncReadExt; +use tokio_util::compat::{FuturesAsyncReadCompatExt, TokioAsyncReadCompatExt}; use tracing::warn; +use uv_normalize::{DistInfoName, InvalidNameError}; use zip::ZipArchive; -use distribution_filename::WheelFilename; -use pep440_rs::Version; -use uv_normalize::DistInfoName; - -use crate::Error; +/// The caller is responsible for attaching the path or url we failed to read. +#[derive(Debug, Error)] +pub enum Error { + #[error("Failed to read `dist-info` metadata from built wheel")] + DistInfo, + #[error("No .dist-info directory found")] + MissingDistInfo, + #[error("Multiple .dist-info directories found: {0}")] + MultipleDistInfo(String), + #[error( + "The .dist-info directory does not consist of the normalized package name and version: `{0}`" + )] + MissingDistInfoSegments(String), + #[error("The .dist-info directory {0} does not start with the normalized package name: {1}")] + MissingDistInfoPackageName(String, String), + #[error("The .dist-info directory {0} does not start with the normalized version: {1}")] + MissingDistInfoVersion(String, String), + #[error("The .dist-info directory name contains invalid characters")] + InvalidName(#[from] InvalidNameError), + #[error("The metadata at {0} is invalid")] + InvalidMetadata(String, pypi_types::MetadataError), + #[error("Failed to read from zip file")] + Zip(#[from] zip::result::ZipError), + #[error("Failed to read from zip file")] + AsyncZip(#[from] async_zip::error::ZipError), + // No `#[from]` to enforce manual review of `io::Error` sources. + #[error(transparent)] + Io(io::Error), +} /// Find the `.dist-info` directory in a zipped wheel. /// @@ -123,13 +159,11 @@ pub fn read_archive_metadata( let dist_info_prefix = find_archive_dist_info(filename, archive.file_names().map(|name| (name, name)))?.1; - let mut file = archive - .by_name(&format!("{dist_info_prefix}.dist-info/METADATA")) - .map_err(|err| Error::Zip(filename.to_string(), err))?; + let mut file = archive.by_name(&format!("{dist_info_prefix}.dist-info/METADATA"))?; #[allow(clippy::cast_possible_truncation)] let mut buffer = Vec::with_capacity(file.size() as usize); - file.read_to_end(&mut buffer)?; + file.read_to_end(&mut buffer).map_err(Error::Io)?; Ok(buffer) } @@ -142,26 +176,27 @@ pub fn find_flat_dist_info( path: impl AsRef, ) -> Result { // Iterate over `path` to find the `.dist-info` directory. It should be at the top-level. - let Some(dist_info_prefix) = fs_err::read_dir(path.as_ref())?.find_map(|entry| { - let entry = entry.ok()?; - let file_type = entry.file_type().ok()?; - if file_type.is_dir() { - let path = entry.path(); - - let extension = path.extension()?; - if extension != "dist-info" { - return None; - } + let Some(dist_info_prefix) = fs_err::read_dir(path.as_ref()) + .map_err(Error::Io)? + .find_map(|entry| { + let entry = entry.ok()?; + let file_type = entry.file_type().ok()?; + if file_type.is_dir() { + let path = entry.path(); - let dist_info_prefix = path.file_stem()?.to_str()?; - Some(dist_info_prefix.to_string()) - } else { - None - } - }) else { - return Err(Error::InvalidWheel( - "Missing .dist-info directory".to_string(), - )); + let extension = path.extension()?; + if extension != "dist-info" { + return None; + } + + let dist_info_prefix = path.file_stem()?.to_str()?; + Some(dist_info_prefix.to_string()) + } else { + None + } + }) + else { + return Err(Error::MissingDistInfo); }; // Like `pip`, validate that the `.dist-info` directory is prefixed with the canonical @@ -199,16 +234,86 @@ pub fn read_dist_info_metadata( let metadata_file = wheel .as_ref() .join(format!("{dist_info_prefix}.dist-info/METADATA")); - Ok(fs_err::read(metadata_file)?) + fs_err::read(metadata_file).map_err(Error::Io) +} + +/// Read a wheel's `METADATA` file from a zip file. +pub async fn read_metadata_async_seek( + filename: &WheelFilename, + reader: impl tokio::io::AsyncRead + tokio::io::AsyncSeek + Unpin, +) -> Result, Error> { + let reader = futures::io::BufReader::new(reader.compat()); + let mut zip_reader = async_zip::base::read::seek::ZipFileReader::new(reader).await?; + + let (metadata_idx, _dist_info_prefix) = find_archive_dist_info( + filename, + zip_reader + .file() + .entries() + .iter() + .enumerate() + .filter_map(|(index, entry)| Some((index, entry.filename().as_str().ok()?))), + )?; + + // Read the contents of the `METADATA` file. + let mut contents = Vec::new(); + zip_reader + .reader_with_entry(metadata_idx) + .await? + .read_to_end_checked(&mut contents) + .await?; + + Ok(contents) +} + +/// Like [`read_metadata_async_seek`], but doesn't use seek. +pub async fn read_metadata_async_stream( + filename: &WheelFilename, + debug_path: &str, + reader: R, +) -> Result { + let reader = futures::io::BufReader::with_capacity(128 * 1024, reader); + let mut zip = async_zip::base::read::stream::ZipFileReader::new(reader); + + while let Some(mut entry) = zip.next_with_entry().await? { + // Find the `METADATA` entry. + let path = entry.reader().entry().filename().as_str()?; + + if is_metadata_entry(path, filename)? { + let mut reader = entry.reader_mut().compat(); + let mut contents = Vec::new(); + reader.read_to_end(&mut contents).await.unwrap(); + + let metadata = Metadata23::parse_metadata(&contents) + .map_err(|err| Error::InvalidMetadata(debug_path.to_string(), err))?; + return Ok(metadata); + } + + // Close current file to get access to the next one. See docs: + // https://docs.rs/async_zip/0.0.16/async_zip/base/read/stream/ + zip = entry.skip().await?; + } + + Err(Error::MissingDistInfo) +} + +/// Read the [`Metadata23`] from an unzipped wheel. +pub fn read_flat_wheel_metadata( + filename: &WheelFilename, + wheel: impl AsRef, +) -> Result { + let dist_info_prefix = find_flat_dist_info(filename, &wheel)?; + let metadata = read_dist_info_metadata(&dist_info_prefix, &wheel)?; + Metadata23::parse_metadata(&metadata).map_err(|err| { + Error::InvalidMetadata(format!("{dist_info_prefix}.dist-info/METADATA"), err) + }) } #[cfg(test)] mod test { - use std::str::FromStr; - + use super::find_archive_dist_info; use distribution_filename::WheelFilename; - - use crate::metadata::find_archive_dist_info; + use std::str::FromStr; #[test] fn test_dot_in_name() { diff --git a/crates/uv-resolver/Cargo.toml b/crates/uv-resolver/Cargo.toml index df45d7a0fa62..e61be2c79854 100644 --- a/crates/uv-resolver/Cargo.toml +++ b/crates/uv-resolver/Cargo.toml @@ -16,7 +16,6 @@ workspace = true cache-key = { workspace = true } distribution-filename = { workspace = true } distribution-types = { workspace = true } -install-wheel-rs = { workspace = true } once-map = { workspace = true } pep440_rs = { workspace = true } pep508_rs = { workspace = true } @@ -28,6 +27,7 @@ uv-configuration = { workspace = true } uv-distribution = { workspace = true } uv-fs = { workspace = true, features = ["serde"] } uv-git = { workspace = true } +uv-metadata = { workspace = true } uv-normalize = { workspace = true } uv-pubgrub = { workspace = true } uv-python = { workspace = true } diff --git a/crates/uv-resolver/src/resolver/mod.rs b/crates/uv-resolver/src/resolver/mod.rs index ba42d1371837..76d0c91e9cc9 100644 --- a/crates/uv-resolver/src/resolver/mod.rs +++ b/crates/uv-resolver/src/resolver/mod.rs @@ -899,7 +899,7 @@ impl ResolverState { + MetadataResponse::InvalidStructure { source: _, err } => { self.unavailable_packages.insert( name.clone(), UnavailablePackage::InvalidStructure(err.to_string()), @@ -1272,8 +1272,8 @@ impl ResolverState { - warn!("Unable to extract metadata for {name}: {err}"); + MetadataResponse::InvalidStructure { source: _, err } => { + warn!("Unable to extract metadata for {source}: {err}"); self.incomplete_packages .entry(name.clone()) .or_default() @@ -1668,7 +1668,7 @@ impl ResolverState { warn!("Unable to extract metadata for {dist}: {err}"); } - MetadataResponse::InvalidStructure(err) => { + MetadataResponse::InvalidStructure { err, source: _ } => { warn!("Unable to extract metadata for {dist}: {err}"); } _ => {} @@ -1686,7 +1686,7 @@ impl ResolverState { warn!("Unable to extract metadata for {dist}: {err}"); } - MetadataResponse::InvalidStructure(err) => { + MetadataResponse::InvalidStructure { source: _, err } => { warn!("Unable to extract metadata for {dist}: {err}"); } _ => {} diff --git a/crates/uv-resolver/src/resolver/provider.rs b/crates/uv-resolver/src/resolver/provider.rs index 5125fa09f03b..00f07221d234 100644 --- a/crates/uv-resolver/src/resolver/provider.rs +++ b/crates/uv-resolver/src/resolver/provider.rs @@ -39,7 +39,10 @@ pub enum MetadataResponse { /// The wheel metadata was found, but the metadata was inconsistent. InconsistentMetadata(Box), /// The wheel has an invalid structure. - InvalidStructure(Box), + InvalidStructure { + source: String, + err: Box, + }, /// The wheel metadata was not found in the cache and the network is not available. Offline, } @@ -184,8 +187,11 @@ impl<'a, Context: BuildContext> ResolverProvider for DefaultResolverProvider<'a, uv_client::ErrorKind::MetadataParseError(_, _, err) => { Ok(MetadataResponse::InvalidMetadata(err)) } - uv_client::ErrorKind::DistInfo(err) => { - Ok(MetadataResponse::InvalidStructure(Box::new(err))) + uv_client::ErrorKind::Metadata(_, err) => { + Ok(MetadataResponse::InvalidStructure { + source: dist.to_string(), + err: Box::new(err), + }) } kind => Err(uv_client::Error::from(kind).into()), }, @@ -198,8 +204,11 @@ impl<'a, Context: BuildContext> ResolverProvider for DefaultResolverProvider<'a, uv_distribution::Error::Metadata(err) => { Ok(MetadataResponse::InvalidMetadata(Box::new(err))) } - uv_distribution::Error::DistInfo(err) => { - Ok(MetadataResponse::InvalidStructure(Box::new(err))) + uv_distribution::Error::WheelMetadata(source, err) => { + Ok(MetadataResponse::InvalidStructure { + source: source.to_string_lossy().to_string(), + err, + }) } err => Err(err), }, From e63845b6f518e6177bc3cc8ad9f747169d45c46f Mon Sep 17 00:00:00 2001 From: konstin Date: Tue, 10 Sep 2024 08:10:58 -0400 Subject: [PATCH 2/3] . --- crates/uv-resolver/src/resolver/mod.rs | 10 +++++----- crates/uv-resolver/src/resolver/provider.rs | 17 ++++------------- 2 files changed, 9 insertions(+), 18 deletions(-) diff --git a/crates/uv-resolver/src/resolver/mod.rs b/crates/uv-resolver/src/resolver/mod.rs index 76d0c91e9cc9..ba42d1371837 100644 --- a/crates/uv-resolver/src/resolver/mod.rs +++ b/crates/uv-resolver/src/resolver/mod.rs @@ -899,7 +899,7 @@ impl ResolverState { + MetadataResponse::InvalidStructure(err) => { self.unavailable_packages.insert( name.clone(), UnavailablePackage::InvalidStructure(err.to_string()), @@ -1272,8 +1272,8 @@ impl ResolverState { - warn!("Unable to extract metadata for {source}: {err}"); + MetadataResponse::InvalidStructure(err) => { + warn!("Unable to extract metadata for {name}: {err}"); self.incomplete_packages .entry(name.clone()) .or_default() @@ -1668,7 +1668,7 @@ impl ResolverState { warn!("Unable to extract metadata for {dist}: {err}"); } - MetadataResponse::InvalidStructure { err, source: _ } => { + MetadataResponse::InvalidStructure(err) => { warn!("Unable to extract metadata for {dist}: {err}"); } _ => {} @@ -1686,7 +1686,7 @@ impl ResolverState { warn!("Unable to extract metadata for {dist}: {err}"); } - MetadataResponse::InvalidStructure { source: _, err } => { + MetadataResponse::InvalidStructure(err) => { warn!("Unable to extract metadata for {dist}: {err}"); } _ => {} diff --git a/crates/uv-resolver/src/resolver/provider.rs b/crates/uv-resolver/src/resolver/provider.rs index 00f07221d234..f4385ad79881 100644 --- a/crates/uv-resolver/src/resolver/provider.rs +++ b/crates/uv-resolver/src/resolver/provider.rs @@ -39,10 +39,7 @@ pub enum MetadataResponse { /// The wheel metadata was found, but the metadata was inconsistent. InconsistentMetadata(Box), /// The wheel has an invalid structure. - InvalidStructure { - source: String, - err: Box, - }, + InvalidStructure(Box), /// The wheel metadata was not found in the cache and the network is not available. Offline, } @@ -188,10 +185,7 @@ impl<'a, Context: BuildContext> ResolverProvider for DefaultResolverProvider<'a, Ok(MetadataResponse::InvalidMetadata(err)) } uv_client::ErrorKind::Metadata(_, err) => { - Ok(MetadataResponse::InvalidStructure { - source: dist.to_string(), - err: Box::new(err), - }) + Ok(MetadataResponse::InvalidStructure(Box::new(err))) } kind => Err(uv_client::Error::from(kind).into()), }, @@ -204,11 +198,8 @@ impl<'a, Context: BuildContext> ResolverProvider for DefaultResolverProvider<'a, uv_distribution::Error::Metadata(err) => { Ok(MetadataResponse::InvalidMetadata(Box::new(err))) } - uv_distribution::Error::WheelMetadata(source, err) => { - Ok(MetadataResponse::InvalidStructure { - source: source.to_string_lossy().to_string(), - err, - }) + uv_distribution::Error::WheelMetadata(_, err) => { + Ok(MetadataResponse::InvalidStructure(err)) } err => Err(err), }, From 35af6fd7417253da0b28f750852b63a343556bd3 Mon Sep 17 00:00:00 2001 From: konstin Date: Tue, 10 Sep 2024 09:24:43 -0400 Subject: [PATCH 3/3] Update snapshots --- crates/uv/tests/pip_sync.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/crates/uv/tests/pip_sync.rs b/crates/uv/tests/pip_sync.rs index 791b0f2b1cf1..57ba15277fb4 100644 --- a/crates/uv/tests/pip_sync.rs +++ b/crates/uv/tests/pip_sync.rs @@ -2615,13 +2615,15 @@ fn incompatible_wheel() -> Result<()> { .arg("requirements.txt") .arg("--strict"), @r###" success: false - exit_code: 2 + exit_code: 1 ----- stdout ----- ----- stderr ----- - error: Failed to read `foo @ file://[TEMP_DIR]/foo-1.2.3-not-compatible-wheel.whl` - Caused by: Failed to unzip wheel: foo-1.2.3-not-compatible-wheel.whl - Caused by: unable to locate the end of central directory record + × No solution found when resolving dependencies: + ╰─▶ Because foo has an invalid package format and you require foo, we can conclude that your requirements are unsatisfiable. + + hint: The structure of foo was invalid: + Failed to read from zip file "### );