Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extract METADATA reading into a crate #7231

Merged
merged 3 commits into from
Sep 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 21 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,13 @@ uv-fs = { path = "crates/uv-fs" }
uv-git = { path = "crates/uv-git" }
uv-installer = { path = "crates/uv-installer" }
uv-macros = { path = "crates/uv-macros" }
uv-metadata = { path = "crates/uv-metadata" }
uv-normalize = { path = "crates/uv-normalize" }
uv-options-metadata = { path = "crates/uv-options-metadata" }
uv-pubgrub = { path = "crates/uv-pubgrub" }
uv-python = { path = "crates/uv-python" }
uv-requirements = { path = "crates/uv-requirements" }
uv-resolver = { path = "crates/uv-resolver" }
uv-pubgrub = { path = "crates/uv-pubgrub" }
uv-scripts = { path = "crates/uv-scripts" }
uv-settings = { path = "crates/uv-settings" }
uv-shell = { path = "crates/uv-shell" }
Expand All @@ -64,7 +65,7 @@ async-channel = { version = "2.2.0" }
async-compression = { version = "0.4.6" }
async-trait = { version = "0.1.78" }
async_http_range_reader = { version = "0.8.0" }
async_zip = { git = "https://github.com/charliermarsh/rs-async-zip", rev = "011b24604fa7bc223daaad7712c0694bac8f0a87", features = ["deflate"] }
async_zip = { git = "https://github.com/charliermarsh/rs-async-zip", rev = "011b24604fa7bc223daaad7712c0694bac8f0a87", features = ["deflate", "tokio"] }
axoupdater = { version = "0.7.0", default-features = false }
backoff = { version = "0.4.0" }
base64 = { version = "0.22.0" }
Expand Down
15 changes: 0 additions & 15 deletions crates/install-wheel-rs/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ use uv_normalize::PackageName;
pub use wheel::{parse_wheel_file, read_record_file, LibKind};

pub mod linker;
pub mod metadata;
mod record;
mod script;
mod uninstall;
Expand Down Expand Up @@ -82,24 +81,10 @@ pub enum Error {
Pep440,
#[error("Invalid direct_url.json")]
DirectUrlJson(#[from] serde_json::Error),
#[error("No .dist-info directory found")]
MissingDistInfo,
#[error("Cannot uninstall package; `RECORD` file not found at: {}", _0.user_display())]
MissingRecord(PathBuf),
#[error("Cannot uninstall package; `top_level.txt` file not found at: {}", _0.user_display())]
MissingTopLevel(PathBuf),
#[error("Multiple .dist-info directories found: {0}")]
MultipleDistInfo(String),
#[error(
"The .dist-info directory {0} does not consist of the normalized package name and version"
)]
MissingDistInfoSegments(String),
#[error("The .dist-info directory {0} does not start with the normalized package name: {1}")]
MissingDistInfoPackageName(String, String),
#[error("The .dist-info directory {0} does not start with the normalized version: {1}")]
MissingDistInfoVersion(String, String),
#[error("The .dist-info directory name contains invalid characters")]
InvalidDistInfoPrefix,
#[error("Invalid wheel size")]
InvalidSize,
#[error("Invalid package name")]
Expand Down
4 changes: 2 additions & 2 deletions crates/uv-client/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ workspace = true
cache-key = { workspace = true }
distribution-filename = { workspace = true }
distribution-types = { workspace = true }
install-wheel-rs = { workspace = true }
pep440_rs = { workspace = true }
pep508_rs = { workspace = true }
platform-tags = { workspace = true }
Expand All @@ -19,14 +18,15 @@ uv-auth = { workspace = true }
uv-cache = { workspace = true }
uv-configuration = { workspace = true }
uv-fs = { workspace = true, features = ["tokio"] }
uv-metadata = { workspace = true }
uv-normalize = { workspace = true }
uv-version = { workspace = true }
uv-warnings = { workspace = true }

anyhow = { workspace = true }
async-trait = { workspace = true }
async_http_range_reader = { workspace = true }
async_zip = { workspace = true, features = ["tokio"] }
async_zip = { workspace = true }
fs-err = { workspace = true, features = ["tokio"] }
futures = { workspace = true }
html-escape = { workspace = true }
Expand Down
4 changes: 2 additions & 2 deletions crates/uv-client/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,8 @@ pub enum ErrorKind {
#[error("Expected an index URL, but received non-base URL: {0}")]
CannotBeABase(Url),

#[error(transparent)]
DistInfo(#[from] install_wheel_rs::Error),
#[error("Failed to read metadata: `{0}`")]
Metadata(String, #[source] uv_metadata::Error),

#[error("{0} isn't available locally, but making network requests to registries was banned")]
NoIndex(String),
Expand Down
119 changes: 29 additions & 90 deletions crates/uv-client/src/registry_client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,23 +9,21 @@ use http::HeaderMap;
use reqwest::{Client, Response, StatusCode};
use reqwest_middleware::ClientWithMiddleware;
use serde::{Deserialize, Serialize};
use tokio::io::AsyncReadExt;
use tokio_util::compat::{FuturesAsyncReadCompatExt, TokioAsyncReadCompatExt};
use tracing::{info_span, instrument, trace, warn, Instrument};
use url::Url;

use distribution_filename::{DistFilename, SourceDistFilename, WheelFilename};
use distribution_types::{
BuiltDist, File, FileLocation, IndexCapabilities, IndexUrl, IndexUrls, Name,
};
use install_wheel_rs::metadata::{find_archive_dist_info, is_metadata_entry};
use pep440_rs::Version;
use pep508_rs::MarkerEnvironment;
use platform_tags::Platform;
use pypi_types::{Metadata23, SimpleJson};
use uv_cache::{Cache, CacheBucket, CacheEntry, WheelCache};
use uv_configuration::KeyringProviderType;
use uv_configuration::{IndexStrategy, TrustedHost};
use uv_metadata::{read_metadata_async_seek, read_metadata_async_stream};
use uv_normalize::PackageName;

use crate::base_client::BaseClientBuilder;
Expand Down Expand Up @@ -452,8 +450,18 @@ impl RegistryClient {
.await
.map_err(ErrorKind::Io)?;
let reader = tokio::io::BufReader::new(file);
read_metadata_async_seek(&wheel.filename, built_dist.to_string(), reader)
.await?
let contents = read_metadata_async_seek(&wheel.filename, reader)
.await
.map_err(|err| {
ErrorKind::Metadata(path.to_string_lossy().to_string(), err)
})?;
Metadata23::parse_metadata(&contents).map_err(|err| {
ErrorKind::MetadataParseError(
wheel.filename.clone(),
built_dist.to_string(),
Box::new(err),
)
})?
}
WheelLocation::Url(url) => {
self.wheel_metadata_registry(&wheel.index, &wheel.file, &url, capabilities)
Expand All @@ -476,7 +484,18 @@ impl RegistryClient {
.await
.map_err(ErrorKind::Io)?;
let reader = tokio::io::BufReader::new(file);
read_metadata_async_seek(&wheel.filename, built_dist.to_string(), reader).await?
let contents = read_metadata_async_seek(&wheel.filename, reader)
.await
.map_err(|err| {
ErrorKind::Metadata(wheel.install_path.to_string_lossy().to_string(), err)
})?;
Metadata23::parse_metadata(&contents).map_err(|err| {
ErrorKind::MetadataParseError(
wheel.filename.clone(),
built_dist.to_string(),
Box::new(err),
)
})?
}
};

Expand Down Expand Up @@ -609,7 +628,7 @@ impl RegistryClient {
.await
.map_err(ErrorKind::AsyncHttpRangeReader)?;
trace!("Getting metadata for {filename} by range request");
let text = wheel_metadata_from_remote_zip(filename, &mut reader).await?;
let text = wheel_metadata_from_remote_zip(filename, url, &mut reader).await?;
let metadata = Metadata23::parse_metadata(text.as_bytes()).map_err(|err| {
Error::from(ErrorKind::MetadataParseError(
filename.clone(),
Expand Down Expand Up @@ -675,7 +694,9 @@ impl RegistryClient {
.map_err(|err| self.handle_response_errors(err))
.into_async_read();

read_metadata_async_stream(filename, url.to_string(), reader).await
read_metadata_async_stream(filename, url.as_ref(), reader)
.await
.map_err(|err| ErrorKind::Metadata(url.to_string(), err))
}
.instrument(info_span!("read_metadata_stream", wheel = %filename))
};
Expand All @@ -701,88 +722,6 @@ impl RegistryClient {
}
}

/// Read a wheel's `METADATA` file from a zip file.
async fn read_metadata_async_seek(
filename: &WheelFilename,
debug_source: String,
reader: impl tokio::io::AsyncRead + tokio::io::AsyncSeek + Unpin,
) -> Result<Metadata23, Error> {
let reader = futures::io::BufReader::new(reader.compat());
let mut zip_reader = async_zip::base::read::seek::ZipFileReader::new(reader)
.await
.map_err(|err| ErrorKind::Zip(filename.clone(), err))?;

let (metadata_idx, _dist_info_prefix) = find_archive_dist_info(
filename,
zip_reader
.file()
.entries()
.iter()
.enumerate()
.filter_map(|(index, entry)| Some((index, entry.filename().as_str().ok()?))),
)
.map_err(ErrorKind::DistInfo)?;

// Read the contents of the `METADATA` file.
let mut contents = Vec::new();
zip_reader
.reader_with_entry(metadata_idx)
.await
.map_err(|err| ErrorKind::Zip(filename.clone(), err))?
.read_to_end_checked(&mut contents)
.await
.map_err(|err| ErrorKind::Zip(filename.clone(), err))?;

let metadata = Metadata23::parse_metadata(&contents).map_err(|err| {
ErrorKind::MetadataParseError(filename.clone(), debug_source, Box::new(err))
})?;
Ok(metadata)
}

/// Like [`read_metadata_async_seek`], but doesn't use seek.
async fn read_metadata_async_stream<R: futures::AsyncRead + Unpin>(
filename: &WheelFilename,
debug_source: String,
reader: R,
) -> Result<Metadata23, Error> {
let reader = futures::io::BufReader::with_capacity(128 * 1024, reader);
let mut zip = async_zip::base::read::stream::ZipFileReader::new(reader);

while let Some(mut entry) = zip
.next_with_entry()
.await
.map_err(|err| ErrorKind::Zip(filename.clone(), err))?
{
// Find the `METADATA` entry.
let path = entry
.reader()
.entry()
.filename()
.as_str()
.map_err(|err| ErrorKind::Zip(filename.clone(), err))?;

if is_metadata_entry(path, filename).map_err(ErrorKind::DistInfo)? {
let mut reader = entry.reader_mut().compat();
let mut contents = Vec::new();
reader.read_to_end(&mut contents).await.unwrap();

let metadata = Metadata23::parse_metadata(&contents).map_err(|err| {
ErrorKind::MetadataParseError(filename.clone(), debug_source, Box::new(err))
})?;
return Ok(metadata);
}

// Close current file to get access to the next one. See docs:
// https://docs.rs/async_zip/0.0.16/async_zip/base/read/stream/
zip = entry
.skip()
.await
.map_err(|err| ErrorKind::Zip(filename.clone(), err))?;
}

Err(ErrorKind::MetadataNotFound(filename.clone(), debug_source).into())
}

#[derive(
Default, Debug, Serialize, Deserialize, rkyv::Archive, rkyv::Deserialize, rkyv::Serialize,
)]
Expand Down
12 changes: 6 additions & 6 deletions crates/uv-client/src/remote_metadata.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
use crate::{Error, ErrorKind};
use async_http_range_reader::AsyncHttpRangeReader;
use distribution_filename::WheelFilename;
use futures::io::BufReader;
use tokio_util::compat::TokioAsyncReadCompatExt;

use distribution_filename::WheelFilename;
use install_wheel_rs::metadata::find_archive_dist_info;

use crate::{Error, ErrorKind};
use url::Url;
use uv_metadata::find_archive_dist_info;

/// Read the `.dist-info/METADATA` file from a async remote zip reader, so we avoid downloading the
/// entire wheel just for the one file.
Expand Down Expand Up @@ -50,6 +49,7 @@ use crate::{Error, ErrorKind};
/// rest of the crate.
pub(crate) async fn wheel_metadata_from_remote_zip(
filename: &WheelFilename,
debug_name: &Url,
reader: &mut AsyncHttpRangeReader,
) -> Result<String, Error> {
// Make sure we have the back part of the stream.
Expand All @@ -75,7 +75,7 @@ pub(crate) async fn wheel_metadata_from_remote_zip(
.enumerate()
.filter_map(|(idx, e)| Some(((idx, e), e.filename().as_str().ok()?))),
)
.map_err(ErrorKind::DistInfo)?;
.map_err(|err| ErrorKind::Metadata(debug_name.to_string(), err))?;

let offset = metadata_entry.header_offset();
let size = metadata_entry.compressed_size()
Expand Down
2 changes: 1 addition & 1 deletion crates/uv-distribution/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ workspace = true
[dependencies]
distribution-filename = { workspace = true }
distribution-types = { workspace = true }
install-wheel-rs = { workspace = true }
pep440_rs = { workspace = true }
pep508_rs = { workspace = true }
platform-tags = { workspace = true }
Expand All @@ -27,6 +26,7 @@ uv-configuration = { workspace = true }
uv-extract = { workspace = true }
uv-fs = { workspace = true, features = ["tokio"] }
uv-git = { workspace = true }
uv-metadata = { workspace = true }
uv-normalize = { workspace = true }
uv-types = { workspace = true }
uv-warnings = { workspace = true }
Expand Down
Loading
Loading