Skip to content

Commit

Permalink
Extract METADATA reading into a crate (#7231)
Browse files Browse the repository at this point in the history
This is preparatory work for the upload functionality, which needs to
read the METADATA file and attach its parsed contents to the POST
request: We move finding the `.dist-info` from `install-wheel-rs` and
`uv-client` to a new `uv-metadata` crate, so it can be shared with the
publish crate.

I don't properly know if its the right place since the upload code isn't
ready, but i'm PR-ing it now because it already had merge conflicts.
  • Loading branch information
konstin authored Sep 10, 2024
1 parent 95a4bee commit 2b3890f
Show file tree
Hide file tree
Showing 17 changed files with 253 additions and 184 deletions.
24 changes: 21 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,13 @@ uv-fs = { path = "crates/uv-fs" }
uv-git = { path = "crates/uv-git" }
uv-installer = { path = "crates/uv-installer" }
uv-macros = { path = "crates/uv-macros" }
uv-metadata = { path = "crates/uv-metadata" }
uv-normalize = { path = "crates/uv-normalize" }
uv-options-metadata = { path = "crates/uv-options-metadata" }
uv-pubgrub = { path = "crates/uv-pubgrub" }
uv-python = { path = "crates/uv-python" }
uv-requirements = { path = "crates/uv-requirements" }
uv-resolver = { path = "crates/uv-resolver" }
uv-pubgrub = { path = "crates/uv-pubgrub" }
uv-scripts = { path = "crates/uv-scripts" }
uv-settings = { path = "crates/uv-settings" }
uv-shell = { path = "crates/uv-shell" }
Expand All @@ -64,7 +65,7 @@ async-channel = { version = "2.2.0" }
async-compression = { version = "0.4.6" }
async-trait = { version = "0.1.78" }
async_http_range_reader = { version = "0.8.0" }
async_zip = { git = "https://github.com/charliermarsh/rs-async-zip", rev = "011b24604fa7bc223daaad7712c0694bac8f0a87", features = ["deflate"] }
async_zip = { git = "https://github.com/charliermarsh/rs-async-zip", rev = "011b24604fa7bc223daaad7712c0694bac8f0a87", features = ["deflate", "tokio"] }
axoupdater = { version = "0.7.0", default-features = false }
backoff = { version = "0.4.0" }
base64 = { version = "0.22.0" }
Expand Down
15 changes: 0 additions & 15 deletions crates/install-wheel-rs/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ use uv_normalize::PackageName;
pub use wheel::{parse_wheel_file, read_record_file, LibKind};

pub mod linker;
pub mod metadata;
mod record;
mod script;
mod uninstall;
Expand Down Expand Up @@ -82,24 +81,10 @@ pub enum Error {
Pep440,
#[error("Invalid direct_url.json")]
DirectUrlJson(#[from] serde_json::Error),
#[error("No .dist-info directory found")]
MissingDistInfo,
#[error("Cannot uninstall package; `RECORD` file not found at: {}", _0.user_display())]
MissingRecord(PathBuf),
#[error("Cannot uninstall package; `top_level.txt` file not found at: {}", _0.user_display())]
MissingTopLevel(PathBuf),
#[error("Multiple .dist-info directories found: {0}")]
MultipleDistInfo(String),
#[error(
"The .dist-info directory {0} does not consist of the normalized package name and version"
)]
MissingDistInfoSegments(String),
#[error("The .dist-info directory {0} does not start with the normalized package name: {1}")]
MissingDistInfoPackageName(String, String),
#[error("The .dist-info directory {0} does not start with the normalized version: {1}")]
MissingDistInfoVersion(String, String),
#[error("The .dist-info directory name contains invalid characters")]
InvalidDistInfoPrefix,
#[error("Invalid wheel size")]
InvalidSize,
#[error("Invalid package name")]
Expand Down
4 changes: 2 additions & 2 deletions crates/uv-client/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ workspace = true
cache-key = { workspace = true }
distribution-filename = { workspace = true }
distribution-types = { workspace = true }
install-wheel-rs = { workspace = true }
pep440_rs = { workspace = true }
pep508_rs = { workspace = true }
platform-tags = { workspace = true }
Expand All @@ -19,14 +18,15 @@ uv-auth = { workspace = true }
uv-cache = { workspace = true }
uv-configuration = { workspace = true }
uv-fs = { workspace = true, features = ["tokio"] }
uv-metadata = { workspace = true }
uv-normalize = { workspace = true }
uv-version = { workspace = true }
uv-warnings = { workspace = true }

anyhow = { workspace = true }
async-trait = { workspace = true }
async_http_range_reader = { workspace = true }
async_zip = { workspace = true, features = ["tokio"] }
async_zip = { workspace = true }
fs-err = { workspace = true, features = ["tokio"] }
futures = { workspace = true }
html-escape = { workspace = true }
Expand Down
4 changes: 2 additions & 2 deletions crates/uv-client/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,8 @@ pub enum ErrorKind {
#[error("Expected an index URL, but received non-base URL: {0}")]
CannotBeABase(Url),

#[error(transparent)]
DistInfo(#[from] install_wheel_rs::Error),
#[error("Failed to read metadata: `{0}`")]
Metadata(String, #[source] uv_metadata::Error),

#[error("{0} isn't available locally, but making network requests to registries was banned")]
NoIndex(String),
Expand Down
119 changes: 29 additions & 90 deletions crates/uv-client/src/registry_client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,23 +9,21 @@ use http::HeaderMap;
use reqwest::{Client, Response, StatusCode};
use reqwest_middleware::ClientWithMiddleware;
use serde::{Deserialize, Serialize};
use tokio::io::AsyncReadExt;
use tokio_util::compat::{FuturesAsyncReadCompatExt, TokioAsyncReadCompatExt};
use tracing::{info_span, instrument, trace, warn, Instrument};
use url::Url;

use distribution_filename::{DistFilename, SourceDistFilename, WheelFilename};
use distribution_types::{
BuiltDist, File, FileLocation, IndexCapabilities, IndexUrl, IndexUrls, Name,
};
use install_wheel_rs::metadata::{find_archive_dist_info, is_metadata_entry};
use pep440_rs::Version;
use pep508_rs::MarkerEnvironment;
use platform_tags::Platform;
use pypi_types::{Metadata23, SimpleJson};
use uv_cache::{Cache, CacheBucket, CacheEntry, WheelCache};
use uv_configuration::KeyringProviderType;
use uv_configuration::{IndexStrategy, TrustedHost};
use uv_metadata::{read_metadata_async_seek, read_metadata_async_stream};
use uv_normalize::PackageName;

use crate::base_client::BaseClientBuilder;
Expand Down Expand Up @@ -452,8 +450,18 @@ impl RegistryClient {
.await
.map_err(ErrorKind::Io)?;
let reader = tokio::io::BufReader::new(file);
read_metadata_async_seek(&wheel.filename, built_dist.to_string(), reader)
.await?
let contents = read_metadata_async_seek(&wheel.filename, reader)
.await
.map_err(|err| {
ErrorKind::Metadata(path.to_string_lossy().to_string(), err)
})?;
Metadata23::parse_metadata(&contents).map_err(|err| {
ErrorKind::MetadataParseError(
wheel.filename.clone(),
built_dist.to_string(),
Box::new(err),
)
})?
}
WheelLocation::Url(url) => {
self.wheel_metadata_registry(&wheel.index, &wheel.file, &url, capabilities)
Expand All @@ -476,7 +484,18 @@ impl RegistryClient {
.await
.map_err(ErrorKind::Io)?;
let reader = tokio::io::BufReader::new(file);
read_metadata_async_seek(&wheel.filename, built_dist.to_string(), reader).await?
let contents = read_metadata_async_seek(&wheel.filename, reader)
.await
.map_err(|err| {
ErrorKind::Metadata(wheel.install_path.to_string_lossy().to_string(), err)
})?;
Metadata23::parse_metadata(&contents).map_err(|err| {
ErrorKind::MetadataParseError(
wheel.filename.clone(),
built_dist.to_string(),
Box::new(err),
)
})?
}
};

Expand Down Expand Up @@ -609,7 +628,7 @@ impl RegistryClient {
.await
.map_err(ErrorKind::AsyncHttpRangeReader)?;
trace!("Getting metadata for {filename} by range request");
let text = wheel_metadata_from_remote_zip(filename, &mut reader).await?;
let text = wheel_metadata_from_remote_zip(filename, url, &mut reader).await?;
let metadata = Metadata23::parse_metadata(text.as_bytes()).map_err(|err| {
Error::from(ErrorKind::MetadataParseError(
filename.clone(),
Expand Down Expand Up @@ -675,7 +694,9 @@ impl RegistryClient {
.map_err(|err| self.handle_response_errors(err))
.into_async_read();

read_metadata_async_stream(filename, url.to_string(), reader).await
read_metadata_async_stream(filename, url.as_ref(), reader)
.await
.map_err(|err| ErrorKind::Metadata(url.to_string(), err))
}
.instrument(info_span!("read_metadata_stream", wheel = %filename))
};
Expand All @@ -701,88 +722,6 @@ impl RegistryClient {
}
}

/// Read a wheel's `METADATA` file from a zip file.
async fn read_metadata_async_seek(
filename: &WheelFilename,
debug_source: String,
reader: impl tokio::io::AsyncRead + tokio::io::AsyncSeek + Unpin,
) -> Result<Metadata23, Error> {
let reader = futures::io::BufReader::new(reader.compat());
let mut zip_reader = async_zip::base::read::seek::ZipFileReader::new(reader)
.await
.map_err(|err| ErrorKind::Zip(filename.clone(), err))?;

let (metadata_idx, _dist_info_prefix) = find_archive_dist_info(
filename,
zip_reader
.file()
.entries()
.iter()
.enumerate()
.filter_map(|(index, entry)| Some((index, entry.filename().as_str().ok()?))),
)
.map_err(ErrorKind::DistInfo)?;

// Read the contents of the `METADATA` file.
let mut contents = Vec::new();
zip_reader
.reader_with_entry(metadata_idx)
.await
.map_err(|err| ErrorKind::Zip(filename.clone(), err))?
.read_to_end_checked(&mut contents)
.await
.map_err(|err| ErrorKind::Zip(filename.clone(), err))?;

let metadata = Metadata23::parse_metadata(&contents).map_err(|err| {
ErrorKind::MetadataParseError(filename.clone(), debug_source, Box::new(err))
})?;
Ok(metadata)
}

/// Like [`read_metadata_async_seek`], but doesn't use seek.
async fn read_metadata_async_stream<R: futures::AsyncRead + Unpin>(
filename: &WheelFilename,
debug_source: String,
reader: R,
) -> Result<Metadata23, Error> {
let reader = futures::io::BufReader::with_capacity(128 * 1024, reader);
let mut zip = async_zip::base::read::stream::ZipFileReader::new(reader);

while let Some(mut entry) = zip
.next_with_entry()
.await
.map_err(|err| ErrorKind::Zip(filename.clone(), err))?
{
// Find the `METADATA` entry.
let path = entry
.reader()
.entry()
.filename()
.as_str()
.map_err(|err| ErrorKind::Zip(filename.clone(), err))?;

if is_metadata_entry(path, filename).map_err(ErrorKind::DistInfo)? {
let mut reader = entry.reader_mut().compat();
let mut contents = Vec::new();
reader.read_to_end(&mut contents).await.unwrap();

let metadata = Metadata23::parse_metadata(&contents).map_err(|err| {
ErrorKind::MetadataParseError(filename.clone(), debug_source, Box::new(err))
})?;
return Ok(metadata);
}

// Close current file to get access to the next one. See docs:
// https://docs.rs/async_zip/0.0.16/async_zip/base/read/stream/
zip = entry
.skip()
.await
.map_err(|err| ErrorKind::Zip(filename.clone(), err))?;
}

Err(ErrorKind::MetadataNotFound(filename.clone(), debug_source).into())
}

#[derive(
Default, Debug, Serialize, Deserialize, rkyv::Archive, rkyv::Deserialize, rkyv::Serialize,
)]
Expand Down
12 changes: 6 additions & 6 deletions crates/uv-client/src/remote_metadata.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
use crate::{Error, ErrorKind};
use async_http_range_reader::AsyncHttpRangeReader;
use distribution_filename::WheelFilename;
use futures::io::BufReader;
use tokio_util::compat::TokioAsyncReadCompatExt;

use distribution_filename::WheelFilename;
use install_wheel_rs::metadata::find_archive_dist_info;

use crate::{Error, ErrorKind};
use url::Url;
use uv_metadata::find_archive_dist_info;

/// Read the `.dist-info/METADATA` file from a async remote zip reader, so we avoid downloading the
/// entire wheel just for the one file.
Expand Down Expand Up @@ -50,6 +49,7 @@ use crate::{Error, ErrorKind};
/// rest of the crate.
pub(crate) async fn wheel_metadata_from_remote_zip(
filename: &WheelFilename,
debug_name: &Url,
reader: &mut AsyncHttpRangeReader,
) -> Result<String, Error> {
// Make sure we have the back part of the stream.
Expand All @@ -75,7 +75,7 @@ pub(crate) async fn wheel_metadata_from_remote_zip(
.enumerate()
.filter_map(|(idx, e)| Some(((idx, e), e.filename().as_str().ok()?))),
)
.map_err(ErrorKind::DistInfo)?;
.map_err(|err| ErrorKind::Metadata(debug_name.to_string(), err))?;

let offset = metadata_entry.header_offset();
let size = metadata_entry.compressed_size()
Expand Down
2 changes: 1 addition & 1 deletion crates/uv-distribution/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ workspace = true
cache-key = { workspace = true }
distribution-filename = { workspace = true }
distribution-types = { workspace = true }
install-wheel-rs = { workspace = true }
pep440_rs = { workspace = true }
pep508_rs = { workspace = true }
platform-tags = { workspace = true }
Expand All @@ -28,6 +27,7 @@ uv-configuration = { workspace = true }
uv-extract = { workspace = true }
uv-fs = { workspace = true, features = ["tokio"] }
uv-git = { workspace = true }
uv-metadata = { workspace = true }
uv-normalize = { workspace = true }
uv-types = { workspace = true }
uv-warnings = { workspace = true }
Expand Down
Loading

0 comments on commit 2b3890f

Please sign in to comment.