Skip to content

Commit

Permalink
Consistently enforce requested-vs.-built metadata when retrieving whe…
Browse files Browse the repository at this point in the history
…els (#9484)

## Summary

We were missing a bunch of edge cases, e.g., the wheel exists in the
cache already.

Closes #9480.
  • Loading branch information
charliermarsh authored Nov 27, 2024
1 parent 4f2b30c commit f1ccbcb
Show file tree
Hide file tree
Showing 10 changed files with 354 additions and 66 deletions.
15 changes: 11 additions & 4 deletions crates/uv-distribution-types/src/traits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@ use uv_pep508::VerbatimUrl;
use crate::error::Error;
use crate::{
BuiltDist, CachedDirectUrlDist, CachedDist, CachedRegistryDist, DirectUrlBuiltDist,
DirectUrlSourceDist, Dist, DistributionId, GitSourceDist, InstalledDirectUrlDist,
InstalledDist, InstalledEggInfoDirectory, InstalledEggInfoFile, InstalledLegacyEditable,
InstalledRegistryDist, InstalledVersion, LocalDist, PackageId, PathBuiltDist, PathSourceDist,
RegistryBuiltWheel, RegistrySourceDist, ResourceId, SourceDist, VersionId, VersionOrUrlRef,
DirectUrlSourceDist, DirectorySourceDist, Dist, DistributionId, GitSourceDist,
InstalledDirectUrlDist, InstalledDist, InstalledEggInfoDirectory, InstalledEggInfoFile,
InstalledLegacyEditable, InstalledRegistryDist, InstalledVersion, LocalDist, PackageId,
PathBuiltDist, PathSourceDist, RegistryBuiltWheel, RegistrySourceDist, ResourceId, SourceDist,
VersionId, VersionOrUrlRef,
};

pub trait Name {
Expand Down Expand Up @@ -219,6 +220,12 @@ impl std::fmt::Display for PathSourceDist {
}
}

impl std::fmt::Display for DirectorySourceDist {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}{}", self.name(), self.version_or_url())
}
}

impl std::fmt::Display for RegistryBuiltWheel {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}{}", self.name(), self.version_or_url())
Expand Down
9 changes: 8 additions & 1 deletion crates/uv-distribution/src/distribution_database.rs
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,8 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
.boxed_local()
.await?;

// Validate that the metadata is consistent with the distribution.

// If the wheel was unzipped previously, respect it. Source distributions are
// cached under a unique revision ID, so unzipped directories are never stale.
match built_wheel.target.canonicalize() {
Expand Down Expand Up @@ -397,7 +399,10 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
.await;

match result {
Ok(metadata) => Ok(ArchiveMetadata::from_metadata23(metadata)),
Ok(metadata) => {
// Validate that the metadata is consistent with the distribution.
Ok(ArchiveMetadata::from_metadata23(metadata))
}
Err(err) if err.is_http_streaming_unsupported() => {
warn!("Streaming unsupported when fetching metadata for {dist}; downloading wheel directly ({err})");

Expand Down Expand Up @@ -461,6 +466,8 @@ impl<'a, Context: BuildContext> DistributionDatabase<'a, Context> {
.boxed_local()
.await?;

// Validate that the metadata is consistent with the distribution.

Ok(metadata)
}

Expand Down
18 changes: 16 additions & 2 deletions crates/uv-distribution/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,26 @@ pub enum Error {
#[error("Built wheel has an invalid filename")]
WheelFilename(#[from] WheelFilenameError),
#[error("Package metadata name `{metadata}` does not match given name `{given}`")]
NameMismatch {
WheelMetadataNameMismatch {
given: PackageName,
metadata: PackageName,
},
#[error("Package metadata version `{metadata}` does not match given version `{given}`")]
VersionMismatch { given: Version, metadata: Version },
WheelMetadataVersionMismatch { given: Version, metadata: Version },
#[error(
"Package metadata name `{metadata}` does not match `{filename}` from the wheel filename"
)]
WheelFilenameNameMismatch {
filename: PackageName,
metadata: PackageName,
},
#[error(
"Package metadata version `{metadata}` does not match `{filename}` from the wheel filename"
)]
WheelFilenameVersionMismatch {
filename: Version,
metadata: Version,
},
#[error("Failed to parse metadata from built wheel")]
Metadata(#[from] uv_pypi_types::MetadataError),
#[error("Failed to read metadata: `{}`", _0.user_display())]
Expand Down
8 changes: 8 additions & 0 deletions crates/uv-distribution/src/source/built_wheel_metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ use uv_cache_info::CacheInfo;
use uv_distribution_filename::WheelFilename;
use uv_distribution_types::Hashed;
use uv_fs::files;
use uv_normalize::PackageName;
use uv_pep440::Version;
use uv_platform_tags::Tags;
use uv_pypi_types::HashDigest;

Expand Down Expand Up @@ -56,6 +58,12 @@ impl BuiltWheelMetadata {
self.hashes = hashes;
self
}

/// Returns `true` if the wheel matches the given package name and version.
pub(crate) fn matches(&self, name: Option<&PackageName>, version: Option<&Version>) -> bool {
name.map_or(true, |name| self.filename.name == *name)
&& version.map_or(true, |version| self.filename.version == *version)
}
}

impl Hashed for BuiltWheelMetadata {
Expand Down
130 changes: 95 additions & 35 deletions crates/uv-distribution/src/source/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ use uv_distribution_types::{
use uv_extract::hash::Hasher;
use uv_fs::{rename_with_retry, write_atomic, LockedFile};
use uv_metadata::read_archive_metadata;
use uv_pep440::release_specifiers_to_ranges;
use uv_normalize::PackageName;
use uv_pep440::{release_specifiers_to_ranges, Version};
use uv_platform_tags::Tags;
use uv_pypi_types::{HashAlgorithm, HashDigest, Metadata12, RequiresTxt, ResolutionMetadata};
use uv_types::{BuildContext, SourceBuildTrait};
Expand Down Expand Up @@ -416,7 +417,9 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
};

// If the cache contains a compatible wheel, return it.
if let Some(built_wheel) = BuiltWheelMetadata::find_in_cache(tags, &cache_shard) {
if let Some(built_wheel) = BuiltWheelMetadata::find_in_cache(tags, &cache_shard)
.filter(|built_wheel| built_wheel.matches(source.name(), source.version()))
{
return Ok(built_wheel.with_hashes(revision.into_hashes()));
}

Expand Down Expand Up @@ -520,10 +523,13 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {

// If the cache contains compatible metadata, return it.
let metadata_entry = cache_shard.entry(METADATA);
if let Some(metadata) = read_cached_metadata(&metadata_entry).await? {
if let Some(metadata) = CachedMetadata::read(&metadata_entry)
.await?
.filter(|metadata| metadata.matches(source.name(), source.version()))
{
debug!("Using cached metadata for: {source}");
return Ok(ArchiveMetadata {
metadata: Metadata::from_metadata23(metadata),
metadata: Metadata::from_metadata23(metadata.into()),
hashes: revision.into_hashes(),
});
}
Expand Down Expand Up @@ -732,7 +738,9 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
};

// If the cache contains a compatible wheel, return it.
if let Some(built_wheel) = BuiltWheelMetadata::find_in_cache(tags, &cache_shard) {
if let Some(built_wheel) = BuiltWheelMetadata::find_in_cache(tags, &cache_shard)
.filter(|built_wheel| built_wheel.matches(source.name(), source.version()))
{
return Ok(built_wheel);
}

Expand Down Expand Up @@ -824,10 +832,13 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {

// If the cache contains compatible metadata, return it.
let metadata_entry = cache_shard.entry(METADATA);
if let Some(metadata) = read_cached_metadata(&metadata_entry).await? {
if let Some(metadata) = CachedMetadata::read(&metadata_entry)
.await?
.filter(|metadata| metadata.matches(source.name(), source.version()))
{
debug!("Using cached metadata for: {source}");
return Ok(ArchiveMetadata {
metadata: Metadata::from_metadata23(metadata),
metadata: Metadata::from_metadata23(metadata.into()),
hashes: revision.into_hashes(),
});
}
Expand Down Expand Up @@ -1000,7 +1011,9 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
};

// If the cache contains a compatible wheel, return it.
if let Some(built_wheel) = BuiltWheelMetadata::find_in_cache(tags, &cache_shard) {
if let Some(built_wheel) = BuiltWheelMetadata::find_in_cache(tags, &cache_shard)
.filter(|built_wheel| built_wheel.matches(source.name(), source.version()))
{
return Ok(built_wheel);
}

Expand Down Expand Up @@ -1095,11 +1108,14 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {

// If the cache contains compatible metadata, return it.
let metadata_entry = cache_shard.entry(METADATA);
if let Some(metadata) = read_cached_metadata(&metadata_entry).await? {
if let Some(metadata) = CachedMetadata::read(&metadata_entry)
.await?
.filter(|metadata| metadata.matches(source.name(), source.version()))
{
debug!("Using cached metadata for: {source}");
return Ok(ArchiveMetadata::from(
Metadata::from_workspace(
metadata,
metadata.into(),
resource.install_path.as_ref(),
None,
self.build_context.locations(),
Expand Down Expand Up @@ -1278,7 +1294,9 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
};

// If the cache contains a compatible wheel, return it.
if let Some(built_wheel) = BuiltWheelMetadata::find_in_cache(tags, &cache_shard) {
if let Some(built_wheel) = BuiltWheelMetadata::find_in_cache(tags, &cache_shard)
.filter(|built_wheel| built_wheel.matches(source.name(), source.version()))
{
return Ok(built_wheel);
}

Expand Down Expand Up @@ -1388,7 +1406,10 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
.map_err(Error::CacheRead)?
.is_fresh()
{
if let Some(metadata) = read_cached_metadata(&metadata_entry).await? {
if let Some(metadata) = CachedMetadata::read(&metadata_entry)
.await?
.filter(|metadata| metadata.matches(source.name(), source.version()))
{
let path = if let Some(subdirectory) = resource.subdirectory {
Cow::Owned(fetch.path().join(subdirectory))
} else {
Expand All @@ -1398,7 +1419,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
debug!("Using cached metadata for: {source}");
return Ok(ArchiveMetadata::from(
Metadata::from_workspace(
metadata,
metadata.into(),
&path,
None,
self.build_context.locations(),
Expand Down Expand Up @@ -1798,6 +1819,14 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
.await
.map_err(Error::Build)?;

// Read the metadata from the wheel.
let filename = WheelFilename::from_str(&disk_filename)?;
let metadata = read_wheel_metadata(&filename, &temp_dir.path().join(&disk_filename))?;

// Validate the metadata.
validate_metadata(source, &metadata)?;
validate_filename(&filename, &metadata)?;

// Move the wheel to the cache.
rename_with_retry(
temp_dir.path().join(&disk_filename),
Expand All @@ -1806,13 +1835,6 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
.await
.map_err(Error::CacheWrite)?;

// Read the metadata from the wheel.
let filename = WheelFilename::from_str(&disk_filename)?;
let metadata = read_wheel_metadata(&filename, &cache_shard.join(&disk_filename))?;

// Validate the metadata.
validate(source, &metadata)?;

debug!("Finished building: {source}");
Ok((disk_filename, filename, metadata))
}
Expand Down Expand Up @@ -1883,7 +1905,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
let metadata = ResolutionMetadata::parse_metadata(&content)?;

// Validate the metadata.
validate(source, &metadata)?;
validate_metadata(source, &metadata)?;

Ok(Some(metadata))
}
Expand All @@ -1899,7 +1921,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
debug!("Found static `pyproject.toml` for: {source}");

// Validate the metadata.
validate(source, &metadata)?;
validate_metadata(source, &metadata)?;

return Ok(Some(metadata));
}
Expand Down Expand Up @@ -1929,7 +1951,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
debug!("Found static `PKG-INFO` for: {source}");

// Validate the metadata.
validate(source, &metadata)?;
validate_metadata(source, &metadata)?;

return Ok(Some(metadata));
}
Expand All @@ -1953,7 +1975,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
debug!("Found static `egg-info` for: {source}");

// Validate the metadata.
validate(source, &metadata)?;
validate_metadata(source, &metadata)?;

return Ok(Some(metadata));
}
Expand Down Expand Up @@ -2065,10 +2087,13 @@ pub fn prune(cache: &Cache) -> Result<Removal, Error> {
}

/// Validate that the source distribution matches the built metadata.
fn validate(source: &BuildableSource<'_>, metadata: &ResolutionMetadata) -> Result<(), Error> {
fn validate_metadata(
source: &BuildableSource<'_>,
metadata: &ResolutionMetadata,
) -> Result<(), Error> {
if let Some(name) = source.name() {
if metadata.name != *name {
return Err(Error::NameMismatch {
return Err(Error::WheelMetadataNameMismatch {
metadata: metadata.name.clone(),
given: name.clone(),
});
Expand All @@ -2077,7 +2102,7 @@ fn validate(source: &BuildableSource<'_>, metadata: &ResolutionMetadata) -> Resu

if let Some(version) = source.version() {
if metadata.version != *version {
return Err(Error::VersionMismatch {
return Err(Error::WheelMetadataVersionMismatch {
metadata: metadata.version.clone(),
given: version.clone(),
});
Expand All @@ -2087,6 +2112,25 @@ fn validate(source: &BuildableSource<'_>, metadata: &ResolutionMetadata) -> Resu
Ok(())
}

/// Validate that the source distribution matches the built filename.
fn validate_filename(filename: &WheelFilename, metadata: &ResolutionMetadata) -> Result<(), Error> {
if metadata.name != filename.name {
return Err(Error::WheelFilenameNameMismatch {
metadata: metadata.name.clone(),
filename: filename.name.clone(),
});
}

if metadata.version != filename.version {
return Err(Error::WheelFilenameVersionMismatch {
metadata: metadata.version.clone(),
filename: filename.version.clone(),
});
}

Ok(())
}

/// A pointer to a source distribution revision in the cache, fetched from an HTTP archive.
///
/// Encoded with `MsgPack`, and represented on disk by a `.http` file.
Expand Down Expand Up @@ -2335,14 +2379,30 @@ async fn read_requires_dist(project_root: &Path) -> Result<uv_pypi_types::Requir
Ok(requires_dist)
}

/// Read an existing cached [`ResolutionMetadata`], if it exists.
async fn read_cached_metadata(
cache_entry: &CacheEntry,
) -> Result<Option<ResolutionMetadata>, Error> {
match fs::read(&cache_entry.path()).await {
Ok(cached) => Ok(Some(rmp_serde::from_slice(&cached)?)),
Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
Err(err) => Err(Error::CacheRead(err)),
/// Wheel metadata stored in the source distribution cache.
#[derive(Debug, Clone)]
struct CachedMetadata(ResolutionMetadata);

impl CachedMetadata {
/// Read an existing cached [`ResolutionMetadata`], if it exists.
async fn read(cache_entry: &CacheEntry) -> Result<Option<Self>, Error> {
match fs::read(&cache_entry.path()).await {
Ok(cached) => Ok(Some(Self(rmp_serde::from_slice(&cached)?))),
Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
Err(err) => Err(Error::CacheRead(err)),
}
}

/// Returns `true` if the metadata matches the given package name and version.
fn matches(&self, name: Option<&PackageName>, version: Option<&Version>) -> bool {
name.map_or(true, |name| self.0.name == *name)
&& version.map_or(true, |version| self.0.version == *version)
}
}

impl From<CachedMetadata> for ResolutionMetadata {
fn from(value: CachedMetadata) -> Self {
value.0
}
}

Expand Down
Loading

0 comments on commit f1ccbcb

Please sign in to comment.