Skip to content

Commit

Permalink
Fix broken URLs parsed from relative paths in registries (#1413)
Browse files Browse the repository at this point in the history
Closes #1388

Fixes incorrect handling of relative paths returned by indexes without
an explicit `<base>`.

`Url.join` will drop the last segment in an url e.g. `http://foo/bar` ->
`http://foo/baz` if there is not a trailing slash but what we want is
`http://foo/bar/baz`. We don't add the trailing `/` in
`base_url_join_relative` because flat indexes are `http://foo/bar.html`
and we _want_ `bar.html` to be replaced.
  • Loading branch information
zanieb authored Feb 16, 2024
1 parent e48edf0 commit 0bfce35
Show file tree
Hide file tree
Showing 3 changed files with 168 additions and 13 deletions.
25 changes: 15 additions & 10 deletions crates/pypi-types/src/base_url.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,19 @@ pub fn base_url_join_relative(base: &str, maybe_relative: &str) -> Result<Url, J
Ok(absolute) => Ok(absolute),
Err(err) => {
if err == url::ParseError::RelativeUrlWithoutBase {
let base = Url::parse(base).map_err(|err| JoinRelativeError {
let base_url = Url::parse(base).map_err(|err| JoinRelativeError::ParseError {
original: base.to_string(),
source: err,
})?;
base.join(maybe_relative).map_err(|err| JoinRelativeError {
original: format!("{base}/{maybe_relative}"),
source: err,
})

base_url
.join(maybe_relative)
.map_err(|_| JoinRelativeError::ParseError {
original: format!("{base}/{maybe_relative}"),
source: err,
})
} else {
Err(JoinRelativeError {
Err(JoinRelativeError::ParseError {
original: maybe_relative.to_string(),
source: err,
})
Expand All @@ -36,10 +39,12 @@ pub fn base_url_join_relative(base: &str, maybe_relative: &str) -> Result<Url, J
/// The error message includes the URL (`base` or `maybe_relative`) passed to
/// `base_url_join_relative` that provoked the error.
#[derive(Clone, Debug, thiserror::Error)]
#[error("Failed to parse URL: `{original}`")]
pub struct JoinRelativeError {
original: String,
source: url::ParseError,
pub enum JoinRelativeError {
#[error("Failed to parse URL: `{original}`")]
ParseError {
original: String,
source: url::ParseError,
},
}

#[derive(Debug, Clone, Hash, Eq, PartialEq, Serialize, Deserialize)]
Expand Down
102 changes: 102 additions & 0 deletions crates/uv-client/src/html.rs
Original file line number Diff line number Diff line change
Expand Up @@ -498,4 +498,106 @@ mod tests {
}
"###);
}

/// Test for AWS Code Artifact
/// From https://github.com/astral-sh/uv/issues/1388#issuecomment-1947659088
#[test]
fn parse_code_artifact_index_html() {
let text = r#"
<!DOCTYPE html>
<html>
<head>
<title>Links for flask</title>
</head>
<body>
<h1>Links for flask</h1>
<a href="0.1/Flask-0.1.tar.gz#sha256=9da884457e910bf0847d396cb4b778ad9f3c3d17db1c5997cb861937bd284237" data-gpg-sig="false" >Flask-0.1.tar.gz</a>
<br/>
<a href="0.10.1/Flask-0.10.1.tar.gz#sha256=4c83829ff83d408b5e1d4995472265411d2c414112298f2eb4b359d9e4563373" data-gpg-sig="false" >Flask-0.10.1.tar.gz</a>
<br/>
<a href="3.0.1/flask-3.0.1.tar.gz#sha256=6489f51bb3666def6f314e15f19d50a1869a19ae0e8c9a3641ffe66c77d42403" data-requires-python="&gt;=3.8" data-gpg-sig="false" >flask-3.0.1.tar.gz</a>
<br/>
</body>
</html>
"#;
let base = Url::parse("https://account.d.codeartifact.us-west-2.amazonaws.com/pypi/shared-packages-pypi/simple/flask/")
.unwrap();
let result = SimpleHtml::parse(text, &base).unwrap();
insta::assert_debug_snapshot!(result, @r###"
SimpleHtml {
base: BaseUrl(
Url {
scheme: "https",
cannot_be_a_base: false,
username: "",
password: None,
host: Some(
Domain(
"account.d.codeartifact.us-west-2.amazonaws.com",
),
),
port: None,
path: "/pypi/shared-packages-pypi/simple/flask/",
query: None,
fragment: None,
},
),
files: [
File {
dist_info_metadata: None,
filename: "Flask-0.1.tar.gz",
hashes: Hashes {
sha256: Some(
"9da884457e910bf0847d396cb4b778ad9f3c3d17db1c5997cb861937bd284237",
),
},
requires_python: None,
size: None,
upload_time: None,
url: "0.1/Flask-0.1.tar.gz#sha256=9da884457e910bf0847d396cb4b778ad9f3c3d17db1c5997cb861937bd284237",
yanked: None,
},
File {
dist_info_metadata: None,
filename: "Flask-0.10.1.tar.gz",
hashes: Hashes {
sha256: Some(
"4c83829ff83d408b5e1d4995472265411d2c414112298f2eb4b359d9e4563373",
),
},
requires_python: None,
size: None,
upload_time: None,
url: "0.10.1/Flask-0.10.1.tar.gz#sha256=4c83829ff83d408b5e1d4995472265411d2c414112298f2eb4b359d9e4563373",
yanked: None,
},
File {
dist_info_metadata: None,
filename: "flask-3.0.1.tar.gz",
hashes: Hashes {
sha256: Some(
"6489f51bb3666def6f314e15f19d50a1869a19ae0e8c9a3641ffe66c77d42403",
),
},
requires_python: Some(
Ok(
VersionSpecifiers(
[
VersionSpecifier {
operator: GreaterThanEqual,
version: "3.8",
},
],
),
),
),
size: None,
upload_time: None,
url: "3.0.1/flask-3.0.1.tar.gz#sha256=6489f51bb3666def6f314e15f19d50a1869a19ae0e8c9a3641ffe66c77d42403",
yanked: None,
},
],
}
"###);
}
}
54 changes: 51 additions & 3 deletions crates/uv-client/src/registry_client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,10 @@ impl RegistryClient {
url.path_segments_mut()
.unwrap()
.pop_if_empty()
.push(package_name.as_ref());
.push(package_name.as_ref())
// The URL *must* end in a trailing slash for proper relative path behavior
// ref https://github.com/servo/rust-url/issues/333
.push("");

trace!("Fetching metadata for {package_name} from {url}");

Expand Down Expand Up @@ -705,10 +708,11 @@ pub enum Connectivity {
mod tests {
use std::str::FromStr;

use pypi_types::SimpleJson;
use pypi_types::{JoinRelativeError, SimpleJson};
use url::Url;
use uv_normalize::PackageName;

use crate::{SimpleMetadata, SimpleMetadatum};
use crate::{html::SimpleHtml, SimpleMetadata, SimpleMetadatum};

#[test]
fn ignore_failing_files() {
Expand Down Expand Up @@ -758,4 +762,48 @@ mod tests {
.collect();
assert_eq!(versions, ["1.7.8".to_string()]);
}

/// Test for AWS Code Artifact registry
/// Regression coverage of https://github.com/astral-sh/uv/issues/1388
#[test]
fn relative_urls_code_artifact() -> Result<(), JoinRelativeError> {
let text = r#"
<!DOCTYPE html>
<html>
<head>
<title>Links for flask</title>
</head>
<body>
<h1>Links for flask</h1>
<a href="0.1/Flask-0.1.tar.gz#sha256=9da884457e910bf0847d396cb4b778ad9f3c3d17db1c5997cb861937bd284237" data-gpg-sig="false" >Flask-0.1.tar.gz</a>
<br/>
<a href="0.10.1/Flask-0.10.1.tar.gz#sha256=4c83829ff83d408b5e1d4995472265411d2c414112298f2eb4b359d9e4563373" data-gpg-sig="false" >Flask-0.10.1.tar.gz</a>
<br/>
<a href="3.0.1/flask-3.0.1.tar.gz#sha256=6489f51bb3666def6f314e15f19d50a1869a19ae0e8c9a3641ffe66c77d42403" data-requires-python="&gt;=3.8" data-gpg-sig="false" >flask-3.0.1.tar.gz</a>
<br/>
</body>
</html>
"#;

// Note the lack of a trailing `/` here is important for coverage of url-join behavior
let base = Url::parse("https://account.d.codeartifact.us-west-2.amazonaws.com/pypi/shared-packages-pypi/simple/flask")
.unwrap();
let SimpleHtml { base, files } = SimpleHtml::parse(text, &base).unwrap();

// Test parsing of the file urls
let urls = files
.iter()
.map(|file| pypi_types::base_url_join_relative(base.as_url().as_str(), &file.url))
.collect::<Result<Vec<_>, JoinRelativeError>>()?;
let urls = urls.iter().map(|url| url.as_str()).collect::<Vec<_>>();
insta::assert_debug_snapshot!(urls, @r###"
[
"https://account.d.codeartifact.us-west-2.amazonaws.com/pypi/shared-packages-pypi/simple/0.1/Flask-0.1.tar.gz#sha256=9da884457e910bf0847d396cb4b778ad9f3c3d17db1c5997cb861937bd284237",
"https://account.d.codeartifact.us-west-2.amazonaws.com/pypi/shared-packages-pypi/simple/0.10.1/Flask-0.10.1.tar.gz#sha256=4c83829ff83d408b5e1d4995472265411d2c414112298f2eb4b359d9e4563373",
"https://account.d.codeartifact.us-west-2.amazonaws.com/pypi/shared-packages-pypi/simple/3.0.1/flask-3.0.1.tar.gz#sha256=6489f51bb3666def6f314e15f19d50a1869a19ae0e8c9a3641ffe66c77d42403",
]
"###);

Ok(())
}
}

0 comments on commit 0bfce35

Please sign in to comment.