diff --git a/crates/rattler_conda_types/Cargo.toml b/crates/rattler_conda_types/Cargo.toml index c5128b65b..13393c8bd 100644 --- a/crates/rattler_conda_types/Cargo.toml +++ b/crates/rattler_conda_types/Cargo.toml @@ -43,6 +43,8 @@ rstest = "0.18.2" assert_matches = "1.5.0" hex-literal = "0.4.1" criterion = { version = "0.5", features = ["html_reports"] } +pathdiff = "0.2.1" +dunce = "1.0.4" [[bench]] name = "parse" diff --git a/crates/rattler_conda_types/src/lib.rs b/crates/rattler_conda_types/src/lib.rs index f897fc9bc..22777ccc4 100644 --- a/crates/rattler_conda_types/src/lib.rs +++ b/crates/rattler_conda_types/src/lib.rs @@ -36,7 +36,9 @@ pub use package_name::{InvalidPackageNameError, PackageName}; pub use platform::{Arch, ParseArchError, ParsePlatformError, Platform}; pub use prefix_record::PrefixRecord; pub use repo_data::patches::{PackageRecordPatch, PatchInstructions, RepoDataPatch}; -pub use repo_data::{ChannelInfo, ConvertSubdirError, PackageRecord, RepoData}; +pub use repo_data::{ + compute_package_url, ChannelInfo, ConvertSubdirError, PackageRecord, RepoData, +}; pub use repo_data_record::RepoDataRecord; pub use run_export::RunExportKind; pub use version::{ diff --git a/crates/rattler_conda_types/src/repo_data/mod.rs b/crates/rattler_conda_types/src/repo_data/mod.rs index 70d1ea0cd..94275f033 100644 --- a/crates/rattler_conda_types/src/repo_data/mod.rs +++ b/crates/rattler_conda_types/src/repo_data/mod.rs @@ -4,6 +4,7 @@ pub mod patches; mod topological_sort; +use std::borrow::Cow; use std::collections::{BTreeMap, BTreeSet}; use std::fmt::{Display, Formatter}; use std::path::Path; @@ -14,6 +15,7 @@ use rattler_digest::{serde::SerializableHash, Md5Hash, Sha256Hash}; use serde::{Deserialize, Serialize}; use serde_with::{serde_as, skip_serializing_none, OneOrMany}; use thiserror::Error; +use url::Url; use rattler_macros::sorted; @@ -57,6 +59,10 @@ pub struct RepoData { pub struct ChannelInfo { /// The channel's subdirectory pub subdir: String, + + /// The base_url for all package urls. Can be an absolute or relative url. + #[serde(skip_serializing_if = "Option::is_none")] + pub base_url: Option, } /// A single record in the Conda repodata. A single record refers to a single binary distribution @@ -173,17 +179,29 @@ impl RepoData { Ok(serde_json::from_str(&contents)?) } + /// Returns the `base_url` specified in the repodata. + pub fn base_url(&self) -> Option<&str> { + self.info.as_ref().and_then(|i| i.base_url.as_deref()) + } + /// Builds a [`Vec`] from the packages in a [`RepoData`] given the source of the /// data. pub fn into_repo_data_records(self, channel: &Channel) -> Vec { let mut records = Vec::with_capacity(self.packages.len() + self.conda_packages.len()); let channel_name = channel.canonical_name(); + let base_url = self.base_url().map(ToOwned::to_owned); + + // Determine the base_url of the channel for (filename, package_record) in self.packages.into_iter().chain(self.conda_packages) { records.push(RepoDataRecord { - url: channel - .base_url() - .join(&format!("{}/{}", &package_record.subdir, &filename)) - .expect("failed to build a url from channel and package record"), + url: compute_package_url( + &channel + .base_url() + .join(&package_record.subdir) + .expect("cannot join channel base_url and subdir"), + base_url.as_deref(), + &filename, + ), channel: channel_name.clone(), package_record, file_name: filename, @@ -193,6 +211,50 @@ impl RepoData { } } +/// Computes the URL for a package. +pub fn compute_package_url( + repo_data_base_url: &Url, + base_url: Option<&str>, + filename: &str, +) -> Url { + let mut absolute_url = match base_url { + None => repo_data_base_url.clone(), + Some(base_url) => match Url::parse(base_url) { + Err(url::ParseError::RelativeUrlWithoutBase) if !base_url.starts_with('/') => { + add_trailing_slash(repo_data_base_url) + .join(base_url) + .expect("failed to join base_url with channel") + } + Err(url::ParseError::RelativeUrlWithoutBase) => { + let mut url = repo_data_base_url.clone(); + url.set_path(base_url); + url + } + Err(e) => unreachable!("{e}"), + Ok(base_url) => base_url, + }, + }; + + let path = absolute_url.path(); + if !path.ends_with('/') { + absolute_url.set_path(&format!("{path}/")) + } + absolute_url + .join(filename) + .expect("failed to join base_url and filename") +} + +fn add_trailing_slash(url: &Url) -> Cow { + let path = url.path(); + if !path.ends_with('/') { + let mut url = url.clone(); + url.set_path(&format!("{path}/")); + Cow::Owned(url) + } else { + Cow::Borrowed(url) + } +} + impl PackageRecord { /// A simple helper method that constructs a `PackageRecord` with the bare minimum values. pub fn new(name: PackageName, version: impl Into, build: String) -> Self { @@ -351,10 +413,10 @@ fn sort_set_alphabetically( #[cfg(test)] mod test { - use crate::repo_data::determine_subdir; + use crate::repo_data::{compute_package_url, determine_subdir}; use fxhash::FxHashSet; - use crate::RepoData; + use crate::{Channel, ChannelConfig, RepoData}; // isl-0.12.2-1.tar.bz2 // gmp-5.1.2-6.tar.bz2 @@ -371,7 +433,7 @@ mod test { #[test] fn test_serialize() { let repodata = RepoData { - version: Some(1), + version: Some(2), info: Default::default(), packages: Default::default(), conda_packages: Default::default(), @@ -397,4 +459,63 @@ mod test { let json = serde_json::to_string_pretty(&repodata).unwrap(); insta::assert_snapshot!(json); } + + #[test] + fn test_base_url_packages() { + // load test data + let test_data_path = dunce::canonicalize( + std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("../../test-data"), + ) + .unwrap(); + let data_path = test_data_path.join("channels/dummy/linux-64/repodata.json"); + let repodata = RepoData::from_path(&data_path).unwrap(); + + let channel = Channel::from_str( + url::Url::from_directory_path(data_path.parent().unwrap().parent().unwrap()) + .unwrap() + .as_str(), + &ChannelConfig::default(), + ) + .unwrap(); + + let file_urls = repodata + .into_repo_data_records(&channel) + .into_iter() + .map(|r| { + pathdiff::diff_paths(r.url.to_file_path().unwrap(), &test_data_path) + .unwrap() + .to_string_lossy() + .replace('\\', "/") + }) + .collect::>(); + + // serialize to yaml + insta::assert_yaml_snapshot!(file_urls); + } + + #[test] + fn test_base_url() { + let channel = Channel::from_str("conda-forge", &ChannelConfig::default()).unwrap(); + let base_url = channel.base_url().join("linux-64/").unwrap(); + assert_eq!( + compute_package_url(&base_url, None, "bla.conda").to_string(), + "https://conda.anaconda.org/conda-forge/linux-64/bla.conda" + ); + assert_eq!( + compute_package_url(&base_url, Some("https://host.some.org"), "bla.conda",).to_string(), + "https://host.some.org/bla.conda" + ); + assert_eq!( + compute_package_url(&base_url, Some("/root"), "bla.conda").to_string(), + "https://conda.anaconda.org/root/bla.conda" + ); + assert_eq!( + compute_package_url(&base_url, Some("foo/bar"), "bla.conda").to_string(), + "https://conda.anaconda.org/conda-forge/linux-64/foo/bar/bla.conda" + ); + assert_eq!( + compute_package_url(&base_url, Some("../../root"), "bla.conda").to_string(), + "https://conda.anaconda.org/root/bla.conda" + ); + } } diff --git a/crates/rattler_conda_types/src/repo_data/snapshots/rattler_conda_types__repo_data__test__base_url_packages.snap b/crates/rattler_conda_types/src/repo_data/snapshots/rattler_conda_types__repo_data__test__base_url_packages.snap new file mode 100644 index 000000000..4aa817cf6 --- /dev/null +++ b/crates/rattler_conda_types/src/repo_data/snapshots/rattler_conda_types__repo_data__test__base_url_packages.snap @@ -0,0 +1,11 @@ +--- +source: crates/rattler_conda_types/src/repo_data/mod.rs +assertion_line: 493 +expression: file_urls +--- +- channels/dummy/linux-64/foo-3.0.2-py36h1af98f8_1.tar.bz2 +- "channels/dummy/linux-64/baz-1.0-unix_py36h1af98f8_2\u0000.tar.bz2" +- channels/dummy/linux-64/foo-4.0.2-py36h1af98f8_2.tar.bz2 +- channels/dummy/linux-64/bar-1.0-unix_py36h1af98f8_2.tar.bz2 +- channels/dummy/linux-64/foo-3.0.2-py36h1af98f8_1.conda + diff --git a/crates/rattler_conda_types/src/repo_data/snapshots/rattler_conda_types__repo_data__test__serialize.snap b/crates/rattler_conda_types/src/repo_data/snapshots/rattler_conda_types__repo_data__test__serialize.snap index dd39506d6..3f4768b86 100644 --- a/crates/rattler_conda_types/src/repo_data/snapshots/rattler_conda_types__repo_data__test__serialize.snap +++ b/crates/rattler_conda_types/src/repo_data/snapshots/rattler_conda_types__repo_data__test__serialize.snap @@ -1,5 +1,6 @@ --- source: crates/rattler_conda_types/src/repo_data/mod.rs +assertion_line: 438 expression: repodata --- info: ~ @@ -13,5 +14,5 @@ removed: - quux - qux - xyz -repodata_version: 1 +repodata_version: 2 diff --git a/crates/rattler_conda_types/src/repo_data/snapshots/rattler_conda_types__repo_data__test__serialize_packages-2.snap b/crates/rattler_conda_types/src/repo_data/snapshots/rattler_conda_types__repo_data__test__serialize_packages-2.snap index e3e4b3c2d..017208b48 100644 --- a/crates/rattler_conda_types/src/repo_data/snapshots/rattler_conda_types__repo_data__test__serialize_packages-2.snap +++ b/crates/rattler_conda_types/src/repo_data/snapshots/rattler_conda_types__repo_data__test__serialize_packages-2.snap @@ -1,10 +1,12 @@ --- source: crates/rattler_conda_types/src/repo_data/mod.rs +assertion_line: 460 expression: json --- { "info": { - "subdir": "linux-64" + "subdir": "linux-64", + "base_url": "../linux-64" }, "packages": { "bar-1.0-unix_py36h1af98f8_2.tar.bz2": { @@ -83,5 +85,5 @@ expression: json } }, "packages.conda": {}, - "repodata_version": 1 + "repodata_version": 2 } diff --git a/crates/rattler_conda_types/src/repo_data/snapshots/rattler_conda_types__repo_data__test__serialize_packages.snap b/crates/rattler_conda_types/src/repo_data/snapshots/rattler_conda_types__repo_data__test__serialize_packages.snap index 762c3378c..13a13bd4d 100644 --- a/crates/rattler_conda_types/src/repo_data/snapshots/rattler_conda_types__repo_data__test__serialize_packages.snap +++ b/crates/rattler_conda_types/src/repo_data/snapshots/rattler_conda_types__repo_data__test__serialize_packages.snap @@ -1,9 +1,11 @@ --- source: crates/rattler_conda_types/src/repo_data/mod.rs +assertion_line: 448 expression: repodata --- info: subdir: linux-64 + base_url: "../linux-64" packages: bar-1.0-unix_py36h1af98f8_2.tar.bz2: build: unix_py36h1af98f8_2 @@ -73,5 +75,5 @@ packages: timestamp: 1605110689658 version: 4.0.2 packages.conda: {} -repodata_version: 1 +repodata_version: 2 diff --git a/crates/rattler_repodata_gateway/src/sparse/mod.rs b/crates/rattler_repodata_gateway/src/sparse/mod.rs index 0f7e7f4ad..150ec4322 100644 --- a/crates/rattler_repodata_gateway/src/sparse/mod.rs +++ b/crates/rattler_repodata_gateway/src/sparse/mod.rs @@ -3,7 +3,9 @@ use futures::{stream, StreamExt, TryFutureExt, TryStreamExt}; use itertools::Itertools; -use rattler_conda_types::{Channel, PackageName, PackageRecord, RepoDataRecord}; +use rattler_conda_types::{ + compute_package_url, Channel, ChannelInfo, PackageName, PackageRecord, RepoDataRecord, +}; use serde::{ de::{Error, MapAccess, Visitor}, Deserialize, Deserializer, @@ -90,9 +92,11 @@ impl SparseRepoData { /// Returns all the records for the specified package name. pub fn load_records(&self, package_name: &PackageName) -> io::Result> { let repo_data = self.inner.borrow_repo_data(); + let base_url = repo_data.info.as_ref().and_then(|i| i.base_url.as_deref()); let mut records = parse_records( package_name, &repo_data.packages, + base_url, &self.channel, &self.subdir, self.patch_record_fn, @@ -100,6 +104,7 @@ impl SparseRepoData { let mut conda_records = parse_records( package_name, &repo_data.conda_packages, + base_url, &self.channel, &self.subdir, self.patch_record_fn, @@ -133,11 +138,16 @@ impl SparseRepoData { while let Some(next_package) = pending.pop_front() { for (i, repo_data) in repo_data.iter().enumerate() { let repo_data_packages = repo_data.inner.borrow_repo_data(); + let base_url = repo_data_packages + .info + .as_ref() + .and_then(|i| i.base_url.as_deref()); // Get all records from the repodata let mut records = parse_records( &next_package, &repo_data_packages.packages, + base_url, &repo_data.channel, &repo_data.subdir, patch_function, @@ -145,6 +155,7 @@ impl SparseRepoData { let mut conda_records = parse_records( &next_package, &repo_data_packages.conda_packages, + base_url, &repo_data.channel, &repo_data.subdir, patch_function, @@ -180,6 +191,9 @@ impl SparseRepoData { /// A serde compatible struct that only sparsely parses a repodata.json file. #[derive(Deserialize)] struct LazyRepoData<'i> { + /// The channel information contained in the repodata.json file + info: Option, + /// The tar.bz2 packages contained in the repodata.json file #[serde(borrow)] #[serde(deserialize_with = "deserialize_filename_and_raw_record")] @@ -196,6 +210,7 @@ struct LazyRepoData<'i> { fn parse_records<'i>( package_name: &PackageName, packages: &[(PackageFilename<'i>, &'i RawValue)], + base_url: Option<&str>, channel: &Channel, subdir: &str, patch_function: Option, @@ -212,10 +227,14 @@ fn parse_records<'i>( package_record.subdir = subdir.to_owned(); } result.push(RepoDataRecord { - url: channel - .base_url() - .join(&format!("{}/{}", &package_record.subdir, &key.filename)) - .expect("failed to build a url from channel and package record"), + url: compute_package_url( + &channel + .base_url + .join(&format!("{}/", &package_record.subdir)) + .expect("failed determine repo_base_url"), + base_url, + key.filename, + ), channel: channel_name.clone(), package_record, file_name: key.filename.to_owned(), diff --git a/test-data/channels/dummy/linux-64/repodata.json b/test-data/channels/dummy/linux-64/repodata.json index 7d5f059a3..4a4acf92d 100644 --- a/test-data/channels/dummy/linux-64/repodata.json +++ b/test-data/channels/dummy/linux-64/repodata.json @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a8b4ace3d680d33498f54cb36d5f4c5234bac3929f72dff6858888bc02b13148 -size 2417 +oid sha256:659258cfd03f77f6af79d51d882dfdbafd68f9fb6fde2475a45f3452f7389345 +size 2449