Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: implement base_url cep #322

Merged
merged 4 commits into from
Sep 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions crates/rattler_conda_types/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ rstest = "0.18.2"
assert_matches = "1.5.0"
hex-literal = "0.4.1"
criterion = { version = "0.5", features = ["html_reports"] }
pathdiff = "0.2.1"
dunce = "1.0.4"

[[bench]]
name = "parse"
Expand Down
4 changes: 3 additions & 1 deletion crates/rattler_conda_types/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ pub use package_name::{InvalidPackageNameError, PackageName};
pub use platform::{Arch, ParseArchError, ParsePlatformError, Platform};
pub use prefix_record::PrefixRecord;
pub use repo_data::patches::{PackageRecordPatch, PatchInstructions, RepoDataPatch};
pub use repo_data::{ChannelInfo, ConvertSubdirError, PackageRecord, RepoData};
pub use repo_data::{
compute_package_url, ChannelInfo, ConvertSubdirError, PackageRecord, RepoData,
};
pub use repo_data_record::RepoDataRecord;
pub use run_export::RunExportKind;
pub use version::{
Expand Down
135 changes: 128 additions & 7 deletions crates/rattler_conda_types/src/repo_data/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
pub mod patches;
mod topological_sort;

use std::borrow::Cow;
use std::collections::{BTreeMap, BTreeSet};
use std::fmt::{Display, Formatter};
use std::path::Path;
Expand All @@ -14,6 +15,7 @@ use rattler_digest::{serde::SerializableHash, Md5Hash, Sha256Hash};
use serde::{Deserialize, Serialize};
use serde_with::{serde_as, skip_serializing_none, OneOrMany};
use thiserror::Error;
use url::Url;

use rattler_macros::sorted;

Expand Down Expand Up @@ -57,6 +59,10 @@ pub struct RepoData {
pub struct ChannelInfo {
/// The channel's subdirectory
pub subdir: String,

/// The base_url for all package urls. Can be an absolute or relative url.
#[serde(skip_serializing_if = "Option::is_none")]
pub base_url: Option<String>,
}

/// A single record in the Conda repodata. A single record refers to a single binary distribution
Expand Down Expand Up @@ -173,17 +179,29 @@ impl RepoData {
Ok(serde_json::from_str(&contents)?)
}

/// Returns the `base_url` specified in the repodata.
pub fn base_url(&self) -> Option<&str> {
self.info.as_ref().and_then(|i| i.base_url.as_deref())
}

/// Builds a [`Vec<RepoDataRecord>`] from the packages in a [`RepoData`] given the source of the
/// data.
pub fn into_repo_data_records(self, channel: &Channel) -> Vec<RepoDataRecord> {
let mut records = Vec::with_capacity(self.packages.len() + self.conda_packages.len());
let channel_name = channel.canonical_name();
let base_url = self.base_url().map(ToOwned::to_owned);

// Determine the base_url of the channel
for (filename, package_record) in self.packages.into_iter().chain(self.conda_packages) {
records.push(RepoDataRecord {
url: channel
.base_url()
.join(&format!("{}/{}", &package_record.subdir, &filename))
.expect("failed to build a url from channel and package record"),
url: compute_package_url(
&channel
.base_url()
.join(&package_record.subdir)
.expect("cannot join channel base_url and subdir"),
base_url.as_deref(),
&filename,
),
channel: channel_name.clone(),
package_record,
file_name: filename,
Expand All @@ -193,6 +211,50 @@ impl RepoData {
}
}

/// Computes the URL for a package.
pub fn compute_package_url(
repo_data_base_url: &Url,
base_url: Option<&str>,
filename: &str,
) -> Url {
let mut absolute_url = match base_url {
None => repo_data_base_url.clone(),
Some(base_url) => match Url::parse(base_url) {
Err(url::ParseError::RelativeUrlWithoutBase) if !base_url.starts_with('/') => {
add_trailing_slash(repo_data_base_url)
.join(base_url)
.expect("failed to join base_url with channel")
}
Err(url::ParseError::RelativeUrlWithoutBase) => {
let mut url = repo_data_base_url.clone();
url.set_path(base_url);
url
}
Err(e) => unreachable!("{e}"),
Ok(base_url) => base_url,
},
};

let path = absolute_url.path();
if !path.ends_with('/') {
absolute_url.set_path(&format!("{path}/"))
}
absolute_url
.join(filename)
.expect("failed to join base_url and filename")
}

fn add_trailing_slash(url: &Url) -> Cow<Url> {
let path = url.path();
if !path.ends_with('/') {
let mut url = url.clone();
url.set_path(&format!("{path}/"));
Cow::Owned(url)
} else {
Cow::Borrowed(url)
}
}

impl PackageRecord {
/// A simple helper method that constructs a `PackageRecord` with the bare minimum values.
pub fn new(name: PackageName, version: impl Into<VersionWithSource>, build: String) -> Self {
Expand Down Expand Up @@ -351,10 +413,10 @@ fn sort_set_alphabetically<S: serde::Serializer>(

#[cfg(test)]
mod test {
use crate::repo_data::determine_subdir;
use crate::repo_data::{compute_package_url, determine_subdir};
use fxhash::FxHashSet;

use crate::RepoData;
use crate::{Channel, ChannelConfig, RepoData};

// isl-0.12.2-1.tar.bz2
// gmp-5.1.2-6.tar.bz2
Expand All @@ -371,7 +433,7 @@ mod test {
#[test]
fn test_serialize() {
let repodata = RepoData {
version: Some(1),
version: Some(2),
info: Default::default(),
packages: Default::default(),
conda_packages: Default::default(),
Expand All @@ -397,4 +459,63 @@ mod test {
let json = serde_json::to_string_pretty(&repodata).unwrap();
insta::assert_snapshot!(json);
}

#[test]
fn test_base_url_packages() {
// load test data
let test_data_path = dunce::canonicalize(
std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("../../test-data"),
)
.unwrap();
let data_path = test_data_path.join("channels/dummy/linux-64/repodata.json");
let repodata = RepoData::from_path(&data_path).unwrap();

let channel = Channel::from_str(
url::Url::from_directory_path(data_path.parent().unwrap().parent().unwrap())
.unwrap()
.as_str(),
&ChannelConfig::default(),
)
.unwrap();

let file_urls = repodata
.into_repo_data_records(&channel)
.into_iter()
.map(|r| {
pathdiff::diff_paths(r.url.to_file_path().unwrap(), &test_data_path)
.unwrap()
.to_string_lossy()
.replace('\\', "/")
})
.collect::<Vec<_>>();

// serialize to yaml
insta::assert_yaml_snapshot!(file_urls);
}

#[test]
fn test_base_url() {
let channel = Channel::from_str("conda-forge", &ChannelConfig::default()).unwrap();
let base_url = channel.base_url().join("linux-64/").unwrap();
assert_eq!(
compute_package_url(&base_url, None, "bla.conda").to_string(),
"https://conda.anaconda.org/conda-forge/linux-64/bla.conda"
);
assert_eq!(
compute_package_url(&base_url, Some("https://host.some.org"), "bla.conda",).to_string(),
"https://host.some.org/bla.conda"
);
assert_eq!(
compute_package_url(&base_url, Some("/root"), "bla.conda").to_string(),
"https://conda.anaconda.org/root/bla.conda"
);
assert_eq!(
compute_package_url(&base_url, Some("foo/bar"), "bla.conda").to_string(),
"https://conda.anaconda.org/conda-forge/linux-64/foo/bar/bla.conda"
);
assert_eq!(
compute_package_url(&base_url, Some("../../root"), "bla.conda").to_string(),
"https://conda.anaconda.org/root/bla.conda"
);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
---
source: crates/rattler_conda_types/src/repo_data/mod.rs
assertion_line: 493
expression: file_urls
---
- channels/dummy/linux-64/foo-3.0.2-py36h1af98f8_1.tar.bz2
- "channels/dummy/linux-64/baz-1.0-unix_py36h1af98f8_2\u0000.tar.bz2"
- channels/dummy/linux-64/foo-4.0.2-py36h1af98f8_2.tar.bz2
- channels/dummy/linux-64/bar-1.0-unix_py36h1af98f8_2.tar.bz2
- channels/dummy/linux-64/foo-3.0.2-py36h1af98f8_1.conda

Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
---
source: crates/rattler_conda_types/src/repo_data/mod.rs
assertion_line: 438
expression: repodata
---
info: ~
Expand All @@ -13,5 +14,5 @@ removed:
- quux
- qux
- xyz
repodata_version: 1
repodata_version: 2

Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
---
source: crates/rattler_conda_types/src/repo_data/mod.rs
assertion_line: 460
expression: json
---
{
"info": {
"subdir": "linux-64"
"subdir": "linux-64",
"base_url": "../linux-64"
},
"packages": {
"bar-1.0-unix_py36h1af98f8_2.tar.bz2": {
Expand Down Expand Up @@ -83,5 +85,5 @@ expression: json
}
},
"packages.conda": {},
"repodata_version": 1
"repodata_version": 2
}
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
---
source: crates/rattler_conda_types/src/repo_data/mod.rs
assertion_line: 448
expression: repodata
---
info:
subdir: linux-64
base_url: "../linux-64"
packages:
bar-1.0-unix_py36h1af98f8_2.tar.bz2:
build: unix_py36h1af98f8_2
Expand Down Expand Up @@ -73,5 +75,5 @@ packages:
timestamp: 1605110689658
version: 4.0.2
packages.conda: {}
repodata_version: 1
repodata_version: 2

29 changes: 24 additions & 5 deletions crates/rattler_repodata_gateway/src/sparse/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@

use futures::{stream, StreamExt, TryFutureExt, TryStreamExt};
use itertools::Itertools;
use rattler_conda_types::{Channel, PackageName, PackageRecord, RepoDataRecord};
use rattler_conda_types::{
compute_package_url, Channel, ChannelInfo, PackageName, PackageRecord, RepoDataRecord,
};
use serde::{
de::{Error, MapAccess, Visitor},
Deserialize, Deserializer,
Expand Down Expand Up @@ -90,16 +92,19 @@ impl SparseRepoData {
/// Returns all the records for the specified package name.
pub fn load_records(&self, package_name: &PackageName) -> io::Result<Vec<RepoDataRecord>> {
let repo_data = self.inner.borrow_repo_data();
let base_url = repo_data.info.as_ref().and_then(|i| i.base_url.as_deref());
let mut records = parse_records(
package_name,
&repo_data.packages,
base_url,
&self.channel,
&self.subdir,
self.patch_record_fn,
)?;
let mut conda_records = parse_records(
package_name,
&repo_data.conda_packages,
base_url,
&self.channel,
&self.subdir,
self.patch_record_fn,
Expand Down Expand Up @@ -133,18 +138,24 @@ impl SparseRepoData {
while let Some(next_package) = pending.pop_front() {
for (i, repo_data) in repo_data.iter().enumerate() {
let repo_data_packages = repo_data.inner.borrow_repo_data();
let base_url = repo_data_packages
.info
.as_ref()
.and_then(|i| i.base_url.as_deref());

// Get all records from the repodata
let mut records = parse_records(
&next_package,
&repo_data_packages.packages,
base_url,
&repo_data.channel,
&repo_data.subdir,
patch_function,
)?;
let mut conda_records = parse_records(
&next_package,
&repo_data_packages.conda_packages,
base_url,
&repo_data.channel,
&repo_data.subdir,
patch_function,
Expand Down Expand Up @@ -180,6 +191,9 @@ impl SparseRepoData {
/// A serde compatible struct that only sparsely parses a repodata.json file.
#[derive(Deserialize)]
struct LazyRepoData<'i> {
/// The channel information contained in the repodata.json file
info: Option<ChannelInfo>,

/// The tar.bz2 packages contained in the repodata.json file
#[serde(borrow)]
#[serde(deserialize_with = "deserialize_filename_and_raw_record")]
Expand All @@ -196,6 +210,7 @@ struct LazyRepoData<'i> {
fn parse_records<'i>(
package_name: &PackageName,
packages: &[(PackageFilename<'i>, &'i RawValue)],
base_url: Option<&str>,
channel: &Channel,
subdir: &str,
patch_function: Option<fn(&mut PackageRecord)>,
Expand All @@ -212,10 +227,14 @@ fn parse_records<'i>(
package_record.subdir = subdir.to_owned();
}
result.push(RepoDataRecord {
url: channel
.base_url()
.join(&format!("{}/{}", &package_record.subdir, &key.filename))
.expect("failed to build a url from channel and package record"),
url: compute_package_url(
&channel
.base_url
.join(&format!("{}/", &package_record.subdir))
.expect("failed determine repo_base_url"),
base_url,
key.filename,
),
channel: channel_name.clone(),
package_record,
file_name: key.filename.to_owned(),
Expand Down
4 changes: 2 additions & 2 deletions test-data/channels/dummy/linux-64/repodata.json
Git LFS file not shown