Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move index functionality to rattler and create python bindings #436

Merged
merged 10 commits into from
Dec 15, 2023
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions crates/rattler_index/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
[package]
name = "rattler_index"
version.workspace = true
edition.workspace = true
authors = []
description = "A crate that indexes directories containing conda packages to create local conda channels"
categories.workspace = true
homepage.workspace = true
repository.workspace = true
license.workspace = true
readme.workspace = true

[dependencies]
fs-err = "2.11.0"
rattler_conda_types = { version = "0.14.0", path = "../rattler_conda_types" }
rattler_digest = { version = "0.14.0", path = "../rattler_digest" }
rattler_package_streaming = { version = "0.14.0", path = "../rattler_package_streaming" }
BenjaminLowry marked this conversation as resolved.
Show resolved Hide resolved
serde_json = "1.0.108"
tracing = "0.1.40"
walkdir = "2.4.0"

[dev-dependencies]
tempfile = "3.8.0"
195 changes: 195 additions & 0 deletions crates/rattler_index/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
//! Indexing of packages in a output folder to create up to date repodata.json files
BenjaminLowry marked this conversation as resolved.
Show resolved Hide resolved
use rattler_conda_types::package::ArchiveType;
use rattler_conda_types::package::IndexJson;
use rattler_conda_types::package::PackageFile;
use rattler_conda_types::ChannelInfo;
use rattler_conda_types::PackageRecord;
use rattler_conda_types::Platform;
use rattler_conda_types::RepoData;
use rattler_package_streaming::read;
use rattler_package_streaming::seek;

use fs_err::File;
use std::ffi::OsStr;
use std::io::Read;
use std::io::Write;
use std::path::Path;
use std::path::PathBuf;
use walkdir::WalkDir;

fn package_record_from_index_json<T: Read>(
file: &Path,
index_json_reader: &mut T,
) -> Result<PackageRecord, std::io::Error> {
let index = IndexJson::from_reader(index_json_reader)?;

let sha256_result = rattler_digest::compute_file_digest::<rattler_digest::Sha256>(file)?;
let md5_result = rattler_digest::compute_file_digest::<rattler_digest::Md5>(file)?;
let size = std::fs::metadata(file)?.len();

let package_record = PackageRecord {
name: index.name,
version: index.version,
build: index.build,
build_number: index.build_number,
subdir: index.subdir.unwrap_or_else(|| "unknown".to_string()),
md5: Some(md5_result),
sha256: Some(sha256_result),
size: Some(size),
arch: index.arch,
platform: index.platform,
depends: index.depends,
constrains: index.constrains,
track_features: index.track_features,
features: index.features,
noarch: index.noarch,
license: index.license,
license_family: index.license_family,
timestamp: index.timestamp,
legacy_bz2_md5: None,
legacy_bz2_size: None,
purls: Default::default(),
};
Ok(package_record)
}

fn package_record_from_tar_bz2(file: &Path) -> Result<PackageRecord, std::io::Error> {
let reader = std::fs::File::open(file)?;
let mut archive = read::stream_tar_bz2(reader);
for entry in archive.entries()?.flatten() {
let mut entry = entry;
let path = entry.path()?;
if path.as_os_str().eq("info/index.json") {
return package_record_from_index_json(file, &mut entry);
}
}
Err(std::io::Error::new(
std::io::ErrorKind::Other,
"No index.json found",
))
}

fn package_record_from_conda(file: &Path) -> Result<PackageRecord, std::io::Error> {
let reader = std::fs::File::open(file)?;
let mut archive = seek::stream_conda_info(reader).expect("Could not open conda file");

for entry in archive.entries()?.flatten() {
let mut entry = entry;
let path = entry.path()?;
if path.as_os_str().eq("info/index.json") {
return package_record_from_index_json(file, &mut entry);
}
}
Err(std::io::Error::new(
std::io::ErrorKind::Other,
"No index.json found",
))
}

/// Create a new `repodata.json` for all packages in the given output folder. If `target_platform` is
/// `Some`, only that specific subdir is indexed. Otherwise indexes all subdirs and creates a
/// `repodata.json` for each.
pub fn index(
output_folder: &Path,
target_platform: Option<&Platform>,
) -> Result<(), std::io::Error> {
let entries = WalkDir::new(output_folder).into_iter();
let entries: Vec<(PathBuf, ArchiveType)> = entries
.filter_entry(|e| e.depth() <= 2)
.filter_map(|e| e.ok())
.filter_map(|e| {
ArchiveType::split_str(e.path().to_string_lossy().as_ref())
.map(|(p, t)| (PathBuf::from(format!("{}{}", p, t.extension())), t))
})
.collect();

// find all subdirs
let mut platforms = entries
.iter()
.filter_map(|(p, _)| {
p.parent()
.and_then(|parent| parent.file_name())
.and_then(|file_name| {
let name = file_name.to_string_lossy().to_string();
if name != "src_cache" {
Some(name)
} else {
None
}
})
})
.collect::<std::collections::HashSet<_>>();

// Always create noarch subdir
if !output_folder.join("noarch").exists() {
std::fs::create_dir(output_folder.join("noarch"))?;
platforms.insert("noarch".to_string());
}

// Create target platform dir if needed
if let Some(target_platform) = target_platform {
let platform_str = target_platform.to_string();
if !output_folder.join(&platform_str).exists() {
std::fs::create_dir(output_folder.join(&platform_str))?;
platforms.insert(platform_str);
}
}

for platform in platforms {
if let Some(target_platform) = target_platform {
if platform != target_platform.to_string() {
if platform != "noarch" {
continue;
} else {
// check that noarch is already indexed if it is not the target platform
if output_folder.join("noarch/repodata.json").exists() {
continue;
}
}
}
}

let mut repodata = RepoData {
info: Some(ChannelInfo {
subdir: platform.clone(),
base_url: None,
}),
packages: Default::default(),
conda_packages: Default::default(),
removed: Default::default(),
version: Some(1),
baszalmstra marked this conversation as resolved.
Show resolved Hide resolved
};

for (p, t) in entries.iter().filter_map(|(p, t)| {
p.parent().and_then(|parent| {
parent.file_name().and_then(|file_name| {
if file_name == OsStr::new(&platform) {
// If the file_name is the platform we're looking for, return Some((p, t))
Some((p, t))
} else {
// Otherwise, we return None to filter out this item
None
}
})
})
}) {
let record = match t {
ArchiveType::TarBz2 => package_record_from_tar_bz2(p),
ArchiveType::Conda => package_record_from_conda(p),
};
let (Ok(record), Some(file_name)) = (record, p.file_name()) else {
tracing::info!("Could not read package record from {:?}", p);
continue;
};
repodata
.conda_packages
.insert(file_name.to_string_lossy().to_string(), record);
}
let out_file = output_folder.join(platform).join("repodata.json");
File::create(&out_file)?.write_all(serde_json::to_string_pretty(&repodata)?.as_bytes())?;
}

Ok(())
}

// TODO: write proper unit tests for above functions
61 changes: 61 additions & 0 deletions crates/rattler_index/tests/test_index.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
use rattler_conda_types::Platform;
use rattler_index::index;
use serde_json::Value;
use std::fs;
use std::fs::File;
use std::path::{Path, PathBuf};

fn test_data_dir() -> PathBuf {
Path::new(env!("CARGO_MANIFEST_DIR")).join("../../test-data")
}

#[test]
fn test_index() {
let temp_dir = tempfile::tempdir().unwrap();
let subdir_path = Path::new("win-64");
let file_path = Path::new("conda-22.11.1-py38haa244fe_1.conda");
let index_json_path = Path::new("conda-22.11.1-py38haa244fe_1-index.json");
fs::create_dir(temp_dir.path().join(subdir_path)).unwrap();
fs::copy(
test_data_dir().join(file_path),
temp_dir.path().join(subdir_path).join(file_path),
)
.unwrap();

let res = index(temp_dir.path(), Some(&Platform::Win64));
assert_eq!(res.is_ok(), true);

let repodata_path = temp_dir.path().join(subdir_path).join("repodata.json");
let repodata_json: Value = serde_json::from_reader(File::open(repodata_path).unwrap()).unwrap();

let expected_repodata_entry: Value =
serde_json::from_reader(File::open(test_data_dir().join(index_json_path)).unwrap())
.unwrap();

assert_eq!(
repodata_json
.get("info")
.unwrap()
.get("subdir")
.unwrap()
.as_str(),
Some("win-64")
);
assert_eq!(repodata_json.get("packages").is_some(), true);
assert_eq!(
repodata_json
.get("packages.conda")
.unwrap()
.get("conda-22.11.1-py38haa244fe_1.conda")
.unwrap(),
&expected_repodata_entry
);
}

#[test]
fn test_index_empty_directory() {
let temp_dir = tempfile::tempdir().unwrap();
let res = index(temp_dir.path(), None);
assert_eq!(res.is_ok(), true);
assert_eq!(fs::read_dir(temp_dir).unwrap().count(), 0);
}
Loading
Loading