From fe3b1a11de37c027e6a8fe3bd29d6dd3201ef5f1 Mon Sep 17 00:00:00 2001 From: Benjamin Lowry Date: Tue, 12 Dec 2023 13:33:09 -0500 Subject: [PATCH 1/9] first pass --- crates/rattler_index/Cargo.toml | 23 +++ crates/rattler_index/src/lib.rs | 195 ++++++++++++++++++ crates/rattler_index/tests/test_index.rs | 53 +++++ py-rattler/Cargo.lock | 186 +++++++++++++++-- py-rattler/Cargo.toml | 1 + py-rattler/rattler/__init__.py | 2 + py-rattler/rattler/index/__init__.py | 3 + py-rattler/rattler/index/index.py | 25 +++ py-rattler/src/index.rs | 16 ++ py-rattler/src/lib.rs | 4 + py-rattler/tests/unit/test_index.py | 55 +++++ .../conda-22.11.1-py38haa244fe_1-index.json | 35 ++++ 12 files changed, 579 insertions(+), 19 deletions(-) create mode 100644 crates/rattler_index/Cargo.toml create mode 100644 crates/rattler_index/src/lib.rs create mode 100644 crates/rattler_index/tests/test_index.rs create mode 100644 py-rattler/rattler/index/__init__.py create mode 100644 py-rattler/rattler/index/index.py create mode 100644 py-rattler/src/index.rs create mode 100644 py-rattler/tests/unit/test_index.py create mode 100644 test-data/conda-22.11.1-py38haa244fe_1-index.json diff --git a/crates/rattler_index/Cargo.toml b/crates/rattler_index/Cargo.toml new file mode 100644 index 000000000..74f2e249c --- /dev/null +++ b/crates/rattler_index/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "rattler_index" +version.workspace = true +edition.workspace = true +authors = [] +description = "A crate that indexes directories containing conda packages to create local conda channels" +categories.workspace = true +homepage.workspace = true +repository.workspace = true +license.workspace = true +readme.workspace = true + +[dependencies] +fs-err = "2.11.0" +rattler_conda_types = { version = "0.14.0", path = "../rattler_conda_types" } +rattler_digest = { version = "0.14.0", path = "../rattler_digest" } +rattler_package_streaming = { version = "0.14.0", path = "../rattler_package_streaming" } +serde_json = "1.0.108" +tracing = "0.1.40" +walkdir = "2.4.0" + +[dev-dependencies] +tempfile = "3.8.0" diff --git a/crates/rattler_index/src/lib.rs b/crates/rattler_index/src/lib.rs new file mode 100644 index 000000000..42a1816ee --- /dev/null +++ b/crates/rattler_index/src/lib.rs @@ -0,0 +1,195 @@ +//! Indexing of packages in a output folder to create up to date repodata.json files +use rattler_conda_types::package::ArchiveType; +use rattler_conda_types::package::IndexJson; +use rattler_conda_types::package::PackageFile; +use rattler_conda_types::ChannelInfo; +use rattler_conda_types::PackageRecord; +use rattler_conda_types::Platform; +use rattler_conda_types::RepoData; +use rattler_package_streaming::read; +use rattler_package_streaming::seek; + +use fs_err::File; +use std::ffi::OsStr; +use std::io::Read; +use std::io::Write; +use std::path::Path; +use std::path::PathBuf; +use walkdir::WalkDir; + +fn package_record_from_index_json( + file: &Path, + index_json_reader: &mut T, +) -> Result { + let index = IndexJson::from_reader(index_json_reader)?; + + let sha256_result = rattler_digest::compute_file_digest::(file)?; + let md5_result = rattler_digest::compute_file_digest::(file)?; + let size = std::fs::metadata(file)?.len(); + + let package_record = PackageRecord { + name: index.name, + version: index.version, + build: index.build, + build_number: index.build_number, + subdir: index.subdir.unwrap_or_else(|| "unknown".to_string()), + md5: Some(md5_result), + sha256: Some(sha256_result), + size: Some(size), + arch: index.arch, + platform: index.platform, + depends: index.depends, + constrains: index.constrains, + track_features: index.track_features, + features: index.features, + noarch: index.noarch, + license: index.license, + license_family: index.license_family, + timestamp: index.timestamp, + legacy_bz2_md5: None, + legacy_bz2_size: None, + purls: Default::default(), + }; + Ok(package_record) +} + +fn package_record_from_tar_bz2(file: &Path) -> Result { + let reader = std::fs::File::open(file)?; + let mut archive = read::stream_tar_bz2(reader); + for entry in archive.entries()?.flatten() { + let mut entry = entry; + let path = entry.path()?; + if path.as_os_str().eq("info/index.json") { + return package_record_from_index_json(file, &mut entry); + } + } + Err(std::io::Error::new( + std::io::ErrorKind::Other, + "No index.json found", + )) +} + +fn package_record_from_conda(file: &Path) -> Result { + let reader = std::fs::File::open(file)?; + let mut archive = seek::stream_conda_info(reader).expect("Could not open conda file"); + + for entry in archive.entries()?.flatten() { + let mut entry = entry; + let path = entry.path()?; + if path.as_os_str().eq("info/index.json") { + return package_record_from_index_json(file, &mut entry); + } + } + Err(std::io::Error::new( + std::io::ErrorKind::Other, + "No index.json found", + )) +} + +/// Create a new `repodata.json` for all packages in the given output folder. If `target_platform` is +/// `Some`, only that specific subdir is indexed. Otherwise indexes all subdirs and creates a +/// `repodata.json` for each. +pub fn index( + output_folder: &Path, + target_platform: Option<&Platform>, +) -> Result<(), std::io::Error> { + let entries = WalkDir::new(output_folder).into_iter(); + let entries: Vec<(PathBuf, ArchiveType)> = entries + .filter_entry(|e| e.depth() <= 2) + .filter_map(|e| e.ok()) + .filter_map(|e| { + ArchiveType::split_str(e.path().to_string_lossy().as_ref()) + .map(|(p, t)| (PathBuf::from(format!("{}{}", p, t.extension())), t)) + }) + .collect(); + + // find all subdirs + let mut platforms = entries + .iter() + .filter_map(|(p, _)| { + p.parent() + .and_then(|parent| parent.file_name()) + .and_then(|file_name| { + let name = file_name.to_string_lossy().to_string(); + if name != "src_cache" { + Some(name) + } else { + None + } + }) + }) + .collect::>(); + + // Always create noarch subdir + if !output_folder.join("noarch").exists() { + std::fs::create_dir(output_folder.join("noarch"))?; + platforms.insert("noarch".to_string()); + } + + // Create target platform dir if needed + if let Some(target_platform) = target_platform { + let platform_str = target_platform.to_string(); + if !output_folder.join(&platform_str).exists() { + std::fs::create_dir(output_folder.join(&platform_str))?; + platforms.insert(platform_str); + } + } + + for platform in platforms { + if let Some(target_platform) = target_platform { + if platform != target_platform.to_string() { + if platform != "noarch" { + continue; + } else { + // check that noarch is already indexed if it is not the target platform + if output_folder.join("noarch/repodata.json").exists() { + continue; + } + } + } + } + + let mut repodata = RepoData { + info: Some(ChannelInfo { + subdir: platform.clone(), + base_url: None, + }), + packages: Default::default(), + conda_packages: Default::default(), + removed: Default::default(), + version: Some(1), + }; + + for (p, t) in entries.iter().filter_map(|(p, t)| { + p.parent().and_then(|parent| { + parent.file_name().and_then(|file_name| { + if file_name == OsStr::new(&platform) { + // If the file_name is the platform we're looking for, return Some((p, t)) + Some((p, t)) + } else { + // Otherwise, we return None to filter out this item + None + } + }) + }) + }) { + let record = match t { + ArchiveType::TarBz2 => package_record_from_tar_bz2(p), + ArchiveType::Conda => package_record_from_conda(p), + }; + let (Ok(record), Some(file_name)) = (record, p.file_name()) else { + tracing::info!("Could not read package record from {:?}", p); + continue; + }; + repodata + .conda_packages + .insert(file_name.to_string_lossy().to_string(), record); + } + let out_file = output_folder.join(platform).join("repodata.json"); + File::create(&out_file)?.write_all(serde_json::to_string_pretty(&repodata)?.as_bytes())?; + } + + Ok(()) +} + +// TODO: write proper unit tests for above functions diff --git a/crates/rattler_index/tests/test_index.rs b/crates/rattler_index/tests/test_index.rs new file mode 100644 index 000000000..865e8e3ed --- /dev/null +++ b/crates/rattler_index/tests/test_index.rs @@ -0,0 +1,53 @@ +use rattler_conda_types::Platform; +use rattler_index::index; +use serde_json::Value; +use std::fs; +use std::fs::File; +use std::path::{Path, PathBuf}; + +fn test_data_dir() -> PathBuf { + Path::new(env!("CARGO_MANIFEST_DIR")).join("../../test-data") +} + +#[test] +fn test_index() { + let temp_dir = tempfile::tempdir().unwrap(); + let subdir_path = Path::new("win-64"); + let file_path = Path::new("conda-22.11.1-py38haa244fe_1.conda"); + let index_json_path = Path::new("conda-22.11.1-py38haa244fe_1-index.json"); + fs::create_dir(temp_dir.path().join(subdir_path)).unwrap(); + fs::copy( + test_data_dir().join(file_path), + temp_dir.path().join(subdir_path).join(file_path), + ) + .unwrap(); + + let res = index(temp_dir.path(), Some(&Platform::Win64)); + assert_eq!(res.is_ok(), true); + + let repodata_path = temp_dir.path().join(subdir_path).join("repodata.json"); + let repodata_json: Value = serde_json::from_reader(File::open(repodata_path).unwrap()).unwrap(); + + let expected_repodata_entry: Value = + serde_json::from_reader(File::open(test_data_dir().join(index_json_path)).unwrap()) + .unwrap(); + + assert_eq!( + repodata_json + .get("info") + .unwrap() + .get("subdir") + .unwrap() + .as_str(), + Some("win-64") + ); + assert_eq!(repodata_json.get("packages").is_some(), true); + assert_eq!( + repodata_json + .get("packages.conda") + .unwrap() + .get("conda-22.11.1-py38haa244fe_1.conda") + .unwrap(), + &expected_repodata_entry + ); +} diff --git a/py-rattler/Cargo.lock b/py-rattler/Cargo.lock index bcf10b27a..512dfa8de 100644 --- a/py-rattler/Cargo.lock +++ b/py-rattler/Cargo.lock @@ -718,6 +718,25 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fs-err" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88a41f105fe1d5b6b34b2055e3dc59bb79b46b48b2040b9e6c7b4b5de097aa41" +dependencies = [ + "autocfg", +] + +[[package]] +name = "fslock" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04412b8935272e3a9bae6f48c7bfff74c2911f60525404edfdd28e49884c3bfb" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "funty" version = "2.0.0" @@ -1674,6 +1693,50 @@ dependencies = [ "indexmap 2.0.2", ] +[[package]] +name = "phf" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" +dependencies = [ + "phf_macros", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0" +dependencies = [ + "phf_shared", + "rand", +] + +[[package]] +name = "phf_macros" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3444646e286606587e49f3bcf1679b8cef1dc2c5ecc29ddacaffc305180d464b" +dependencies = [ + "phf_generator", + "phf_shared", + "proc-macro2", + "quote", + "syn 2.0.37", + "unicase", +] + +[[package]] +name = "phf_shared" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" +dependencies = [ + "siphasher", + "unicase", +] + [[package]] name = "pin-project-lite" version = "0.2.13" @@ -1771,6 +1834,21 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "purl" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d153044e55fb5c0a6f0f0f974c3335d15a842263ba4b208d2656120fe530a5ab" +dependencies = [ + "hex", + "percent-encoding", + "phf", + "serde", + "smartstring", + "thiserror", + "unicase", +] + [[package]] name = "py-rattler" version = "0.2.0" @@ -1783,6 +1861,7 @@ dependencies = [ "rattler", "rattler_conda_types", "rattler_digest", + "rattler_index", "rattler_networking", "rattler_repodata_gateway", "rattler_shell", @@ -1923,7 +2002,7 @@ dependencies = [ [[package]] name = "rattler" -version = "0.12.2" +version = "0.14.0" dependencies = [ "anyhow", "async-compression", @@ -1962,7 +2041,7 @@ dependencies = [ [[package]] name = "rattler_conda_types" -version = "0.12.2" +version = "0.14.0" dependencies = [ "chrono", "fxhash", @@ -1972,6 +2051,7 @@ dependencies = [ "itertools", "lazy-regex", "nom", + "purl", "rattler_digest", "rattler_macros", "regex", @@ -1989,7 +2069,7 @@ dependencies = [ [[package]] name = "rattler_digest" -version = "0.12.2" +version = "0.14.0" dependencies = [ "blake2", "digest", @@ -2001,9 +2081,22 @@ dependencies = [ "tokio", ] +[[package]] +name = "rattler_index" +version = "0.14.0" +dependencies = [ + "fs-err", + "rattler_conda_types", + "rattler_digest", + "rattler_package_streaming", + "serde_json", + "tracing", + "walkdir", +] + [[package]] name = "rattler_macros" -version = "0.12.2" +version = "0.14.0" dependencies = [ "quote", "syn 2.0.37", @@ -2011,15 +2104,17 @@ dependencies = [ [[package]] name = "rattler_networking" -version = "0.12.2" +version = "0.14.0" dependencies = [ "anyhow", "dirs", + "fslock", "getrandom", "itertools", "keyring", "lazy_static", "libc", + "once_cell", "reqwest", "retry-policies", "serde", @@ -2031,7 +2126,7 @@ dependencies = [ [[package]] name = "rattler_package_streaming" -version = "0.12.2" +version = "0.14.0" dependencies = [ "bzip2", "chrono", @@ -2053,7 +2148,7 @@ dependencies = [ [[package]] name = "rattler_repodata_gateway" -version = "0.12.2" +version = "0.14.0" dependencies = [ "anyhow", "async-compression", @@ -2090,7 +2185,7 @@ dependencies = [ [[package]] name = "rattler_shell" -version = "0.12.2" +version = "0.14.0" dependencies = [ "enum_dispatch", "indexmap 2.0.2", @@ -2105,7 +2200,7 @@ dependencies = [ [[package]] name = "rattler_solve" -version = "0.12.2" +version = "0.14.0" dependencies = [ "anyhow", "chrono", @@ -2123,7 +2218,7 @@ dependencies = [ [[package]] name = "rattler_virtual_packages" -version = "0.12.2" +version = "0.14.0" dependencies = [ "cfg-if", "libloading", @@ -2362,6 +2457,15 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ef703b7cb59335eae2eb93ceb664c0eb7ea6bf567079d843e09420219668e072" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "schannel" version = "0.1.22" @@ -2451,9 +2555,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.107" +version = "1.0.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b420ce6e3d8bd882e9b243c6eed35dbc9a6110c9769e74b584e0d68d1f20c65" +checksum = "3d1c7e3eac408d115102c4c24ad393e0821bb3a5df4d506a80f85f7a742a526b" dependencies = [ "indexmap 2.0.2", "itoa", @@ -2573,6 +2677,12 @@ dependencies = [ "libc", ] +[[package]] +name = "siphasher" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" + [[package]] name = "slab" version = "0.4.9" @@ -2591,6 +2701,17 @@ dependencies = [ "serde", ] +[[package]] +name = "smartstring" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fb72c633efbaa2dd666986505016c32c3044395ceaf881518399d2f4127ee29" +dependencies = [ + "autocfg", + "static_assertions", + "version_check", +] + [[package]] name = "socket2" version = "0.4.9" @@ -2920,11 +3041,10 @@ checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" [[package]] name = "tracing" -version = "0.1.37" +version = "0.1.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ce8c33a8d48bd45d624a6e523445fd21ec13d3653cd51f681abf67418f54eb8" +checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" dependencies = [ - "cfg-if", "pin-project-lite", "tracing-attributes", "tracing-core", @@ -2932,9 +3052,9 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.26" +version = "0.1.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f4f31f56159e98206da9efd823404b79b6ef3143b4a7ab76e67b1751b25a4ab" +checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", @@ -2943,9 +3063,9 @@ dependencies = [ [[package]] name = "tracing-core" -version = "0.1.31" +version = "0.1.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0955b8137a1df6f1a2e9a37d8a6656291ff0297c1a97c24e0d8425fe2312f79a" +checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" dependencies = [ "once_cell", ] @@ -2981,6 +3101,15 @@ dependencies = [ "winapi", ] +[[package]] +name = "unicase" +version = "2.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7d2d4dafb69621809a81864c9c1b864479e1235c0dd4e199924b9742439ed89" +dependencies = [ + "version_check", +] + [[package]] name = "unicode-bidi" version = "0.3.13" @@ -3060,6 +3189,16 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d5b2c62b4012a3e1eca5a7e077d13b3bf498c4073e33ccd58626607748ceeca" +[[package]] +name = "walkdir" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71d857dc86794ca4c280d616f7da00d2dbfd8cd788846559a6813e6aa4b54ee" +dependencies = [ + "same-file", + "winapi-util", +] + [[package]] name = "want" version = "0.3.1" @@ -3186,6 +3325,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +[[package]] +name = "winapi-util" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596" +dependencies = [ + "winapi", +] + [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" diff --git a/py-rattler/Cargo.toml b/py-rattler/Cargo.toml index 7ac1c6390..ca66d7282 100644 --- a/py-rattler/Cargo.toml +++ b/py-rattler/Cargo.toml @@ -36,6 +36,7 @@ rattler_virtual_packages = { path = "../crates/rattler_virtual_packages", defaul rattler_solve = { path = "../crates/rattler_solve", default-features = false, features = [ "resolvo", ] } +rattler_index = { path = "../crates/rattler_index" } pyo3 = { version = "0.19", features = [ "abi3-py38", diff --git a/py-rattler/rattler/__init__.py b/py-rattler/rattler/__init__.py index c25c634a2..bcd727a7c 100644 --- a/py-rattler/rattler/__init__.py +++ b/py-rattler/rattler/__init__.py @@ -16,6 +16,7 @@ from rattler.platform import Platform from rattler.utils.rattler_version import get_rattler_version as _get_rattler_version from rattler.linker import link +from rattler.index import index __version__ = _get_rattler_version() del _get_rattler_version @@ -42,4 +43,5 @@ "solve", "Platform", "link", + "index", ] diff --git a/py-rattler/rattler/index/__init__.py b/py-rattler/rattler/index/__init__.py new file mode 100644 index 000000000..9023b5dd1 --- /dev/null +++ b/py-rattler/rattler/index/__init__.py @@ -0,0 +1,3 @@ +from rattler.index.index import index + +__all__ = ["index"] diff --git a/py-rattler/rattler/index/index.py b/py-rattler/rattler/index/index.py new file mode 100644 index 000000000..5c80c66f3 --- /dev/null +++ b/py-rattler/rattler/index/index.py @@ -0,0 +1,25 @@ +from __future__ import annotations +from typing import Optional, TYPE_CHECKING + +from rattler.rattler import py_index + +if TYPE_CHECKING: + import os + from rattler.platform import Platform + + +def index( + channel_directory: os.PathLike[str], + target_platform: Optional[Platform] = None, +) -> bool: + """ + TODO(blowry): add docstring + :param channel_directory: + :param target_platform: + :return: + """ + + return py_index( + channel_directory, + target_platform._inner if target_platform else target_platform, + ) diff --git a/py-rattler/src/index.rs b/py-rattler/src/index.rs new file mode 100644 index 000000000..fde6407d6 --- /dev/null +++ b/py-rattler/src/index.rs @@ -0,0 +1,16 @@ +use pyo3::{pyfunction, PyResult}; +use rattler_conda_types::Platform; +use rattler_index::index; + +use std::path::PathBuf; + +use crate::{error::PyRattlerError, platform::PyPlatform}; + +#[pyfunction] +pub fn py_index(channel_directory: PathBuf, target_platform: Option) -> PyResult { + let path = channel_directory.as_path(); + match index(path, target_platform.map(Platform::from).as_ref()) { + Ok(_v) => Ok(true), + Err(e) => Err(PyRattlerError::from(e).into()), + } +} diff --git a/py-rattler/src/lib.rs b/py-rattler/src/lib.rs index 4e944dfae..bddd5c878 100644 --- a/py-rattler/src/lib.rs +++ b/py-rattler/src/lib.rs @@ -1,6 +1,7 @@ mod channel; mod error; mod generic_virtual_package; +mod index; mod linker; mod match_spec; mod meta; @@ -35,6 +36,7 @@ use version::PyVersion; use pyo3::prelude::*; +use index::py_index; use linker::py_link; use meta::get_rattler_version; use platform::{PyArch, PyPlatform}; @@ -83,6 +85,8 @@ fn rattler(py: Python, m: &PyModule) -> PyResult<()> { .unwrap(); m.add_function(wrap_pyfunction!(py_link, m).unwrap()) .unwrap(); + m.add_function(wrap_pyfunction!(py_index, m).unwrap()) + .unwrap(); // Exceptions m.add( diff --git a/py-rattler/tests/unit/test_index.py b/py-rattler/tests/unit/test_index.py new file mode 100644 index 000000000..49beefd34 --- /dev/null +++ b/py-rattler/tests/unit/test_index.py @@ -0,0 +1,55 @@ +import os +from pathlib import Path +import pytest +import shutil + +from rattler import Platform, index + + +@pytest.fixture +def package_directory(tmp_path) -> Path: + data_dir = Path(os.path.join(os.path.dirname(__file__), "../../../test-data/")) + + win_filename = "ruff-0.0.171-py310h298983d_0.conda" + noarch_filename = "pytweening-1.0.4-pyhd8ed1ab_0.tar.bz2" + win_subdir = tmp_path / "win-64" + noarch_subdir = tmp_path / "noarch" + win_subdir.mkdir() + noarch_subdir.mkdir() + shutil.copy(data_dir / win_filename, win_subdir / win_filename) + shutil.copy(data_dir / noarch_filename, noarch_subdir / noarch_filename) + return tmp_path + + +def test_index(package_directory): + assert index(package_directory) == True + + assert set(os.listdir(package_directory)) == {"noarch", "win-64"} + assert "repodata.json" in os.listdir(package_directory / "win-64") + with open(package_directory / "win-64/repodata.json") as f: + assert "ruff-0.0.171-py310h298983d_0" in f.read() + assert "repodata.json" in os.listdir(package_directory / "noarch") + with open(package_directory / "noarch/repodata.json") as f: + assert "pytweening-1.0.4-pyhd8ed1ab_0" in f.read() + + +def test_index_specific_subdir_non_noarch(package_directory): + assert index(package_directory, Platform("win-64")) == True + + assert "repodata.json" in os.listdir(package_directory / "win-64") + with open(package_directory / "win-64/repodata.json") as f: + assert "ruff-0.0.171-py310h298983d_0" in f.read() + assert "repodata.json" in os.listdir(package_directory / "noarch") + with open(package_directory / "noarch/repodata.json") as f: + assert "pytweening-1.0.4-pyhd8ed1ab_0" in f.read() + + +def test_index_specific_subdir_noarch(package_directory): + assert index(package_directory, Platform("noarch")) == True + + win_files = os.listdir(package_directory / "win-64") + assert "repodata.json" not in win_files + assert "ruff-0.0.171-py310h298983d_0.conda" in win_files + assert "repodata.json" in os.listdir(package_directory / "noarch") + with open(package_directory / "noarch/repodata.json") as f: + assert "pytweening-1.0.4-pyhd8ed1ab_0" in f.read() diff --git a/test-data/conda-22.11.1-py38haa244fe_1-index.json b/test-data/conda-22.11.1-py38haa244fe_1-index.json new file mode 100644 index 000000000..7768a8ab2 --- /dev/null +++ b/test-data/conda-22.11.1-py38haa244fe_1-index.json @@ -0,0 +1,35 @@ +{ + "arch": "x86_64", + "build": "py38haa244fe_1", + "build_number": 1, + "constrains": [ + "conda-content-trust >=0.1.1", + "cytoolz >=0.8.1", + "conda-libmamba-solver >=22.12.0", + "conda-env >=2.6", + "conda-build >=3" + ], + "depends": [ + "conda-package-handling >=1.3.0", + "menuinst >=1.4.11,<2", + "pluggy >=1.0.0", + "pycosat >=0.6.3", + "pyopenssl >=16.2.0", + "python >=3.8,<3.9.0a0", + "python_abi 3.8.* *_cp38", + "requests >=2.20.1,<3", + "ruamel.yaml >=0.11.14,<0.18", + "setuptools >=31.0.1", + "toolz >=0.8.1", + "tqdm >=4" + ], + "license": "BSD-3-Clause", + "md5": "9987c96161034575f5a9c2be848960c5", + "name": "conda", + "platform": "win", + "sha256": "a8a44c5ff2b2f423546d49721ba2e3e632233c74a813c944adf8e5742834930e", + "size": 928900, + "subdir": "win-64", + "timestamp": 1670456742050, + "version": "22.11.1" +} From f7dba071ca651cc8bd19796defcd9168c4182a89 Mon Sep 17 00:00:00 2001 From: Benjamin Lowry Date: Tue, 12 Dec 2023 14:01:42 -0500 Subject: [PATCH 2/9] docstring --- py-rattler/rattler/index/index.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/py-rattler/rattler/index/index.py b/py-rattler/rattler/index/index.py index 5c80c66f3..fd2666ad1 100644 --- a/py-rattler/rattler/index/index.py +++ b/py-rattler/rattler/index/index.py @@ -1,24 +1,30 @@ from __future__ import annotations -from typing import Optional, TYPE_CHECKING +import os +from typing import Optional +from rattler.platform import Platform from rattler.rattler import py_index -if TYPE_CHECKING: - import os - from rattler.platform import Platform - def index( channel_directory: os.PathLike[str], target_platform: Optional[Platform] = None, ) -> bool: """ - TODO(blowry): add docstring - :param channel_directory: - :param target_platform: - :return: - """ + Indexes dependencies in the `channel_directory` for one or more subdirectories within said directory. + Will generate repodata.json files in each subdirectory containing metadata about each present package, + or if `target_platform` is specified will only consider the subdirectory corresponding to this platform. + Will always index the "noarch" subdirectory, and thus this subdirectory should always be present, because + conda channels at a minimum must include this subdirectory. + Arguments: + channel_directory: A `os.PathLike[str]` that is the directory containing subdirectories + of dependencies to index. + target_platform(optional): A `Platform` to index dependencies for + + Returns: + True iff indexing was successful + """ return py_index( channel_directory, target_platform._inner if target_platform else target_platform, From 66651dad92760da782d329923dd56649e7a90dcf Mon Sep 17 00:00:00 2001 From: Benjamin Lowry Date: Tue, 12 Dec 2023 14:18:05 -0500 Subject: [PATCH 3/9] another test --- crates/rattler_index/tests/test_index.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/crates/rattler_index/tests/test_index.rs b/crates/rattler_index/tests/test_index.rs index 865e8e3ed..005b707fc 100644 --- a/crates/rattler_index/tests/test_index.rs +++ b/crates/rattler_index/tests/test_index.rs @@ -51,3 +51,11 @@ fn test_index() { &expected_repodata_entry ); } + +#[test] +fn test_index_empty_directory() { + let temp_dir = tempfile::tempdir().unwrap(); + let res = index(temp_dir.path(), None); + assert_eq!(res.is_ok(), true); + assert_eq!(fs::read_dir(temp_dir).unwrap().count(), 0); +} From 54ec53a72468f9a92a3b484850c08fbfb6f55aad Mon Sep 17 00:00:00 2001 From: Benjamin Lowry Date: Tue, 12 Dec 2023 14:28:39 -0500 Subject: [PATCH 4/9] lint --- py-rattler/tests/unit/test_index.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/py-rattler/tests/unit/test_index.py b/py-rattler/tests/unit/test_index.py index 49beefd34..d24c1a2aa 100644 --- a/py-rattler/tests/unit/test_index.py +++ b/py-rattler/tests/unit/test_index.py @@ -1,3 +1,4 @@ +# type: ignore import os from pathlib import Path import pytest @@ -22,7 +23,7 @@ def package_directory(tmp_path) -> Path: def test_index(package_directory): - assert index(package_directory) == True + assert index(package_directory) is True assert set(os.listdir(package_directory)) == {"noarch", "win-64"} assert "repodata.json" in os.listdir(package_directory / "win-64") @@ -34,7 +35,7 @@ def test_index(package_directory): def test_index_specific_subdir_non_noarch(package_directory): - assert index(package_directory, Platform("win-64")) == True + assert index(package_directory, Platform("win-64")) is True assert "repodata.json" in os.listdir(package_directory / "win-64") with open(package_directory / "win-64/repodata.json") as f: @@ -45,7 +46,7 @@ def test_index_specific_subdir_non_noarch(package_directory): def test_index_specific_subdir_noarch(package_directory): - assert index(package_directory, Platform("noarch")) == True + assert index(package_directory, Platform("noarch")) is True win_files = os.listdir(package_directory / "win-64") assert "repodata.json" not in win_files From ec600cb44edd23415ea3c19feed23946654c5771 Mon Sep 17 00:00:00 2001 From: Benjamin Lowry Date: Tue, 12 Dec 2023 15:49:32 -0500 Subject: [PATCH 5/9] vendored openssl --- py-rattler/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/py-rattler/Cargo.toml b/py-rattler/Cargo.toml index ca66d7282..0301e32b2 100644 --- a/py-rattler/Cargo.toml +++ b/py-rattler/Cargo.toml @@ -49,7 +49,7 @@ tokio = { version = "1.32" } thiserror = "1.0.44" url = "2.4.1" -openssl = { version = "0.10", optional = true } +openssl = { version = "0.10", optional = true, features = ["vendored"] } # Prevent package from thinking it's in the workspace [workspace] From dd890c9ef15d620f867925877e881634b7f8f99a Mon Sep 17 00:00:00 2001 From: Benjamin Lowry Date: Tue, 12 Dec 2023 17:10:18 -0500 Subject: [PATCH 6/9] feedback --- crates/rattler_index/Cargo.toml | 6 +++--- crates/rattler_index/src/lib.rs | 2 ++ py-rattler/Cargo.toml | 2 +- py-rattler/rattler/index/index.py | 7 ++----- py-rattler/src/index.rs | 20 +++++++++++++------- py-rattler/tests/unit/test_index.py | 6 +++--- 6 files changed, 24 insertions(+), 19 deletions(-) diff --git a/crates/rattler_index/Cargo.toml b/crates/rattler_index/Cargo.toml index 74f2e249c..558ee00c6 100644 --- a/crates/rattler_index/Cargo.toml +++ b/crates/rattler_index/Cargo.toml @@ -12,9 +12,9 @@ readme.workspace = true [dependencies] fs-err = "2.11.0" -rattler_conda_types = { version = "0.14.0", path = "../rattler_conda_types" } -rattler_digest = { version = "0.14.0", path = "../rattler_digest" } -rattler_package_streaming = { version = "0.14.0", path = "../rattler_package_streaming" } +rattler_conda_types = { version = "0.14.0", path = "../rattler_conda_types", default-features = false } +rattler_digest = { version = "0.14.0", path = "../rattler_digest", default-features = false } +rattler_package_streaming = { version = "0.14.0", path = "../rattler_package_streaming", default-features = false } serde_json = "1.0.108" tracing = "0.1.40" walkdir = "2.4.0" diff --git a/crates/rattler_index/src/lib.rs b/crates/rattler_index/src/lib.rs index 42a1816ee..b054bc547 100644 --- a/crates/rattler_index/src/lib.rs +++ b/crates/rattler_index/src/lib.rs @@ -1,4 +1,6 @@ //! Indexing of packages in a output folder to create up to date repodata.json files +#![deny(missing_docs)] + use rattler_conda_types::package::ArchiveType; use rattler_conda_types::package::IndexJson; use rattler_conda_types::package::PackageFile; diff --git a/py-rattler/Cargo.toml b/py-rattler/Cargo.toml index 0301e32b2..ca66d7282 100644 --- a/py-rattler/Cargo.toml +++ b/py-rattler/Cargo.toml @@ -49,7 +49,7 @@ tokio = { version = "1.32" } thiserror = "1.0.44" url = "2.4.1" -openssl = { version = "0.10", optional = true, features = ["vendored"] } +openssl = { version = "0.10", optional = true } # Prevent package from thinking it's in the workspace [workspace] diff --git a/py-rattler/rattler/index/index.py b/py-rattler/rattler/index/index.py index fd2666ad1..bd077c307 100644 --- a/py-rattler/rattler/index/index.py +++ b/py-rattler/rattler/index/index.py @@ -9,7 +9,7 @@ def index( channel_directory: os.PathLike[str], target_platform: Optional[Platform] = None, -) -> bool: +) -> None: """ Indexes dependencies in the `channel_directory` for one or more subdirectories within said directory. Will generate repodata.json files in each subdirectory containing metadata about each present package, @@ -21,11 +21,8 @@ def index( channel_directory: A `os.PathLike[str]` that is the directory containing subdirectories of dependencies to index. target_platform(optional): A `Platform` to index dependencies for - - Returns: - True iff indexing was successful """ - return py_index( + py_index( channel_directory, target_platform._inner if target_platform else target_platform, ) diff --git a/py-rattler/src/index.rs b/py-rattler/src/index.rs index fde6407d6..a551ce69c 100644 --- a/py-rattler/src/index.rs +++ b/py-rattler/src/index.rs @@ -1,4 +1,4 @@ -use pyo3::{pyfunction, PyResult}; +use pyo3::{pyfunction, PyResult, Python}; use rattler_conda_types::Platform; use rattler_index::index; @@ -7,10 +7,16 @@ use std::path::PathBuf; use crate::{error::PyRattlerError, platform::PyPlatform}; #[pyfunction] -pub fn py_index(channel_directory: PathBuf, target_platform: Option) -> PyResult { - let path = channel_directory.as_path(); - match index(path, target_platform.map(Platform::from).as_ref()) { - Ok(_v) => Ok(true), - Err(e) => Err(PyRattlerError::from(e).into()), - } +pub fn py_index( + py: Python<'_>, + channel_directory: PathBuf, + target_platform: Option, +) -> PyResult<()> { + py.allow_threads(move || { + let path = channel_directory.as_path(); + match index(path, target_platform.map(Platform::from).as_ref()) { + Ok(_v) => Ok(()), + Err(e) => Err(PyRattlerError::from(e).into()), + } + }) } diff --git a/py-rattler/tests/unit/test_index.py b/py-rattler/tests/unit/test_index.py index d24c1a2aa..4f4f0752a 100644 --- a/py-rattler/tests/unit/test_index.py +++ b/py-rattler/tests/unit/test_index.py @@ -23,7 +23,7 @@ def package_directory(tmp_path) -> Path: def test_index(package_directory): - assert index(package_directory) is True + index(package_directory) assert set(os.listdir(package_directory)) == {"noarch", "win-64"} assert "repodata.json" in os.listdir(package_directory / "win-64") @@ -35,7 +35,7 @@ def test_index(package_directory): def test_index_specific_subdir_non_noarch(package_directory): - assert index(package_directory, Platform("win-64")) is True + index(package_directory, Platform("win-64")) assert "repodata.json" in os.listdir(package_directory / "win-64") with open(package_directory / "win-64/repodata.json") as f: @@ -46,7 +46,7 @@ def test_index_specific_subdir_non_noarch(package_directory): def test_index_specific_subdir_noarch(package_directory): - assert index(package_directory, Platform("noarch")) is True + index(package_directory, Platform("noarch")) win_files = os.listdir(package_directory / "win-64") assert "repodata.json" not in win_files From b106af292842ea7a8192d16c73e6951ff029116c Mon Sep 17 00:00:00 2001 From: Benjamin Lowry Date: Wed, 13 Dec 2023 11:11:14 -0500 Subject: [PATCH 7/9] trivial --- py-rattler/rattler/index/index.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/py-rattler/rattler/index/index.py b/py-rattler/rattler/index/index.py index bd077c307..a227186a7 100644 --- a/py-rattler/rattler/index/index.py +++ b/py-rattler/rattler/index/index.py @@ -14,13 +14,13 @@ def index( Indexes dependencies in the `channel_directory` for one or more subdirectories within said directory. Will generate repodata.json files in each subdirectory containing metadata about each present package, or if `target_platform` is specified will only consider the subdirectory corresponding to this platform. - Will always index the "noarch" subdirectory, and thus this subdirectory should always be present, because + Will always index the "noarch" subdirectory, and thus this subdirectory should always be present because conda channels at a minimum must include this subdirectory. Arguments: channel_directory: A `os.PathLike[str]` that is the directory containing subdirectories of dependencies to index. - target_platform(optional): A `Platform` to index dependencies for + target_platform(optional): A `Platform` to index dependencies for. """ py_index( channel_directory, From c371b39aee1fe89fa429f7bd886426e38317c9d9 Mon Sep 17 00:00:00 2001 From: Benjamin Lowry Date: Wed, 13 Dec 2023 11:19:51 -0500 Subject: [PATCH 8/9] again --- py-rattler/rattler/index/index.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/py-rattler/rattler/index/index.py b/py-rattler/rattler/index/index.py index a227186a7..a55cce8e8 100644 --- a/py-rattler/rattler/index/index.py +++ b/py-rattler/rattler/index/index.py @@ -14,7 +14,7 @@ def index( Indexes dependencies in the `channel_directory` for one or more subdirectories within said directory. Will generate repodata.json files in each subdirectory containing metadata about each present package, or if `target_platform` is specified will only consider the subdirectory corresponding to this platform. - Will always index the "noarch" subdirectory, and thus this subdirectory should always be present because + Will always index the "noarch" subdirectory, and thus this subdirectory should always be present, because conda channels at a minimum must include this subdirectory. Arguments: From b17719398b5e4f91344abda2f6ae9acb8856781f Mon Sep 17 00:00:00 2001 From: Benjamin Lowry Date: Fri, 15 Dec 2023 09:34:30 -0500 Subject: [PATCH 9/9] 2 --- crates/rattler_index/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/rattler_index/src/lib.rs b/crates/rattler_index/src/lib.rs index b054bc547..0c3d2bfe2 100644 --- a/crates/rattler_index/src/lib.rs +++ b/crates/rattler_index/src/lib.rs @@ -159,7 +159,7 @@ pub fn index( packages: Default::default(), conda_packages: Default::default(), removed: Default::default(), - version: Some(1), + version: Some(2), }; for (p, t) in entries.iter().filter_map(|(p, t)| {