From ed563c4c081204dcb7bb8b0742250752f645da69 Mon Sep 17 00:00:00 2001 From: Wolf Vollprecht Date: Thu, 11 Jan 2024 14:36:10 +0100 Subject: [PATCH] properly recursively delete empty directories and add tests --- .../rattler/src/install/clobber_registry.rs | 15 +- crates/rattler/src/install/driver.rs | 2 +- crates/rattler/src/install/unlink.rs | 237 +++++++++++++++--- crates/rattler/src/lib.rs | 33 +++ .../rattler_conda_types/src/prefix_record.rs | 23 ++ 5 files changed, 262 insertions(+), 48 deletions(-) diff --git a/crates/rattler/src/install/clobber_registry.rs b/crates/rattler/src/install/clobber_registry.rs index 586075b96..4fa4144f4 100644 --- a/crates/rattler/src/install/clobber_registry.rs +++ b/crates/rattler/src/install/clobber_registry.rs @@ -152,7 +152,7 @@ impl ClobberRegistry { .iter() .cloned() .enumerate() - .filter(|(_, n)| clobbered_by_names.contains(&n)) + .filter(|(_, n)| clobbered_by_names.contains(n)) .collect::>(); let winner = sorted_clobbered_by.last().expect("No winner found"); @@ -272,7 +272,7 @@ mod tests { package_cache::PackageCache, }; - fn repodata_record(filename: &str) -> RepoDataRecord { + fn get_repodata_record(filename: &str) -> RepoDataRecord { let path = fs::canonicalize(get_test_data_dir().join(filename)).unwrap(); let index_json = read_package_file::(&path).unwrap(); @@ -304,8 +304,7 @@ mod tests { install_driver: &InstallDriver, install_options: &InstallOptions, ) -> anyhow::Result<()> { - // Link the contents of the package into our environment. This returns all the paths that were - // linked. + // Link the contents of the package into our environment. This returns all the paths that were linked. let paths = crate::install::link_package( &package_dir, target_prefix, @@ -324,9 +323,7 @@ mod tests { .map(|entry| entry.relative_path.clone()) .collect(), paths_data: paths.into(), - // TODO: Retrieve the requested spec for this package from the request requested_spec: None, - // TODO: What to do with this? link: None, }; @@ -436,9 +433,9 @@ mod tests { } fn test_operations() -> Vec> { - let repodata_record_1 = repodata_record("clobber/clobber-1-0.1.0-h4616a5c_0.tar.bz2"); - let repodata_record_2 = repodata_record("clobber/clobber-2-0.1.0-h4616a5c_0.tar.bz2"); - let repodata_record_3 = repodata_record("clobber/clobber-3-0.1.0-h4616a5c_0.tar.bz2"); + let repodata_record_1 = get_repodata_record("clobber/clobber-1-0.1.0-h4616a5c_0.tar.bz2"); + let repodata_record_2 = get_repodata_record("clobber/clobber-2-0.1.0-h4616a5c_0.tar.bz2"); + let repodata_record_3 = get_repodata_record("clobber/clobber-3-0.1.0-h4616a5c_0.tar.bz2"); vec![ TransactionOperation::Install(repodata_record_1), diff --git a/crates/rattler/src/install/driver.rs b/crates/rattler/src/install/driver.rs index 1e920186b..b6230ee68 100644 --- a/crates/rattler/src/install/driver.rs +++ b/crates/rattler/src/install/driver.rs @@ -162,7 +162,7 @@ impl InstallDriver { self.clobber_registry() .post_process(&required_packages, target_prefix) - .map_err(|e| InstallError::PostProcessFailed(e))?; + .map_err(InstallError::PostProcessFailed)?; Ok(()) } diff --git a/crates/rattler/src/install/unlink.rs b/crates/rattler/src/install/unlink.rs index ccd24505e..3eb84f61a 100644 --- a/crates/rattler/src/install/unlink.rs +++ b/crates/rattler/src/install/unlink.rs @@ -1,6 +1,10 @@ //! Unlinking packages from an environment. -use std::{collections::HashSet, io::ErrorKind, path::Path}; +use std::{ + collections::HashSet, + io::ErrorKind, + path::{Path, PathBuf}, +}; use indexmap::IndexSet; use itertools::Itertools; @@ -22,6 +26,66 @@ pub enum UnlinkError { FailedToReadDirectory(String, std::io::Error), } +fn recursively_remove_empty_directories( + directory_path: &Path, + target_prefix: &Path, + is_python_noarch: bool, +) -> Result { + // Never delete the target prefix + if directory_path == target_prefix || !directory_path.exists() { + return Ok(directory_path.to_path_buf()); + } + + // Should we make this stronger to protect the user? + assert!(directory_path.starts_with(target_prefix)); + + let mut read_dir = directory_path.read_dir().map_err(|e| { + UnlinkError::FailedToReadDirectory(directory_path.to_string_lossy().to_string(), e) + })?; + + match read_dir.next().transpose() { + Ok(None) => { + // The directory is empty, delete it + std::fs::remove_dir(directory_path).map_err(|e| { + UnlinkError::FailedToDeleteDirectory( + directory_path.to_string_lossy().to_string(), + e, + ) + })?; + + // Recursively remove the parent directory + if let Some(parent) = directory_path.parent() { + recursively_remove_empty_directories(parent, target_prefix, is_python_noarch) + } else { + Ok(directory_path.into()) + } + } + + // Check if the only entry is a `__pycache__` directory + Ok(Some(entry)) + if is_python_noarch + && entry.file_name() == "__pycache__" + && read_dir.next().is_none() => + { + // The directory is empty, delete it + std::fs::remove_dir_all(directory_path).map_err(|e| { + UnlinkError::FailedToDeleteDirectory( + directory_path.to_string_lossy().to_string(), + e, + ) + })?; + + // Recursively remove the parent directory + if let Some(parent) = directory_path.parent() { + recursively_remove_empty_directories(parent, target_prefix, is_python_noarch) + } else { + Ok(directory_path.into()) + } + } + _ => Ok(directory_path.into()), + } +} + /// Completely remove the specified package from the environment. pub async fn unlink_package( target_prefix: &Path, @@ -60,44 +124,14 @@ pub async fn unlink_package( let mut directories: IndexSet<_> = directories.into_iter().sorted().collect(); while let Some(directory) = directories.pop() { let directory_path = target_prefix.join(&directory); + let removed_until = + recursively_remove_empty_directories(&directory_path, target_prefix, is_python_noarch)?; - let mut read_dir = directory_path.read_dir().map_err(|e| { - UnlinkError::FailedToReadDirectory(directory_path.to_string_lossy().to_string(), e) - })?; - - match read_dir.next().transpose() { - Ok(None) => { - // The directory is empty, delete it - std::fs::remove_dir(&directory_path).map_err(|e| { - UnlinkError::FailedToDeleteDirectory( - directory_path.to_string_lossy().to_string(), - e, - ) - })?; - } - - // Check if the only entry is a `__pycache__` directory - Ok(Some(entry)) - if is_python_noarch - && entry.file_name() == "__pycache__" - && read_dir.next().is_none() => - { - // The directory is empty, delete it - std::fs::remove_dir_all(&directory_path).map_err(|e| { - UnlinkError::FailedToDeleteDirectory( - directory_path.to_string_lossy().to_string(), - e, - ) - })?; - } - _ => { - // The directory is not empty which means our parent directory is also not empty, - // recursively remove the parent directory from the set as well. - while let Some(parent) = directory.parent() { - if !directories.shift_remove(parent) { - break; - } - } + // The directory is not empty which means our parent directory is also not empty, + // recursively remove the parent directory from the set as well. + while let Some(parent) = removed_until.parent() { + if !directories.shift_remove(parent) { + break; } } } @@ -115,3 +149,130 @@ pub async fn unlink_package( Ok(()) } + +#[cfg(test)] +mod tests { + use std::{ + fs::{self, File}, + io::Write, + path::Path, + str::FromStr, + }; + + use rattler_conda_types::{Platform, PrefixRecord, Version}; + + use crate::{ + get_repodata_record, get_test_data_dir, + install::{link_package, unlink_package, InstallDriver, InstallOptions, PythonInfo}, + }; + + async fn link_ruff(target_prefix: &Path, package: &str) -> PrefixRecord { + let package_dir = tempfile::TempDir::new().unwrap(); + + // Create package cache + rattler_package_streaming::fs::extract( + &get_test_data_dir().join(package), + package_dir.path(), + ) + .unwrap(); + + let py_info = + PythonInfo::from_version(&Version::from_str("3.10").unwrap(), Platform::Linux64) + .unwrap(); + let install_options = InstallOptions { + python_info: Some(py_info), + ..InstallOptions::default() + }; + + // Link the package + let paths = link_package( + package_dir.path(), + target_prefix, + &InstallDriver::default(), + install_options, + ) + .await + .unwrap(); + + let repodata_record = get_repodata_record(package); + // Construct a PrefixRecord for the package + let prefix_record = + PrefixRecord::from_repodata_record(repodata_record, None, None, paths, None, None); + + return prefix_record; + } + + #[tokio::test] + async fn test_unlink_package() { + let environment_dir = tempfile::TempDir::new().unwrap(); + let prefix_record = + link_ruff(environment_dir.path(), "ruff-0.0.171-py310h298983d_0.conda").await; + let conda_meta_path = environment_dir.path().join("conda-meta"); + std::fs::create_dir_all(&conda_meta_path).unwrap(); + + // Write the conda-meta information + let pkg_meta_path = conda_meta_path.join(prefix_record.file_name()); + prefix_record.write_to_path(&pkg_meta_path, true).unwrap(); + + // Unlink the package + unlink_package(environment_dir.path(), &prefix_record) + .await + .unwrap(); + + // Check if the conda-meta file is gone + assert!(!pkg_meta_path.exists()); + + // check that the environment is completely empty except for the conda-meta folder + let entries = std::fs::read_dir(environment_dir.path()) + .unwrap() + .collect::>(); + assert_eq!(entries.len(), 1); + assert_eq!(entries[0].as_ref().unwrap().file_name(), "conda-meta"); + } + + #[tokio::test] + async fn test_unlink_package_python_noarch() { + let target_prefix = tempfile::TempDir::new().unwrap(); + let prefix_record = link_ruff( + target_prefix.path(), + "pytweening-1.0.4-pyhd8ed1ab_0.tar.bz2", + ) + .await; + + let conda_meta_path = target_prefix.path().join("conda-meta"); + std::fs::create_dir_all(&conda_meta_path).unwrap(); + + // Write the conda-meta information + let pkg_meta_path = conda_meta_path.join(prefix_record.file_name()); + prefix_record.write_to_path(&pkg_meta_path, true).unwrap(); + + fs::create_dir( + target_prefix + .path() + .join("lib/python3.10/site-packages/pytweening/__pycache__"), + ) + .unwrap(); + let mut file = + File::create(target_prefix.path().join( + "lib/python3.10/site-packages/pytweening/__pycache__/__init__.cpython-310.pyc", + )) + .unwrap(); + file.write_all("some funny bytes".as_bytes()).unwrap(); + file.sync_all().unwrap(); + + // Unlink the package + unlink_package(target_prefix.path(), &prefix_record) + .await + .unwrap(); + + // Check if the conda-meta file is gone + assert!(!pkg_meta_path.exists()); + + // check that the environment is completely empty except for the conda-meta folder + let entries = std::fs::read_dir(target_prefix.path()) + .unwrap() + .collect::>(); + assert_eq!(entries.len(), 1); + assert_eq!(entries[0].as_ref().unwrap().file_name(), "conda-meta"); + } +} diff --git a/crates/rattler/src/lib.rs b/crates/rattler/src/lib.rs index e437713c8..9d32f312e 100644 --- a/crates/rattler/src/lib.rs +++ b/crates/rattler/src/lib.rs @@ -40,3 +40,36 @@ pub fn default_cache_dir() -> anyhow::Result { .ok_or_else(|| anyhow::anyhow!("could not determine cache directory for current platform"))? .join("rattler/cache")) } + +#[cfg(test)] +use rattler_conda_types::RepoDataRecord; + +#[cfg(test)] +pub(crate) fn get_repodata_record(filename: &str) -> RepoDataRecord { + use std::fs; + + use rattler_conda_types::{package::IndexJson, PackageRecord}; + use rattler_digest::{Md5, Sha256}; + use rattler_package_streaming::seek::read_package_file; + + let path = fs::canonicalize(get_test_data_dir().join(filename)).unwrap(); + let index_json = read_package_file::(&path).unwrap(); + + // find size and hash + let size = fs::metadata(&path).unwrap().len(); + let sha256 = rattler_digest::compute_file_digest::(&path).unwrap(); + let md5 = rattler_digest::compute_file_digest::(&path).unwrap(); + + RepoDataRecord { + package_record: PackageRecord::from_index_json( + index_json, + Some(size), + Some(sha256), + Some(md5), + ) + .unwrap(), + file_name: filename.to_string(), + url: url::Url::from_file_path(&path).unwrap(), + channel: "test".to_string(), + } +} diff --git a/crates/rattler_conda_types/src/prefix_record.rs b/crates/rattler_conda_types/src/prefix_record.rs index afe9fa9de..359aee4d7 100644 --- a/crates/rattler_conda_types/src/prefix_record.rs +++ b/crates/rattler_conda_types/src/prefix_record.rs @@ -163,6 +163,29 @@ impl PrefixRecord { Self::from_str(&str) } + /// Creates a `PrefixRecord` from a `RepoDataRecord`. + pub fn from_repodata_record( + repodata_record: RepoDataRecord, + package_tarball_full_path: Option, + extracted_package_dir: Option, + paths: Vec, + requested_spec: Option, + link: Option, + ) -> Self { + Self { + repodata_record, + package_tarball_full_path, + extracted_package_dir, + files: paths + .iter() + .map(|entry| entry.relative_path.clone()) + .collect(), + paths_data: paths.into(), + link, + requested_spec, + } + } + /// Parses a `paths.json` file from a file. pub fn from_path(path: impl AsRef) -> Result { Self::from_reader(File::open(path.as_ref())?)