Skip to content

Commit

Permalink
Merge pull request #133 from SpyrosRoum/Remove-tar-combinations-from-…
Browse files Browse the repository at this point in the history
…CompressionFormat

Remove tar combinations from compression format
  • Loading branch information
marcospb19 authored Nov 2, 2021
2 parents fa07b55 + 15e922b commit ebe3918
Show file tree
Hide file tree
Showing 2 changed files with 110 additions and 100 deletions.
90 changes: 34 additions & 56 deletions src/commands.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use crate::{
extension::{
self,
CompressionFormat::{self, *},
Extension,
},
info,
utils::{self, dir_is_empty, nice_directory_display, to_utf},
Expand Down Expand Up @@ -55,9 +56,7 @@ pub fn run(args: Opts, question_policy: QuestionPolicy) -> crate::Result<()> {
return Err(Error::with_reason(reason));
}

if !formats.get(0).map(CompressionFormat::is_archive_format).unwrap_or(false)
&& represents_several_files(&files)
{
if !formats.get(0).map(Extension::is_archive).unwrap_or(false) && represents_several_files(&files) {
// This piece of code creates a suggestion for compressing multiple files
// It says:
// Change from file.bz.xz
Expand Down Expand Up @@ -85,7 +84,7 @@ pub fn run(args: Opts, question_policy: QuestionPolicy) -> crate::Result<()> {
return Err(Error::with_reason(reason));
}

if let Some(format) = formats.iter().skip(1).find(|format| format.is_archive_format()) {
if let Some(format) = formats.iter().skip(1).find(|format| format.is_archive()) {
let reason = FinalError::with_title(format!("Cannot compress to '{}'.", to_utf(&output_path)))
.detail(format!("Found the format '{}' in an incorrect position.", format))
.detail(format!("'{}' can only be used at the start of the file extension.", format))
Expand All @@ -107,12 +106,28 @@ pub fn run(args: Opts, question_policy: QuestionPolicy) -> crate::Result<()> {
// `ouch compress file.tar.gz file.tar.gz.xz` should produce `file.tar.gz.xz` and not `file.tar.gz.tar.gz.xz`
let input_extensions = extension::extensions_from_path(&files[0]);

// We calculate the formats that are left if we filter out a sublist at the start of what we have that's the same as the input formats
let mut new_formats = Vec::with_capacity(formats.len());
for (inp_ext, out_ext) in input_extensions.iter().zip(&formats) {
if inp_ext.compression_formats == out_ext.compression_formats {
new_formats.push(out_ext.clone());
} else if inp_ext
.compression_formats
.iter()
.zip(&out_ext.compression_formats)
.all(|(inp, out)| inp == out)
{
let new_ext = Extension::new(
&out_ext.compression_formats[..inp_ext.compression_formats.len()],
&out_ext.display_text,
);
new_formats.push(new_ext);
break;
}
}
// If the input is a sublist at the start of `formats` then remove the extensions
// Note: If input_extensions is empty this counts as true
if !input_extensions.is_empty()
&& input_extensions.len() < formats.len()
&& input_extensions.iter().zip(&formats).all(|(inp, out)| inp == out)
{
// Note: If input_extensions is empty then it will make `formats` empty too, which we don't want
if !input_extensions.is_empty() && new_formats != formats {
// Safety:
// We checked above that input_extensions isn't empty, so files[0] has a extension.
//
Expand All @@ -123,8 +138,7 @@ pub fn run(args: Opts, question_policy: QuestionPolicy) -> crate::Result<()> {
to_utf(files[0].as_path().file_name().unwrap()),
to_utf(&output_path)
);
let drain_iter = formats.drain(..input_extensions.len());
drop(drain_iter); // Remove the extensions from `formats`
formats = new_formats;
}
}
let compress_result = compress_files(files, formats, output_file);
Expand Down Expand Up @@ -189,7 +203,7 @@ pub fn run(args: Opts, question_policy: QuestionPolicy) -> crate::Result<()> {
// files are the list of paths to be compressed: ["dir/file1.txt", "dir/file2.txt"]
// formats contains each format necessary for compression, example: [Tar, Gz] (in compression order)
// output_file is the resulting compressed file name, example: "compressed.tar.gz"
fn compress_files(files: Vec<PathBuf>, formats: Vec<CompressionFormat>, output_file: fs::File) -> crate::Result<()> {
fn compress_files(files: Vec<PathBuf>, formats: Vec<Extension>, output_file: fs::File) -> crate::Result<()> {
let file_writer = BufWriter::with_capacity(BUFFER_CAPACITY, output_file);

let mut writer: Box<dyn Write> = Box::new(file_writer);
Expand All @@ -212,40 +226,20 @@ fn compress_files(files: Vec<PathBuf>, formats: Vec<CompressionFormat>, output_f
encoder
};

for format in formats.iter().skip(1).rev() {
for format in formats.iter().flat_map(Extension::iter).skip(1).collect::<Vec<_>>().iter().rev() {
writer = chain_writer_encoder(format, writer);
}

match formats[0] {
match formats[0].compression_formats[0] {
Gzip | Bzip | Lzma | Zstd => {
writer = chain_writer_encoder(&formats[0], writer);
writer = chain_writer_encoder(&formats[0].compression_formats[0], writer);
let mut reader = fs::File::open(&files[0]).unwrap();
io::copy(&mut reader, &mut writer)?;
}
Tar => {
let mut writer = archive::tar::build_archive_from_paths(&files, writer)?;
writer.flush()?;
}
Tgz => {
let encoder = flate2::write::GzEncoder::new(writer, Default::default());
let writer = archive::tar::build_archive_from_paths(&files, encoder)?;
writer.finish()?.flush()?;
}
Tbz => {
let encoder = bzip2::write::BzEncoder::new(writer, Default::default());
let writer = archive::tar::build_archive_from_paths(&files, encoder)?;
writer.finish()?.flush()?;
}
Tlzma => {
let encoder = xz2::write::XzEncoder::new(writer, 6);
let writer = archive::tar::build_archive_from_paths(&files, encoder)?;
writer.finish()?.flush()?;
}
Tzst => {
let encoder = zstd::stream::write::Encoder::new(writer, Default::default())?;
let writer = archive::tar::build_archive_from_paths(&files, encoder)?;
writer.finish()?.flush()?;
}
Zip => {
eprintln!("{yellow}Warning:{reset}", yellow = *colors::YELLOW, reset = *colors::RESET);
eprintln!("\tCompressing .zip entirely in memory.");
Expand Down Expand Up @@ -274,7 +268,7 @@ fn compress_files(files: Vec<PathBuf>, formats: Vec<CompressionFormat>, output_f
// file_name is only used when extracting single file formats, no archive formats like .tar or .zip
fn decompress_file(
input_file_path: &Path,
formats: Vec<extension::CompressionFormat>,
formats: Vec<Extension>,
output_dir: Option<&Path>,
file_name: &Path,
question_policy: QuestionPolicy,
Expand All @@ -296,7 +290,7 @@ fn decompress_file(
// in-memory decompression/copying first.
//
// Any other Zip decompression done can take up the whole RAM and freeze ouch.
if let [Zip] = *formats.as_slice() {
if formats.len() == 1 && *formats[0].compression_formats.as_slice() == [Zip] {
utils::create_dir_if_non_existent(output_dir)?;
let zip_archive = zip::ZipArchive::new(reader)?;
let _files = crate::archive::zip::unpack_archive(zip_archive, output_dir, question_policy)?;
Expand All @@ -320,17 +314,17 @@ fn decompress_file(
Ok(decoder)
};

for format in formats.iter().skip(1).rev() {
for format in formats.iter().flat_map(Extension::iter).skip(1).collect::<Vec<_>>().iter().rev() {
reader = chain_reader_decoder(format, reader)?;
}

utils::create_dir_if_non_existent(output_dir)?;

let files_unpacked;

match formats[0] {
match formats[0].compression_formats[0] {
Gzip | Bzip | Lzma | Zstd => {
reader = chain_reader_decoder(&formats[0], reader)?;
reader = chain_reader_decoder(&formats[0].compression_formats[0], reader)?;

// TODO: improve error treatment
let mut writer = fs::File::create(&output_path)?;
Expand All @@ -341,22 +335,6 @@ fn decompress_file(
Tar => {
files_unpacked = crate::archive::tar::unpack_archive(reader, output_dir, question_policy)?;
}
Tgz => {
let reader = chain_reader_decoder(&Gzip, reader)?;
files_unpacked = crate::archive::tar::unpack_archive(reader, output_dir, question_policy)?;
}
Tbz => {
let reader = chain_reader_decoder(&Bzip, reader)?;
files_unpacked = crate::archive::tar::unpack_archive(reader, output_dir, question_policy)?;
}
Tlzma => {
let reader = chain_reader_decoder(&Lzma, reader)?;
files_unpacked = crate::archive::tar::unpack_archive(reader, output_dir, question_policy)?;
}
Tzst => {
let reader = chain_reader_decoder(&Zstd, reader)?;
files_unpacked = crate::archive::tar::unpack_archive(reader, output_dir, question_policy)?;
}
Zip => {
eprintln!("Compressing first into .zip.");
eprintln!("Warning: .zip archives with extra extensions have a downside.");
Expand Down
120 changes: 76 additions & 44 deletions src/extension.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,26 +4,62 @@ use std::{ffi::OsStr, fmt, path::Path};

use self::CompressionFormat::*;

/// A wrapper around `CompressionFormat` that allows combinations like `tgz`
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Extension {
pub compression_formats: Vec<CompressionFormat>,
pub display_text: String,
}

impl Extension {
/// # Panics:
/// Will panic if `formats` is empty
pub fn new(formats: impl Into<Vec<CompressionFormat>>, text: impl Into<String>) -> Self {
let formats = formats.into();
assert!(!formats.is_empty());
Self { compression_formats: formats, display_text: text.into() }
}

/// Checks if the first format in `compression_formats` is an archive
pub fn is_archive(&self) -> bool {
// Safety: we check that `compression_formats` is not empty in `Self::new`
self.compression_formats[0].is_archive_format()
}

pub fn iter(&self) -> impl Iterator<Item = &CompressionFormat> {
self.compression_formats.iter()
}
}

impl fmt::Display for Extension {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(&self.display_text)
}
}

#[allow(missing_docs)]
#[derive(Clone, PartialEq, Eq, Debug)]
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
/// Accepted extensions for input and output
pub enum CompressionFormat {
Gzip, // .gz
Bzip, // .bz
Lzma, // .lzma
Tar, // .tar (technically not a compression extension, but will do for now)
Tgz, // .tgz
Tbz, // .tbz
Tlzma, // .tlzma
Tzst, // .tzst
Zstd, // .zst
Zip, // .zip
Gzip, // .gz
Bzip, // .bz
Lzma, // .lzma
Tar, // .tar (technically not a compression extension, but will do for now)
Zstd, // .zst
Zip, // .zip
}

impl CompressionFormat {
/// Currently supported archive formats are .tar (and aliases to it) and .zip
pub fn is_archive_format(&self) -> bool {
matches!(self, Tar | Tgz | Tbz | Tlzma | Tzst | Zip)
// Keep this match like that without a wildcard `_` so we don't forget to update it
match self {
Tar | Zip => true,
Gzip => false,
Bzip => false,
Lzma => false,
Zstd => false,
}
}
}

Expand All @@ -38,10 +74,6 @@ impl fmt::Display for CompressionFormat {
Zstd => ".zst",
Lzma => ".lz",
Tar => ".tar",
Tgz => ".tgz",
Tbz => ".tbz",
Tlzma => ".tlz",
Tzst => ".tzst",
Zip => ".zip",
}
)
Expand All @@ -53,15 +85,7 @@ impl fmt::Display for CompressionFormat {

/// Extracts extensions from a path,
/// return both the remaining path and the list of extension objects
///
/// ```rust
/// use ouch::extension::{separate_known_extensions_from_name, CompressionFormat};
/// use std::path::Path;
///
/// let mut path = Path::new("bolovo.tar.gz");
/// assert_eq!(separate_known_extensions_from_name(&path), (Path::new("bolovo"), vec![CompressionFormat::Tar, CompressionFormat::Gzip]));
/// ```
pub fn separate_known_extensions_from_name(mut path: &Path) -> (&Path, Vec<CompressionFormat>) {
pub fn separate_known_extensions_from_name(mut path: &Path) -> (&Path, Vec<Extension>) {
// // TODO: check for file names with the name of an extension
// // TODO2: warn the user that currently .tar.gz is a .gz file named .tar
//
Expand All @@ -75,16 +99,16 @@ pub fn separate_known_extensions_from_name(mut path: &Path) -> (&Path, Vec<Compr
// While there is known extensions at the tail, grab them
while let Some(extension) = path.extension().and_then(OsStr::to_str) {
extensions.push(match extension {
"tar" => Tar,
"tgz" => Tgz,
"tbz" | "tbz2" => Tbz,
"txz" | "tlz" | "tlzma" => Tlzma,
"tzst" => Tzst,
"zip" => Zip,
"bz" | "bz2" => Bzip,
"gz" => Gzip,
"xz" | "lzma" | "lz" => Lzma,
"zst" => Zstd,
"tar" => Extension::new([Tar], extension),
"tgz" => Extension::new([Tar, Gzip], extension),
"tbz" | "tbz2" => Extension::new([Tar, Bzip], extension),
"txz" | "tlz" | "tlzma" => Extension::new([Tar, Lzma], extension),
"tzst" => Extension::new([Tar, Zstd], ".tzst"),
"zip" => Extension::new([Zip], extension),
"bz" | "bz2" => Extension::new([Bzip], extension),
"gz" => Extension::new([Gzip], extension),
"xz" | "lzma" | "lz" => Extension::new([Lzma], extension),
"zst" => Extension::new([Zstd], extension),
_ => break,
});

Expand All @@ -98,15 +122,23 @@ pub fn separate_known_extensions_from_name(mut path: &Path) -> (&Path, Vec<Compr
}

/// Extracts extensions from a path, return only the list of extension objects
///
/// ```rust
/// use ouch::extension::{extensions_from_path, CompressionFormat};
/// use std::path::Path;
///
/// let mut path = Path::new("bolovo.tar.gz");
/// assert_eq!(extensions_from_path(&path), vec![CompressionFormat::Tar, CompressionFormat::Gzip]);
/// ```
pub fn extensions_from_path(path: &Path) -> Vec<CompressionFormat> {
pub fn extensions_from_path(path: &Path) -> Vec<Extension> {
let (_, extensions) = separate_known_extensions_from_name(path);
extensions
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_extensions_from_path() {
use CompressionFormat::*;
let path = Path::new("bolovo.tar.gz");

let extensions: Vec<Extension> = extensions_from_path(&path);
let formats: Vec<&CompressionFormat> = extensions.iter().flat_map(Extension::iter).collect::<Vec<_>>();

assert_eq!(formats, vec![&Tar, &Gzip]);
}
}

0 comments on commit ebe3918

Please sign in to comment.