From 7a559457431f3418bcc0bcb9ba68559b3b57de99 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Fri, 3 May 2024 14:31:10 -0400 Subject: [PATCH 01/48] add benchmarks --- benches/read_metadata.rs | 72 +++++++++++++++++++++++++++++++++++----- src/read.rs | 1 + src/spec.rs | 2 +- 3 files changed, 66 insertions(+), 9 deletions(-) diff --git a/benches/read_metadata.rs b/benches/read_metadata.rs index b419836f5..2b4b64c8b 100644 --- a/benches/read_metadata.rs +++ b/benches/read_metadata.rs @@ -3,30 +3,32 @@ use bencher::{benchmark_group, benchmark_main}; use std::io::{Cursor, Write}; use bencher::Bencher; +use getrandom::getrandom; use zip::write::SimpleFileOptions; -use zip::{CompressionMethod, ZipArchive, ZipWriter}; +use zip::{result::ZipResult, CompressionMethod, ZipArchive, ZipWriter}; const FILE_COUNT: usize = 15_000; const FILE_SIZE: usize = 1024; -fn generate_random_archive(count_files: usize, file_size: usize) -> Vec { +fn generate_random_archive(count_files: usize, file_size: usize) -> ZipResult> { let data = Vec::new(); let mut writer = ZipWriter::new(Cursor::new(data)); let options = SimpleFileOptions::default().compression_method(CompressionMethod::Stored); - let bytes = vec![0u8; file_size]; + let mut bytes = vec![0u8; file_size]; for i in 0..count_files { let name = format!("file_deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef_{i}.dat"); - writer.start_file(name, options).unwrap(); - writer.write_all(&bytes).unwrap(); + writer.start_file(name, options)?; + getrandom(&mut bytes).unwrap(); + writer.write_all(&bytes)?; } - writer.finish().unwrap().into_inner() + Ok(writer.finish()?.into_inner()) } fn read_metadata(bench: &mut Bencher) { - let bytes = generate_random_archive(FILE_COUNT, FILE_SIZE); + let bytes = generate_random_archive(FILE_COUNT, FILE_SIZE).unwrap(); bench.iter(|| { let archive = ZipArchive::new(Cursor::new(bytes.as_slice())).unwrap(); @@ -34,5 +36,59 @@ fn read_metadata(bench: &mut Bencher) { }); } -benchmark_group!(benches, read_metadata); +const COMMENT_SIZE: usize = 50_000; + +fn generate_random_zip32_archive_with_comment(comment_length: usize) -> ZipResult> { + let data = Vec::new(); + let mut writer = ZipWriter::new(Cursor::new(data)); + let options = SimpleFileOptions::default().compression_method(CompressionMethod::Stored); + + let mut bytes = vec![0u8; comment_length]; + getrandom(&mut bytes).unwrap(); + writer.set_raw_comment(bytes); + + writer.start_file("asdf.txt", options)?; + writer.write_all(b"asdf")?; + + Ok(writer.finish()?.into_inner()) +} + +fn parse_comment(bench: &mut Bencher) { + let bytes = generate_random_zip32_archive_with_comment(COMMENT_SIZE).unwrap(); + + bench.iter(|| { + let archive = ZipArchive::new(Cursor::new(bytes.as_slice())).unwrap(); + archive.len() + }); +} + +const COMMENT_SIZE_64: usize = 500_000; + +fn generate_random_zip64_archive_with_comment(comment_length: usize) -> ZipResult> { + let data = Vec::new(); + let mut writer = ZipWriter::new(Cursor::new(data)); + let options = SimpleFileOptions::default() + .compression_method(CompressionMethod::Stored) + .large_file(true); + + let mut bytes = vec![0u8; comment_length]; + getrandom(&mut bytes).unwrap(); + writer.set_raw_comment(bytes); + + writer.start_file("asdf.txt", options)?; + writer.write_all(b"asdf")?; + + Ok(writer.finish()?.into_inner()) +} + +fn parse_zip64_comment(bench: &mut Bencher) { + let bytes = generate_random_zip64_archive_with_comment(COMMENT_SIZE_64).unwrap(); + + bench.iter(|| { + let archive = ZipArchive::new(Cursor::new(bytes.as_slice())).unwrap(); + archive.len() + }); +} + +benchmark_group!(benches, read_metadata, parse_comment, parse_zip64_comment); benchmark_main!(benches); diff --git a/src/read.rs b/src/read.rs index 0b62c9233..e39dc57c1 100644 --- a/src/read.rs +++ b/src/read.rs @@ -349,6 +349,7 @@ pub(crate) fn make_reader( } } +#[derive(Debug)] pub(crate) struct CentralDirectoryInfo { pub(crate) archive_offset: u64, pub(crate) directory_start: u64, diff --git a/src/spec.rs b/src/spec.rs index 893228e4e..63885a977 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -58,7 +58,7 @@ impl CentralDirectoryEnd { const BYTES_BETWEEN_MAGIC_AND_COMMENT_SIZE: u64 = HEADER_SIZE - 6; let file_length = reader.seek(io::SeekFrom::End(0))?; - let search_upper_bound = file_length.saturating_sub(MAX_HEADER_AND_COMMENT_SIZE); + let search_upper_bound = 0; if file_length < HEADER_SIZE { return Err(ZipError::InvalidArchive("Invalid zip header")); From 0a573d3747db7278c4978afb1bc3647a4e40fe39 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Sat, 18 May 2024 08:53:56 -0400 Subject: [PATCH 02/48] make benchmarks report bytes/second --- benches/read_metadata.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/benches/read_metadata.rs b/benches/read_metadata.rs index 2b4b64c8b..6c4756bc3 100644 --- a/benches/read_metadata.rs +++ b/benches/read_metadata.rs @@ -34,6 +34,7 @@ fn read_metadata(bench: &mut Bencher) { let archive = ZipArchive::new(Cursor::new(bytes.as_slice())).unwrap(); archive.len() }); + bench.bytes = bytes.len() as u64; } const COMMENT_SIZE: usize = 50_000; @@ -60,6 +61,7 @@ fn parse_comment(bench: &mut Bencher) { let archive = ZipArchive::new(Cursor::new(bytes.as_slice())).unwrap(); archive.len() }); + bench.bytes = bytes.len() as u64; } const COMMENT_SIZE_64: usize = 500_000; @@ -88,6 +90,7 @@ fn parse_zip64_comment(bench: &mut Bencher) { let archive = ZipArchive::new(Cursor::new(bytes.as_slice())).unwrap(); archive.len() }); + bench.bytes = bytes.len() as u64; } benchmark_group!(benches, read_metadata, parse_comment, parse_zip64_comment); From 3d1728d79648ab442d78726d8ef2f8e2651cac22 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Sat, 18 May 2024 09:13:52 -0400 Subject: [PATCH 03/48] add stream benchmark --- benches/read_metadata.rs | 40 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/benches/read_metadata.rs b/benches/read_metadata.rs index 6c4756bc3..add550c44 100644 --- a/benches/read_metadata.rs +++ b/benches/read_metadata.rs @@ -1,9 +1,11 @@ use bencher::{benchmark_group, benchmark_main}; -use std::io::{Cursor, Write}; +use std::fs; +use std::io::{prelude::*, Cursor}; use bencher::Bencher; use getrandom::getrandom; +use tempdir::TempDir; use zip::write::SimpleFileOptions; use zip::{result::ZipResult, CompressionMethod, ZipArchive, ZipWriter}; @@ -46,7 +48,7 @@ fn generate_random_zip32_archive_with_comment(comment_length: usize) -> ZipResul let mut bytes = vec![0u8; comment_length]; getrandom(&mut bytes).unwrap(); - writer.set_raw_comment(bytes); + writer.set_raw_comment(bytes.into()); writer.start_file("asdf.txt", options)?; writer.write_all(b"asdf")?; @@ -75,7 +77,7 @@ fn generate_random_zip64_archive_with_comment(comment_length: usize) -> ZipResul let mut bytes = vec![0u8; comment_length]; getrandom(&mut bytes).unwrap(); - writer.set_raw_comment(bytes); + writer.set_raw_comment(bytes.into()); writer.start_file("asdf.txt", options)?; writer.write_all(b"asdf")?; @@ -93,5 +95,35 @@ fn parse_zip64_comment(bench: &mut Bencher) { bench.bytes = bytes.len() as u64; } -benchmark_group!(benches, read_metadata, parse_comment, parse_zip64_comment); +#[allow(dead_code)] +fn parse_stream_archive(bench: &mut Bencher) { + const STREAM_ZIP_ENTRIES: usize = 5; + const STREAM_FILE_SIZE: usize = 5; + + let bytes = generate_random_archive(STREAM_ZIP_ENTRIES, STREAM_FILE_SIZE).unwrap(); + dbg!(&bytes); + + /* Write to a temporary file path to incur some filesystem overhead from repeated reads */ + let dir = TempDir::new("stream-bench").unwrap(); + let out = dir.path().join("bench-out.zip"); + fs::write(&out, &bytes).unwrap(); + let mut f = fs::File::open(out).unwrap(); + + bench.iter(|| loop { + while zip::read::read_zipfile_from_stream(&mut f) + .unwrap() + .is_some() + {} + }); + bench.bytes = bytes.len() as u64; +} + +benchmark_group!( + benches, + read_metadata, + parse_comment, + parse_zip64_comment, + /* FIXME: this currently errors */ + /* parse_stream_archive */ +); benchmark_main!(benches); From 011e5afe7be719d62d1779f1e7f6a374ae1d6f00 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Fri, 3 May 2024 16:24:58 -0400 Subject: [PATCH 04/48] add test that breaks without the fix --- tests/data/misaligned_comment.zip | Bin 0 -> 513 bytes tests/zip_comment_garbage.rs | 18 ++++++++++++++++++ 2 files changed, 18 insertions(+) create mode 100644 tests/data/misaligned_comment.zip diff --git a/tests/data/misaligned_comment.zip b/tests/data/misaligned_comment.zip new file mode 100644 index 0000000000000000000000000000000000000000..6f4256b5ef77ac1b86ad00dd51896da5ed27e777 GIT binary patch literal 513 ocmWIWW@TeQ18fY%8TmyedilAzsd*&|NjZryjHYmuJM=>U0QJiWhyVZp literal 0 HcmV?d00001 diff --git a/tests/zip_comment_garbage.rs b/tests/zip_comment_garbage.rs index ef4d97507..9c1246175 100644 --- a/tests/zip_comment_garbage.rs +++ b/tests/zip_comment_garbage.rs @@ -28,3 +28,21 @@ fn correctly_handle_zip_with_garbage_after_comment() { assert_eq!(archive.comment(), "short.".as_bytes()); } + +/// Ensure that a file which has the signature misaligned with the window size is still +/// successfully located. +#[test] +fn correctly_handle_cde_on_window() { + let mut v = Vec::new(); + v.extend_from_slice(include_bytes!("../tests/data/misaligned_comment.zip")); + assert_eq!(v.len(), 512 + 1); + let sig: [u8; 4] = v[..4].try_into().unwrap(); + let sig = u32::from_le_bytes(sig); + + const CENTRAL_DIRECTORY_END_SIGNATURE: u32 = 0x06054b50; + assert_eq!(sig, CENTRAL_DIRECTORY_END_SIGNATURE); + + let archive = ZipArchive::new(io::Cursor::new(v)).expect("couldn't open test zip"); + + assert_eq!(archive.comment(), "short.".as_bytes()); +} From ea308499af60b283fc8a4eb21bcefe75e04089d3 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Fri, 3 May 2024 14:28:28 -0400 Subject: [PATCH 05/48] bulk parsing and bulk writing - use blocks for reading individual file headers - remove unnecessary option wrapping for stream entries - create Block trait - add coerce method to reduce some boilerplate - add serialize method to reduce more boilerplate - use to_le! and from_le! - add test case - add some docs - rename a few structs to clarify zip32-only --- Cargo.toml | 1 + benches/read_metadata.rs | 4 +- src/read.rs | 224 ++++++------- src/read/stream.rs | 26 +- src/spec.rs | 695 ++++++++++++++++++++++++++++++++------- src/types.rs | 417 ++++++++++++++++++++++- src/write.rs | 172 +++------- 7 files changed, 1134 insertions(+), 405 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index c7230454c..ecd0a9db4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,6 +35,7 @@ displaydoc = { version = "0.2.4", default-features = false } flate2 = { version = "1.0.28", default-features = false, optional = true } indexmap = "2" hmac = { version = "0.12.1", optional = true, features = ["reset"] } +memchr = "2.7.2" pbkdf2 = { version = "0.12.2", optional = true } rand = { version = "0.8.5", optional = true } sha1 = { version = "0.10.6", optional = true } diff --git a/benches/read_metadata.rs b/benches/read_metadata.rs index add550c44..f42793b51 100644 --- a/benches/read_metadata.rs +++ b/benches/read_metadata.rs @@ -48,7 +48,7 @@ fn generate_random_zip32_archive_with_comment(comment_length: usize) -> ZipResul let mut bytes = vec![0u8; comment_length]; getrandom(&mut bytes).unwrap(); - writer.set_raw_comment(bytes.into()); + writer.set_raw_comment(bytes.into_boxed_slice()); writer.start_file("asdf.txt", options)?; writer.write_all(b"asdf")?; @@ -77,7 +77,7 @@ fn generate_random_zip64_archive_with_comment(comment_length: usize) -> ZipResul let mut bytes = vec![0u8; comment_length]; getrandom(&mut bytes).unwrap(); - writer.set_raw_comment(bytes.into()); + writer.set_raw_comment(bytes.into_boxed_slice()); writer.start_file("asdf.txt", options)?; writer.write_all(b"asdf")?; diff --git a/src/read.rs b/src/read.rs index e39dc57c1..584e021e7 100644 --- a/src/read.rs +++ b/src/read.rs @@ -8,14 +8,17 @@ use crate::crc32::Crc32Reader; use crate::extra_fields::{ExtendedTimestamp, ExtraField}; use crate::read::zip_archive::Shared; use crate::result::{ZipError, ZipResult}; -use crate::spec; -use crate::types::{AesMode, AesVendorVersion, DateTime, System, ZipFileData}; +use crate::spec::{self, Block}; +use crate::types::{ + AesMode, AesVendorVersion, DateTime, System, ZipEntryBlock, ZipFileData, ZipLocalEntryBlock, +}; use crate::zipcrypto::{ZipCryptoReader, ZipCryptoReaderValid, ZipCryptoValidator}; use indexmap::IndexMap; use std::borrow::Cow; use std::ffi::OsString; use std::fs::create_dir_all; use std::io::{self, copy, prelude::*, sink}; +use std::mem; use std::ops::Deref; use std::path::{Path, PathBuf}; use std::sync::{Arc, OnceLock}; @@ -458,7 +461,7 @@ impl ZipArchive { } fn get_directory_info_zip32( - footer: &spec::CentralDirectoryEnd, + footer: &spec::Zip32CentralDirectoryEnd, cde_start_pos: u64, ) -> ZipResult { // Some zip files have data prepended to them, resulting in the @@ -485,7 +488,7 @@ impl ZipArchive { fn get_directory_info_zip64( reader: &mut R, - footer: &spec::CentralDirectoryEnd, + footer: &spec::Zip32CentralDirectoryEnd, cde_start_pos: u64, ) -> ZipResult>> { // See if there's a ZIP64 footer. The ZIP64 locator if present will @@ -502,56 +505,59 @@ impl ZipArchive { // don't know how to precisely relate that location to our current // actual offset in the file, since there may be junk at its // beginning. Therefore we need to perform another search, as in - // read::CentralDirectoryEnd::find_and_parse, except now we search + // read::Zip32CentralDirectoryEnd::find_and_parse, except now we search // forward. There may be multiple results because of Zip64 central-directory signatures in // ZIP comment data. - let mut results = Vec::new(); - let search_upper_bound = cde_start_pos - .checked_sub(60) // minimum size of Zip64CentralDirectoryEnd + Zip64CentralDirectoryEndLocator + // minimum size of Zip64CentralDirectoryEnd + Zip64CentralDirectoryEndLocator + .checked_sub(60) .ok_or(ZipError::InvalidArchive( "File cannot contain ZIP64 central directory end", ))?; - let search_results = spec::Zip64CentralDirectoryEnd::find_and_parse( - reader, - locator64.end_of_central_directory_offset, - search_upper_bound, - )?; - search_results.into_iter().for_each(|(footer64, archive_offset)| { - results.push({ - let directory_start_result = footer64 + let (lower, upper) = if locator64.end_of_central_directory_offset > search_upper_bound { + ( + search_upper_bound, + locator64.end_of_central_directory_offset, + ) + } else { + ( + locator64.end_of_central_directory_offset, + search_upper_bound, + ) + }; + let search_results = spec::Zip64CentralDirectoryEnd::find_and_parse(reader, lower, upper)?; + let results: Vec> = + search_results.into_iter().map(|(footer64, archive_offset)| { + let directory_start = footer64 .central_directory_offset .checked_add(archive_offset) .ok_or(ZipError::InvalidArchive( "Invalid central directory size or offset", - )); - directory_start_result.and_then(|directory_start| { - if directory_start > search_upper_bound { - Err(ZipError::InvalidArchive( - "Invalid central directory size or offset", - )) - } else if footer64.number_of_files_on_this_disk > footer64.number_of_files { - Err(ZipError::InvalidArchive( - "ZIP64 footer indicates more files on this disk than in the whole archive", - )) - } else if footer64.version_needed_to_extract > footer64.version_made_by { - Err(ZipError::InvalidArchive( - "ZIP64 footer indicates a new version is needed to extract this archive than the \ - version that wrote it", - )) - } else { - Ok(CentralDirectoryInfo { - archive_offset, - directory_start, - number_of_files: footer64.number_of_files as usize, - disk_number: footer64.disk_number, - disk_with_central_directory: footer64.disk_with_central_directory, - }) - } - }) - }); - }); + ))?; + if directory_start > search_upper_bound { + Err(ZipError::InvalidArchive( + "Invalid central directory size or offset", + )) + } else if footer64.number_of_files_on_this_disk > footer64.number_of_files { + Err(ZipError::InvalidArchive( + "ZIP64 footer indicates more files on this disk than in the whole archive", + )) + } else if footer64.version_needed_to_extract > footer64.version_made_by { + Err(ZipError::InvalidArchive( + "ZIP64 footer indicates a new version is needed to extract this archive than the \ + version that wrote it", + )) + } else { + Ok(CentralDirectoryInfo { + archive_offset, + directory_start, + number_of_files: footer64.number_of_files as usize, + disk_number: footer64.disk_number, + disk_with_central_directory: footer64.disk_with_central_directory, + }) + } + }).collect(); Ok(results) } @@ -559,7 +565,7 @@ impl ZipArchive { /// separate function to ease the control flow design. pub(crate) fn get_metadata( reader: &mut R, - footer: &spec::CentralDirectoryEnd, + footer: &spec::Zip32CentralDirectoryEnd, cde_start_pos: u64, ) -> ZipResult { // Check if file has a zip64 footer @@ -690,7 +696,7 @@ impl ZipArchive { /// /// This uses the central directory record of the ZIP file, and ignores local file headers pub fn new(mut reader: R) -> ZipResult> { - let (footer, cde_start_pos) = spec::CentralDirectoryEnd::find_and_parse(&mut reader)?; + let (footer, cde_start_pos) = spec::Zip32CentralDirectoryEnd::find_and_parse(&mut reader)?; let shared = Self::get_metadata(&mut reader, &footer, cde_start_pos)?; Ok(ZipArchive { reader, @@ -1002,12 +1008,8 @@ pub(crate) fn central_header_to_zip_file( let central_header_start = reader.stream_position()?; // Parse central header - let signature = reader.read_u32_le()?; - if signature != spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE { - Err(ZipError::InvalidArchive("Invalid Central Directory header")) - } else { - central_header_to_zip_file_inner(reader, archive_offset, central_header_start) - } + let block = ZipEntryBlock::parse(reader)?; + central_header_to_zip_file_inner(reader, archive_offset, central_header_start, block) } /// Parse a central directory entry to collect the information for the file. @@ -1015,31 +1017,38 @@ fn central_header_to_zip_file_inner( reader: &mut R, archive_offset: u64, central_header_start: u64, + block: ZipEntryBlock, ) -> ZipResult { - let version_made_by = reader.read_u16_le()?; - let _version_to_extract = reader.read_u16_le()?; - let flags = reader.read_u16_le()?; + let ZipEntryBlock { + // magic, + version_made_by, + // version_to_extract, + flags, + compression_method, + last_mod_time, + last_mod_date, + crc32, + compressed_size, + uncompressed_size, + file_name_length, + extra_field_length, + file_comment_length, + // disk_number, + // internal_file_attributes, + external_file_attributes, + offset, + .. + } = block; + let encrypted = flags & 1 == 1; let is_utf8 = flags & (1 << 11) != 0; let using_data_descriptor = flags & (1 << 3) != 0; - let compression_method = reader.read_u16_le()?; - let last_mod_time = reader.read_u16_le()?; - let last_mod_date = reader.read_u16_le()?; - let crc32 = reader.read_u32_le()?; - let compressed_size = reader.read_u32_le()?; - let uncompressed_size = reader.read_u32_le()?; - let file_name_length = reader.read_u16_le()? as usize; - let extra_field_length = reader.read_u16_le()? as usize; - let file_comment_length = reader.read_u16_le()? as usize; - let _disk_number = reader.read_u16_le()?; - let _internal_file_attributes = reader.read_u16_le()?; - let external_file_attributes = reader.read_u32_le()?; - let offset = reader.read_u32_le()? as u64; - let mut file_name_raw = vec![0; file_name_length]; + + let mut file_name_raw = vec![0; file_name_length as usize]; reader.read_exact(&mut file_name_raw)?; - let mut extra_field = vec![0; extra_field_length]; + let mut extra_field = vec![0; extra_field_length as usize]; reader.read_exact(&mut extra_field)?; - let mut file_comment_raw = vec![0; file_comment_length]; + let mut file_comment_raw = vec![0; file_comment_length as usize]; reader.read_exact(&mut file_comment_raw)?; let file_name: Box = match is_utf8 { @@ -1054,6 +1063,7 @@ fn central_header_to_zip_file_inner( // Construct the result let mut result = ZipFileData { system: System::from((version_made_by >> 8) as u8), + /* NB: this strips the top 8 bits! */ version_made_by: version_made_by as u8, encrypted, using_data_descriptor, @@ -1071,7 +1081,7 @@ fn central_header_to_zip_file_inner( extra_field: Some(Arc::new(extra_field)), central_extra_field: None, file_comment, - header_start: offset, + header_start: offset.into(), extra_data_start: None, central_header_start, data_start: OnceLock::new(), @@ -1404,7 +1414,15 @@ impl<'a> Drop for ZipFile<'a> { /// * `data_start`: set to 0 /// * `external_attributes`: `unix_mode()`: will return None pub fn read_zipfile_from_stream<'a, R: Read>(reader: &'a mut R) -> ZipResult>> { - let signature = reader.read_u32_le()?; + let mut block = [0u8; mem::size_of::()]; + reader.read_exact(&mut block)?; + let block: Box<[u8]> = block.into(); + + let signature = spec::Magic::from_le_bytes( + block[..mem::size_of_val(&spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE)] + .try_into() + .unwrap(), + ); match signature { spec::LOCAL_FILE_HEADER_SIGNATURE => (), @@ -1412,67 +1430,9 @@ pub fn read_zipfile_from_stream<'a, R: Read>(reader: &'a mut R) -> ZipResult return Err(ZipError::InvalidArchive("Invalid local file header")), } - let version_made_by = reader.read_u16_le()?; - let flags = reader.read_u16_le()?; - if flags & 1 == 1 { - return unsupported_zip_error("Encrypted files are not supported"); - } - if flags & (1 << 3) == 1 << 3 { - // using_data_descriptor flag is set - return unsupported_zip_error("The file length is not available in the local header"); - } - let is_utf8 = flags & (1 << 11) != 0; - #[allow(deprecated)] - let compression_method = CompressionMethod::from_u16(reader.read_u16_le()?); - let last_mod_time = reader.read_u16_le()?; - let last_mod_date = reader.read_u16_le()?; - let crc32 = reader.read_u32_le()?; - let compressed_size = reader.read_u32_le()?; - let uncompressed_size = reader.read_u32_le()?; - let file_name_length = reader.read_u16_le()? as usize; - let extra_field_length = reader.read_u16_le()? as usize; - - let mut file_name_raw = vec![0; file_name_length]; - reader.read_exact(&mut file_name_raw)?; - let mut extra_field = vec![0; extra_field_length]; - reader.read_exact(&mut extra_field)?; + let block = ZipLocalEntryBlock::interpret(block)?; - let file_name: Box = match is_utf8 { - true => String::from_utf8_lossy(&file_name_raw).into(), - false => file_name_raw.clone().from_cp437().into(), - }; - - let mut result = ZipFileData { - system: System::from((version_made_by >> 8) as u8), - version_made_by: version_made_by as u8, - encrypted: flags & 1 == 1, - using_data_descriptor: false, - compression_method, - compression_level: None, - last_modified_time: DateTime::try_from_msdos(last_mod_date, last_mod_time).ok(), - crc32, - compressed_size: compressed_size as u64, - uncompressed_size: uncompressed_size as u64, - file_name, - file_name_raw: file_name_raw.into(), - extra_field: Some(Arc::new(extra_field)), - central_extra_field: None, - file_comment: String::with_capacity(0).into_boxed_str(), // file comment is only available in the central directory - // header_start and data start are not available, but also don't matter, since seeking is - // not available. - header_start: 0, - extra_data_start: None, - data_start: OnceLock::new(), - central_header_start: 0, - // The external_attributes field is only available in the central directory. - // We set this to zero, which should be valid as the docs state 'If input came - // from standard input, this field is set to zero.' - external_attributes: 0, - large_file: false, - aes_mode: None, - aes_extra_data_start: 0, - extra_fields: Vec::new(), - }; + let mut result = ZipFileData::from_local_block(block, reader)?; match parse_extra_field(&mut result) { Ok(..) | Err(ZipError::Io(..)) => {} diff --git a/src/read/stream.rs b/src/read/stream.rs index 40cb9efc8..9673f2e50 100644 --- a/src/read/stream.rs +++ b/src/read/stream.rs @@ -1,12 +1,12 @@ -use crate::unstable::LittleEndianReadExt; use std::fs; use std::io::{self, Read}; use std::path::{Path, PathBuf}; use super::{ - central_header_to_zip_file_inner, read_zipfile_from_stream, spec, ZipError, ZipFile, + central_header_to_zip_file_inner, read_zipfile_from_stream, ZipEntryBlock, ZipError, ZipFile, ZipFileData, ZipResult, }; +use crate::spec::Block; /// Stream decoder for zip. #[derive(Debug)] @@ -20,31 +20,31 @@ impl ZipStreamReader { } impl ZipStreamReader { - fn parse_central_directory(&mut self) -> ZipResult> { + fn parse_central_directory(&mut self) -> ZipResult { // Give archive_offset and central_header_start dummy value 0, since // they are not used in the output. let archive_offset = 0; let central_header_start = 0; // Parse central header - let signature = self.0.read_u32_le()?; - if signature != spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE { - Ok(None) - } else { - central_header_to_zip_file_inner(&mut self.0, archive_offset, central_header_start) - .map(ZipStreamFileMetadata) - .map(Some) - } + let block = ZipEntryBlock::parse(&mut self.0)?; + let file = central_header_to_zip_file_inner( + &mut self.0, + archive_offset, + central_header_start, + block, + )?; + Ok(ZipStreamFileMetadata(file)) } - /// Iteraate over the stream and extract all file and their + /// Iterate over the stream and extract all file and their /// metadata. pub fn visit(mut self, visitor: &mut V) -> ZipResult<()> { while let Some(mut file) = read_zipfile_from_stream(&mut self.0)? { visitor.visit_file(&mut file)?; } - while let Some(metadata) = self.parse_central_directory()? { + while let Ok(metadata) = self.parse_central_directory() { visitor.visit_additional_metadata(&metadata)?; } diff --git a/src/spec.rs b/src/spec.rs index 63885a977..cc42395ff 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -1,47 +1,169 @@ +#![macro_use] + use crate::result::{ZipError, ZipResult}; -use crate::unstable::{LittleEndianReadExt, LittleEndianWriteExt}; -use core::mem::size_of_val; +use memchr::memmem::FinderRev; use std::borrow::Cow; use std::io; use std::io::prelude::*; +use std::mem; use std::path::{Component, Path, MAIN_SEPARATOR}; -pub const LOCAL_FILE_HEADER_SIGNATURE: u32 = 0x04034b50; -pub const CENTRAL_DIRECTORY_HEADER_SIGNATURE: u32 = 0x02014b50; -pub(crate) const CENTRAL_DIRECTORY_END_SIGNATURE: u32 = 0x06054b50; -pub const ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE: u32 = 0x06064b50; -pub(crate) const ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE: u32 = 0x07064b50; +pub type Magic = u32; + +pub const LOCAL_FILE_HEADER_SIGNATURE: Magic = 0x04034b50; +pub const CENTRAL_DIRECTORY_HEADER_SIGNATURE: Magic = 0x02014b50; +pub(crate) const CENTRAL_DIRECTORY_END_SIGNATURE: Magic = 0x06054b50; +pub const ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE: Magic = 0x06064b50; +pub(crate) const ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE: Magic = 0x07064b50; pub const ZIP64_BYTES_THR: u64 = u32::MAX as u64; pub const ZIP64_ENTRY_THR: usize = u16::MAX as usize; -pub struct CentralDirectoryEnd { +pub trait Block: Sized + Copy { + /* TODO: use smallvec? */ + fn interpret(bytes: Box<[u8]>) -> ZipResult; + + fn deserialize(block: &[u8]) -> Self { + assert_eq!(block.len(), mem::size_of::()); + let block_ptr: *const Self = block.as_ptr().cast(); + unsafe { block_ptr.read() } + } + + fn parse(reader: &mut T) -> ZipResult { + let mut block = vec![0u8; mem::size_of::()]; + reader.read_exact(&mut block)?; + Self::interpret(block.into_boxed_slice()) + } + + fn encode(self) -> Box<[u8]>; + + fn serialize(self) -> Box<[u8]> { + let mut out_block = vec![0u8; mem::size_of::()]; + let out_view: &mut [u8] = out_block.as_mut(); + let out_ptr: *mut Self = out_view.as_mut_ptr().cast(); + unsafe { + out_ptr.write(self); + } + out_block.into_boxed_slice() + } + + fn write(self, writer: &mut T) -> ZipResult<()> { + let block = self.encode(); + writer.write_all(&block)?; + Ok(()) + } +} + +/// Convert all the fields of a struct *from* little-endian representations. +macro_rules! from_le { + ($obj:ident, $field:ident, $type:ty) => { + $obj.$field = <$type>::from_le($obj.$field); + }; + ($obj:ident, [($field:ident, $type:ty) $(,)?]) => { + from_le![$obj, $field, $type]; + }; + ($obj:ident, [($field:ident, $type:ty), $($rest:tt),+ $(,)?]) => { + from_le![$obj, $field, $type]; + from_le!($obj, [$($rest),+]); + }; +} + +/// Convert all the fields of a struct *into* little-endian representations. +macro_rules! to_le { + ($obj:ident, $field:ident, $type:ty) => { + $obj.$field = <$type>::to_le($obj.$field); + }; + ($obj:ident, [($field:ident, $type:ty) $(,)?]) => { + to_le![$obj, $field, $type]; + }; + ($obj:ident, [($field:ident, $type:ty), $($rest:tt),+ $(,)?]) => { + to_le![$obj, $field, $type]; + to_le!($obj, [$($rest),+]); + }; +} + +#[derive(Copy, Clone, Debug)] +#[repr(packed)] +pub struct Zip32CDEBlock { + magic: Magic, pub disk_number: u16, pub disk_with_central_directory: u16, pub number_of_files_on_this_disk: u16, pub number_of_files: u16, pub central_directory_size: u32, pub central_directory_offset: u32, - pub zip_file_comment: Box<[u8]>, + pub zip_file_comment_length: u16, +} + +impl Zip32CDEBlock { + #[allow(clippy::wrong_self_convention)] + #[inline(always)] + fn from_le(mut self) -> Self { + from_le![ + self, + [ + (magic, Magic), + (disk_number, u16), + (disk_with_central_directory, u16), + (number_of_files_on_this_disk, u16), + (number_of_files, u16), + (central_directory_size, u32), + (central_directory_offset, u32), + (zip_file_comment_length, u16) + ] + ]; + self + } + + #[inline(always)] + fn to_le(mut self) -> Self { + to_le![ + self, + [ + (magic, Magic), + (disk_number, u16), + (disk_with_central_directory, u16), + (number_of_files_on_this_disk, u16), + (number_of_files, u16), + (central_directory_size, u32), + (central_directory_offset, u32), + (zip_file_comment_length, u16) + ] + ]; + self + } } -impl CentralDirectoryEnd { - pub fn parse(reader: &mut T) -> ZipResult { - let magic = reader.read_u32_le()?; - if magic != CENTRAL_DIRECTORY_END_SIGNATURE { +impl Block for Zip32CDEBlock { + fn interpret(bytes: Box<[u8]>) -> ZipResult { + let block = Self::deserialize(&bytes).from_le(); + + if block.magic != CENTRAL_DIRECTORY_END_SIGNATURE { return Err(ZipError::InvalidArchive("Invalid digital signature header")); } - let disk_number = reader.read_u16_le()?; - let disk_with_central_directory = reader.read_u16_le()?; - let number_of_files_on_this_disk = reader.read_u16_le()?; - let number_of_files = reader.read_u16_le()?; - let central_directory_size = reader.read_u32_le()?; - let central_directory_offset = reader.read_u32_le()?; - let zip_file_comment_length = reader.read_u16_le()? as usize; - let mut zip_file_comment = vec![0; zip_file_comment_length].into_boxed_slice(); - reader.read_exact(&mut zip_file_comment)?; - Ok(CentralDirectoryEnd { + Ok(block) + } + + fn encode(self) -> Box<[u8]> { + self.to_le().serialize() + } +} + +#[derive(Debug)] +pub struct Zip32CentralDirectoryEnd { + pub disk_number: u16, + pub disk_with_central_directory: u16, + pub number_of_files_on_this_disk: u16, + pub number_of_files: u16, + pub central_directory_size: u32, + pub central_directory_offset: u32, + pub zip_file_comment: Box<[u8]>, +} + +impl Zip32CentralDirectoryEnd { + fn block_and_comment(self) -> ZipResult<(Zip32CDEBlock, Box<[u8]>)> { + let Self { disk_number, disk_with_central_directory, number_of_files_on_this_disk, @@ -49,80 +171,202 @@ impl CentralDirectoryEnd { central_directory_size, central_directory_offset, zip_file_comment, + } = self; + let block = Zip32CDEBlock { + magic: CENTRAL_DIRECTORY_END_SIGNATURE, + + disk_number, + disk_with_central_directory, + number_of_files_on_this_disk, + number_of_files, + central_directory_size, + central_directory_offset, + zip_file_comment_length: zip_file_comment.len().try_into().unwrap_or(u16::MAX), + }; + Ok((block, zip_file_comment)) + } + + pub fn parse(reader: &mut T) -> ZipResult { + let Zip32CDEBlock { + // magic, + disk_number, + disk_with_central_directory, + number_of_files_on_this_disk, + number_of_files, + central_directory_size, + central_directory_offset, + zip_file_comment_length, + .. + } = Zip32CDEBlock::parse(reader)?; + + let mut zip_file_comment = vec![0u8; zip_file_comment_length as usize]; + reader.read_exact(&mut zip_file_comment)?; + + Ok(Zip32CentralDirectoryEnd { + disk_number, + disk_with_central_directory, + number_of_files_on_this_disk, + number_of_files, + central_directory_size, + central_directory_offset, + zip_file_comment: zip_file_comment.into_boxed_slice(), }) } - pub fn find_and_parse(reader: &mut T) -> ZipResult<(CentralDirectoryEnd, u64)> { - const HEADER_SIZE: u64 = 22; - const MAX_HEADER_AND_COMMENT_SIZE: u64 = 66000; - const BYTES_BETWEEN_MAGIC_AND_COMMENT_SIZE: u64 = HEADER_SIZE - 6; + pub fn find_and_parse( + reader: &mut T, + ) -> ZipResult<(Zip32CentralDirectoryEnd, u64)> { let file_length = reader.seek(io::SeekFrom::End(0))?; - let search_upper_bound = 0; - - if file_length < HEADER_SIZE { + if file_length < mem::size_of::() as u64 { return Err(ZipError::InvalidArchive("Invalid zip header")); } - let mut pos = file_length - HEADER_SIZE; - while pos >= search_upper_bound { - let mut have_signature = false; - reader.seek(io::SeekFrom::Start(pos))?; - if reader.read_u32_le()? == CENTRAL_DIRECTORY_END_SIGNATURE { - have_signature = true; - reader.seek(io::SeekFrom::Current( - BYTES_BETWEEN_MAGIC_AND_COMMENT_SIZE as i64, - ))?; - let cde_start_pos = reader.seek(io::SeekFrom::Start(pos))?; - if let Ok(end_header) = CentralDirectoryEnd::parse(reader) { - return Ok((end_header, cde_start_pos)); + let search_upper_bound = 0; + + const END_WINDOW_SIZE: usize = 512; + + let sig_bytes = CENTRAL_DIRECTORY_END_SIGNATURE.to_le_bytes(); + let finder = FinderRev::new(&sig_bytes); + + let mut window_start: u64 = file_length.saturating_sub(END_WINDOW_SIZE as u64); + let mut window = [0u8; END_WINDOW_SIZE]; + while window_start >= search_upper_bound { + /* Go to the start of the window in the file. */ + reader.seek(io::SeekFrom::Start(window_start))?; + + /* Identify how many bytes to read (this may be less than the window size for files + * smaller than END_WINDOW_SIZE). */ + let end = (window_start + END_WINDOW_SIZE as u64).min(file_length); + let cur_len = (end - window_start) as usize; + debug_assert!(cur_len > 0); + debug_assert!(cur_len <= END_WINDOW_SIZE); + let cur_window: &mut [u8] = &mut window[..cur_len]; + /* Read the window into the bytes! */ + reader.read_exact(cur_window)?; + + /* Find instances of the magic signature. */ + for offset in finder.rfind_iter(cur_window) { + let cde_start_pos = window_start + offset as u64; + reader.seek(io::SeekFrom::Start(cde_start_pos))?; + /* Drop any headers that don't parse. */ + if let Ok(cde) = Self::parse(reader) { + return Ok((cde, cde_start_pos)); } } - pos = match pos.checked_sub(if have_signature { - size_of_val(&CENTRAL_DIRECTORY_END_SIGNATURE) as u64 - } else { - 1 - }) { - Some(p) => p, - None => break, - }; + + /* We always want to make sure we go allllll the way back to the start of the file if + * we can't find it elsewhere. However, our `while` condition doesn't check that. So we + * avoid infinite looping by checking at the end of the loop. */ + if window_start == search_upper_bound { + break; + } + debug_assert!(END_WINDOW_SIZE > mem::size_of_val(&CENTRAL_DIRECTORY_END_SIGNATURE)); + /* Shift the window by END_WINDOW_SIZE bytes, but make sure to cover matches that + * overlap our nice neat window boundaries! */ + window_start = (window_start + /* NB: To catch matches across window boundaries, we need to make our blocks overlap + * by the width of the pattern to match. */ + + mem::size_of_val(&CENTRAL_DIRECTORY_END_SIGNATURE) as u64) + /* This should never happen, but make sure we don't go past the end of the file. */ + .min(file_length); + window_start = window_start + .saturating_sub( + /* Shift the window upon each iteration so we search END_WINDOW_SIZE bytes at + * once (unless limited by file_length). */ + END_WINDOW_SIZE as u64, + ) + /* This will never go below the value of `search_upper_bound`, so we have a special + * `if window_start == search_upper_bound` check above. */ + .max(search_upper_bound); } + Err(ZipError::InvalidArchive( "Could not find central directory end", )) } - pub fn write(&self, writer: &mut T) -> ZipResult<()> { - writer.write_u32_le(CENTRAL_DIRECTORY_END_SIGNATURE)?; - writer.write_u16_le(self.disk_number)?; - writer.write_u16_le(self.disk_with_central_directory)?; - writer.write_u16_le(self.number_of_files_on_this_disk)?; - writer.write_u16_le(self.number_of_files)?; - writer.write_u32_le(self.central_directory_size)?; - writer.write_u32_le(self.central_directory_offset)?; - writer.write_u16_le(self.zip_file_comment.len() as u16)?; - writer.write_all(&self.zip_file_comment)?; + pub fn write(self, writer: &mut T) -> ZipResult<()> { + let (block, comment) = self.block_and_comment()?; + block.write(writer)?; + writer.write_all(&comment)?; Ok(()) } } -pub struct Zip64CentralDirectoryEndLocator { +#[derive(Copy, Clone)] +#[repr(packed)] +pub struct Zip64CDELocatorBlock { + magic: Magic, pub disk_with_central_directory: u32, pub end_of_central_directory_offset: u64, pub number_of_disks: u32, } -impl Zip64CentralDirectoryEndLocator { - pub fn parse(reader: &mut T) -> ZipResult { - let magic = reader.read_u32_le()?; - if magic != ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE { +impl Zip64CDELocatorBlock { + #[allow(clippy::wrong_self_convention)] + #[inline(always)] + fn from_le(mut self) -> Self { + from_le![ + self, + [ + (magic, Magic), + (disk_with_central_directory, u32), + (end_of_central_directory_offset, u64), + (number_of_disks, u32), + ] + ]; + self + } + + #[inline(always)] + fn to_le(mut self) -> Self { + to_le![ + self, + [ + (magic, Magic), + (disk_with_central_directory, u32), + (end_of_central_directory_offset, u64), + (number_of_disks, u32), + ] + ]; + self + } +} + +impl Block for Zip64CDELocatorBlock { + fn interpret(bytes: Box<[u8]>) -> ZipResult { + let block = Self::deserialize(&bytes).from_le(); + + if block.magic != ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE { return Err(ZipError::InvalidArchive( "Invalid zip64 locator digital signature header", )); } - let disk_with_central_directory = reader.read_u32_le()?; - let end_of_central_directory_offset = reader.read_u64_le()?; - let number_of_disks = reader.read_u32_le()?; + + Ok(block) + } + + fn encode(self) -> Box<[u8]> { + self.to_le().serialize() + } +} + +pub struct Zip64CentralDirectoryEndLocator { + pub disk_with_central_directory: u32, + pub end_of_central_directory_offset: u64, + pub number_of_disks: u32, +} + +impl Zip64CentralDirectoryEndLocator { + pub fn parse(reader: &mut T) -> ZipResult { + let Zip64CDELocatorBlock { + // magic, + disk_with_central_directory, + end_of_central_directory_offset, + number_of_disks, + .. + } = Zip64CDELocatorBlock::parse(reader)?; Ok(Zip64CentralDirectoryEndLocator { disk_with_central_directory, @@ -131,12 +375,96 @@ impl Zip64CentralDirectoryEndLocator { }) } - pub fn write(&self, writer: &mut T) -> ZipResult<()> { - writer.write_u32_le(ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE)?; - writer.write_u32_le(self.disk_with_central_directory)?; - writer.write_u64_le(self.end_of_central_directory_offset)?; - writer.write_u32_le(self.number_of_disks)?; - Ok(()) + pub fn block(self) -> Zip64CDELocatorBlock { + let Self { + disk_with_central_directory, + end_of_central_directory_offset, + number_of_disks, + } = self; + Zip64CDELocatorBlock { + magic: ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE, + disk_with_central_directory, + end_of_central_directory_offset, + number_of_disks, + } + } + + pub fn write(self, writer: &mut T) -> ZipResult<()> { + self.block().write(writer) + } +} + +#[derive(Copy, Clone)] +#[repr(packed)] +pub struct Zip64CDEBlock { + magic: Magic, + pub record_size: u64, + pub version_made_by: u16, + pub version_needed_to_extract: u16, + pub disk_number: u32, + pub disk_with_central_directory: u32, + pub number_of_files_on_this_disk: u64, + pub number_of_files: u64, + pub central_directory_size: u64, + pub central_directory_offset: u64, +} + +impl Zip64CDEBlock { + #[allow(clippy::wrong_self_convention)] + #[inline(always)] + fn from_le(mut self) -> Self { + from_le![ + self, + [ + (magic, Magic), + (record_size, u64), + (version_made_by, u16), + (version_needed_to_extract, u16), + (disk_number, u32), + (disk_with_central_directory, u32), + (number_of_files_on_this_disk, u64), + (number_of_files, u64), + (central_directory_size, u64), + (central_directory_offset, u64), + ] + ]; + self + } + + #[inline(always)] + fn to_le(mut self) -> Self { + to_le![ + self, + [ + (magic, Magic), + (record_size, u64), + (version_made_by, u16), + (version_needed_to_extract, u16), + (disk_number, u32), + (disk_with_central_directory, u32), + (number_of_files_on_this_disk, u64), + (number_of_files, u64), + (central_directory_size, u64), + (central_directory_offset, u64), + ] + ]; + self + } +} + +impl Block for Zip64CDEBlock { + fn interpret(bytes: Box<[u8]>) -> ZipResult { + let block = Self::deserialize(&bytes).from_le(); + + if block.magic != ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE { + return Err(ZipError::InvalidArchive("Invalid digital signature header")); + } + + Ok(block) + } + + fn encode(self) -> Box<[u8]> { + self.to_le().serialize() } } @@ -153,56 +481,105 @@ pub struct Zip64CentralDirectoryEnd { } impl Zip64CentralDirectoryEnd { + pub fn parse(reader: &mut T) -> ZipResult { + let Zip64CDEBlock { + // record_size, + version_made_by, + version_needed_to_extract, + disk_number, + disk_with_central_directory, + number_of_files_on_this_disk, + number_of_files, + central_directory_size, + central_directory_offset, + .. + } = Zip64CDEBlock::parse(reader)?; + Ok(Self { + version_made_by, + version_needed_to_extract, + disk_number, + disk_with_central_directory, + number_of_files_on_this_disk, + number_of_files, + central_directory_size, + central_directory_offset, + }) + } + pub fn find_and_parse( reader: &mut T, nominal_offset: u64, search_upper_bound: u64, ) -> ZipResult> { let mut results = Vec::new(); - let mut pos = search_upper_bound; - - while pos >= nominal_offset { - let mut have_signature = false; - reader.seek(io::SeekFrom::Start(pos))?; - if reader.read_u32_le()? == ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE { - have_signature = true; - let archive_offset = pos - nominal_offset; - - let _record_size = reader.read_u64_le()?; - // We would use this value if we did anything with the "zip64 extensible data sector". - - let version_made_by = reader.read_u16_le()?; - let version_needed_to_extract = reader.read_u16_le()?; - let disk_number = reader.read_u32_le()?; - let disk_with_central_directory = reader.read_u32_le()?; - let number_of_files_on_this_disk = reader.read_u64_le()?; - let number_of_files = reader.read_u64_le()?; - let central_directory_size = reader.read_u64_le()?; - let central_directory_offset = reader.read_u64_le()?; - - results.push(( - Zip64CentralDirectoryEnd { - version_made_by, - version_needed_to_extract, - disk_number, - disk_with_central_directory, - number_of_files_on_this_disk, - number_of_files, - central_directory_size, - central_directory_offset, - }, - archive_offset, - )); + + const END_WINDOW_SIZE: usize = 2048; + + let sig_bytes = ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE.to_le_bytes(); + let finder = FinderRev::new(&sig_bytes); + + let mut window_start: u64 = search_upper_bound + .saturating_sub(END_WINDOW_SIZE as u64) + .max(nominal_offset); + let mut window = [0u8; END_WINDOW_SIZE]; + while window_start >= nominal_offset { + reader.seek(io::SeekFrom::Start(window_start))?; + + /* Identify how many bytes to read (this may be less than the window size for files + * smaller than END_WINDOW_SIZE). */ + let end = (window_start + END_WINDOW_SIZE as u64).min(search_upper_bound); + + debug_assert!(end >= window_start); + let cur_len = (end - window_start) as usize; + if cur_len == 0 { + break; } - pos = match pos.checked_sub(if have_signature { - size_of_val(&ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE) as u64 - } else { - 1 - }) { - None => break, - Some(p) => p, + debug_assert!(cur_len <= END_WINDOW_SIZE); + let cur_window: &mut [u8] = &mut window[..cur_len]; + /* Read the window into the bytes! */ + reader.read_exact(cur_window)?; + + /* Find instances of the magic signature. */ + for offset in finder.rfind_iter(cur_window) { + let cde_start_pos = window_start + offset as u64; + reader.seek(io::SeekFrom::Start(cde_start_pos))?; + + debug_assert!(cde_start_pos >= nominal_offset); + let archive_offset = cde_start_pos - nominal_offset; + let cde = Self::parse(reader)?; + + results.push((cde, archive_offset)); } + + /* We always want to make sure we go allllll the way back to the start of the file if + * we can't find it elsewhere. However, our `while` condition doesn't check that. So we + * avoid infinite looping by checking at the end of the loop. */ + if window_start == nominal_offset { + break; + } + debug_assert!( + END_WINDOW_SIZE > mem::size_of_val(&ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE) + ); + /* Shift the window by END_WINDOW_SIZE bytes, but make sure to cover matches that + * overlap our nice neat window boundaries! */ + window_start = (window_start + /* NB: To catch matches across window boundaries, we need to make our blocks overlap + * by the width of the pattern to match. */ + + mem::size_of_val(&ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE) as u64) + /* This may never happen, but make sure we don't go past the end of the specified + * range. */ + .min(search_upper_bound); + window_start = window_start + .saturating_sub( + /* Shift the window upon each iteration so we search END_WINDOW_SIZE bytes at + * once (unless limited by search_upper_bound). */ + END_WINDOW_SIZE as u64, + ) + /* This will never go below the value of `nominal_offset`, so we have a special + * `if window_start == nominal_offset` check above. */ + .max(nominal_offset); } + if results.is_empty() { Err(ZipError::InvalidArchive( "Could not find ZIP64 central directory end", @@ -212,18 +589,34 @@ impl Zip64CentralDirectoryEnd { } } - pub fn write(&self, writer: &mut T) -> ZipResult<()> { - writer.write_u32_le(ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE)?; - writer.write_u64_le(44)?; // record size - writer.write_u16_le(self.version_made_by)?; - writer.write_u16_le(self.version_needed_to_extract)?; - writer.write_u32_le(self.disk_number)?; - writer.write_u32_le(self.disk_with_central_directory)?; - writer.write_u64_le(self.number_of_files_on_this_disk)?; - writer.write_u64_le(self.number_of_files)?; - writer.write_u64_le(self.central_directory_size)?; - writer.write_u64_le(self.central_directory_offset)?; - Ok(()) + pub fn block(self) -> Zip64CDEBlock { + let Self { + version_made_by, + version_needed_to_extract, + disk_number, + disk_with_central_directory, + number_of_files_on_this_disk, + number_of_files, + central_directory_size, + central_directory_offset, + } = self; + Zip64CDEBlock { + magic: ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE, + /* currently unused */ + record_size: 44, + version_made_by, + version_needed_to_extract, + disk_number, + disk_with_central_directory, + number_of_files_on_this_disk, + number_of_files, + central_directory_size, + central_directory_offset, + } + } + + pub fn write(self, writer: &mut T) -> ZipResult<()> { + self.block().write(writer) } } @@ -280,3 +673,51 @@ pub(crate) fn path_to_string>(path: T) -> Box { maybe_original.unwrap().into() } } + +#[cfg(test)] +mod test { + use super::*; + use std::io::Cursor; + + #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] + #[repr(packed)] + pub struct TestBlock { + magic: Magic, + pub file_name_length: u16, + } + + impl TestBlock { + #[allow(clippy::wrong_self_convention)] + fn from_le(mut self) -> Self { + from_le![self, [(magic, Magic), (file_name_length, u16)]]; + self + } + fn to_le(mut self) -> Self { + to_le![self, [(magic, Magic), (file_name_length, u16)]]; + self + } + } + + impl Block for TestBlock { + fn interpret(bytes: Box<[u8]>) -> ZipResult { + Ok(Self::deserialize(&bytes).from_le()) + } + fn encode(self) -> Box<[u8]> { + self.to_le().serialize() + } + } + + /// Demonstrate that a block object can be safely written to memory and deserialized back out. + #[test] + fn block_serde() { + let block = TestBlock { + magic: 0x01111, + file_name_length: 3, + }; + let mut c = Cursor::new(Vec::new()); + block.write(&mut c).unwrap(); + c.set_position(0); + let block2 = TestBlock::parse(&mut c).unwrap(); + assert_eq!(block, block2); + } +} diff --git a/src/types.rs b/src/types.rs index 217088fd8..ddd2f79a4 100644 --- a/src/types.rs +++ b/src/types.rs @@ -1,13 +1,18 @@ //! Types that specify what is contained in a ZIP. +use crate::cp437::FromCp437; +use crate::write::{FileOptionExtension, FileOptions}; use path::{Component, Path, PathBuf}; use std::fmt; use std::path; use std::sync::{Arc, OnceLock}; +#[cfg(doc)] +use crate::read::ZipFile; #[cfg(feature = "chrono")] use chrono::{Datelike, NaiveDate, NaiveDateTime, NaiveTime, Timelike}; -#[cfg(doc)] -use {crate::read::ZipFile, crate::write::FileOptions}; + +use crate::result::{ZipError, ZipResult}; +use crate::spec::{self, Block}; pub(crate) mod ffi { pub const S_IFDIR: u32 = 0o0040000; @@ -23,6 +28,12 @@ use crate::CompressionMethod; #[cfg(feature = "time")] use time::{error::ComponentRange, Date, Month, OffsetDateTime, PrimitiveDateTime, Time}; +pub(crate) struct ZipRawValues { + pub(crate) crc32: u32, + pub(crate) compressed_size: u64, + pub(crate) uncompressed_size: u64, +} + #[derive(Clone, Copy, Debug, PartialEq, Eq)] #[repr(u8)] pub enum System { @@ -546,6 +557,408 @@ impl ZipFileData { .map(|v| v.len()) .unwrap_or_default() } + + #[allow(clippy::too_many_arguments)] + pub(crate) fn initialize_local_block( + name: S, + options: &FileOptions, + raw_values: ZipRawValues, + header_start: u64, + extra_data_start: Option, + aes_extra_data_start: u64, + compression_method: crate::compression::CompressionMethod, + aes_mode: Option<(AesMode, AesVendorVersion, CompressionMethod)>, + extra_field: Option>>, + ) -> Self + where + S: Into>, + { + let permissions = options.permissions.unwrap_or(0o100644); + let file_name: Box = name.into(); + let file_name_raw: Box<[u8]> = file_name.bytes().collect(); + ZipFileData { + system: System::Unix, + version_made_by: DEFAULT_VERSION, + encrypted: options.encrypt_with.is_some(), + using_data_descriptor: false, + compression_method, + compression_level: options.compression_level, + last_modified_time: Some(options.last_modified_time), + crc32: raw_values.crc32, + compressed_size: raw_values.compressed_size, + uncompressed_size: raw_values.uncompressed_size, + file_name, // Never used for saving, but used as map key in insert_file_data() + file_name_raw, + extra_field, + central_extra_field: options.extended_options.central_extra_data().cloned(), + file_comment: String::with_capacity(0).into_boxed_str(), + header_start, + data_start: OnceLock::new(), + central_header_start: 0, + external_attributes: permissions << 16, + large_file: options.large_file, + aes_mode, + extra_fields: Vec::new(), + extra_data_start, + aes_extra_data_start, + } + } + + pub(crate) fn from_local_block( + block: ZipLocalEntryBlock, + reader: &mut R, + ) -> ZipResult { + let ZipLocalEntryBlock { + // magic, + version_made_by, + flags, + compression_method, + last_mod_time, + last_mod_date, + crc32, + compressed_size, + uncompressed_size, + file_name_length, + extra_field_length, + .. + } = block; + + let encrypted: bool = flags & 1 == 1; + /* FIXME: these were previously incorrect: add testing! */ + /* flags & (1 << 1) != 0 */ + let is_utf8: bool = flags & (1 << 11) != 0; + /* flags & (1 << 3) != 0 */ + let using_data_descriptor: bool = flags & (1 << 3) == 1 << 3; + #[allow(deprecated)] + let compression_method = crate::CompressionMethod::from_u16(compression_method); + let file_name_length: usize = file_name_length.into(); + let extra_field_length: usize = extra_field_length.into(); + + if encrypted { + return Err(ZipError::UnsupportedArchive( + "Encrypted files are not supported", + )); + } + if using_data_descriptor { + return Err(ZipError::UnsupportedArchive( + "The file length is not available in the local header", + )); + } + + let mut file_name_raw = vec![0u8; file_name_length]; + reader.read_exact(&mut file_name_raw)?; + let mut extra_field = vec![0u8; extra_field_length]; + reader.read_exact(&mut extra_field)?; + + let file_name: Box = match is_utf8 { + true => String::from_utf8_lossy(&file_name_raw).into(), + false => file_name_raw.clone().from_cp437().into(), + }; + + let system: u8 = (version_made_by >> 8).try_into().unwrap(); + Ok(ZipFileData { + system: System::from(system), + /* NB: this strips the top 8 bits! */ + version_made_by: version_made_by as u8, + encrypted, + using_data_descriptor, + compression_method, + compression_level: None, + last_modified_time: DateTime::try_from_msdos(last_mod_date, last_mod_time).ok(), + crc32, + compressed_size: compressed_size.into(), + uncompressed_size: uncompressed_size.into(), + file_name, + file_name_raw: file_name_raw.into(), + extra_field: Some(Arc::new(extra_field)), + central_extra_field: None, + file_comment: String::with_capacity(0).into_boxed_str(), // file comment is only available in the central directory + // header_start and data start are not available, but also don't matter, since seeking is + // not available. + header_start: 0, + data_start: OnceLock::new(), + central_header_start: 0, + // The external_attributes field is only available in the central directory. + // We set this to zero, which should be valid as the docs state 'If input came + // from standard input, this field is set to zero.' + external_attributes: 0, + large_file: false, + aes_mode: None, + extra_fields: Vec::new(), + extra_data_start: None, + aes_extra_data_start: 0, + }) + } + + fn is_utf8(&self) -> bool { + std::str::from_utf8(&self.file_name_raw).is_ok() + } + + fn is_ascii(&self) -> bool { + self.file_name_raw.is_ascii() + } + + fn flags(&self) -> u16 { + (if self.is_utf8() && !self.is_ascii() { + 1u16 << 11 + } else { + 0 + }) | if self.encrypted { 1u16 << 0 } else { 0 } + } + + pub(crate) fn local_block(&self) -> ZipResult { + let (compressed_size, uncompressed_size) = if self.large_file { + (spec::ZIP64_BYTES_THR as u32, spec::ZIP64_BYTES_THR as u32) + } else { + ( + self.compressed_size.try_into().unwrap(), + self.uncompressed_size.try_into().unwrap(), + ) + }; + + let mut extra_field_length = self.extra_field_len(); + if self.large_file { + /* TODO: magic number */ + extra_field_length += 20; + } + if extra_field_length + self.central_extra_field_len() > u16::MAX as usize { + return Err(ZipError::InvalidArchive("Extra data field is too large")); + } + let extra_field_length: u16 = extra_field_length.try_into().unwrap(); + + let last_modified_time = self + .last_modified_time + .unwrap_or_else(DateTime::default_for_write); + Ok(ZipLocalEntryBlock { + magic: spec::LOCAL_FILE_HEADER_SIGNATURE, + version_made_by: self.version_needed(), + flags: self.flags(), + #[allow(deprecated)] + compression_method: self.compression_method.to_u16(), + last_mod_time: last_modified_time.timepart(), + last_mod_date: last_modified_time.datepart(), + crc32: self.crc32, + compressed_size, + uncompressed_size, + file_name_length: self.file_name_raw.len().try_into().unwrap(), + extra_field_length, + }) + } + + pub(crate) fn block(&self, zip64_extra_field_length: u16) -> ZipEntryBlock { + let extra_field_len: u16 = self.extra_field_len().try_into().unwrap(); + let central_extra_field_len: u16 = self.central_extra_field_len().try_into().unwrap(); + let last_modified_time = self + .last_modified_time + .unwrap_or_else(DateTime::default_for_write); + ZipEntryBlock { + magic: spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE, + version_made_by: (self.system as u16) << 8 | (self.version_made_by as u16), + version_to_extract: self.version_needed(), + flags: self.flags(), + #[allow(deprecated)] + compression_method: self.compression_method.to_u16(), + last_mod_time: last_modified_time.timepart(), + last_mod_date: last_modified_time.datepart(), + crc32: self.crc32, + compressed_size: self + .compressed_size + .min(spec::ZIP64_BYTES_THR) + .try_into() + .unwrap(), + uncompressed_size: self + .uncompressed_size + .min(spec::ZIP64_BYTES_THR) + .try_into() + .unwrap(), + file_name_length: self.file_name_raw.len().try_into().unwrap(), + extra_field_length: zip64_extra_field_length + + extra_field_len + + central_extra_field_len, + /* FIXME: this appears to be set to 0 in write_central_directory_header() on master? */ + file_comment_length: self.file_comment.as_bytes().len().try_into().unwrap(), + disk_number: 0, + internal_file_attributes: 0, + external_file_attributes: self.external_attributes, + offset: self + .header_start + .min(spec::ZIP64_BYTES_THR) + .try_into() + .unwrap(), + } + } +} + +#[derive(Copy, Clone, Debug)] +#[repr(packed)] +pub(crate) struct ZipEntryBlock { + pub magic: spec::Magic, + pub version_made_by: u16, + pub version_to_extract: u16, + pub flags: u16, + pub compression_method: u16, + pub last_mod_time: u16, + pub last_mod_date: u16, + pub crc32: u32, + pub compressed_size: u32, + pub uncompressed_size: u32, + pub file_name_length: u16, + pub extra_field_length: u16, + pub file_comment_length: u16, + pub disk_number: u16, + pub internal_file_attributes: u16, + pub external_file_attributes: u32, + pub offset: u32, +} + +impl ZipEntryBlock { + #[allow(clippy::wrong_self_convention)] + #[inline(always)] + fn from_le(mut self) -> Self { + from_le![ + self, + [ + (magic, spec::Magic), + (version_made_by, u16), + (version_to_extract, u16), + (flags, u16), + (compression_method, u16), + (last_mod_time, u16), + (last_mod_date, u16), + (crc32, u32), + (compressed_size, u32), + (uncompressed_size, u32), + (file_name_length, u16), + (extra_field_length, u16), + (file_comment_length, u16), + (disk_number, u16), + (internal_file_attributes, u16), + (external_file_attributes, u32), + (offset, u32), + ] + ]; + self + } + + #[inline(always)] + fn to_le(mut self) -> Self { + to_le![ + self, + [ + (magic, spec::Magic), + (version_made_by, u16), + (version_to_extract, u16), + (flags, u16), + (compression_method, u16), + (last_mod_time, u16), + (last_mod_date, u16), + (crc32, u32), + (compressed_size, u32), + (uncompressed_size, u32), + (file_name_length, u16), + (extra_field_length, u16), + (file_comment_length, u16), + (disk_number, u16), + (internal_file_attributes, u16), + (external_file_attributes, u32), + (offset, u32), + ] + ]; + self + } +} + +impl Block for ZipEntryBlock { + fn interpret(bytes: Box<[u8]>) -> ZipResult { + let block = Self::deserialize(&bytes).from_le(); + + if block.magic != spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE { + return Err(ZipError::InvalidArchive("Invalid Central Directory header")); + } + + Ok(block) + } + + fn encode(self) -> Box<[u8]> { + self.to_le().serialize() + } +} + +#[derive(Copy, Clone, Debug)] +#[repr(packed)] +pub(crate) struct ZipLocalEntryBlock { + magic: spec::Magic, + pub version_made_by: u16, + pub flags: u16, + pub compression_method: u16, + pub last_mod_time: u16, + pub last_mod_date: u16, + pub crc32: u32, + pub compressed_size: u32, + pub uncompressed_size: u32, + pub file_name_length: u16, + pub extra_field_length: u16, +} + +impl ZipLocalEntryBlock { + #[allow(clippy::wrong_self_convention)] + #[inline(always)] + fn from_le(mut self) -> Self { + from_le![ + self, + [ + (magic, spec::Magic), + (version_made_by, u16), + (flags, u16), + (compression_method, u16), + (last_mod_time, u16), + (last_mod_date, u16), + (crc32, u32), + (compressed_size, u32), + (uncompressed_size, u32), + (file_name_length, u16), + (extra_field_length, u16), + ] + ]; + self + } + + #[inline(always)] + fn to_le(mut self) -> Self { + to_le![ + self, + [ + (magic, spec::Magic), + (version_made_by, u16), + (flags, u16), + (compression_method, u16), + (last_mod_time, u16), + (last_mod_date, u16), + (crc32, u32), + (compressed_size, u32), + (uncompressed_size, u32), + (file_name_length, u16), + (extra_field_length, u16), + ] + ]; + self + } +} + +impl Block for ZipLocalEntryBlock { + fn interpret(bytes: Box<[u8]>) -> ZipResult { + let block = Self::deserialize(&bytes).from_le(); + + if block.magic != spec::LOCAL_FILE_HEADER_SIGNATURE { + return Err(ZipError::InvalidArchive("Invalid local file header")); + } + + Ok(block) + } + + fn encode(self) -> Box<[u8]> { + self.to_le().serialize() + } } /// The encryption specification used to encrypt a file with AES. diff --git a/src/write.rs b/src/write.rs index 63635b91c..86f1719d5 100644 --- a/src/write.rs +++ b/src/write.rs @@ -5,10 +5,10 @@ use crate::aes::AesWriter; use crate::compression::CompressionMethod; use crate::read::{find_content, ZipArchive, ZipFile, ZipFileReader}; use crate::result::{ZipError, ZipResult}; -use crate::spec; +use crate::spec::{self, Block}; #[cfg(feature = "aes-crypto")] use crate::types::AesMode; -use crate::types::{ffi, AesVendorVersion, DateTime, System, ZipFileData, DEFAULT_VERSION}; +use crate::types::{ffi, AesVendorVersion, DateTime, ZipFileData, ZipRawValues, DEFAULT_VERSION}; use crate::write::ffi::S_IFLNK; #[cfg(any(feature = "_deflate-any", feature = "bzip2", feature = "zstd",))] use core::num::NonZeroU64; @@ -22,7 +22,7 @@ use std::io::{BufReader, SeekFrom}; use std::marker::PhantomData; use std::mem; use std::str::{from_utf8, Utf8Error}; -use std::sync::{Arc, OnceLock}; +use std::sync::Arc; #[cfg(any( feature = "deflate", @@ -147,11 +147,6 @@ struct ZipWriterStats { bytes_written: u64, } -struct ZipRawValues { - crc32: u32, - compressed_size: u64, - uncompressed_size: u64, -} mod sealed { use std::sync::Arc; @@ -188,7 +183,7 @@ mod sealed { } #[derive(Copy, Clone, Debug)] -enum EncryptWith<'k> { +pub(crate) enum EncryptWith<'k> { #[cfg(feature = "aes-crypto")] Aes { mode: AesMode, @@ -223,9 +218,9 @@ pub struct FileOptions<'k, T: FileOptionExtension> { pub(crate) last_modified_time: DateTime, pub(crate) permissions: Option, pub(crate) large_file: bool, - encrypt_with: Option>, - extended_options: T, - alignment: u16, + pub(crate) encrypt_with: Option>, + pub(crate) extended_options: T, + pub(crate) alignment: u16, #[cfg(feature = "deflate-zopfli")] pub(super) zopfli_buffer_size: Option, } @@ -509,7 +504,8 @@ impl ZipWriterStats { impl ZipWriter { /// Initializes the archive from an existing ZIP archive, making it ready for append. pub fn new_append(mut readwriter: A) -> ZipResult> { - let (footer, cde_start_pos) = spec::CentralDirectoryEnd::find_and_parse(&mut readwriter)?; + let (footer, cde_start_pos) = + spec::Zip32CentralDirectoryEnd::find_and_parse(&mut readwriter)?; let metadata = ZipArchive::get_metadata(&mut readwriter, &footer, cde_start_pos)?; Ok(ZipWriter { @@ -777,7 +773,6 @@ impl ZipWriter { { let header_start = self.inner.get_plain().stream_position()?; - let permissions = options.permissions.unwrap_or(0o100644); let (compression_method, aes_mode) = match options.encrypt_with { #[cfg(feature = "aes-crypto")] Some(EncryptWith::Aes { mode, .. }) => ( @@ -786,76 +781,38 @@ impl ZipWriter { ), _ => (options.compression_method, None), }; - let last_modified_time = options.last_modified_time; - let file = ZipFileData { - system: System::Unix, - version_made_by: DEFAULT_VERSION, - encrypted: options.encrypt_with.is_some(), - using_data_descriptor: false, - compression_method, - compression_level: options.compression_level, - last_modified_time: Some(options.last_modified_time), - crc32: raw_values.crc32, - compressed_size: raw_values.compressed_size, - uncompressed_size: raw_values.uncompressed_size, - file_name: name.to_owned().into(), // Never used for saving, but used as map key in insert_file_data() - file_name_raw: name.into().bytes().collect(), - extra_field, - central_extra_field: options.extended_options.central_extra_data().cloned(), - file_comment: String::with_capacity(0).into_boxed_str(), + + let file = ZipFileData::initialize_local_block( + name, + &options, + raw_values, header_start, - extra_data_start: None, - data_start: OnceLock::new(), - central_header_start: 0, - external_attributes: permissions << 16, - large_file: options.large_file, - aes_mode, + None, aes_extra_data_start, + compression_method, + aes_mode, + extra_field, + ); - extra_fields: Vec::new(), - }; let index = self.insert_file_data(file)?; let file = &mut self.files[index]; let writer = self.inner.get_plain(); - // local file header signature - writer.write_u32_le(spec::LOCAL_FILE_HEADER_SIGNATURE)?; - // version needed to extract - writer.write_u16_le(file.version_needed())?; - // general purpose bit flag - let is_utf8 = std::str::from_utf8(&file.file_name_raw).is_ok(); - let is_ascii = file.file_name_raw.is_ascii(); - let flag = if is_utf8 && !is_ascii { 1u16 << 11 } else { 0 } - | if file.encrypted { 1u16 << 0 } else { 0 }; - writer.write_u16_le(flag)?; - // Compression method - #[allow(deprecated)] - writer.write_u16_le(file.compression_method.to_u16())?; - // last mod file time and last mod file date - writer.write_u16_le(last_modified_time.timepart())?; - writer.write_u16_le(last_modified_time.datepart())?; - // crc-32 - writer.write_u32_le(file.crc32)?; - // compressed size and uncompressed size - if file.large_file { - writer.write_u32_le(spec::ZIP64_BYTES_THR as u32)?; - writer.write_u32_le(spec::ZIP64_BYTES_THR as u32)?; - } else { - writer.write_u32_le(file.compressed_size as u32)?; - writer.write_u32_le(file.uncompressed_size as u32)?; - } - // file name length - writer.write_u16_le(file.file_name_raw.len() as u16)?; - // extra field length - let mut extra_field_length = file.extra_field_len(); - if file.large_file { - extra_field_length += 20; - } - if extra_field_length + file.central_extra_field_len() > u16::MAX as usize { - let _ = self.abort_file(); - return Err(InvalidArchive("Extra data field is too large")); + + let block = match file.local_block() { + Ok(block) => block, + Err(e) => { + let _ = self.abort_file(); + return Err(e); + } + }; + match block.write(writer) { + Ok(()) => (), + Err(e) => { + let _ = self.abort_file(); + return Err(e); + } } - let extra_field_length = extra_field_length as u16; - writer.write_u16_le(extra_field_length)?; + // file name writer.write_all(&file.file_name_raw)?; // zip64 extra field @@ -873,7 +830,7 @@ impl ZipWriter { if unaligned_header_bytes != 0 { let pad_length = (align - unaligned_header_bytes) as usize; let Some(new_extra_field_length) = - (pad_length as u16).checked_add(extra_field_length) + (pad_length as u16).checked_add(block.extra_field_length) else { let _ = self.abort_file(); return Err(InvalidArchive( @@ -1431,7 +1388,7 @@ impl ZipWriter { } let number_of_files = self.files.len().min(spec::ZIP64_ENTRY_THR) as u16; - let footer = spec::CentralDirectoryEnd { + let footer = spec::Zip32CentralDirectoryEnd { disk_number: 0, disk_with_central_directory: 0, zip_file_comment: self.comment.clone(), @@ -1805,52 +1762,9 @@ fn write_central_directory_header(writer: &mut T, file: &ZipFileData) let zip64_extra_field_length = write_central_zip64_extra_field(&mut zip64_extra_field.as_mut(), file)?; - // central file header signature - writer.write_u32_le(spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE)?; - // version made by - let version_made_by = (file.system as u16) << 8 | (file.version_made_by as u16); - writer.write_u16_le(version_made_by)?; - // version needed to extract - writer.write_u16_le(file.version_needed())?; - // general puprose bit flag - let is_utf8 = std::str::from_utf8(&file.file_name_raw).is_ok(); - let is_ascii = file.file_name_raw.is_ascii(); - let flag = if is_utf8 && !is_ascii { 1u16 << 11 } else { 0 } - | if file.encrypted { 1u16 << 0 } else { 0 }; - writer.write_u16_le(flag)?; - // compression method - #[allow(deprecated)] - writer.write_u16_le(file.compression_method.to_u16())?; - let last_modified_time = file - .last_modified_time - .unwrap_or_else(DateTime::default_for_write); - // last mod file time + date - writer.write_u16_le(last_modified_time.timepart())?; - writer.write_u16_le(last_modified_time.datepart())?; - // crc-32 - writer.write_u32_le(file.crc32)?; - // compressed size - writer.write_u32_le(file.compressed_size.min(spec::ZIP64_BYTES_THR) as u32)?; - // uncompressed size - writer.write_u32_le(file.uncompressed_size.min(spec::ZIP64_BYTES_THR) as u32)?; - // file name length - writer.write_u16_le(file.file_name_raw.len() as u16)?; - // extra field length - writer.write_u16_le( - zip64_extra_field_length - + file.extra_field_len() as u16 - + file.central_extra_field_len() as u16, - )?; - // file comment length - writer.write_u16_le(0)?; - // disk number start - writer.write_u16_le(0)?; - // internal file attributes - writer.write_u16_le(0)?; - // external file attributes - writer.write_u32_le(file.external_attributes)?; - // relative offset of local header - writer.write_u32_le(file.header_start.min(spec::ZIP64_BYTES_THR) as u32)?; + let block = file.block(zip64_extra_field_length); + block.write(writer)?; + // file name writer.write_all(&file.file_name_raw)?; // zip64 extra field @@ -1926,6 +1840,7 @@ fn update_local_zip64_extra_field( Ok(()) } +/* TODO: make this use the Block trait somehow! */ fn write_central_zip64_extra_field(writer: &mut T, file: &ZipFileData) -> ZipResult { // The order of the fields in the zip64 extended // information record is fixed, but the fields MUST @@ -2074,7 +1989,7 @@ mod test { writer .start_file_from_path(path, SimpleFileOptions::default()) .unwrap(); - let archive = ZipArchive::new(writer.finish().unwrap()).unwrap(); + let archive = writer.finish_into_readable().unwrap(); assert_eq!(Some("foo/example.txt"), archive.name_for_index(0)); } @@ -2227,8 +2142,7 @@ mod test { writer .shallow_copy_file(SECOND_FILENAME, SECOND_FILENAME) .expect_err("Duplicate filename"); - let zip = writer.finish().unwrap(); - let mut reader = ZipArchive::new(zip).unwrap(); + let mut reader = writer.finish_into_readable().unwrap(); let mut file_names: Vec<&str> = reader.file_names().collect(); file_names.sort(); let mut expected_file_names = vec![RT_TEST_FILENAME, SECOND_FILENAME]; @@ -2512,7 +2426,7 @@ mod test { let contents = b"sleeping"; let () = zip.start_file("sleep", options).unwrap(); let _count = zip.write(&contents[..]).unwrap(); - let mut zip = ZipArchive::new(zip.finish().unwrap()).unwrap(); + let mut zip = zip.finish_into_readable().unwrap(); let file = zip.by_index(0).unwrap(); assert_eq!(file.name(), "sleep"); assert_eq!(file.data_start(), page_size.into()); From ad1d51d0993aa407365f20b97e4004390ab5f937 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Mon, 13 May 2024 00:40:53 -0400 Subject: [PATCH 06/48] write file comment to central directory header --- src/types.rs | 1 - src/write.rs | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/types.rs b/src/types.rs index ddd2f79a4..937afabbe 100644 --- a/src/types.rs +++ b/src/types.rs @@ -775,7 +775,6 @@ impl ZipFileData { extra_field_length: zip64_extra_field_length + extra_field_len + central_extra_field_len, - /* FIXME: this appears to be set to 0 in write_central_directory_header() on master? */ file_comment_length: self.file_comment.as_bytes().len().try_into().unwrap(), disk_number: 0, internal_file_attributes: 0, diff --git a/src/write.rs b/src/write.rs index 86f1719d5..58e43396e 100644 --- a/src/write.rs +++ b/src/write.rs @@ -1777,7 +1777,7 @@ fn write_central_directory_header(writer: &mut T, file: &ZipFileData) writer.write_all(central_extra_field)?; } // file comment - // + writer.write_all(file.file_comment.as_bytes())?; Ok(()) } From 46c42c7f820ccd0b29fca8f2b52f0a1904ac8b4d Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Sat, 18 May 2024 02:04:07 -0400 Subject: [PATCH 07/48] review comments 1 --- benches/read_metadata.rs | 20 +++++++------- src/read.rs | 58 ++++++++++++++++++++++++---------------- 2 files changed, 45 insertions(+), 33 deletions(-) diff --git a/benches/read_metadata.rs b/benches/read_metadata.rs index f42793b51..f54f7c2d4 100644 --- a/benches/read_metadata.rs +++ b/benches/read_metadata.rs @@ -41,7 +41,7 @@ fn read_metadata(bench: &mut Bencher) { const COMMENT_SIZE: usize = 50_000; -fn generate_random_zip32_archive_with_comment(comment_length: usize) -> ZipResult> { +fn generate_zip32_archive_with_random_comment(comment_length: usize) -> ZipResult> { let data = Vec::new(); let mut writer = ZipWriter::new(Cursor::new(data)); let options = SimpleFileOptions::default().compression_method(CompressionMethod::Stored); @@ -56,19 +56,19 @@ fn generate_random_zip32_archive_with_comment(comment_length: usize) -> ZipResul Ok(writer.finish()?.into_inner()) } -fn parse_comment(bench: &mut Bencher) { - let bytes = generate_random_zip32_archive_with_comment(COMMENT_SIZE).unwrap(); +fn parse_archive_with_comment(bench: &mut Bencher) { + let bytes = generate_zip32_archive_with_random_comment(COMMENT_SIZE).unwrap(); bench.iter(|| { let archive = ZipArchive::new(Cursor::new(bytes.as_slice())).unwrap(); - archive.len() + archive.comment().len() }); bench.bytes = bytes.len() as u64; } const COMMENT_SIZE_64: usize = 500_000; -fn generate_random_zip64_archive_with_comment(comment_length: usize) -> ZipResult> { +fn generate_zip64_archive_with_random_comment(comment_length: usize) -> ZipResult> { let data = Vec::new(); let mut writer = ZipWriter::new(Cursor::new(data)); let options = SimpleFileOptions::default() @@ -85,12 +85,12 @@ fn generate_random_zip64_archive_with_comment(comment_length: usize) -> ZipResul Ok(writer.finish()?.into_inner()) } -fn parse_zip64_comment(bench: &mut Bencher) { - let bytes = generate_random_zip64_archive_with_comment(COMMENT_SIZE_64).unwrap(); +fn parse_zip64_archive_with_comment(bench: &mut Bencher) { + let bytes = generate_zip64_archive_with_random_comment(COMMENT_SIZE_64).unwrap(); bench.iter(|| { let archive = ZipArchive::new(Cursor::new(bytes.as_slice())).unwrap(); - archive.len() + archive.comment().len() }); bench.bytes = bytes.len() as u64; } @@ -121,8 +121,8 @@ fn parse_stream_archive(bench: &mut Bencher) { benchmark_group!( benches, read_metadata, - parse_comment, - parse_zip64_comment, + parse_archive_with_comment, + parse_zip64_archive_with_comment, /* FIXME: this currently errors */ /* parse_stream_archive */ ); diff --git a/src/read.rs b/src/read.rs index 584e021e7..430475ffd 100644 --- a/src/read.rs +++ b/src/read.rs @@ -486,6 +486,19 @@ impl ZipArchive { }) } + const fn zip64_cde_len() -> usize { + mem::size_of::() + + mem::size_of::() + } + + const fn order_lower_upper_bounds(a: u64, b: u64) -> (u64, u64) { + if a > b { + (b, a) + } else { + (a, b) + } + } + fn get_directory_info_zip64( reader: &mut R, footer: &spec::Zip32CentralDirectoryEnd, @@ -495,6 +508,7 @@ impl ZipArchive { // have its signature 20 bytes in front of the standard footer. The // standard footer, in turn, is 22+N bytes large, where N is the // comment length. Therefore: + /* TODO: compute this from constant sizes and offsets! */ reader.seek(io::SeekFrom::End( -(20 + 22 + footer.zip_file_comment.len() as i64), ))?; @@ -510,22 +524,16 @@ impl ZipArchive { // ZIP comment data. let search_upper_bound = cde_start_pos - // minimum size of Zip64CentralDirectoryEnd + Zip64CentralDirectoryEndLocator - .checked_sub(60) + .checked_sub(Self::zip64_cde_len() as u64) .ok_or(ZipError::InvalidArchive( "File cannot contain ZIP64 central directory end", ))?; - let (lower, upper) = if locator64.end_of_central_directory_offset > search_upper_bound { - ( - search_upper_bound, - locator64.end_of_central_directory_offset, - ) - } else { - ( - locator64.end_of_central_directory_offset, - search_upper_bound, - ) - }; + + let (lower, upper) = Self::order_lower_upper_bounds( + locator64.end_of_central_directory_offset, + search_upper_bound, + ); + let search_results = spec::Zip64CentralDirectoryEnd::find_and_parse(reader, lower, upper)?; let results: Vec> = search_results.into_iter().map(|(footer64, archive_offset)| { @@ -1012,6 +1020,13 @@ pub(crate) fn central_header_to_zip_file( central_header_to_zip_file_inner(reader, archive_offset, central_header_start, block) } +#[inline] +fn read_variable_length_byte_field(reader: &mut R, len: usize) -> io::Result> { + let mut data = vec![0; len]; + reader.read_exact(&mut data)?; + Ok(data.into_boxed_slice()) +} + /// Parse a central directory entry to collect the information for the file. fn central_header_to_zip_file_inner( reader: &mut R, @@ -1044,20 +1059,17 @@ fn central_header_to_zip_file_inner( let is_utf8 = flags & (1 << 11) != 0; let using_data_descriptor = flags & (1 << 3) != 0; - let mut file_name_raw = vec![0; file_name_length as usize]; - reader.read_exact(&mut file_name_raw)?; - let mut extra_field = vec![0; extra_field_length as usize]; - reader.read_exact(&mut extra_field)?; - let mut file_comment_raw = vec![0; file_comment_length as usize]; - reader.read_exact(&mut file_comment_raw)?; + let file_name_raw = read_variable_length_byte_field(reader, file_name_length as usize)?; + let extra_field = read_variable_length_byte_field(reader, extra_field_length as usize)?; + let file_comment_raw = read_variable_length_byte_field(reader, file_comment_length as usize)?; let file_name: Box = match is_utf8 { true => String::from_utf8_lossy(&file_name_raw).into(), - false => file_name_raw.from_cp437().into(), + false => file_name_raw.clone().from_cp437(), }; let file_comment: Box = match is_utf8 { true => String::from_utf8_lossy(&file_comment_raw).into(), - false => file_comment_raw.from_cp437().into(), + false => file_comment_raw.from_cp437(), }; // Construct the result @@ -1077,8 +1089,8 @@ fn central_header_to_zip_file_inner( compressed_size: compressed_size as u64, uncompressed_size: uncompressed_size as u64, file_name, - file_name_raw: file_name_raw.into(), - extra_field: Some(Arc::new(extra_field)), + file_name_raw, + extra_field: Some(Arc::new(extra_field.to_vec())), central_extra_field: None, file_comment, header_start: offset.into(), From 3fa0d845547587a1ff315a4f1e2c61d15b01ce98 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Sat, 18 May 2024 02:38:57 -0400 Subject: [PATCH 08/48] make Magic into a wrapper struct --- src/read.rs | 9 ++---- src/spec.rs | 79 ++++++++++++++++++++++++++++++++++++++++------------ src/types.rs | 6 ++-- 3 files changed, 68 insertions(+), 26 deletions(-) diff --git a/src/read.rs b/src/read.rs index 430475ffd..1b099e444 100644 --- a/src/read.rs +++ b/src/read.rs @@ -221,7 +221,8 @@ pub(crate) fn find_content<'a>( ) -> ZipResult> { // Parse local header reader.seek(io::SeekFrom::Start(data.header_start))?; - let signature = reader.read_u32_le()?; + /* FIXME: read this in blocks too! ::literal is not for general use! */ + let signature = spec::Magic::literal(reader.read_u32_le()?); if signature != spec::LOCAL_FILE_HEADER_SIGNATURE { return Err(ZipError::InvalidArchive("Invalid local file header")); } @@ -1430,11 +1431,7 @@ pub fn read_zipfile_from_stream<'a, R: Read>(reader: &'a mut R) -> ZipResult = block.into(); - let signature = spec::Magic::from_le_bytes( - block[..mem::size_of_val(&spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE)] - .try_into() - .unwrap(), - ); + let signature = spec::Magic::from_first_le_bytes(&block); match signature { spec::LOCAL_FILE_HEADER_SIGNATURE => (), diff --git a/src/spec.rs b/src/spec.rs index cc42395ff..0ce7c765c 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -8,13 +8,53 @@ use std::io::prelude::*; use std::mem; use std::path::{Component, Path, MAIN_SEPARATOR}; -pub type Magic = u32; +/// "Magic" header values used in the zip spec to locate metadata records. +/// +/// These values currently always take up a fixed four bytes, so we can parse and wrap them in this +/// struct to enforce some small amount of type safety. +#[derive(Copy, Clone, Debug, PartialOrd, Ord, PartialEq, Eq, Hash)] +#[repr(transparent)] +pub struct Magic(u32); -pub const LOCAL_FILE_HEADER_SIGNATURE: Magic = 0x04034b50; -pub const CENTRAL_DIRECTORY_HEADER_SIGNATURE: Magic = 0x02014b50; -pub(crate) const CENTRAL_DIRECTORY_END_SIGNATURE: Magic = 0x06054b50; -pub const ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE: Magic = 0x06064b50; -pub(crate) const ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE: Magic = 0x07064b50; +impl Magic { + pub(crate) const fn literal(x: u32) -> Self { + Self(x) + } + + #[inline(always)] + pub(crate) const fn from_le_bytes(bytes: [u8; 4]) -> Self { + Self(u32::from_le_bytes(bytes)) + } + + #[inline(always)] + pub(crate) fn from_first_le_bytes(data: &[u8]) -> Self { + let first_bytes: [u8; 4] = data[..mem::size_of::()].try_into().unwrap(); + Self::from_le_bytes(first_bytes) + } + + #[inline(always)] + pub(crate) const fn to_le_bytes(self) -> [u8; 4] { + self.0.to_le_bytes() + } + + #[allow(clippy::wrong_self_convention)] + #[inline(always)] + pub(crate) fn from_le(self) -> Self { + Self(u32::from_le(self.0)) + } + + #[allow(clippy::wrong_self_convention)] + #[inline(always)] + pub(crate) fn to_le(self) -> Self { + Self(u32::to_le(self.0)) + } +} + +pub const LOCAL_FILE_HEADER_SIGNATURE: Magic = Magic::literal(0x04034b50); +pub const CENTRAL_DIRECTORY_HEADER_SIGNATURE: Magic = Magic::literal(0x02014b50); +pub(crate) const CENTRAL_DIRECTORY_END_SIGNATURE: Magic = Magic::literal(0x06054b50); +pub const ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE: Magic = Magic::literal(0x06064b50); +pub(crate) const ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE: Magic = Magic::literal(0x07064b50); pub const ZIP64_BYTES_THR: u64 = u32::MAX as u64; pub const ZIP64_ENTRY_THR: usize = u16::MAX as usize; @@ -98,7 +138,7 @@ pub struct Zip32CDEBlock { impl Zip32CDEBlock { #[allow(clippy::wrong_self_convention)] #[inline(always)] - fn from_le(mut self) -> Self { + pub(crate) fn from_le(mut self) -> Self { from_le![ self, [ @@ -116,7 +156,7 @@ impl Zip32CDEBlock { } #[inline(always)] - fn to_le(mut self) -> Self { + pub(crate) fn to_le(mut self) -> Self { to_le![ self, [ @@ -138,7 +178,8 @@ impl Block for Zip32CDEBlock { fn interpret(bytes: Box<[u8]>) -> ZipResult { let block = Self::deserialize(&bytes).from_le(); - if block.magic != CENTRAL_DIRECTORY_END_SIGNATURE { + let magic = block.magic; + if magic != CENTRAL_DIRECTORY_END_SIGNATURE { return Err(ZipError::InvalidArchive("Invalid digital signature header")); } @@ -306,7 +347,7 @@ pub struct Zip64CDELocatorBlock { impl Zip64CDELocatorBlock { #[allow(clippy::wrong_self_convention)] #[inline(always)] - fn from_le(mut self) -> Self { + pub(crate) fn from_le(mut self) -> Self { from_le![ self, [ @@ -320,7 +361,7 @@ impl Zip64CDELocatorBlock { } #[inline(always)] - fn to_le(mut self) -> Self { + pub(crate) fn to_le(mut self) -> Self { to_le![ self, [ @@ -338,7 +379,8 @@ impl Block for Zip64CDELocatorBlock { fn interpret(bytes: Box<[u8]>) -> ZipResult { let block = Self::deserialize(&bytes).from_le(); - if block.magic != ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE { + let magic = block.magic; + if magic != ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE { return Err(ZipError::InvalidArchive( "Invalid zip64 locator digital signature header", )); @@ -412,7 +454,7 @@ pub struct Zip64CDEBlock { impl Zip64CDEBlock { #[allow(clippy::wrong_self_convention)] #[inline(always)] - fn from_le(mut self) -> Self { + pub(crate) fn from_le(mut self) -> Self { from_le![ self, [ @@ -432,7 +474,7 @@ impl Zip64CDEBlock { } #[inline(always)] - fn to_le(mut self) -> Self { + pub(crate) fn to_le(mut self) -> Self { to_le![ self, [ @@ -456,7 +498,8 @@ impl Block for Zip64CDEBlock { fn interpret(bytes: Box<[u8]>) -> ZipResult { let block = Self::deserialize(&bytes).from_le(); - if block.magic != ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE { + let magic = block.magic; + if magic != ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE { return Err(ZipError::InvalidArchive("Invalid digital signature header")); } @@ -688,11 +731,11 @@ mod test { impl TestBlock { #[allow(clippy::wrong_self_convention)] - fn from_le(mut self) -> Self { + pub(crate) fn from_le(mut self) -> Self { from_le![self, [(magic, Magic), (file_name_length, u16)]]; self } - fn to_le(mut self) -> Self { + pub(crate) fn to_le(mut self) -> Self { to_le![self, [(magic, Magic), (file_name_length, u16)]]; self } @@ -711,7 +754,7 @@ mod test { #[test] fn block_serde() { let block = TestBlock { - magic: 0x01111, + magic: Magic::literal(0x01111), file_name_length: 3, }; let mut c = Cursor::new(Vec::new()); diff --git a/src/types.rs b/src/types.rs index 937afabbe..7cd909993 100644 --- a/src/types.rs +++ b/src/types.rs @@ -871,7 +871,8 @@ impl Block for ZipEntryBlock { fn interpret(bytes: Box<[u8]>) -> ZipResult { let block = Self::deserialize(&bytes).from_le(); - if block.magic != spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE { + let magic = block.magic; + if magic != spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE { return Err(ZipError::InvalidArchive("Invalid Central Directory header")); } @@ -948,7 +949,8 @@ impl Block for ZipLocalEntryBlock { fn interpret(bytes: Box<[u8]>) -> ZipResult { let block = Self::deserialize(&bytes).from_le(); - if block.magic != spec::LOCAL_FILE_HEADER_SIGNATURE { + let magic = block.magic; + if magic != spec::LOCAL_FILE_HEADER_SIGNATURE { return Err(ZipError::InvalidArchive("Invalid local file header")); } From 08385d52e196fb498f152d9882f7fa96522ebc9a Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Sat, 18 May 2024 03:08:17 -0400 Subject: [PATCH 09/48] implement find_content() by parsing with blocks --- src/read.rs | 38 +++++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/src/read.rs b/src/read.rs index 1b099e444..a87dab67c 100644 --- a/src/read.rs +++ b/src/read.rs @@ -219,25 +219,30 @@ pub(crate) fn find_content<'a>( data: &ZipFileData, reader: &'a mut (impl Read + Seek), ) -> ZipResult> { - // Parse local header - reader.seek(io::SeekFrom::Start(data.header_start))?; - /* FIXME: read this in blocks too! ::literal is not for general use! */ - let signature = spec::Magic::literal(reader.read_u32_le()?); - if signature != spec::LOCAL_FILE_HEADER_SIGNATURE { - return Err(ZipError::InvalidArchive("Invalid local file header")); - } + // TODO: use .get_or_try_init() once stabilized to provide a closure returning a Result! let data_start = match data.data_start.get() { + Some(data_start) => *data_start, None => { - reader.seek(io::SeekFrom::Current(22))?; - let file_name_length = reader.read_u16_le()? as u64; - let extra_field_length = reader.read_u16_le()? as u64; - let magic_and_header = 4 + 22 + 2 + 2; - let data_start = - data.header_start + magic_and_header + file_name_length + extra_field_length; - data.data_start.get_or_init(|| data_start); + // Go to start of data. + reader.seek(io::SeekFrom::Start(data.header_start))?; + // Parse static-sized fields and check the magic value. + let block = ZipLocalEntryBlock::parse(reader)?; + // Calculate the end of the local header from the fields we just parsed. + let variable_fields_len = (block.file_name_length + block.extra_field_length) as u64; + let data_start = data.header_start + + mem::size_of::() as u64 + + variable_fields_len; + // Set the value so we don't have to read it again. + match data.data_start.set(data_start) { + Ok(()) => (), + // If the value was already set in the meantime, ensure it matches (this is probably + // unnecessary). + Err(_) => { + assert_eq!(*data.data_start.get().unwrap(), data_start); + } + } data_start } - Some(start) => *start, }; reader.seek(io::SeekFrom::Start(data_start))?; @@ -1427,6 +1432,9 @@ impl<'a> Drop for ZipFile<'a> { /// * `data_start`: set to 0 /// * `external_attributes`: `unix_mode()`: will return None pub fn read_zipfile_from_stream<'a, R: Read>(reader: &'a mut R) -> ZipResult>> { + // We can't use the typical ::parse() method, as we follow separate code paths depending on the + // "magic" value (since the magic value will be from the central directory header if we've + // finished iterating over all the actual files). let mut block = [0u8; mem::size_of::()]; reader.read_exact(&mut block)?; let block: Box<[u8]> = block.into(); From 7eb5907622b33df249c0aa2fd6be02206391f61e Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Sat, 18 May 2024 03:56:10 -0400 Subject: [PATCH 10/48] remove a lot of boilerplate for Block impls --- src/spec.rs | 154 ++++++++++++++++++++++++--------------------------- src/types.rs | 58 +++++++------------ 2 files changed, 92 insertions(+), 120 deletions(-) diff --git a/src/spec.rs b/src/spec.rs index 0ce7c765c..afe01df2c 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -60,8 +60,20 @@ pub const ZIP64_BYTES_THR: u64 = u32::MAX as u64; pub const ZIP64_ENTRY_THR: usize = u16::MAX as usize; pub trait Block: Sized + Copy { + const MAGIC: Magic; + + fn magic(self) -> Magic; + + const ERROR: ZipError; + /* TODO: use smallvec? */ - fn interpret(bytes: Box<[u8]>) -> ZipResult; + fn interpret(bytes: Box<[u8]>) -> ZipResult { + let block = Self::deserialize(&bytes).from_le(); + if block.magic() != Self::MAGIC { + return Err(Self::ERROR); + } + Ok(block) + } fn deserialize(block: &[u8]) -> Self { assert_eq!(block.len(), mem::size_of::()); @@ -69,18 +81,27 @@ pub trait Block: Sized + Copy { unsafe { block_ptr.read() } } + #[allow(clippy::wrong_self_convention)] + fn from_le(self) -> Self; + fn parse(reader: &mut T) -> ZipResult { let mut block = vec![0u8; mem::size_of::()]; reader.read_exact(&mut block)?; Self::interpret(block.into_boxed_slice()) } - fn encode(self) -> Box<[u8]>; + fn encode(self) -> Box<[u8]> { + self.to_le().serialize() + } + + fn to_le(self) -> Self; + /* TODO: use Box<[u8; mem::size_of::()]> when generic_const_exprs are stabilized! */ fn serialize(self) -> Box<[u8]> { + /* TODO: use Box::new_zeroed() when stabilized! */ + /* TODO: also consider using smallvec! */ let mut out_block = vec![0u8; mem::size_of::()]; - let out_view: &mut [u8] = out_block.as_mut(); - let out_ptr: *mut Self = out_view.as_mut_ptr().cast(); + let out_ptr: *mut Self = out_block.as_mut_ptr().cast(); unsafe { out_ptr.write(self); } @@ -135,10 +156,18 @@ pub struct Zip32CDEBlock { pub zip_file_comment_length: u16, } -impl Zip32CDEBlock { - #[allow(clippy::wrong_self_convention)] +impl Block for Zip32CDEBlock { + const MAGIC: Magic = CENTRAL_DIRECTORY_END_SIGNATURE; + #[inline(always)] - pub(crate) fn from_le(mut self) -> Self { + fn magic(self) -> Magic { + self.magic + } + + const ERROR: ZipError = ZipError::InvalidArchive("Invalid digital signature header"); + + #[inline(always)] + fn from_le(mut self) -> Self { from_le![ self, [ @@ -156,7 +185,7 @@ impl Zip32CDEBlock { } #[inline(always)] - pub(crate) fn to_le(mut self) -> Self { + fn to_le(mut self) -> Self { to_le![ self, [ @@ -174,23 +203,6 @@ impl Zip32CDEBlock { } } -impl Block for Zip32CDEBlock { - fn interpret(bytes: Box<[u8]>) -> ZipResult { - let block = Self::deserialize(&bytes).from_le(); - - let magic = block.magic; - if magic != CENTRAL_DIRECTORY_END_SIGNATURE { - return Err(ZipError::InvalidArchive("Invalid digital signature header")); - } - - Ok(block) - } - - fn encode(self) -> Box<[u8]> { - self.to_le().serialize() - } -} - #[derive(Debug)] pub struct Zip32CentralDirectoryEnd { pub disk_number: u16, @@ -344,10 +356,19 @@ pub struct Zip64CDELocatorBlock { pub number_of_disks: u32, } -impl Zip64CDELocatorBlock { - #[allow(clippy::wrong_self_convention)] +impl Block for Zip64CDELocatorBlock { + const MAGIC: Magic = ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE; + #[inline(always)] - pub(crate) fn from_le(mut self) -> Self { + fn magic(self) -> Magic { + self.magic + } + + const ERROR: ZipError = + ZipError::InvalidArchive("Invalid zip64 locator digital signature header"); + + #[inline(always)] + fn from_le(mut self) -> Self { from_le![ self, [ @@ -361,7 +382,7 @@ impl Zip64CDELocatorBlock { } #[inline(always)] - pub(crate) fn to_le(mut self) -> Self { + fn to_le(mut self) -> Self { to_le![ self, [ @@ -375,25 +396,6 @@ impl Zip64CDELocatorBlock { } } -impl Block for Zip64CDELocatorBlock { - fn interpret(bytes: Box<[u8]>) -> ZipResult { - let block = Self::deserialize(&bytes).from_le(); - - let magic = block.magic; - if magic != ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE { - return Err(ZipError::InvalidArchive( - "Invalid zip64 locator digital signature header", - )); - } - - Ok(block) - } - - fn encode(self) -> Box<[u8]> { - self.to_le().serialize() - } -} - pub struct Zip64CentralDirectoryEndLocator { pub disk_with_central_directory: u32, pub end_of_central_directory_offset: u64, @@ -451,10 +453,17 @@ pub struct Zip64CDEBlock { pub central_directory_offset: u64, } -impl Zip64CDEBlock { - #[allow(clippy::wrong_self_convention)] +impl Block for Zip64CDEBlock { + const MAGIC: Magic = ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE; + + fn magic(self) -> Magic { + self.magic + } + + const ERROR: ZipError = ZipError::InvalidArchive("Invalid digital signature header"); + #[inline(always)] - pub(crate) fn from_le(mut self) -> Self { + fn from_le(mut self) -> Self { from_le![ self, [ @@ -474,7 +483,7 @@ impl Zip64CDEBlock { } #[inline(always)] - pub(crate) fn to_le(mut self) -> Self { + fn to_le(mut self) -> Self { to_le![ self, [ @@ -494,23 +503,6 @@ impl Zip64CDEBlock { } } -impl Block for Zip64CDEBlock { - fn interpret(bytes: Box<[u8]>) -> ZipResult { - let block = Self::deserialize(&bytes).from_le(); - - let magic = block.magic; - if magic != ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE { - return Err(ZipError::InvalidArchive("Invalid digital signature header")); - } - - Ok(block) - } - - fn encode(self) -> Box<[u8]> { - self.to_le().serialize() - } -} - pub struct Zip64CentralDirectoryEnd { pub version_made_by: u16, pub version_needed_to_extract: u16, @@ -729,27 +721,25 @@ mod test { pub file_name_length: u16, } - impl TestBlock { - #[allow(clippy::wrong_self_convention)] - pub(crate) fn from_le(mut self) -> Self { + impl Block for TestBlock { + const MAGIC: Magic = Magic::literal(0x01111); + + fn magic(self) -> Magic { + self.magic + } + + const ERROR: ZipError = ZipError::InvalidArchive("unreachable"); + + fn from_le(mut self) -> Self { from_le![self, [(magic, Magic), (file_name_length, u16)]]; self } - pub(crate) fn to_le(mut self) -> Self { + fn to_le(mut self) -> Self { to_le![self, [(magic, Magic), (file_name_length, u16)]]; self } } - impl Block for TestBlock { - fn interpret(bytes: Box<[u8]>) -> ZipResult { - Ok(Self::deserialize(&bytes).from_le()) - } - fn encode(self) -> Box<[u8]> { - self.to_le().serialize() - } - } - /// Demonstrate that a block object can be safely written to memory and deserialized back out. #[test] fn block_serde() { diff --git a/src/types.rs b/src/types.rs index 7cd909993..8452fef15 100644 --- a/src/types.rs +++ b/src/types.rs @@ -810,8 +810,16 @@ pub(crate) struct ZipEntryBlock { pub offset: u32, } -impl ZipEntryBlock { - #[allow(clippy::wrong_self_convention)] +impl Block for ZipEntryBlock { + const MAGIC: spec::Magic = spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE; + + #[inline(always)] + fn magic(self) -> spec::Magic { + self.magic + } + + const ERROR: ZipError = ZipError::InvalidArchive("Invalid Central Directory header"); + #[inline(always)] fn from_le(mut self) -> Self { from_le![ @@ -867,23 +875,6 @@ impl ZipEntryBlock { } } -impl Block for ZipEntryBlock { - fn interpret(bytes: Box<[u8]>) -> ZipResult { - let block = Self::deserialize(&bytes).from_le(); - - let magic = block.magic; - if magic != spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE { - return Err(ZipError::InvalidArchive("Invalid Central Directory header")); - } - - Ok(block) - } - - fn encode(self) -> Box<[u8]> { - self.to_le().serialize() - } -} - #[derive(Copy, Clone, Debug)] #[repr(packed)] pub(crate) struct ZipLocalEntryBlock { @@ -900,8 +891,16 @@ pub(crate) struct ZipLocalEntryBlock { pub extra_field_length: u16, } -impl ZipLocalEntryBlock { - #[allow(clippy::wrong_self_convention)] +impl Block for ZipLocalEntryBlock { + const MAGIC: spec::Magic = spec::LOCAL_FILE_HEADER_SIGNATURE; + + #[inline(always)] + fn magic(self) -> spec::Magic { + self.magic + } + + const ERROR: ZipError = ZipError::InvalidArchive("Invalid local file header"); + #[inline(always)] fn from_le(mut self) -> Self { from_le![ @@ -945,23 +944,6 @@ impl ZipLocalEntryBlock { } } -impl Block for ZipLocalEntryBlock { - fn interpret(bytes: Box<[u8]>) -> ZipResult { - let block = Self::deserialize(&bytes).from_le(); - - let magic = block.magic; - if magic != spec::LOCAL_FILE_HEADER_SIGNATURE { - return Err(ZipError::InvalidArchive("Invalid local file header")); - } - - Ok(block) - } - - fn encode(self) -> Box<[u8]> { - self.to_le().serialize() - } -} - /// The encryption specification used to encrypt a file with AES. /// /// According to the [specification](https://www.winzip.com/win/en/aes_info.html#winzip11) AE-2 From 83cdbadae884fc4f9f220c89d6785c2aef32ce82 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Sat, 18 May 2024 04:04:32 -0400 Subject: [PATCH 11/48] make window size assertions much less complex with Magic --- src/spec.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/spec.rs b/src/spec.rs index afe01df2c..fee4d2dc5 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -278,6 +278,8 @@ impl Zip32CentralDirectoryEnd { let search_upper_bound = 0; const END_WINDOW_SIZE: usize = 512; + /* TODO: use static_assertions!() */ + assert!(END_WINDOW_SIZE > mem::size_of::()); let sig_bytes = CENTRAL_DIRECTORY_END_SIGNATURE.to_le_bytes(); let finder = FinderRev::new(&sig_bytes); @@ -314,13 +316,12 @@ impl Zip32CentralDirectoryEnd { if window_start == search_upper_bound { break; } - debug_assert!(END_WINDOW_SIZE > mem::size_of_val(&CENTRAL_DIRECTORY_END_SIGNATURE)); /* Shift the window by END_WINDOW_SIZE bytes, but make sure to cover matches that * overlap our nice neat window boundaries! */ window_start = (window_start /* NB: To catch matches across window boundaries, we need to make our blocks overlap * by the width of the pattern to match. */ - + mem::size_of_val(&CENTRAL_DIRECTORY_END_SIGNATURE) as u64) + + mem::size_of::() as u64) /* This should never happen, but make sure we don't go past the end of the file. */ .min(file_length); window_start = window_start @@ -549,6 +550,8 @@ impl Zip64CentralDirectoryEnd { let mut results = Vec::new(); const END_WINDOW_SIZE: usize = 2048; + /* TODO: use static_assertions!() */ + assert!(END_WINDOW_SIZE > mem::size_of::()); let sig_bytes = ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE.to_le_bytes(); let finder = FinderRev::new(&sig_bytes); @@ -592,15 +595,12 @@ impl Zip64CentralDirectoryEnd { if window_start == nominal_offset { break; } - debug_assert!( - END_WINDOW_SIZE > mem::size_of_val(&ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE) - ); /* Shift the window by END_WINDOW_SIZE bytes, but make sure to cover matches that * overlap our nice neat window boundaries! */ window_start = (window_start /* NB: To catch matches across window boundaries, we need to make our blocks overlap * by the width of the pattern to match. */ - + mem::size_of_val(&ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE) as u64) + + mem::size_of::() as u64) /* This may never happen, but make sure we don't go past the end of the specified * range. */ .min(search_upper_bound); From 03c92a1184ba9a13cab074c72ad190c66585de56 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Sat, 18 May 2024 04:13:11 -0400 Subject: [PATCH 12/48] add to_and_from_le! macro --- src/spec.rs | 156 ++++++++++++++++----------------------------------- src/types.rs | 126 +++++++++++------------------------------ 2 files changed, 79 insertions(+), 203 deletions(-) diff --git a/src/spec.rs b/src/spec.rs index fee4d2dc5..26c9d4a2c 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -143,6 +143,24 @@ macro_rules! to_le { }; } +/* TODO: derive macro to generate these fields? */ +/// Implement `from_le()` and `to_le()`, providing the field specification to both macros +/// and methods. +macro_rules! to_and_from_le { + ($($args:tt),+ $(,)?) => { + #[inline(always)] + fn from_le(mut self) -> Self { + from_le![self, [$($args),+]]; + self + } + #[inline(always)] + fn to_le(mut self) -> Self { + to_le![self, [$($args),+]]; + self + } + }; +} + #[derive(Copy, Clone, Debug)] #[repr(packed)] pub struct Zip32CDEBlock { @@ -166,41 +184,16 @@ impl Block for Zip32CDEBlock { const ERROR: ZipError = ZipError::InvalidArchive("Invalid digital signature header"); - #[inline(always)] - fn from_le(mut self) -> Self { - from_le![ - self, - [ - (magic, Magic), - (disk_number, u16), - (disk_with_central_directory, u16), - (number_of_files_on_this_disk, u16), - (number_of_files, u16), - (central_directory_size, u32), - (central_directory_offset, u32), - (zip_file_comment_length, u16) - ] - ]; - self - } - - #[inline(always)] - fn to_le(mut self) -> Self { - to_le![ - self, - [ - (magic, Magic), - (disk_number, u16), - (disk_with_central_directory, u16), - (number_of_files_on_this_disk, u16), - (number_of_files, u16), - (central_directory_size, u32), - (central_directory_offset, u32), - (zip_file_comment_length, u16) - ] - ]; - self - } + to_and_from_le![ + (magic, Magic), + (disk_number, u16), + (disk_with_central_directory, u16), + (number_of_files_on_this_disk, u16), + (number_of_files, u16), + (central_directory_size, u32), + (central_directory_offset, u32), + (zip_file_comment_length, u16) + ]; } #[derive(Debug)] @@ -368,33 +361,12 @@ impl Block for Zip64CDELocatorBlock { const ERROR: ZipError = ZipError::InvalidArchive("Invalid zip64 locator digital signature header"); - #[inline(always)] - fn from_le(mut self) -> Self { - from_le![ - self, - [ - (magic, Magic), - (disk_with_central_directory, u32), - (end_of_central_directory_offset, u64), - (number_of_disks, u32), - ] - ]; - self - } - - #[inline(always)] - fn to_le(mut self) -> Self { - to_le![ - self, - [ - (magic, Magic), - (disk_with_central_directory, u32), - (end_of_central_directory_offset, u64), - (number_of_disks, u32), - ] - ]; - self - } + to_and_from_le![ + (magic, Magic), + (disk_with_central_directory, u32), + (end_of_central_directory_offset, u64), + (number_of_disks, u32), + ]; } pub struct Zip64CentralDirectoryEndLocator { @@ -463,45 +435,18 @@ impl Block for Zip64CDEBlock { const ERROR: ZipError = ZipError::InvalidArchive("Invalid digital signature header"); - #[inline(always)] - fn from_le(mut self) -> Self { - from_le![ - self, - [ - (magic, Magic), - (record_size, u64), - (version_made_by, u16), - (version_needed_to_extract, u16), - (disk_number, u32), - (disk_with_central_directory, u32), - (number_of_files_on_this_disk, u64), - (number_of_files, u64), - (central_directory_size, u64), - (central_directory_offset, u64), - ] - ]; - self - } - - #[inline(always)] - fn to_le(mut self) -> Self { - to_le![ - self, - [ - (magic, Magic), - (record_size, u64), - (version_made_by, u16), - (version_needed_to_extract, u16), - (disk_number, u32), - (disk_with_central_directory, u32), - (number_of_files_on_this_disk, u64), - (number_of_files, u64), - (central_directory_size, u64), - (central_directory_offset, u64), - ] - ]; - self - } + to_and_from_le![ + (magic, Magic), + (record_size, u64), + (version_made_by, u16), + (version_needed_to_extract, u16), + (disk_number, u32), + (disk_with_central_directory, u32), + (number_of_files_on_this_disk, u64), + (number_of_files, u64), + (central_directory_size, u64), + (central_directory_offset, u64), + ]; } pub struct Zip64CentralDirectoryEnd { @@ -730,14 +675,7 @@ mod test { const ERROR: ZipError = ZipError::InvalidArchive("unreachable"); - fn from_le(mut self) -> Self { - from_le![self, [(magic, Magic), (file_name_length, u16)]]; - self - } - fn to_le(mut self) -> Self { - to_le![self, [(magic, Magic), (file_name_length, u16)]]; - self - } + to_and_from_le![(magic, Magic), (file_name_length, u16)]; } /// Demonstrate that a block object can be safely written to memory and deserialized back out. diff --git a/src/types.rs b/src/types.rs index 8452fef15..74d60d748 100644 --- a/src/types.rs +++ b/src/types.rs @@ -820,59 +820,25 @@ impl Block for ZipEntryBlock { const ERROR: ZipError = ZipError::InvalidArchive("Invalid Central Directory header"); - #[inline(always)] - fn from_le(mut self) -> Self { - from_le![ - self, - [ - (magic, spec::Magic), - (version_made_by, u16), - (version_to_extract, u16), - (flags, u16), - (compression_method, u16), - (last_mod_time, u16), - (last_mod_date, u16), - (crc32, u32), - (compressed_size, u32), - (uncompressed_size, u32), - (file_name_length, u16), - (extra_field_length, u16), - (file_comment_length, u16), - (disk_number, u16), - (internal_file_attributes, u16), - (external_file_attributes, u32), - (offset, u32), - ] - ]; - self - } - - #[inline(always)] - fn to_le(mut self) -> Self { - to_le![ - self, - [ - (magic, spec::Magic), - (version_made_by, u16), - (version_to_extract, u16), - (flags, u16), - (compression_method, u16), - (last_mod_time, u16), - (last_mod_date, u16), - (crc32, u32), - (compressed_size, u32), - (uncompressed_size, u32), - (file_name_length, u16), - (extra_field_length, u16), - (file_comment_length, u16), - (disk_number, u16), - (internal_file_attributes, u16), - (external_file_attributes, u32), - (offset, u32), - ] - ]; - self - } + to_and_from_le![ + (magic, spec::Magic), + (version_made_by, u16), + (version_to_extract, u16), + (flags, u16), + (compression_method, u16), + (last_mod_time, u16), + (last_mod_date, u16), + (crc32, u32), + (compressed_size, u32), + (uncompressed_size, u32), + (file_name_length, u16), + (extra_field_length, u16), + (file_comment_length, u16), + (disk_number, u16), + (internal_file_attributes, u16), + (external_file_attributes, u32), + (offset, u32), + ]; } #[derive(Copy, Clone, Debug)] @@ -901,47 +867,19 @@ impl Block for ZipLocalEntryBlock { const ERROR: ZipError = ZipError::InvalidArchive("Invalid local file header"); - #[inline(always)] - fn from_le(mut self) -> Self { - from_le![ - self, - [ - (magic, spec::Magic), - (version_made_by, u16), - (flags, u16), - (compression_method, u16), - (last_mod_time, u16), - (last_mod_date, u16), - (crc32, u32), - (compressed_size, u32), - (uncompressed_size, u32), - (file_name_length, u16), - (extra_field_length, u16), - ] - ]; - self - } - - #[inline(always)] - fn to_le(mut self) -> Self { - to_le![ - self, - [ - (magic, spec::Magic), - (version_made_by, u16), - (flags, u16), - (compression_method, u16), - (last_mod_time, u16), - (last_mod_date, u16), - (crc32, u32), - (compressed_size, u32), - (uncompressed_size, u32), - (file_name_length, u16), - (extra_field_length, u16), - ] - ]; - self - } + to_and_from_le![ + (magic, spec::Magic), + (version_made_by, u16), + (flags, u16), + (compression_method, u16), + (last_mod_time, u16), + (last_mod_date, u16), + (crc32, u32), + (compressed_size, u32), + (uncompressed_size, u32), + (file_name_length, u16), + (extra_field_length, u16), + ]; } /// The encryption specification used to encrypt a file with AES. From e1c92e2f219629b3a99d6f0f3fe6aeb50ff85d59 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Sat, 18 May 2024 04:21:13 -0400 Subject: [PATCH 13/48] make SIG_BYTES const --- src/spec.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/spec.rs b/src/spec.rs index 26c9d4a2c..e80b05e85 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -274,8 +274,9 @@ impl Zip32CentralDirectoryEnd { /* TODO: use static_assertions!() */ assert!(END_WINDOW_SIZE > mem::size_of::()); - let sig_bytes = CENTRAL_DIRECTORY_END_SIGNATURE.to_le_bytes(); - let finder = FinderRev::new(&sig_bytes); + const SIG_BYTES: [u8; mem::size_of::()] = + CENTRAL_DIRECTORY_END_SIGNATURE.to_le_bytes(); + let finder = FinderRev::new(&SIG_BYTES); let mut window_start: u64 = file_length.saturating_sub(END_WINDOW_SIZE as u64); let mut window = [0u8; END_WINDOW_SIZE]; @@ -498,8 +499,9 @@ impl Zip64CentralDirectoryEnd { /* TODO: use static_assertions!() */ assert!(END_WINDOW_SIZE > mem::size_of::()); - let sig_bytes = ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE.to_le_bytes(); - let finder = FinderRev::new(&sig_bytes); + const SIG_BYTES: [u8; mem::size_of::()] = + ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE.to_le_bytes(); + let finder = FinderRev::new(&SIG_BYTES); let mut window_start: u64 = search_upper_bound .saturating_sub(END_WINDOW_SIZE as u64) From cf2d980612f070357f869eb4648ba136464403c9 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Sat, 18 May 2024 04:29:26 -0400 Subject: [PATCH 14/48] expose pub(crate) methods to convert compression methods --- src/compression.rs | 47 +++++++++++++++++++++++----------------------- src/read.rs | 8 ++------ src/types.rs | 9 +++------ src/write.rs | 4 ++-- 4 files changed, 31 insertions(+), 37 deletions(-) diff --git a/src/compression.rs b/src/compression.rs index 3dd6eced2..937c72baa 100644 --- a/src/compression.rs +++ b/src/compression.rs @@ -90,13 +90,7 @@ impl CompressionMethod { pub const AES: Self = CompressionMethod::Unsupported(99); } impl CompressionMethod { - /// Converts an u16 to its corresponding CompressionMethod - #[deprecated( - since = "0.5.7", - note = "use a constant to construct a compression method" - )] - pub const fn from_u16(val: u16) -> CompressionMethod { - #[allow(deprecated)] + pub(crate) const fn parse_from_u16(val: u16) -> Self { match val { 0 => CompressionMethod::Stored, #[cfg(feature = "_deflate-any")] @@ -111,18 +105,21 @@ impl CompressionMethod { 93 => CompressionMethod::Zstd, #[cfg(feature = "aes-crypto")] 99 => CompressionMethod::Aes, - + #[allow(deprecated)] v => CompressionMethod::Unsupported(v), } } - /// Converts a CompressionMethod to a u16 + /// Converts an u16 to its corresponding CompressionMethod #[deprecated( since = "0.5.7", - note = "to match on other compression methods, use a constant" + note = "use a constant to construct a compression method" )] - pub const fn to_u16(self) -> u16 { - #[allow(deprecated)] + pub const fn from_u16(val: u16) -> CompressionMethod { + Self::parse_from_u16(val) + } + + pub(crate) const fn serialize_to_u16(self) -> u16 { match self { CompressionMethod::Stored => 0, #[cfg(feature = "_deflate-any")] @@ -137,10 +134,19 @@ impl CompressionMethod { CompressionMethod::Zstd => 93, #[cfg(feature = "lzma")] CompressionMethod::Lzma => 14, - + #[allow(deprecated)] CompressionMethod::Unsupported(v) => v, } } + + /// Converts a CompressionMethod to a u16 + #[deprecated( + since = "0.5.7", + note = "to match on other compression methods, use a constant" + )] + pub const fn to_u16(self) -> u16 { + self.serialize_to_u16() + } } impl Default for CompressionMethod { @@ -180,10 +186,8 @@ mod test { #[test] fn from_eq_to() { for v in 0..(u16::MAX as u32 + 1) { - #[allow(deprecated)] - let from = CompressionMethod::from_u16(v as u16); - #[allow(deprecated)] - let to = from.to_u16() as u32; + let from = CompressionMethod::parse_from_u16(v as u16); + let to = from.serialize_to_u16() as u32; assert_eq!(v, to); } } @@ -191,12 +195,9 @@ mod test { #[test] fn to_eq_from() { fn check_match(method: CompressionMethod) { - #[allow(deprecated)] - let to = method.to_u16(); - #[allow(deprecated)] - let from = CompressionMethod::from_u16(to); - #[allow(deprecated)] - let back = from.to_u16(); + let to = method.serialize_to_u16(); + let from = CompressionMethod::parse_from_u16(to); + let back = from.serialize_to_u16(); assert_eq!(to, back); } diff --git a/src/read.rs b/src/read.rs index a87dab67c..67d4be598 100644 --- a/src/read.rs +++ b/src/read.rs @@ -1085,10 +1085,7 @@ fn central_header_to_zip_file_inner( version_made_by: version_made_by as u8, encrypted, using_data_descriptor, - compression_method: { - #[allow(deprecated)] - CompressionMethod::from_u16(compression_method) - }, + compression_method: CompressionMethod::parse_from_u16(compression_method), compression_level: None, last_modified_time: DateTime::try_from_msdos(last_mod_date, last_mod_time).ok(), crc32, @@ -1171,8 +1168,7 @@ fn parse_extra_field(file: &mut ZipFileData) -> ZipResult<()> { let mut out = [0u8]; reader.read_exact(&mut out)?; let aes_mode = out[0]; - #[allow(deprecated)] - let compression_method = CompressionMethod::from_u16(reader.read_u16_le()?); + let compression_method = CompressionMethod::parse_from_u16(reader.read_u16_le()?); if vendor_id != 0x4541 { return Err(ZipError::InvalidArchive("Invalid AES vendor")); diff --git a/src/types.rs b/src/types.rs index 74d60d748..6293c532a 100644 --- a/src/types.rs +++ b/src/types.rs @@ -629,8 +629,7 @@ impl ZipFileData { let is_utf8: bool = flags & (1 << 11) != 0; /* flags & (1 << 3) != 0 */ let using_data_descriptor: bool = flags & (1 << 3) == 1 << 3; - #[allow(deprecated)] - let compression_method = crate::CompressionMethod::from_u16(compression_method); + let compression_method = crate::CompressionMethod::parse_from_u16(compression_method); let file_name_length: usize = file_name_length.into(); let extra_field_length: usize = extra_field_length.into(); @@ -733,8 +732,7 @@ impl ZipFileData { magic: spec::LOCAL_FILE_HEADER_SIGNATURE, version_made_by: self.version_needed(), flags: self.flags(), - #[allow(deprecated)] - compression_method: self.compression_method.to_u16(), + compression_method: self.compression_method.serialize_to_u16(), last_mod_time: last_modified_time.timepart(), last_mod_date: last_modified_time.datepart(), crc32: self.crc32, @@ -756,8 +754,7 @@ impl ZipFileData { version_made_by: (self.system as u16) << 8 | (self.version_made_by as u16), version_to_extract: self.version_needed(), flags: self.flags(), - #[allow(deprecated)] - compression_method: self.compression_method.to_u16(), + compression_method: self.compression_method.serialize_to_u16(), last_mod_time: last_modified_time.timepart(), last_mod_date: last_modified_time.datepart(), crc32: self.crc32, diff --git a/src/write.rs b/src/write.rs index 58e43396e..acaf4d8c9 100644 --- a/src/write.rs +++ b/src/write.rs @@ -1707,6 +1707,7 @@ fn update_aes_extra_data( let mut buf = Vec::new(); + /* TODO: implement this using the Block trait! */ // Extra field header ID. buf.write_u16_le(0x9901)?; // Data size. @@ -1718,8 +1719,7 @@ fn update_aes_extra_data( // AES encryption strength. buf.write_all(&[aes_mode as u8])?; // Real compression method. - #[allow(deprecated)] - buf.write_u16_le(compression_method.to_u16())?; + buf.write_u16_le(compression_method.serialize_to_u16())?; writer.write_all(&buf)?; From 41813d242cd03f6f46c1bf9f314775f923ee62c0 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Sat, 18 May 2024 04:32:06 -0400 Subject: [PATCH 15/48] move encrypted and data descriptor validation up higher --- src/types.rs | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/types.rs b/src/types.rs index 6293c532a..8e3014092 100644 --- a/src/types.rs +++ b/src/types.rs @@ -623,27 +623,29 @@ impl ZipFileData { .. } = block; - let encrypted: bool = flags & 1 == 1; - /* FIXME: these were previously incorrect: add testing! */ - /* flags & (1 << 1) != 0 */ - let is_utf8: bool = flags & (1 << 11) != 0; - /* flags & (1 << 3) != 0 */ - let using_data_descriptor: bool = flags & (1 << 3) == 1 << 3; - let compression_method = crate::CompressionMethod::parse_from_u16(compression_method); - let file_name_length: usize = file_name_length.into(); - let extra_field_length: usize = extra_field_length.into(); + let encrypted: bool = flags & 1 == 1; if encrypted { return Err(ZipError::UnsupportedArchive( "Encrypted files are not supported", )); } + + /* FIXME: these were previously incorrect: add testing! */ + /* flags & (1 << 3) != 0 */ + let using_data_descriptor: bool = flags & (1 << 3) == 1 << 3; if using_data_descriptor { return Err(ZipError::UnsupportedArchive( "The file length is not available in the local header", )); } + /* flags & (1 << 1) != 0 */ + let is_utf8: bool = flags & (1 << 11) != 0; + let compression_method = crate::CompressionMethod::parse_from_u16(compression_method); + let file_name_length: usize = file_name_length.into(); + let extra_field_length: usize = extra_field_length.into(); + let mut file_name_raw = vec![0u8; file_name_length]; reader.read_exact(&mut file_name_raw)?; let mut extra_field = vec![0u8; extra_field_length]; From 8fbc4039a87fde751f6140558b871778bcbf25e4 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Sat, 18 May 2024 04:36:54 -0400 Subject: [PATCH 16/48] lean more on the ::MAGIC trait constants --- src/read.rs | 9 +++++---- src/read/stream.rs | 6 +++--- src/spec.rs | 9 ++++----- src/types.rs | 13 ++++++------- 4 files changed, 18 insertions(+), 19 deletions(-) diff --git a/src/read.rs b/src/read.rs index 67d4be598..c8ff7581a 100644 --- a/src/read.rs +++ b/src/read.rs @@ -10,7 +10,8 @@ use crate::read::zip_archive::Shared; use crate::result::{ZipError, ZipResult}; use crate::spec::{self, Block}; use crate::types::{ - AesMode, AesVendorVersion, DateTime, System, ZipEntryBlock, ZipFileData, ZipLocalEntryBlock, + AesMode, AesVendorVersion, DateTime, System, ZipCentralEntryBlock, ZipFileData, + ZipLocalEntryBlock, }; use crate::zipcrypto::{ZipCryptoReader, ZipCryptoReaderValid, ZipCryptoValidator}; use indexmap::IndexMap; @@ -1022,7 +1023,7 @@ pub(crate) fn central_header_to_zip_file( let central_header_start = reader.stream_position()?; // Parse central header - let block = ZipEntryBlock::parse(reader)?; + let block = ZipCentralEntryBlock::parse(reader)?; central_header_to_zip_file_inner(reader, archive_offset, central_header_start, block) } @@ -1038,9 +1039,9 @@ fn central_header_to_zip_file_inner( reader: &mut R, archive_offset: u64, central_header_start: u64, - block: ZipEntryBlock, + block: ZipCentralEntryBlock, ) -> ZipResult { - let ZipEntryBlock { + let ZipCentralEntryBlock { // magic, version_made_by, // version_to_extract, diff --git a/src/read/stream.rs b/src/read/stream.rs index 9673f2e50..d87e22fce 100644 --- a/src/read/stream.rs +++ b/src/read/stream.rs @@ -3,8 +3,8 @@ use std::io::{self, Read}; use std::path::{Path, PathBuf}; use super::{ - central_header_to_zip_file_inner, read_zipfile_from_stream, ZipEntryBlock, ZipError, ZipFile, - ZipFileData, ZipResult, + central_header_to_zip_file_inner, read_zipfile_from_stream, ZipCentralEntryBlock, ZipError, + ZipFile, ZipFileData, ZipResult, }; use crate::spec::Block; @@ -27,7 +27,7 @@ impl ZipStreamReader { let central_header_start = 0; // Parse central header - let block = ZipEntryBlock::parse(&mut self.0)?; + let block = ZipCentralEntryBlock::parse(&mut self.0)?; let file = central_header_to_zip_file_inner( &mut self.0, archive_offset, diff --git a/src/spec.rs b/src/spec.rs index e80b05e85..941040961 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -219,8 +219,7 @@ impl Zip32CentralDirectoryEnd { zip_file_comment, } = self; let block = Zip32CDEBlock { - magic: CENTRAL_DIRECTORY_END_SIGNATURE, - + magic: Zip32CDEBlock::MAGIC, disk_number, disk_with_central_directory, number_of_files_on_this_disk, @@ -400,7 +399,7 @@ impl Zip64CentralDirectoryEndLocator { number_of_disks, } = self; Zip64CDELocatorBlock { - magic: ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE, + magic: Zip64CDELocatorBlock::MAGIC, disk_with_central_directory, end_of_central_directory_offset, number_of_disks, @@ -583,7 +582,7 @@ impl Zip64CentralDirectoryEnd { central_directory_offset, } = self; Zip64CDEBlock { - magic: ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE, + magic: Zip64CDEBlock::MAGIC, /* currently unused */ record_size: 44, version_made_by, @@ -684,7 +683,7 @@ mod test { #[test] fn block_serde() { let block = TestBlock { - magic: Magic::literal(0x01111), + magic: TestBlock::MAGIC, file_name_length: 3, }; let mut c = Cursor::new(Vec::new()); diff --git a/src/types.rs b/src/types.rs index 8e3014092..ed07a7a3a 100644 --- a/src/types.rs +++ b/src/types.rs @@ -623,7 +623,6 @@ impl ZipFileData { .. } = block; - let encrypted: bool = flags & 1 == 1; if encrypted { return Err(ZipError::UnsupportedArchive( @@ -731,7 +730,7 @@ impl ZipFileData { .last_modified_time .unwrap_or_else(DateTime::default_for_write); Ok(ZipLocalEntryBlock { - magic: spec::LOCAL_FILE_HEADER_SIGNATURE, + magic: ZipLocalEntryBlock::MAGIC, version_made_by: self.version_needed(), flags: self.flags(), compression_method: self.compression_method.serialize_to_u16(), @@ -745,14 +744,14 @@ impl ZipFileData { }) } - pub(crate) fn block(&self, zip64_extra_field_length: u16) -> ZipEntryBlock { + pub(crate) fn block(&self, zip64_extra_field_length: u16) -> ZipCentralEntryBlock { let extra_field_len: u16 = self.extra_field_len().try_into().unwrap(); let central_extra_field_len: u16 = self.central_extra_field_len().try_into().unwrap(); let last_modified_time = self .last_modified_time .unwrap_or_else(DateTime::default_for_write); - ZipEntryBlock { - magic: spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE, + ZipCentralEntryBlock { + magic: ZipCentralEntryBlock::MAGIC, version_made_by: (self.system as u16) << 8 | (self.version_made_by as u16), version_to_extract: self.version_needed(), flags: self.flags(), @@ -789,7 +788,7 @@ impl ZipFileData { #[derive(Copy, Clone, Debug)] #[repr(packed)] -pub(crate) struct ZipEntryBlock { +pub(crate) struct ZipCentralEntryBlock { pub magic: spec::Magic, pub version_made_by: u16, pub version_to_extract: u16, @@ -809,7 +808,7 @@ pub(crate) struct ZipEntryBlock { pub offset: u32, } -impl Block for ZipEntryBlock { +impl Block for ZipCentralEntryBlock { const MAGIC: spec::Magic = spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE; #[inline(always)] From acb0a6f0c46dba001b1c995fb681d972405e3b87 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Sat, 18 May 2024 04:46:17 -0400 Subject: [PATCH 17/48] clarify the check being performed --- src/types.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/types.rs b/src/types.rs index ed07a7a3a..079543797 100644 --- a/src/types.rs +++ b/src/types.rs @@ -722,7 +722,9 @@ impl ZipFileData { extra_field_length += 20; } if extra_field_length + self.central_extra_field_len() > u16::MAX as usize { - return Err(ZipError::InvalidArchive("Extra data field is too large")); + return Err(ZipError::InvalidArchive( + "Local + central extra data fields are too large", + )); } let extra_field_length: u16 = extra_field_length.try_into().unwrap(); From 3d6c4d1ae49b899ac298a7ce92a70a0a25a7abbb Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Sat, 18 May 2024 05:56:27 -0400 Subject: [PATCH 18/48] fix fuzz failure --- src/read.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/read.rs b/src/read.rs index c8ff7581a..8bcf561ef 100644 --- a/src/read.rs +++ b/src/read.rs @@ -226,10 +226,15 @@ pub(crate) fn find_content<'a>( None => { // Go to start of data. reader.seek(io::SeekFrom::Start(data.header_start))?; + // Parse static-sized fields and check the magic value. let block = ZipLocalEntryBlock::parse(reader)?; + // Calculate the end of the local header from the fields we just parsed. - let variable_fields_len = (block.file_name_length + block.extra_field_length) as u64; + let variable_fields_len = + // Each of these fields must be converted to u64 before adding, as the result may + // easily overflow a u16. + block.file_name_length as u64 + block.extra_field_length as u64; let data_start = data.header_start + mem::size_of::() as u64 + variable_fields_len; @@ -1432,6 +1437,7 @@ pub fn read_zipfile_from_stream<'a, R: Read>(reader: &'a mut R) -> ZipResult()]; reader.read_exact(&mut block)?; let block: Box<[u8]> = block.into(); From 21d07e192c0f054e6cc1cb90df0543a0de60dc06 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Sat, 18 May 2024 08:09:37 -0400 Subject: [PATCH 19/48] add ExtraFieldMagic and Zip64ExtraFieldBlock --- src/spec.rs | 37 +++++++++++++++ src/types.rs | 126 +++++++++++++++++++++++++++++++++++++++++++-------- src/write.rs | 68 ++++++++++----------------- 3 files changed, 169 insertions(+), 62 deletions(-) diff --git a/src/spec.rs b/src/spec.rs index 941040961..f4a516fc0 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -56,6 +56,43 @@ pub(crate) const CENTRAL_DIRECTORY_END_SIGNATURE: Magic = Magic::literal(0x06054 pub const ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE: Magic = Magic::literal(0x06064b50); pub(crate) const ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE: Magic = Magic::literal(0x07064b50); +/// Similar to [`Magic`], but used for extra field tags as per section 4.5.3 of APPNOTE.TXT. +#[derive(Copy, Clone, Debug, PartialOrd, Ord, PartialEq, Eq, Hash)] +#[repr(transparent)] +pub struct ExtraFieldMagic(u16); + +/* TODO: maybe try to use this for parsing extra fields as well as writing them? */ +#[allow(dead_code)] +impl ExtraFieldMagic { + pub(crate) const fn literal(x: u16) -> Self { + Self(x) + } + + #[inline(always)] + pub(crate) const fn from_le_bytes(bytes: [u8; 2]) -> Self { + Self(u16::from_le_bytes(bytes)) + } + + #[inline(always)] + pub(crate) const fn to_le_bytes(self) -> [u8; 2] { + self.0.to_le_bytes() + } + + #[allow(clippy::wrong_self_convention)] + #[inline(always)] + pub(crate) fn from_le(self) -> Self { + Self(u16::from_le(self.0)) + } + + #[allow(clippy::wrong_self_convention)] + #[inline(always)] + pub(crate) fn to_le(self) -> Self { + Self(u16::to_le(self.0)) + } +} + +pub const ZIP64_EXTRA_FIELD_TAG: ExtraFieldMagic = ExtraFieldMagic::literal(0x0001); + pub const ZIP64_BYTES_THR: u64 = u32::MAX as u64; pub const ZIP64_ENTRY_THR: usize = u16::MAX as usize; diff --git a/src/types.rs b/src/types.rs index 079543797..9679968cf 100644 --- a/src/types.rs +++ b/src/types.rs @@ -3,6 +3,7 @@ use crate::cp437::FromCp437; use crate::write::{FileOptionExtension, FileOptions}; use path::{Component, Path, PathBuf}; use std::fmt; +use std::mem; use std::path; use std::sync::{Arc, OnceLock}; @@ -706,27 +707,25 @@ impl ZipFileData { }) | if self.encrypted { 1u16 << 0 } else { 0 } } - pub(crate) fn local_block(&self) -> ZipResult { - let (compressed_size, uncompressed_size) = if self.large_file { - (spec::ZIP64_BYTES_THR as u32, spec::ZIP64_BYTES_THR as u32) - } else { - ( - self.compressed_size.try_into().unwrap(), - self.uncompressed_size.try_into().unwrap(), - ) - }; - - let mut extra_field_length = self.extra_field_len(); + fn clamp_size_field(&self, field: u64) -> u32 { if self.large_file { - /* TODO: magic number */ - extra_field_length += 20; - } - if extra_field_length + self.central_extra_field_len() > u16::MAX as usize { - return Err(ZipError::InvalidArchive( - "Local + central extra data fields are too large", - )); + spec::ZIP64_BYTES_THR as u32 + } else { + field.min(spec::ZIP64_BYTES_THR).try_into().unwrap() } - let extra_field_length: u16 = extra_field_length.try_into().unwrap(); + } + + pub(crate) fn local_block(&self) -> ZipResult { + let compressed_size: u32 = self.clamp_size_field(self.compressed_size); + let uncompressed_size: u32 = self.clamp_size_field(self.uncompressed_size); + + let extra_block_len: usize = self + .zip64_extra_field_block() + .map(|block| block.full_size()) + .unwrap_or(0); + let extra_field_length: u16 = (self.extra_field_len() + extra_block_len) + .try_into() + .map_err(|_| ZipError::InvalidArchive("Extra data field is too large"))?; let last_modified_time = self .last_modified_time @@ -786,6 +785,48 @@ impl ZipFileData { .unwrap(), } } + + pub fn zip64_extra_field_block(&self) -> Option { + let uncompressed_size: Option = + if self.uncompressed_size > spec::ZIP64_BYTES_THR || self.large_file { + Some(spec::ZIP64_BYTES_THR) + } else { + None + }; + let compressed_size: Option = + if self.compressed_size > spec::ZIP64_BYTES_THR || self.large_file { + Some(spec::ZIP64_BYTES_THR) + } else { + None + }; + let header_start: Option = if self.header_start > spec::ZIP64_BYTES_THR { + Some(spec::ZIP64_BYTES_THR) + } else { + None + }; + + let mut size: u16 = 0; + if uncompressed_size.is_some() { + size += mem::size_of::() as u16; + } + if compressed_size.is_some() { + size += mem::size_of::() as u16; + } + if header_start.is_some() { + size += mem::size_of::() as u16; + } + if size == 0 { + return None; + } + + Some(Zip64ExtraFieldBlock { + magic: spec::ZIP64_EXTRA_FIELD_TAG, + size, + uncompressed_size, + compressed_size, + header_start, + }) + } } #[derive(Copy, Clone, Debug)] @@ -882,6 +923,53 @@ impl Block for ZipLocalEntryBlock { ]; } +#[derive(Copy, Clone, Debug)] +pub(crate) struct Zip64ExtraFieldBlock { + magic: spec::ExtraFieldMagic, + size: u16, + uncompressed_size: Option, + compressed_size: Option, + header_start: Option, + // Excluded fields: + // u32: disk start number +} + +impl Zip64ExtraFieldBlock { + pub fn full_size(&self) -> usize { + assert!(self.size > 0); + self.size as usize + mem::size_of::() + mem::size_of::() + } + + pub fn serialize(self) -> Box<[u8]> { + let Self { + magic, + size, + uncompressed_size, + compressed_size, + header_start, + } = self; + + let full_size = self.full_size(); + + let mut ret = Vec::with_capacity(full_size); + ret.extend(magic.to_le_bytes()); + ret.extend(u16::to_le_bytes(size)); + + if let Some(uncompressed_size) = uncompressed_size { + ret.extend(u64::to_le_bytes(uncompressed_size)); + } + if let Some(compressed_size) = compressed_size { + ret.extend(u64::to_le_bytes(compressed_size)); + } + if let Some(header_start) = header_start { + ret.extend(u64::to_le_bytes(header_start)); + } + debug_assert_eq!(ret.len(), full_size); + + ret.into_boxed_slice() + } +} + /// The encryption specification used to encrypt a file with AES. /// /// According to the [specification](https://www.winzip.com/win/en/aes_info.html#winzip11) AE-2 diff --git a/src/write.rs b/src/write.rs index acaf4d8c9..4c98858ba 100644 --- a/src/write.rs +++ b/src/write.rs @@ -8,7 +8,9 @@ use crate::result::{ZipError, ZipResult}; use crate::spec::{self, Block}; #[cfg(feature = "aes-crypto")] use crate::types::AesMode; -use crate::types::{ffi, AesVendorVersion, DateTime, ZipFileData, ZipRawValues, DEFAULT_VERSION}; +use crate::types::{ + ffi, AesVendorVersion, DateTime, ZipFileData, ZipLocalEntryBlock, ZipRawValues, DEFAULT_VERSION, +}; use crate::write::ffi::S_IFLNK; #[cfg(any(feature = "_deflate-any", feature = "bzip2", feature = "zstd",))] use core::num::NonZeroU64; @@ -1818,12 +1820,10 @@ fn validate_extra_data(header_id: u16, data: &[u8]) -> ZipResult<()> { fn write_local_zip64_extra_field(writer: &mut T, file: &ZipFileData) -> ZipResult<()> { // This entry in the Local header MUST include BOTH original // and compressed file size fields. - writer.write_u16_le(0x0001)?; - writer.write_u16_le(16)?; - writer.write_u64_le(file.uncompressed_size)?; - writer.write_u64_le(file.compressed_size)?; - // Excluded fields: - // u32: disk start number + assert!(file.large_file); + let block = file.zip64_extra_field_block().unwrap(); + let block = block.serialize(); + writer.write_all(&block)?; Ok(()) } @@ -1831,52 +1831,34 @@ fn update_local_zip64_extra_field( writer: &mut T, file: &ZipFileData, ) -> ZipResult<()> { - let zip64_extra_field = file.header_start + 30 + file.file_name_raw.len() as u64; - writer.seek(SeekFrom::Start(zip64_extra_field + 4))?; - writer.write_u64_le(file.uncompressed_size)?; - writer.write_u64_le(file.compressed_size)?; - // Excluded fields: - // u32: disk start number + assert!(file.large_file); + + let zip64_extra_field = file.header_start + + mem::size_of::() as u64 + + file.file_name_raw.len() as u64; + + writer.seek(SeekFrom::Start(zip64_extra_field))?; + + let block = file.zip64_extra_field_block().unwrap(); + let block = block.serialize(); + writer.write_all(&block)?; Ok(()) } -/* TODO: make this use the Block trait somehow! */ fn write_central_zip64_extra_field(writer: &mut T, file: &ZipFileData) -> ZipResult { // The order of the fields in the zip64 extended // information record is fixed, but the fields MUST // only appear if the corresponding Local or Central // directory record field is set to 0xFFFF or 0xFFFFFFFF. - let mut size = 0; - let uncompressed_size = file.uncompressed_size > spec::ZIP64_BYTES_THR; - let compressed_size = file.compressed_size > spec::ZIP64_BYTES_THR; - let header_start = file.header_start > spec::ZIP64_BYTES_THR; - if uncompressed_size { - size += 8; - } - if compressed_size { - size += 8; - } - if header_start { - size += 8; - } - if size > 0 { - writer.write_u16_le(0x0001)?; - writer.write_u16_le(size)?; - size += 4; - - if uncompressed_size { - writer.write_u64_le(file.uncompressed_size)?; - } - if compressed_size { - writer.write_u64_le(file.compressed_size)?; - } - if header_start { - writer.write_u64_le(file.header_start)?; + match file.zip64_extra_field_block() { + None => Ok(0), + Some(block) => { + let block = block.serialize(); + writer.write_all(&block)?; + let len: u16 = block.len().try_into().unwrap(); + Ok(len) } - // Excluded fields: - // u32: disk start number } - Ok(size) } #[cfg(not(feature = "unreserved"))] From 8d454d22778ee73db77277e0c5cde02a074b5744 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Sat, 18 May 2024 08:18:54 -0400 Subject: [PATCH 20/48] nitpick --- src/types.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/types.rs b/src/types.rs index 9679968cf..4ca0da8a9 100644 --- a/src/types.rs +++ b/src/types.rs @@ -700,11 +700,14 @@ impl ZipFileData { } fn flags(&self) -> u16 { - (if self.is_utf8() && !self.is_ascii() { + let utf8_bit: u16 = if self.is_utf8() && !self.is_ascii() { 1u16 << 11 } else { 0 - }) | if self.encrypted { 1u16 << 0 } else { 0 } + }; + let encrypted_bit: u16 = if self.encrypted { 1u16 << 0 } else { 0 }; + + utf8_bit | encrypted_bit } fn clamp_size_field(&self, field: u64) -> u32 { From a7fd5874cf87639ce353292657c99b4eab304d70 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Sat, 18 May 2024 08:43:13 -0400 Subject: [PATCH 21/48] reduce visibility for all the blocks --- src/read.rs | 4 ++-- src/spec.rs | 67 ++++++++++++++++++++++++++-------------------------- src/types.rs | 8 +++---- 3 files changed, 40 insertions(+), 39 deletions(-) diff --git a/src/read.rs b/src/read.rs index 8bcf561ef..dd3e8fe79 100644 --- a/src/read.rs +++ b/src/read.rs @@ -1445,8 +1445,8 @@ pub fn read_zipfile_from_stream<'a, R: Read>(reader: &'a mut R) -> ZipResult (), - spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE => return Ok(None), + spec::Magic::LOCAL_FILE_HEADER_SIGNATURE => (), + spec::Magic::CENTRAL_DIRECTORY_HEADER_SIGNATURE => return Ok(None), _ => return Err(ZipError::InvalidArchive("Invalid local file header")), } diff --git a/src/spec.rs b/src/spec.rs index f4a516fc0..e94e7c9ed 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -14,89 +14,90 @@ use std::path::{Component, Path, MAIN_SEPARATOR}; /// struct to enforce some small amount of type safety. #[derive(Copy, Clone, Debug, PartialOrd, Ord, PartialEq, Eq, Hash)] #[repr(transparent)] -pub struct Magic(u32); +pub(crate) struct Magic(u32); impl Magic { - pub(crate) const fn literal(x: u32) -> Self { + pub const fn literal(x: u32) -> Self { Self(x) } #[inline(always)] - pub(crate) const fn from_le_bytes(bytes: [u8; 4]) -> Self { + pub const fn from_le_bytes(bytes: [u8; 4]) -> Self { Self(u32::from_le_bytes(bytes)) } #[inline(always)] - pub(crate) fn from_first_le_bytes(data: &[u8]) -> Self { + pub fn from_first_le_bytes(data: &[u8]) -> Self { let first_bytes: [u8; 4] = data[..mem::size_of::()].try_into().unwrap(); Self::from_le_bytes(first_bytes) } #[inline(always)] - pub(crate) const fn to_le_bytes(self) -> [u8; 4] { + pub const fn to_le_bytes(self) -> [u8; 4] { self.0.to_le_bytes() } #[allow(clippy::wrong_self_convention)] #[inline(always)] - pub(crate) fn from_le(self) -> Self { + pub fn from_le(self) -> Self { Self(u32::from_le(self.0)) } #[allow(clippy::wrong_self_convention)] #[inline(always)] - pub(crate) fn to_le(self) -> Self { + pub fn to_le(self) -> Self { Self(u32::to_le(self.0)) } -} -pub const LOCAL_FILE_HEADER_SIGNATURE: Magic = Magic::literal(0x04034b50); -pub const CENTRAL_DIRECTORY_HEADER_SIGNATURE: Magic = Magic::literal(0x02014b50); -pub(crate) const CENTRAL_DIRECTORY_END_SIGNATURE: Magic = Magic::literal(0x06054b50); -pub const ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE: Magic = Magic::literal(0x06064b50); -pub(crate) const ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE: Magic = Magic::literal(0x07064b50); + pub const LOCAL_FILE_HEADER_SIGNATURE: Self = Self::literal(0x04034b50); + pub const CENTRAL_DIRECTORY_HEADER_SIGNATURE: Self = Self::literal(0x02014b50); + pub const CENTRAL_DIRECTORY_END_SIGNATURE: Self = Self::literal(0x06054b50); + pub const ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE: Self = Self::literal(0x06064b50); + pub const ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE: Self = Self::literal(0x07064b50); +} /// Similar to [`Magic`], but used for extra field tags as per section 4.5.3 of APPNOTE.TXT. #[derive(Copy, Clone, Debug, PartialOrd, Ord, PartialEq, Eq, Hash)] #[repr(transparent)] -pub struct ExtraFieldMagic(u16); +pub(crate) struct ExtraFieldMagic(u16); /* TODO: maybe try to use this for parsing extra fields as well as writing them? */ #[allow(dead_code)] impl ExtraFieldMagic { - pub(crate) const fn literal(x: u16) -> Self { + pub const fn literal(x: u16) -> Self { Self(x) } #[inline(always)] - pub(crate) const fn from_le_bytes(bytes: [u8; 2]) -> Self { + pub const fn from_le_bytes(bytes: [u8; 2]) -> Self { Self(u16::from_le_bytes(bytes)) } #[inline(always)] - pub(crate) const fn to_le_bytes(self) -> [u8; 2] { + pub const fn to_le_bytes(self) -> [u8; 2] { self.0.to_le_bytes() } #[allow(clippy::wrong_self_convention)] #[inline(always)] - pub(crate) fn from_le(self) -> Self { + pub fn from_le(self) -> Self { Self(u16::from_le(self.0)) } #[allow(clippy::wrong_self_convention)] #[inline(always)] - pub(crate) fn to_le(self) -> Self { + pub fn to_le(self) -> Self { Self(u16::to_le(self.0)) } -} -pub const ZIP64_EXTRA_FIELD_TAG: ExtraFieldMagic = ExtraFieldMagic::literal(0x0001); + pub const ZIP64_EXTRA_FIELD_TAG: Self = Self::literal(0x0001); +} +/// This should be equal to `0xFFFFFFFF`. pub const ZIP64_BYTES_THR: u64 = u32::MAX as u64; pub const ZIP64_ENTRY_THR: usize = u16::MAX as usize; -pub trait Block: Sized + Copy { +pub(crate) trait Block: Sized + Copy { const MAGIC: Magic; fn magic(self) -> Magic; @@ -200,7 +201,7 @@ macro_rules! to_and_from_le { #[derive(Copy, Clone, Debug)] #[repr(packed)] -pub struct Zip32CDEBlock { +pub(crate) struct Zip32CDEBlock { magic: Magic, pub disk_number: u16, pub disk_with_central_directory: u16, @@ -212,7 +213,7 @@ pub struct Zip32CDEBlock { } impl Block for Zip32CDEBlock { - const MAGIC: Magic = CENTRAL_DIRECTORY_END_SIGNATURE; + const MAGIC: Magic = Magic::CENTRAL_DIRECTORY_END_SIGNATURE; #[inline(always)] fn magic(self) -> Magic { @@ -234,7 +235,7 @@ impl Block for Zip32CDEBlock { } #[derive(Debug)] -pub struct Zip32CentralDirectoryEnd { +pub(crate) struct Zip32CentralDirectoryEnd { pub disk_number: u16, pub disk_with_central_directory: u16, pub number_of_files_on_this_disk: u16, @@ -311,7 +312,7 @@ impl Zip32CentralDirectoryEnd { assert!(END_WINDOW_SIZE > mem::size_of::()); const SIG_BYTES: [u8; mem::size_of::()] = - CENTRAL_DIRECTORY_END_SIGNATURE.to_le_bytes(); + Magic::CENTRAL_DIRECTORY_END_SIGNATURE.to_le_bytes(); let finder = FinderRev::new(&SIG_BYTES); let mut window_start: u64 = file_length.saturating_sub(END_WINDOW_SIZE as u64); @@ -380,7 +381,7 @@ impl Zip32CentralDirectoryEnd { #[derive(Copy, Clone)] #[repr(packed)] -pub struct Zip64CDELocatorBlock { +pub(crate) struct Zip64CDELocatorBlock { magic: Magic, pub disk_with_central_directory: u32, pub end_of_central_directory_offset: u64, @@ -388,7 +389,7 @@ pub struct Zip64CDELocatorBlock { } impl Block for Zip64CDELocatorBlock { - const MAGIC: Magic = ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE; + const MAGIC: Magic = Magic::ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE; #[inline(always)] fn magic(self) -> Magic { @@ -406,7 +407,7 @@ impl Block for Zip64CDELocatorBlock { ]; } -pub struct Zip64CentralDirectoryEndLocator { +pub(crate) struct Zip64CentralDirectoryEndLocator { pub disk_with_central_directory: u32, pub end_of_central_directory_offset: u64, pub number_of_disks: u32, @@ -450,7 +451,7 @@ impl Zip64CentralDirectoryEndLocator { #[derive(Copy, Clone)] #[repr(packed)] -pub struct Zip64CDEBlock { +pub(crate) struct Zip64CDEBlock { magic: Magic, pub record_size: u64, pub version_made_by: u16, @@ -464,7 +465,7 @@ pub struct Zip64CDEBlock { } impl Block for Zip64CDEBlock { - const MAGIC: Magic = ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE; + const MAGIC: Magic = Magic::ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE; fn magic(self) -> Magic { self.magic @@ -486,7 +487,7 @@ impl Block for Zip64CDEBlock { ]; } -pub struct Zip64CentralDirectoryEnd { +pub(crate) struct Zip64CentralDirectoryEnd { pub version_made_by: u16, pub version_needed_to_extract: u16, pub disk_number: u32, @@ -536,7 +537,7 @@ impl Zip64CentralDirectoryEnd { assert!(END_WINDOW_SIZE > mem::size_of::()); const SIG_BYTES: [u8; mem::size_of::()] = - ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE.to_le_bytes(); + Magic::ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE.to_le_bytes(); let finder = FinderRev::new(&SIG_BYTES); let mut window_start: u64 = search_upper_bound diff --git a/src/types.rs b/src/types.rs index 4ca0da8a9..ae2766309 100644 --- a/src/types.rs +++ b/src/types.rs @@ -789,7 +789,7 @@ impl ZipFileData { } } - pub fn zip64_extra_field_block(&self) -> Option { + pub(crate) fn zip64_extra_field_block(&self) -> Option { let uncompressed_size: Option = if self.uncompressed_size > spec::ZIP64_BYTES_THR || self.large_file { Some(spec::ZIP64_BYTES_THR) @@ -823,7 +823,7 @@ impl ZipFileData { } Some(Zip64ExtraFieldBlock { - magic: spec::ZIP64_EXTRA_FIELD_TAG, + magic: spec::ExtraFieldMagic::ZIP64_EXTRA_FIELD_TAG, size, uncompressed_size, compressed_size, @@ -855,7 +855,7 @@ pub(crate) struct ZipCentralEntryBlock { } impl Block for ZipCentralEntryBlock { - const MAGIC: spec::Magic = spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE; + const MAGIC: spec::Magic = spec::Magic::CENTRAL_DIRECTORY_HEADER_SIGNATURE; #[inline(always)] fn magic(self) -> spec::Magic { @@ -902,7 +902,7 @@ pub(crate) struct ZipLocalEntryBlock { } impl Block for ZipLocalEntryBlock { - const MAGIC: spec::Magic = spec::LOCAL_FILE_HEADER_SIGNATURE; + const MAGIC: spec::Magic = spec::Magic::LOCAL_FILE_HEADER_SIGNATURE; #[inline(always)] fn magic(self) -> spec::Magic { From d852c222fce7bcdd7b3c9a200be5c739ec27179f Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Wed, 22 May 2024 11:59:33 -0400 Subject: [PATCH 22/48] review comments 1 --- benches/read_metadata.rs | 4 ++-- src/compression.rs | 2 +- src/read.rs | 4 ++-- src/spec.rs | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) mode change 100644 => 100755 src/spec.rs diff --git a/benches/read_metadata.rs b/benches/read_metadata.rs index f54f7c2d4..76298ef8d 100644 --- a/benches/read_metadata.rs +++ b/benches/read_metadata.rs @@ -1,7 +1,7 @@ use bencher::{benchmark_group, benchmark_main}; use std::fs; -use std::io::{prelude::*, Cursor}; +use std::io::{self, prelude::*, Cursor}; use bencher::Bencher; use getrandom::getrandom; @@ -22,7 +22,7 @@ fn generate_random_archive(count_files: usize, file_size: usize) -> ZipResult( #[inline] fn read_variable_length_byte_field(reader: &mut R, len: usize) -> io::Result> { - let mut data = vec![0; len]; + let mut data = vec![0; len].into_boxed_slice(); reader.read_exact(&mut data)?; - Ok(data.into_boxed_slice()) + Ok(data) } /// Parse a central directory entry to collect the information for the file. diff --git a/src/spec.rs b/src/spec.rs old mode 100644 new mode 100755 index e94e7c9ed..d8948a859 --- a/src/spec.rs +++ b/src/spec.rs @@ -282,7 +282,7 @@ impl Zip32CentralDirectoryEnd { .. } = Zip32CDEBlock::parse(reader)?; - let mut zip_file_comment = vec![0u8; zip_file_comment_length as usize]; + let mut zip_file_comment = vec![0u8; zip_file_comment_length as usize].into_boxed_slice(); reader.read_exact(&mut zip_file_comment)?; Ok(Zip32CentralDirectoryEnd { @@ -292,7 +292,7 @@ impl Zip32CentralDirectoryEnd { number_of_files, central_directory_size, central_directory_offset, - zip_file_comment: zip_file_comment.into_boxed_slice(), + zip_file_comment, }) } From 79b96bdfde33d138836728441f254ba18ee7a3aa Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Wed, 22 May 2024 12:06:54 -0400 Subject: [PATCH 23/48] add "std" feature to getrandom for io::Error conversion --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index ecd0a9db4..80b231439 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -57,7 +57,7 @@ arbitrary = { version = "1.3.2", features = ["derive"] } [dev-dependencies] bencher = "0.1.5" -getrandom = { version = "0.2.14", features = ["js"] } +getrandom = { version = "0.2.14", features = ["js", "std"] } walkdir = "2.5.0" time = { workspace = true, features = ["formatting", "macros"] } anyhow = "1" From 7c2474f80cacc9473a26c7b1bec6adf4c14d6fee Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Wed, 22 May 2024 12:14:19 -0400 Subject: [PATCH 24/48] go into_boxed_slice() earlier --- src/spec.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/spec.rs b/src/spec.rs index d8948a859..d0b3af888 100755 --- a/src/spec.rs +++ b/src/spec.rs @@ -123,9 +123,9 @@ pub(crate) trait Block: Sized + Copy { fn from_le(self) -> Self; fn parse(reader: &mut T) -> ZipResult { - let mut block = vec![0u8; mem::size_of::()]; + let mut block = vec![0u8; mem::size_of::()].into_boxed_slice(); reader.read_exact(&mut block)?; - Self::interpret(block.into_boxed_slice()) + Self::interpret(block) } fn encode(self) -> Box<[u8]> { @@ -138,12 +138,12 @@ pub(crate) trait Block: Sized + Copy { fn serialize(self) -> Box<[u8]> { /* TODO: use Box::new_zeroed() when stabilized! */ /* TODO: also consider using smallvec! */ - let mut out_block = vec![0u8; mem::size_of::()]; + let mut out_block = vec![0u8; mem::size_of::()].into_boxed_slice(); let out_ptr: *mut Self = out_block.as_mut_ptr().cast(); unsafe { out_ptr.write(self); } - out_block.into_boxed_slice() + out_block } fn write(self, writer: &mut T) -> ZipResult<()> { From 0b31d9846a9427061eefc8de4eb118443bc10c1d Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Wed, 22 May 2024 12:31:36 -0400 Subject: [PATCH 25/48] review comments 2 --- src/types.rs | 4 +--- src/write.rs | 13 ++++++++++--- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/types.rs b/src/types.rs index ae2766309..5a27b0b7f 100644 --- a/src/types.rs +++ b/src/types.rs @@ -7,8 +7,6 @@ use std::mem; use std::path; use std::sync::{Arc, OnceLock}; -#[cfg(doc)] -use crate::read::ZipFile; #[cfg(feature = "chrono")] use chrono::{Datelike, NaiveDate, NaiveDateTime, NaiveTime, Timelike}; @@ -70,7 +68,7 @@ impl From for u8 { /// For example, it has a resolution of 2 seconds! /// /// A [`DateTime`] can be stored directly in a zipfile with [`FileOptions::last_modified_time`], -/// or read from one with [`ZipFile::last_modified`] +/// or read from one with [`ZipFile::last_modified`](crate::read::ZipFile::last_modified). /// /// # Warning /// diff --git a/src/write.rs b/src/write.rs index 4c98858ba..337b167c6 100644 --- a/src/write.rs +++ b/src/write.rs @@ -1820,8 +1820,11 @@ fn validate_extra_data(header_id: u16, data: &[u8]) -> ZipResult<()> { fn write_local_zip64_extra_field(writer: &mut T, file: &ZipFileData) -> ZipResult<()> { // This entry in the Local header MUST include BOTH original // and compressed file size fields. - assert!(file.large_file); - let block = file.zip64_extra_field_block().unwrap(); + let Some(block) = file.zip64_extra_field_block() else { + return Err(ZipError::InvalidArchive( + "Attempted to write a ZIP64 extra field for a file that's within zip32 limits", + )); + }; let block = block.serialize(); writer.write_all(&block)?; Ok(()) @@ -1831,7 +1834,11 @@ fn update_local_zip64_extra_field( writer: &mut T, file: &ZipFileData, ) -> ZipResult<()> { - assert!(file.large_file); + if !file.large_file { + return Err(ZipError::InvalidArchive( + "Attempted to update a nonexistent ZIP64 extra field", + )); + } let zip64_extra_field = file.header_start + mem::size_of::() as u64 From 4a784b563637ae104474aafa4b43214af9aa6ffb Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Wed, 22 May 2024 13:00:49 -0400 Subject: [PATCH 26/48] interpose ZipRawValues into ZipFileData --- src/read.rs | 42 ++++++++++++++++--------------- src/spec.rs | 0 src/types.rs | 70 +++++++++++++++++++++++++++++++++------------------- src/write.rs | 22 ++++++++--------- 4 files changed, 77 insertions(+), 57 deletions(-) mode change 100755 => 100644 src/spec.rs diff --git a/src/read.rs b/src/read.rs index c7e1cf7c2..7879c4331 100644 --- a/src/read.rs +++ b/src/read.rs @@ -11,7 +11,7 @@ use crate::result::{ZipError, ZipResult}; use crate::spec::{self, Block}; use crate::types::{ AesMode, AesVendorVersion, DateTime, System, ZipCentralEntryBlock, ZipFileData, - ZipLocalEntryBlock, + ZipLocalEntryBlock, ZipRawValues, }; use crate::zipcrypto::{ZipCryptoReader, ZipCryptoReaderValid, ZipCryptoValidator}; use indexmap::IndexMap; @@ -252,7 +252,7 @@ pub(crate) fn find_content<'a>( }; reader.seek(io::SeekFrom::Start(data_start))?; - Ok((reader as &mut dyn Read).take(data.compressed_size)) + Ok((reader as &mut dyn Read).take(data.compressed_size())) } #[allow(clippy::too_many_arguments)] @@ -404,7 +404,7 @@ impl ZipArchive { if file.using_data_descriptor { return None; } - total = total.checked_add(file.uncompressed_size as u128)?; + total = total.checked_add(file.uncompressed_size() as u128)?; } Some(total) } @@ -700,7 +700,7 @@ impl ZipArchive { None => Ok(None), Some((aes_mode, _, _)) => { let (verification_value, salt) = - AesReader::new(limit_reader, aes_mode, data.compressed_size) + AesReader::new(limit_reader, aes_mode, data.compressed_size()) .get_verification_value_and_salt()?; let aes_info = AesInfo { aes_mode, @@ -980,14 +980,14 @@ impl ZipArchive { let crypto_reader = make_crypto_reader( data.compression_method, - data.crc32, + data.crc32(), data.last_modified_time, data.using_data_descriptor, limit_reader, password, data.aes_mode, #[cfg(feature = "aes-crypto")] - data.compressed_size, + data.compressed_size(), )?; Ok(ZipFile { crypto_reader: Some(crypto_reader), @@ -1094,9 +1094,11 @@ fn central_header_to_zip_file_inner( compression_method: CompressionMethod::parse_from_u16(compression_method), compression_level: None, last_modified_time: DateTime::try_from_msdos(last_mod_date, last_mod_time).ok(), - crc32, - compressed_size: compressed_size as u64, - uncompressed_size: uncompressed_size as u64, + raw_values: ZipRawValues { + crc32, + compressed_size: compressed_size.into(), + uncompressed_size: uncompressed_size.into(), + }, file_name, file_name_raw, extra_field: Some(Arc::new(extra_field.to_vec())), @@ -1147,14 +1149,14 @@ fn parse_extra_field(file: &mut ZipFileData) -> ZipResult<()> { match kind { // Zip64 extended information extra field 0x0001 => { - if file.uncompressed_size == spec::ZIP64_BYTES_THR { + if file.uncompressed_size() == spec::ZIP64_BYTES_THR { file.large_file = true; - file.uncompressed_size = reader.read_u64_le()?; + file.raw_values.uncompressed_size = reader.read_u64_le()?; len_left -= 8; } - if file.compressed_size == spec::ZIP64_BYTES_THR { + if file.compressed_size() == spec::ZIP64_BYTES_THR { file.large_file = true; - file.compressed_size = reader.read_u64_le()?; + file.raw_values.compressed_size = reader.read_u64_le()?; len_left -= 8; } if file.header_start == spec::ZIP64_BYTES_THR { @@ -1228,7 +1230,7 @@ impl<'a> ZipFile<'a> { if let ZipFileReader::NoReader = self.reader { let data = &self.data; let crypto_reader = self.crypto_reader.take().expect("Invalid reader state"); - self.reader = make_reader(data.compression_method, data.crc32, crypto_reader)?; + self.reader = make_reader(data.compression_method, data.crc32(), crypto_reader)?; } Ok(&mut self.reader) } @@ -1325,12 +1327,12 @@ impl<'a> ZipFile<'a> { /// Get the size of the file, in bytes, in the archive pub fn compressed_size(&self) -> u64 { - self.data.compressed_size + self.data.compressed_size() } /// Get the size of the file, in bytes, when uncompressed pub fn size(&self) -> u64 { - self.data.uncompressed_size + self.data.uncompressed_size() } /// Get the time the file was last modified @@ -1360,7 +1362,7 @@ impl<'a> ZipFile<'a> { /// Get the CRC32 hash of the original file pub fn crc32(&self) -> u32 { - self.data.crc32 + self.data.crc32() } /// Get the extra data of the zip header for this file @@ -1459,9 +1461,9 @@ pub fn read_zipfile_from_stream<'a, R: Read>(reader: &'a mut R) -> ZipResult return Err(e), } - let limit_reader = (reader as &'a mut dyn Read).take(result.compressed_size); + let limit_reader = (reader as &'a mut dyn Read).take(result.compressed_size()); - let result_crc32 = result.crc32; + let result_crc32 = result.crc32(); let result_compression_method = result.compression_method; let crypto_reader = make_crypto_reader( result_compression_method, @@ -1472,7 +1474,7 @@ pub fn read_zipfile_from_stream<'a, R: Read>(reader: &'a mut R) -> ZipResult, /// Last modified time. This will only have a 2 second precision. pub last_modified_time: Option, - /// CRC32 checksum - pub crc32: u32, - /// Size of the file in the ZIP - pub compressed_size: u64, - /// Size of the file when extracted - pub uncompressed_size: u64, + /// Checksum and data extents + pub(crate) raw_values: ZipRawValues, /// Name of the file pub file_name: Box, /// Raw file name. To be used when file_name was incorrectly decoded. @@ -431,6 +432,21 @@ pub struct ZipFileData { } impl ZipFileData { + #[inline(always)] + pub fn crc32(&self) -> u32 { + self.raw_values.crc32 + } + + #[inline(always)] + pub fn compressed_size(&self) -> u64 { + self.raw_values.compressed_size + } + + #[inline(always)] + pub fn uncompressed_size(&self) -> u64 { + self.raw_values.uncompressed_size + } + #[allow(dead_code)] pub fn is_dir(&self) -> bool { is_dir(&self.file_name) @@ -583,9 +599,7 @@ impl ZipFileData { compression_method, compression_level: options.compression_level, last_modified_time: Some(options.last_modified_time), - crc32: raw_values.crc32, - compressed_size: raw_values.compressed_size, - uncompressed_size: raw_values.uncompressed_size, + raw_values, file_name, // Never used for saving, but used as map key in insert_file_data() file_name_raw, extra_field, @@ -664,9 +678,11 @@ impl ZipFileData { compression_method, compression_level: None, last_modified_time: DateTime::try_from_msdos(last_mod_date, last_mod_time).ok(), - crc32, - compressed_size: compressed_size.into(), - uncompressed_size: uncompressed_size.into(), + raw_values: ZipRawValues { + crc32, + compressed_size: compressed_size.into(), + uncompressed_size: uncompressed_size.into(), + }, file_name, file_name_raw: file_name_raw.into(), extra_field: Some(Arc::new(extra_field)), @@ -717,8 +733,8 @@ impl ZipFileData { } pub(crate) fn local_block(&self) -> ZipResult { - let compressed_size: u32 = self.clamp_size_field(self.compressed_size); - let uncompressed_size: u32 = self.clamp_size_field(self.uncompressed_size); + let compressed_size: u32 = self.clamp_size_field(self.compressed_size()); + let uncompressed_size: u32 = self.clamp_size_field(self.uncompressed_size()); let extra_block_len: usize = self .zip64_extra_field_block() @@ -738,7 +754,7 @@ impl ZipFileData { compression_method: self.compression_method.serialize_to_u16(), last_mod_time: last_modified_time.timepart(), last_mod_date: last_modified_time.datepart(), - crc32: self.crc32, + crc32: self.crc32(), compressed_size, uncompressed_size, file_name_length: self.file_name_raw.len().try_into().unwrap(), @@ -760,14 +776,14 @@ impl ZipFileData { compression_method: self.compression_method.serialize_to_u16(), last_mod_time: last_modified_time.timepart(), last_mod_date: last_modified_time.datepart(), - crc32: self.crc32, + crc32: self.crc32(), compressed_size: self - .compressed_size + .compressed_size() .min(spec::ZIP64_BYTES_THR) .try_into() .unwrap(), uncompressed_size: self - .uncompressed_size + .uncompressed_size() .min(spec::ZIP64_BYTES_THR) .try_into() .unwrap(), @@ -789,13 +805,13 @@ impl ZipFileData { pub(crate) fn zip64_extra_field_block(&self) -> Option { let uncompressed_size: Option = - if self.uncompressed_size > spec::ZIP64_BYTES_THR || self.large_file { + if self.uncompressed_size() > spec::ZIP64_BYTES_THR || self.large_file { Some(spec::ZIP64_BYTES_THR) } else { None }; let compressed_size: Option = - if self.compressed_size > spec::ZIP64_BYTES_THR || self.large_file { + if self.compressed_size() > spec::ZIP64_BYTES_THR || self.large_file { Some(spec::ZIP64_BYTES_THR) } else { None @@ -1039,9 +1055,11 @@ mod test { compression_method: crate::compression::CompressionMethod::Stored, compression_level: None, last_modified_time: None, - crc32: 0, - compressed_size: 0, - uncompressed_size: 0, + raw_values: ZipRawValues { + crc32: 0, + compressed_size: 0, + uncompressed_size: 0, + }, file_name: file_name.clone().into_boxed_str(), file_name_raw: file_name.into_bytes().into_boxed_slice(), extra_field: None, diff --git a/src/write.rs b/src/write.rs index 337b167c6..24379e8f6 100644 --- a/src/write.rs +++ b/src/write.rs @@ -548,12 +548,12 @@ impl ZipWriter { let src_index = self.index_by_name(src_name)?; let src_data = &self.files[src_index]; let data_start = *src_data.data_start.get().unwrap_or(&0); - let compressed_size = src_data.compressed_size; + let compressed_size = src_data.compressed_size(); debug_assert!(compressed_size <= write_position - data_start); - let uncompressed_size = src_data.uncompressed_size; + let uncompressed_size = src_data.uncompressed_size(); let raw_values = ZipRawValues { - crc32: src_data.crc32, + crc32: src_data.crc32(), compressed_size, uncompressed_size, }; @@ -925,13 +925,13 @@ impl ZipWriter { None => return Ok(()), Some((_, f)) => f, }; - file.uncompressed_size = self.stats.bytes_written; + file.raw_values.uncompressed_size = self.stats.bytes_written; let file_end = writer.stream_position()?; debug_assert!(file_end >= self.stats.start); - file.compressed_size = file_end - self.stats.start; + file.raw_values.compressed_size = file_end - self.stats.start; - file.crc32 = self.stats.hasher.clone().finalize(); + file.raw_values.crc32 = self.stats.hasher.clone().finalize(); if let Some(aes_mode) = &mut file.aes_mode { // We prefer using AE-1 which provides an extra CRC check, but for small files we // switch to AE-2 to prevent being able to use the CRC value to to reconstruct the @@ -939,7 +939,7 @@ impl ZipWriter { // // C.f. https://www.winzip.com/en/support/aes-encryption/#crc-faq aes_mode.1 = if self.stats.bytes_written < 20 { - file.crc32 = 0; + file.raw_values.crc32 = 0; AesVendorVersion::Ae2 } else { AesVendorVersion::Ae1 @@ -1740,20 +1740,20 @@ fn update_aes_extra_data( fn update_local_file_header(writer: &mut T, file: &ZipFileData) -> ZipResult<()> { const CRC32_OFFSET: u64 = 14; writer.seek(SeekFrom::Start(file.header_start + CRC32_OFFSET))?; - writer.write_u32_le(file.crc32)?; + writer.write_u32_le(file.crc32())?; if file.large_file { update_local_zip64_extra_field(writer, file)?; } else { // check compressed size as well as it can also be slightly larger than uncompressed size - if file.compressed_size > spec::ZIP64_BYTES_THR { + if file.compressed_size() > spec::ZIP64_BYTES_THR { return Err(ZipError::Io(io::Error::new( io::ErrorKind::Other, "Large file option has not been set", ))); } - writer.write_u32_le(file.compressed_size as u32)?; + writer.write_u32_le(file.compressed_size() as u32)?; // uncompressed size is already checked on write to catch it as soon as possible - writer.write_u32_le(file.uncompressed_size as u32)?; + writer.write_u32_le(file.uncompressed_size() as u32)?; } Ok(()) } From fe663b9ee6001124a3df1a1d9209e4b8dd12a89a Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Wed, 22 May 2024 13:06:56 -0400 Subject: [PATCH 27/48] tiny fix --- src/read.rs | 1 + src/types.rs | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/read.rs b/src/read.rs index 7879c4331..3889f0fc4 100644 --- a/src/read.rs +++ b/src/read.rs @@ -1142,6 +1142,7 @@ fn parse_extra_field(file: &mut ZipFileData) -> ZipResult<()> { }; let mut reader = io::Cursor::new(extra_field.as_ref()); + /* TODO: codify this structure into Zip64ExtraFieldBlock fields! */ while (reader.position() as usize) < extra_field.len() { let kind = reader.read_u16_le()?; let len = reader.read_u16_le()?; diff --git a/src/types.rs b/src/types.rs index e844224df..d2195bd52 100644 --- a/src/types.rs +++ b/src/types.rs @@ -849,7 +849,7 @@ impl ZipFileData { #[derive(Copy, Clone, Debug)] #[repr(packed)] pub(crate) struct ZipCentralEntryBlock { - pub magic: spec::Magic, + magic: spec::Magic, pub version_made_by: u16, pub version_to_extract: u16, pub flags: u16, From a769e9410e5eb9f01d880130bc94df5214d141d7 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Fri, 24 May 2024 07:39:18 -0400 Subject: [PATCH 28/48] Revert "interpose ZipRawValues into ZipFileData" This reverts commit d8d4dee5cec372878259380fa347c0ffc6cca044. --- src/read.rs | 42 +++++++++++++++---------------- src/types.rs | 70 +++++++++++++++++++--------------------------------- src/write.rs | 22 ++++++++--------- 3 files changed, 57 insertions(+), 77 deletions(-) diff --git a/src/read.rs b/src/read.rs index 3889f0fc4..4bfce7e7d 100644 --- a/src/read.rs +++ b/src/read.rs @@ -11,7 +11,7 @@ use crate::result::{ZipError, ZipResult}; use crate::spec::{self, Block}; use crate::types::{ AesMode, AesVendorVersion, DateTime, System, ZipCentralEntryBlock, ZipFileData, - ZipLocalEntryBlock, ZipRawValues, + ZipLocalEntryBlock, }; use crate::zipcrypto::{ZipCryptoReader, ZipCryptoReaderValid, ZipCryptoValidator}; use indexmap::IndexMap; @@ -252,7 +252,7 @@ pub(crate) fn find_content<'a>( }; reader.seek(io::SeekFrom::Start(data_start))?; - Ok((reader as &mut dyn Read).take(data.compressed_size())) + Ok((reader as &mut dyn Read).take(data.compressed_size)) } #[allow(clippy::too_many_arguments)] @@ -404,7 +404,7 @@ impl ZipArchive { if file.using_data_descriptor { return None; } - total = total.checked_add(file.uncompressed_size() as u128)?; + total = total.checked_add(file.uncompressed_size as u128)?; } Some(total) } @@ -700,7 +700,7 @@ impl ZipArchive { None => Ok(None), Some((aes_mode, _, _)) => { let (verification_value, salt) = - AesReader::new(limit_reader, aes_mode, data.compressed_size()) + AesReader::new(limit_reader, aes_mode, data.compressed_size) .get_verification_value_and_salt()?; let aes_info = AesInfo { aes_mode, @@ -980,14 +980,14 @@ impl ZipArchive { let crypto_reader = make_crypto_reader( data.compression_method, - data.crc32(), + data.crc32, data.last_modified_time, data.using_data_descriptor, limit_reader, password, data.aes_mode, #[cfg(feature = "aes-crypto")] - data.compressed_size(), + data.compressed_size, )?; Ok(ZipFile { crypto_reader: Some(crypto_reader), @@ -1094,11 +1094,9 @@ fn central_header_to_zip_file_inner( compression_method: CompressionMethod::parse_from_u16(compression_method), compression_level: None, last_modified_time: DateTime::try_from_msdos(last_mod_date, last_mod_time).ok(), - raw_values: ZipRawValues { - crc32, - compressed_size: compressed_size.into(), - uncompressed_size: uncompressed_size.into(), - }, + crc32, + compressed_size: compressed_size.into(), + uncompressed_size: uncompressed_size.into(), file_name, file_name_raw, extra_field: Some(Arc::new(extra_field.to_vec())), @@ -1150,14 +1148,14 @@ fn parse_extra_field(file: &mut ZipFileData) -> ZipResult<()> { match kind { // Zip64 extended information extra field 0x0001 => { - if file.uncompressed_size() == spec::ZIP64_BYTES_THR { + if file.uncompressed_size == spec::ZIP64_BYTES_THR { file.large_file = true; - file.raw_values.uncompressed_size = reader.read_u64_le()?; + file.uncompressed_size = reader.read_u64_le()?; len_left -= 8; } - if file.compressed_size() == spec::ZIP64_BYTES_THR { + if file.compressed_size == spec::ZIP64_BYTES_THR { file.large_file = true; - file.raw_values.compressed_size = reader.read_u64_le()?; + file.compressed_size = reader.read_u64_le()?; len_left -= 8; } if file.header_start == spec::ZIP64_BYTES_THR { @@ -1231,7 +1229,7 @@ impl<'a> ZipFile<'a> { if let ZipFileReader::NoReader = self.reader { let data = &self.data; let crypto_reader = self.crypto_reader.take().expect("Invalid reader state"); - self.reader = make_reader(data.compression_method, data.crc32(), crypto_reader)?; + self.reader = make_reader(data.compression_method, data.crc32, crypto_reader)?; } Ok(&mut self.reader) } @@ -1328,12 +1326,12 @@ impl<'a> ZipFile<'a> { /// Get the size of the file, in bytes, in the archive pub fn compressed_size(&self) -> u64 { - self.data.compressed_size() + self.data.compressed_size } /// Get the size of the file, in bytes, when uncompressed pub fn size(&self) -> u64 { - self.data.uncompressed_size() + self.data.uncompressed_size } /// Get the time the file was last modified @@ -1363,7 +1361,7 @@ impl<'a> ZipFile<'a> { /// Get the CRC32 hash of the original file pub fn crc32(&self) -> u32 { - self.data.crc32() + self.data.crc32 } /// Get the extra data of the zip header for this file @@ -1462,9 +1460,9 @@ pub fn read_zipfile_from_stream<'a, R: Read>(reader: &'a mut R) -> ZipResult return Err(e), } - let limit_reader = (reader as &'a mut dyn Read).take(result.compressed_size()); + let limit_reader = (reader as &'a mut dyn Read).take(result.compressed_size); - let result_crc32 = result.crc32(); + let result_crc32 = result.crc32; let result_compression_method = result.compression_method; let crypto_reader = make_crypto_reader( result_compression_method, @@ -1475,7 +1473,7 @@ pub fn read_zipfile_from_stream<'a, R: Read>(reader: &'a mut R) -> ZipResult, /// Last modified time. This will only have a 2 second precision. pub last_modified_time: Option, - /// Checksum and data extents - pub(crate) raw_values: ZipRawValues, + /// CRC32 checksum + pub crc32: u32, + /// Size of the file in the ZIP + pub compressed_size: u64, + /// Size of the file when extracted + pub uncompressed_size: u64, /// Name of the file pub file_name: Box, /// Raw file name. To be used when file_name was incorrectly decoded. @@ -432,21 +431,6 @@ pub struct ZipFileData { } impl ZipFileData { - #[inline(always)] - pub fn crc32(&self) -> u32 { - self.raw_values.crc32 - } - - #[inline(always)] - pub fn compressed_size(&self) -> u64 { - self.raw_values.compressed_size - } - - #[inline(always)] - pub fn uncompressed_size(&self) -> u64 { - self.raw_values.uncompressed_size - } - #[allow(dead_code)] pub fn is_dir(&self) -> bool { is_dir(&self.file_name) @@ -599,7 +583,9 @@ impl ZipFileData { compression_method, compression_level: options.compression_level, last_modified_time: Some(options.last_modified_time), - raw_values, + crc32: raw_values.crc32, + compressed_size: raw_values.compressed_size, + uncompressed_size: raw_values.uncompressed_size, file_name, // Never used for saving, but used as map key in insert_file_data() file_name_raw, extra_field, @@ -678,11 +664,9 @@ impl ZipFileData { compression_method, compression_level: None, last_modified_time: DateTime::try_from_msdos(last_mod_date, last_mod_time).ok(), - raw_values: ZipRawValues { - crc32, - compressed_size: compressed_size.into(), - uncompressed_size: uncompressed_size.into(), - }, + crc32, + compressed_size: compressed_size.into(), + uncompressed_size: uncompressed_size.into(), file_name, file_name_raw: file_name_raw.into(), extra_field: Some(Arc::new(extra_field)), @@ -733,8 +717,8 @@ impl ZipFileData { } pub(crate) fn local_block(&self) -> ZipResult { - let compressed_size: u32 = self.clamp_size_field(self.compressed_size()); - let uncompressed_size: u32 = self.clamp_size_field(self.uncompressed_size()); + let compressed_size: u32 = self.clamp_size_field(self.compressed_size); + let uncompressed_size: u32 = self.clamp_size_field(self.uncompressed_size); let extra_block_len: usize = self .zip64_extra_field_block() @@ -754,7 +738,7 @@ impl ZipFileData { compression_method: self.compression_method.serialize_to_u16(), last_mod_time: last_modified_time.timepart(), last_mod_date: last_modified_time.datepart(), - crc32: self.crc32(), + crc32: self.crc32, compressed_size, uncompressed_size, file_name_length: self.file_name_raw.len().try_into().unwrap(), @@ -776,14 +760,14 @@ impl ZipFileData { compression_method: self.compression_method.serialize_to_u16(), last_mod_time: last_modified_time.timepart(), last_mod_date: last_modified_time.datepart(), - crc32: self.crc32(), + crc32: self.crc32, compressed_size: self - .compressed_size() + .compressed_size .min(spec::ZIP64_BYTES_THR) .try_into() .unwrap(), uncompressed_size: self - .uncompressed_size() + .uncompressed_size .min(spec::ZIP64_BYTES_THR) .try_into() .unwrap(), @@ -805,13 +789,13 @@ impl ZipFileData { pub(crate) fn zip64_extra_field_block(&self) -> Option { let uncompressed_size: Option = - if self.uncompressed_size() > spec::ZIP64_BYTES_THR || self.large_file { + if self.uncompressed_size > spec::ZIP64_BYTES_THR || self.large_file { Some(spec::ZIP64_BYTES_THR) } else { None }; let compressed_size: Option = - if self.compressed_size() > spec::ZIP64_BYTES_THR || self.large_file { + if self.compressed_size > spec::ZIP64_BYTES_THR || self.large_file { Some(spec::ZIP64_BYTES_THR) } else { None @@ -1055,11 +1039,9 @@ mod test { compression_method: crate::compression::CompressionMethod::Stored, compression_level: None, last_modified_time: None, - raw_values: ZipRawValues { - crc32: 0, - compressed_size: 0, - uncompressed_size: 0, - }, + crc32: 0, + compressed_size: 0, + uncompressed_size: 0, file_name: file_name.clone().into_boxed_str(), file_name_raw: file_name.into_bytes().into_boxed_slice(), extra_field: None, diff --git a/src/write.rs b/src/write.rs index 24379e8f6..337b167c6 100644 --- a/src/write.rs +++ b/src/write.rs @@ -548,12 +548,12 @@ impl ZipWriter { let src_index = self.index_by_name(src_name)?; let src_data = &self.files[src_index]; let data_start = *src_data.data_start.get().unwrap_or(&0); - let compressed_size = src_data.compressed_size(); + let compressed_size = src_data.compressed_size; debug_assert!(compressed_size <= write_position - data_start); - let uncompressed_size = src_data.uncompressed_size(); + let uncompressed_size = src_data.uncompressed_size; let raw_values = ZipRawValues { - crc32: src_data.crc32(), + crc32: src_data.crc32, compressed_size, uncompressed_size, }; @@ -925,13 +925,13 @@ impl ZipWriter { None => return Ok(()), Some((_, f)) => f, }; - file.raw_values.uncompressed_size = self.stats.bytes_written; + file.uncompressed_size = self.stats.bytes_written; let file_end = writer.stream_position()?; debug_assert!(file_end >= self.stats.start); - file.raw_values.compressed_size = file_end - self.stats.start; + file.compressed_size = file_end - self.stats.start; - file.raw_values.crc32 = self.stats.hasher.clone().finalize(); + file.crc32 = self.stats.hasher.clone().finalize(); if let Some(aes_mode) = &mut file.aes_mode { // We prefer using AE-1 which provides an extra CRC check, but for small files we // switch to AE-2 to prevent being able to use the CRC value to to reconstruct the @@ -939,7 +939,7 @@ impl ZipWriter { // // C.f. https://www.winzip.com/en/support/aes-encryption/#crc-faq aes_mode.1 = if self.stats.bytes_written < 20 { - file.raw_values.crc32 = 0; + file.crc32 = 0; AesVendorVersion::Ae2 } else { AesVendorVersion::Ae1 @@ -1740,20 +1740,20 @@ fn update_aes_extra_data( fn update_local_file_header(writer: &mut T, file: &ZipFileData) -> ZipResult<()> { const CRC32_OFFSET: u64 = 14; writer.seek(SeekFrom::Start(file.header_start + CRC32_OFFSET))?; - writer.write_u32_le(file.crc32())?; + writer.write_u32_le(file.crc32)?; if file.large_file { update_local_zip64_extra_field(writer, file)?; } else { // check compressed size as well as it can also be slightly larger than uncompressed size - if file.compressed_size() > spec::ZIP64_BYTES_THR { + if file.compressed_size > spec::ZIP64_BYTES_THR { return Err(ZipError::Io(io::Error::new( io::ErrorKind::Other, "Large file option has not been set", ))); } - writer.write_u32_le(file.compressed_size() as u32)?; + writer.write_u32_le(file.compressed_size as u32)?; // uncompressed size is already checked on write to catch it as soon as possible - writer.write_u32_le(file.uncompressed_size() as u32)?; + writer.write_u32_le(file.uncompressed_size as u32)?; } Ok(()) } From 80ca2545695f8c460a1180a0fa942b7190c6afa9 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Fri, 24 May 2024 08:15:16 -0400 Subject: [PATCH 29/48] fix doc comments --- src/write.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/write.rs b/src/write.rs index 337b167c6..01ebe4606 100644 --- a/src/write.rs +++ b/src/write.rs @@ -702,7 +702,7 @@ impl ZipWriter { /// Set ZIP archive comment. /// /// This sets the raw bytes of the comment. The comment - /// is typically expected to be encoded in UTF-8 + /// is typically expected to be encoded in UTF-8. pub fn set_raw_comment(&mut self, comment: Box<[u8]>) { self.comment = comment; } @@ -715,7 +715,7 @@ impl ZipWriter { /// Get ZIP archive comment. /// /// This returns the raw bytes of the comment. The comment - /// is typically expected to be encoded in UTF-8 + /// is typically expected to be encoded in UTF-8. pub const fn get_raw_comment(&self) -> &[u8] { &self.comment } From a509efc28a2e8e2208d2636ad2213650a5eed571 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Fri, 24 May 2024 08:26:38 -0400 Subject: [PATCH 30/48] review comments 3 --- src/spec.rs | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/src/spec.rs b/src/spec.rs index d0b3af888..ef413a961 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -305,7 +305,9 @@ impl Zip32CentralDirectoryEnd { return Err(ZipError::InvalidArchive("Invalid zip header")); } - let search_upper_bound = 0; + // Arbitrary max length we go back to find the zip32 CDE header. + const MAX_HEADER_AND_COMMENT_SIZE: u64 = 66000; + let search_lower_bound = file_length.saturating_sub(MAX_HEADER_AND_COMMENT_SIZE); const END_WINDOW_SIZE: usize = 512; /* TODO: use static_assertions!() */ @@ -317,7 +319,7 @@ impl Zip32CentralDirectoryEnd { let mut window_start: u64 = file_length.saturating_sub(END_WINDOW_SIZE as u64); let mut window = [0u8; END_WINDOW_SIZE]; - while window_start >= search_upper_bound { + while window_start >= search_lower_bound { /* Go to the start of the window in the file. */ reader.seek(io::SeekFrom::Start(window_start))?; @@ -344,7 +346,7 @@ impl Zip32CentralDirectoryEnd { /* We always want to make sure we go allllll the way back to the start of the file if * we can't find it elsewhere. However, our `while` condition doesn't check that. So we * avoid infinite looping by checking at the end of the loop. */ - if window_start == search_upper_bound { + if window_start == search_lower_bound { break; } /* Shift the window by END_WINDOW_SIZE bytes, but make sure to cover matches that @@ -361,9 +363,9 @@ impl Zip32CentralDirectoryEnd { * once (unless limited by file_length). */ END_WINDOW_SIZE as u64, ) - /* This will never go below the value of `search_upper_bound`, so we have a special - * `if window_start == search_upper_bound` check above. */ - .max(search_upper_bound); + /* This will never go below the value of `search_lower_bound`, so we have a special + * `if window_start == search_lower_bound` check above. */ + .max(search_lower_bound); } Err(ZipError::InvalidArchive( @@ -527,7 +529,7 @@ impl Zip64CentralDirectoryEnd { pub fn find_and_parse( reader: &mut T, - nominal_offset: u64, + search_lower_bound: u64, search_upper_bound: u64, ) -> ZipResult> { let mut results = Vec::new(); @@ -542,9 +544,9 @@ impl Zip64CentralDirectoryEnd { let mut window_start: u64 = search_upper_bound .saturating_sub(END_WINDOW_SIZE as u64) - .max(nominal_offset); + .max(search_lower_bound); let mut window = [0u8; END_WINDOW_SIZE]; - while window_start >= nominal_offset { + while window_start >= search_lower_bound { reader.seek(io::SeekFrom::Start(window_start))?; /* Identify how many bytes to read (this may be less than the window size for files @@ -566,8 +568,8 @@ impl Zip64CentralDirectoryEnd { let cde_start_pos = window_start + offset as u64; reader.seek(io::SeekFrom::Start(cde_start_pos))?; - debug_assert!(cde_start_pos >= nominal_offset); - let archive_offset = cde_start_pos - nominal_offset; + debug_assert!(cde_start_pos >= search_lower_bound); + let archive_offset = cde_start_pos - search_lower_bound; let cde = Self::parse(reader)?; results.push((cde, archive_offset)); @@ -576,7 +578,7 @@ impl Zip64CentralDirectoryEnd { /* We always want to make sure we go allllll the way back to the start of the file if * we can't find it elsewhere. However, our `while` condition doesn't check that. So we * avoid infinite looping by checking at the end of the loop. */ - if window_start == nominal_offset { + if window_start == search_lower_bound { break; } /* Shift the window by END_WINDOW_SIZE bytes, but make sure to cover matches that @@ -594,9 +596,9 @@ impl Zip64CentralDirectoryEnd { * once (unless limited by search_upper_bound). */ END_WINDOW_SIZE as u64, ) - /* This will never go below the value of `nominal_offset`, so we have a special - * `if window_start == nominal_offset` check above. */ - .max(nominal_offset); + /* This will never go below the value of `search_lower_bound`, so we have a special + * `if window_start == search_lower_bound` check above. */ + .max(search_lower_bound); } if results.is_empty() { From 8e5b157853f1c0ab8ecdecc0e3862775a42b5f5c Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Fri, 24 May 2024 08:58:41 -0400 Subject: [PATCH 31/48] fix stream benchmark --- benches/read_metadata.rs | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/benches/read_metadata.rs b/benches/read_metadata.rs index 76298ef8d..04ace533b 100644 --- a/benches/read_metadata.rs +++ b/benches/read_metadata.rs @@ -95,21 +95,19 @@ fn parse_zip64_archive_with_comment(bench: &mut Bencher) { bench.bytes = bytes.len() as u64; } -#[allow(dead_code)] fn parse_stream_archive(bench: &mut Bencher) { const STREAM_ZIP_ENTRIES: usize = 5; const STREAM_FILE_SIZE: usize = 5; let bytes = generate_random_archive(STREAM_ZIP_ENTRIES, STREAM_FILE_SIZE).unwrap(); - dbg!(&bytes); /* Write to a temporary file path to incur some filesystem overhead from repeated reads */ let dir = TempDir::new("stream-bench").unwrap(); let out = dir.path().join("bench-out.zip"); fs::write(&out, &bytes).unwrap(); - let mut f = fs::File::open(out).unwrap(); - bench.iter(|| loop { + bench.iter(|| { + let mut f = fs::File::open(&out).unwrap(); while zip::read::read_zipfile_from_stream(&mut f) .unwrap() .is_some() @@ -123,7 +121,6 @@ benchmark_group!( read_metadata, parse_archive_with_comment, parse_zip64_archive_with_comment, - /* FIXME: this currently errors */ - /* parse_stream_archive */ + parse_stream_archive, ); benchmark_main!(benches); From d81382b29a45155705b19f5ac9cab6ec30154d1d Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Fri, 24 May 2024 08:59:51 -0400 Subject: [PATCH 32/48] revert limit for search_lower_bound to fix benchmark --- src/spec.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/spec.rs b/src/spec.rs index ef413a961..7dc8286ff 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -305,9 +305,7 @@ impl Zip32CentralDirectoryEnd { return Err(ZipError::InvalidArchive("Invalid zip header")); } - // Arbitrary max length we go back to find the zip32 CDE header. - const MAX_HEADER_AND_COMMENT_SIZE: u64 = 66000; - let search_lower_bound = file_length.saturating_sub(MAX_HEADER_AND_COMMENT_SIZE); + let search_lower_bound = 0; const END_WINDOW_SIZE: usize = 512; /* TODO: use static_assertions!() */ From ed1d38f5dac24fe6bb40f628bf6e49f09187f622 Mon Sep 17 00:00:00 2001 From: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> Date: Fri, 24 May 2024 12:53:27 -0700 Subject: [PATCH 33/48] Run bench only once for each random input Signed-off-by: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> --- benches/read_metadata.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benches/read_metadata.rs b/benches/read_metadata.rs index 04ace533b..999ca426e 100644 --- a/benches/read_metadata.rs +++ b/benches/read_metadata.rs @@ -59,7 +59,7 @@ fn generate_zip32_archive_with_random_comment(comment_length: usize) -> ZipResul fn parse_archive_with_comment(bench: &mut Bencher) { let bytes = generate_zip32_archive_with_random_comment(COMMENT_SIZE).unwrap(); - bench.iter(|| { + bench.bench_n(1, || { let archive = ZipArchive::new(Cursor::new(bytes.as_slice())).unwrap(); archive.comment().len() }); From 9722dd31e9f54873d11c85820d25b5ee344bc89a Mon Sep 17 00:00:00 2001 From: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> Date: Fri, 24 May 2024 12:57:34 -0700 Subject: [PATCH 34/48] Return error if file comment is too long Signed-off-by: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> --- src/spec.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/spec.rs b/src/spec.rs index 7dc8286ff..d4c611137 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -264,7 +264,7 @@ impl Zip32CentralDirectoryEnd { number_of_files, central_directory_size, central_directory_offset, - zip_file_comment_length: zip_file_comment.len().try_into().unwrap_or(u16::MAX), + zip_file_comment_length: zip_file_comment.len().try_into().map_err(|_| ZipError::InvalidArchive("File comment must be less than 64 KiB"))?, }; Ok((block, zip_file_comment)) } From 848309a944b424149dd9d6980f2ca94ce3997951 Mon Sep 17 00:00:00 2001 From: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> Date: Fri, 24 May 2024 12:58:19 -0700 Subject: [PATCH 35/48] Switch to debug_assert! for an assert! involving only constants Signed-off-by: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> --- src/spec.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/spec.rs b/src/spec.rs index d4c611137..0aaf00e9a 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -534,7 +534,7 @@ impl Zip64CentralDirectoryEnd { const END_WINDOW_SIZE: usize = 2048; /* TODO: use static_assertions!() */ - assert!(END_WINDOW_SIZE > mem::size_of::()); + debug_assert!(END_WINDOW_SIZE > mem::size_of::()); const SIG_BYTES: [u8; mem::size_of::()] = Magic::ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE.to_le_bytes(); From 18760e9f9de7b3880a0ca7df97e36f01833459ba Mon Sep 17 00:00:00 2001 From: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> Date: Fri, 24 May 2024 12:58:36 -0700 Subject: [PATCH 36/48] Switch to debug_assert! for an assert! involving only constants Signed-off-by: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> --- src/spec.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/spec.rs b/src/spec.rs index 0aaf00e9a..a69e45fa1 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -309,7 +309,7 @@ impl Zip32CentralDirectoryEnd { const END_WINDOW_SIZE: usize = 512; /* TODO: use static_assertions!() */ - assert!(END_WINDOW_SIZE > mem::size_of::()); + debug_assert!(END_WINDOW_SIZE > mem::size_of::()); const SIG_BYTES: [u8; mem::size_of::()] = Magic::CENTRAL_DIRECTORY_END_SIGNATURE.to_le_bytes(); From 2a39a8e0a73cf03af86d79fc75a2c4b700402d4b Mon Sep 17 00:00:00 2001 From: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> Date: Fri, 24 May 2024 12:59:13 -0700 Subject: [PATCH 37/48] Fix an off-by-one error in large-file detection Signed-off-by: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> --- src/types.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/types.rs b/src/types.rs index 1eb13b3bc..bcd535e40 100644 --- a/src/types.rs +++ b/src/types.rs @@ -800,7 +800,7 @@ impl ZipFileData { } else { None }; - let header_start: Option = if self.header_start > spec::ZIP64_BYTES_THR { + let header_start: Option = if self.header_start >= spec::ZIP64_BYTES_THR { Some(spec::ZIP64_BYTES_THR) } else { None From a4915fdcd7b12fe27eadd493fba5b8748effdb8a Mon Sep 17 00:00:00 2001 From: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> Date: Fri, 24 May 2024 13:01:51 -0700 Subject: [PATCH 38/48] Fix a bug in benchmark: closure needs a parameter Signed-off-by: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> --- benches/read_metadata.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benches/read_metadata.rs b/benches/read_metadata.rs index 999ca426e..ed6fa853c 100644 --- a/benches/read_metadata.rs +++ b/benches/read_metadata.rs @@ -59,7 +59,7 @@ fn generate_zip32_archive_with_random_comment(comment_length: usize) -> ZipResul fn parse_archive_with_comment(bench: &mut Bencher) { let bytes = generate_zip32_archive_with_random_comment(COMMENT_SIZE).unwrap(); - bench.bench_n(1, || { + bench.bench_n(1, |_| { let archive = ZipArchive::new(Cursor::new(bytes.as_slice())).unwrap(); archive.comment().len() }); From 1bb0b14456a77c26f171af8132c31ca3cb8c1675 Mon Sep 17 00:00:00 2001 From: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> Date: Fri, 24 May 2024 13:03:00 -0700 Subject: [PATCH 39/48] style: Fix cargo fmt check Signed-off-by: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> --- src/spec.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/spec.rs b/src/spec.rs index a69e45fa1..8738a7bdb 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -264,7 +264,10 @@ impl Zip32CentralDirectoryEnd { number_of_files, central_directory_size, central_directory_offset, - zip_file_comment_length: zip_file_comment.len().try_into().map_err(|_| ZipError::InvalidArchive("File comment must be less than 64 KiB"))?, + zip_file_comment_length: zip_file_comment + .len() + .try_into() + .map_err(|_| ZipError::InvalidArchive("File comment must be less than 64 KiB"))?, }; Ok((block, zip_file_comment)) } From f90bdf76b88a6218847b04d1813774f79251f41c Mon Sep 17 00:00:00 2001 From: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> Date: Fri, 24 May 2024 13:03:45 -0700 Subject: [PATCH 40/48] Fix an off-by-one error in large-file detection Signed-off-by: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> --- src/types.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/types.rs b/src/types.rs index bcd535e40..0d078296d 100644 --- a/src/types.rs +++ b/src/types.rs @@ -789,7 +789,7 @@ impl ZipFileData { pub(crate) fn zip64_extra_field_block(&self) -> Option { let uncompressed_size: Option = - if self.uncompressed_size > spec::ZIP64_BYTES_THR || self.large_file { + if self.uncompressed_size >= spec::ZIP64_BYTES_THR || self.large_file { Some(spec::ZIP64_BYTES_THR) } else { None From 3ab9f457fb8caf9cce64a2ea571cadbe1f76810d Mon Sep 17 00:00:00 2001 From: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> Date: Fri, 24 May 2024 13:05:49 -0700 Subject: [PATCH 41/48] Bug fix: `bench_n` expects empty return Signed-off-by: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> --- benches/read_metadata.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benches/read_metadata.rs b/benches/read_metadata.rs index ed6fa853c..f4a77988c 100644 --- a/benches/read_metadata.rs +++ b/benches/read_metadata.rs @@ -61,7 +61,7 @@ fn parse_archive_with_comment(bench: &mut Bencher) { bench.bench_n(1, |_| { let archive = ZipArchive::new(Cursor::new(bytes.as_slice())).unwrap(); - archive.comment().len() + archive.comment().len(); }); bench.bytes = bytes.len() as u64; } From a462b859fa208109ef2047053d4cf031a92c4157 Mon Sep 17 00:00:00 2001 From: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> Date: Fri, 24 May 2024 13:06:12 -0700 Subject: [PATCH 42/48] Fix an off-by-one error in large-file detection Signed-off-by: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> --- src/types.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/types.rs b/src/types.rs index 346d626d7..0c2863dec 100644 --- a/src/types.rs +++ b/src/types.rs @@ -811,7 +811,7 @@ impl ZipFileData { None }; let compressed_size: Option = - if self.compressed_size > spec::ZIP64_BYTES_THR || self.large_file { + if self.compressed_size >= spec::ZIP64_BYTES_THR || self.large_file { Some(spec::ZIP64_BYTES_THR) } else { None From 5e216fe15016566bde3b4918cb5f6f9aef7017c4 Mon Sep 17 00:00:00 2001 From: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> Date: Fri, 24 May 2024 13:08:05 -0700 Subject: [PATCH 43/48] Bug fix: len() is `must-use` Signed-off-by: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> --- benches/read_metadata.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benches/read_metadata.rs b/benches/read_metadata.rs index f4a77988c..73f2b26ed 100644 --- a/benches/read_metadata.rs +++ b/benches/read_metadata.rs @@ -61,7 +61,7 @@ fn parse_archive_with_comment(bench: &mut Bencher) { bench.bench_n(1, |_| { let archive = ZipArchive::new(Cursor::new(bytes.as_slice())).unwrap(); - archive.comment().len(); + let _ = archive.comment().len(); }); bench.bytes = bytes.len() as u64; } From 01bb1624567812695f9cf98bfba46184b98bef14 Mon Sep 17 00:00:00 2001 From: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> Date: Fri, 24 May 2024 13:10:44 -0700 Subject: [PATCH 44/48] Remove an unused macro branch Signed-off-by: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> --- src/spec.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/spec.rs b/src/spec.rs index 8738a7bdb..cdeb04282 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -158,9 +158,6 @@ macro_rules! from_le { ($obj:ident, $field:ident, $type:ty) => { $obj.$field = <$type>::from_le($obj.$field); }; - ($obj:ident, [($field:ident, $type:ty) $(,)?]) => { - from_le![$obj, $field, $type]; - }; ($obj:ident, [($field:ident, $type:ty), $($rest:tt),+ $(,)?]) => { from_le![$obj, $field, $type]; from_le!($obj, [$($rest),+]); From 3af70176e34fd528e611c967f3c6c3bad8b00d89 Mon Sep 17 00:00:00 2001 From: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> Date: Fri, 24 May 2024 13:11:07 -0700 Subject: [PATCH 45/48] Remove an unused macro branch Signed-off-by: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> --- src/spec.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/spec.rs b/src/spec.rs index cdeb04282..3726a58f9 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -169,9 +169,6 @@ macro_rules! to_le { ($obj:ident, $field:ident, $type:ty) => { $obj.$field = <$type>::to_le($obj.$field); }; - ($obj:ident, [($field:ident, $type:ty) $(,)?]) => { - to_le![$obj, $field, $type]; - }; ($obj:ident, [($field:ident, $type:ty), $($rest:tt),+ $(,)?]) => { to_le![$obj, $field, $type]; to_le!($obj, [$($rest),+]); From 326b2c4582daea8eead519be0bbdac6cf4bd0f7d Mon Sep 17 00:00:00 2001 From: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> Date: Fri, 24 May 2024 13:15:58 -0700 Subject: [PATCH 46/48] Revert macro changes Signed-off-by: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> --- src/spec.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/spec.rs b/src/spec.rs index 3726a58f9..8738a7bdb 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -158,6 +158,9 @@ macro_rules! from_le { ($obj:ident, $field:ident, $type:ty) => { $obj.$field = <$type>::from_le($obj.$field); }; + ($obj:ident, [($field:ident, $type:ty) $(,)?]) => { + from_le![$obj, $field, $type]; + }; ($obj:ident, [($field:ident, $type:ty), $($rest:tt),+ $(,)?]) => { from_le![$obj, $field, $type]; from_le!($obj, [$($rest),+]); @@ -169,6 +172,9 @@ macro_rules! to_le { ($obj:ident, $field:ident, $type:ty) => { $obj.$field = <$type>::to_le($obj.$field); }; + ($obj:ident, [($field:ident, $type:ty) $(,)?]) => { + to_le![$obj, $field, $type]; + }; ($obj:ident, [($field:ident, $type:ty), $($rest:tt),+ $(,)?]) => { to_le![$obj, $field, $type]; to_le!($obj, [$($rest),+]); From df70f6a320a7a536616dcb50d650f2bdaedffdf7 Mon Sep 17 00:00:00 2001 From: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> Date: Fri, 24 May 2024 14:57:06 -0700 Subject: [PATCH 47/48] Fix unmatched bracket due to bad merge Signed-off-by: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> --- src/write.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/write.rs b/src/write.rs index 7c2ee5c03..f12485764 100644 --- a/src/write.rs +++ b/src/write.rs @@ -815,7 +815,7 @@ impl ZipWriter { let _ = self.abort_file(); return Err(e); } - + } // file name writer.write_all(&file.file_name_raw)?; // zip64 extra field From a28b16e69cc006c02cd8e38d869d5958bcc948b5 Mon Sep 17 00:00:00 2001 From: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> Date: Fri, 24 May 2024 15:27:28 -0700 Subject: [PATCH 48/48] Apply suggestions from code review Fix errors Signed-off-by: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> --- src/write.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/write.rs b/src/write.rs index f12485764..8e4cce8ab 100644 --- a/src/write.rs +++ b/src/write.rs @@ -796,9 +796,8 @@ impl ZipWriter { extra_field, ); let version_needed = file.version_needed(); - file.version_needed = version_needed; file.version_made_by = file.version_made_by.max(version_needed as u8); - let index = self.insert_file_data(file)?; + let index = self.insert_file_data(file)?; let file = &mut self.files[index]; let writer = self.inner.get_plain();