diff --git a/Cargo.toml b/Cargo.toml index cf36d0041..c7e91815c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,6 +31,7 @@ displaydoc = { version = "0.2.4", default-features = false } flate2 = { version = "1.0.28", default-features = false, optional = true } indexmap = "2" hmac = { version = "0.12.1", optional = true, features = ["reset"] } +memchr = "2.7.2" pbkdf2 = { version = "0.12.2", optional = true } rand = { version = "0.8.5", optional = true } sha1 = { version = "0.10.6", optional = true } diff --git a/benches/read_metadata.rs b/benches/read_metadata.rs index 2b4b64c8b..1ac1b35bc 100644 --- a/benches/read_metadata.rs +++ b/benches/read_metadata.rs @@ -45,7 +45,7 @@ fn generate_random_zip32_archive_with_comment(comment_length: usize) -> ZipResul let mut bytes = vec![0u8; comment_length]; getrandom(&mut bytes).unwrap(); - writer.set_raw_comment(bytes); + writer.set_raw_comment(bytes.into_boxed_slice()); writer.start_file("asdf.txt", options)?; writer.write_all(b"asdf")?; @@ -73,7 +73,7 @@ fn generate_random_zip64_archive_with_comment(comment_length: usize) -> ZipResul let mut bytes = vec![0u8; comment_length]; getrandom(&mut bytes).unwrap(); - writer.set_raw_comment(bytes); + writer.set_raw_comment(bytes.into_boxed_slice()); writer.start_file("asdf.txt", options)?; writer.write_all(b"asdf")?; diff --git a/src/read.rs b/src/read.rs index 2b65cdbeb..67253a1b8 100644 --- a/src/read.rs +++ b/src/read.rs @@ -8,14 +8,17 @@ use crate::crc32::Crc32Reader; use crate::extra_fields::{ExtendedTimestamp, ExtraField}; use crate::read::zip_archive::Shared; use crate::result::{ZipError, ZipResult}; -use crate::spec; -use crate::types::{AesMode, AesVendorVersion, DateTime, System, ZipFileData}; +use crate::spec::{self, Block}; +use crate::types::{ + AesMode, AesVendorVersion, DateTime, System, ZipEntryBlock, ZipFileData, ZipLocalEntryBlock, +}; use crate::zipcrypto::{ZipCryptoReader, ZipCryptoReaderValid, ZipCryptoValidator}; use indexmap::IndexMap; use std::borrow::Cow; use std::ffi::{OsStr, OsString}; use std::fs::create_dir_all; use std::io::{self, copy, prelude::*, sink}; +use std::mem; use std::ops::Deref; use std::path::{Path, PathBuf}; use std::sync::{Arc, OnceLock}; @@ -453,7 +456,7 @@ impl ZipArchive { } fn get_directory_info_zip32( - footer: &spec::CentralDirectoryEnd, + footer: &spec::Zip32CentralDirectoryEnd, cde_start_pos: u64, ) -> ZipResult { // Some zip files have data prepended to them, resulting in the @@ -480,7 +483,7 @@ impl ZipArchive { fn get_directory_info_zip64( reader: &mut R, - footer: &spec::CentralDirectoryEnd, + footer: &spec::Zip32CentralDirectoryEnd, cde_start_pos: u64, ) -> ZipResult>> { // See if there's a ZIP64 footer. The ZIP64 locator if present will @@ -497,56 +500,59 @@ impl ZipArchive { // don't know how to precisely relate that location to our current // actual offset in the file, since there may be junk at its // beginning. Therefore we need to perform another search, as in - // read::CentralDirectoryEnd::find_and_parse, except now we search + // read::Zip32CentralDirectoryEnd::find_and_parse, except now we search // forward. There may be multiple results because of Zip64 central-directory signatures in // ZIP comment data. - let mut results = Vec::new(); - let search_upper_bound = cde_start_pos - .checked_sub(60) // minimum size of Zip64CentralDirectoryEnd + Zip64CentralDirectoryEndLocator + // minimum size of Zip64CentralDirectoryEnd + Zip64CentralDirectoryEndLocator + .checked_sub(60) .ok_or(ZipError::InvalidArchive( "File cannot contain ZIP64 central directory end", ))?; - let search_results = spec::Zip64CentralDirectoryEnd::find_and_parse( - reader, - locator64.end_of_central_directory_offset, - search_upper_bound, - )?; - search_results.into_iter().for_each(|(footer64, archive_offset)| { - results.push({ - let directory_start_result = footer64 + let (lower, upper) = if locator64.end_of_central_directory_offset > search_upper_bound { + ( + search_upper_bound, + locator64.end_of_central_directory_offset, + ) + } else { + ( + locator64.end_of_central_directory_offset, + search_upper_bound, + ) + }; + let search_results = spec::Zip64CentralDirectoryEnd::find_and_parse(reader, lower, upper)?; + let results: Vec> = + search_results.into_iter().map(|(footer64, archive_offset)| { + let directory_start = footer64 .central_directory_offset .checked_add(archive_offset) .ok_or(ZipError::InvalidArchive( "Invalid central directory size or offset", - )); - directory_start_result.and_then(|directory_start| { - if directory_start > search_upper_bound { - Err(ZipError::InvalidArchive( - "Invalid central directory size or offset", - )) - } else if footer64.number_of_files_on_this_disk > footer64.number_of_files { - Err(ZipError::InvalidArchive( - "ZIP64 footer indicates more files on this disk than in the whole archive", - )) - } else if footer64.version_needed_to_extract > footer64.version_made_by { - Err(ZipError::InvalidArchive( - "ZIP64 footer indicates a new version is needed to extract this archive than the \ - version that wrote it", - )) - } else { - Ok(CentralDirectoryInfo { - archive_offset, - directory_start, - number_of_files: footer64.number_of_files as usize, - disk_number: footer64.disk_number, - disk_with_central_directory: footer64.disk_with_central_directory, - }) - } - }) - }); - }); + ))?; + if directory_start > search_upper_bound { + Err(ZipError::InvalidArchive( + "Invalid central directory size or offset", + )) + } else if footer64.number_of_files_on_this_disk > footer64.number_of_files { + Err(ZipError::InvalidArchive( + "ZIP64 footer indicates more files on this disk than in the whole archive", + )) + } else if footer64.version_needed_to_extract > footer64.version_made_by { + Err(ZipError::InvalidArchive( + "ZIP64 footer indicates a new version is needed to extract this archive than the \ + version that wrote it", + )) + } else { + Ok(CentralDirectoryInfo { + archive_offset, + directory_start, + number_of_files: footer64.number_of_files as usize, + disk_number: footer64.disk_number, + disk_with_central_directory: footer64.disk_with_central_directory, + }) + } + }).collect(); Ok(results) } @@ -554,7 +560,7 @@ impl ZipArchive { /// separate function to ease the control flow design. pub(crate) fn get_metadata( reader: &mut R, - footer: &spec::CentralDirectoryEnd, + footer: &spec::Zip32CentralDirectoryEnd, cde_start_pos: u64, ) -> ZipResult { // Check if file has a zip64 footer @@ -650,7 +656,7 @@ impl ZipArchive { /// /// This uses the central directory record of the ZIP file, and ignores local file headers pub fn new(mut reader: R) -> ZipResult> { - let (footer, cde_start_pos) = spec::CentralDirectoryEnd::find_and_parse(&mut reader)?; + let (footer, cde_start_pos) = spec::Zip32CentralDirectoryEnd::find_and_parse(&mut reader)?; let shared = Self::get_metadata(&mut reader, &footer, cde_start_pos)?; Ok(ZipArchive { reader, @@ -927,12 +933,8 @@ pub(crate) fn central_header_to_zip_file( let central_header_start = reader.stream_position()?; // Parse central header - let signature = reader.read_u32_le()?; - if signature != spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE { - Err(ZipError::InvalidArchive("Invalid Central Directory header")) - } else { - central_header_to_zip_file_inner(reader, archive_offset, central_header_start) - } + let block = ZipEntryBlock::parse(reader)?; + central_header_to_zip_file_inner(reader, archive_offset, central_header_start, block) } /// Parse a central directory entry to collect the information for the file. @@ -940,31 +942,38 @@ fn central_header_to_zip_file_inner( reader: &mut R, archive_offset: u64, central_header_start: u64, + block: ZipEntryBlock, ) -> ZipResult { - let version_made_by = reader.read_u16_le()?; - let _version_to_extract = reader.read_u16_le()?; - let flags = reader.read_u16_le()?; + let ZipEntryBlock { + // magic, + version_made_by, + // version_to_extract, + flags, + compression_method, + last_mod_time, + last_mod_date, + crc32, + compressed_size, + uncompressed_size, + file_name_length, + extra_field_length, + file_comment_length, + // disk_number, + // internal_file_attributes, + external_file_attributes, + offset, + .. + } = block; + let encrypted = flags & 1 == 1; let is_utf8 = flags & (1 << 11) != 0; let using_data_descriptor = flags & (1 << 3) != 0; - let compression_method = reader.read_u16_le()?; - let last_mod_time = reader.read_u16_le()?; - let last_mod_date = reader.read_u16_le()?; - let crc32 = reader.read_u32_le()?; - let compressed_size = reader.read_u32_le()?; - let uncompressed_size = reader.read_u32_le()?; - let file_name_length = reader.read_u16_le()? as usize; - let extra_field_length = reader.read_u16_le()? as usize; - let file_comment_length = reader.read_u16_le()? as usize; - let _disk_number = reader.read_u16_le()?; - let _internal_file_attributes = reader.read_u16_le()?; - let external_file_attributes = reader.read_u32_le()?; - let offset = reader.read_u32_le()? as u64; - let mut file_name_raw = vec![0; file_name_length]; + + let mut file_name_raw = vec![0; file_name_length as usize]; reader.read_exact(&mut file_name_raw)?; - let mut extra_field = vec![0; extra_field_length]; + let mut extra_field = vec![0; extra_field_length as usize]; reader.read_exact(&mut extra_field)?; - let mut file_comment_raw = vec![0; file_comment_length]; + let mut file_comment_raw = vec![0; file_comment_length as usize]; reader.read_exact(&mut file_comment_raw)?; let file_name: Box = match is_utf8 { @@ -979,6 +988,7 @@ fn central_header_to_zip_file_inner( // Construct the result let mut result = ZipFileData { system: System::from((version_made_by >> 8) as u8), + /* NB: this strips the top 8 bits! */ version_made_by: version_made_by as u8, encrypted, using_data_descriptor, @@ -996,7 +1006,7 @@ fn central_header_to_zip_file_inner( extra_field: Some(Arc::new(extra_field)), central_extra_field: None, file_comment, - header_start: offset, + header_start: offset.into(), extra_data_start: None, central_header_start, data_start: OnceLock::new(), @@ -1332,7 +1342,15 @@ impl<'a> Drop for ZipFile<'a> { /// * `data_start`: set to 0 /// * `external_attributes`: `unix_mode()`: will return None pub fn read_zipfile_from_stream<'a, R: Read>(reader: &'a mut R) -> ZipResult>> { - let signature = reader.read_u32_le()?; + let mut block = [0u8; mem::size_of::()]; + reader.read_exact(&mut block)?; + let block: Box<[u8]> = block.into(); + + let signature = spec::Magic::from_le_bytes( + block[..mem::size_of_val(&spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE)] + .try_into() + .unwrap(), + ); match signature { spec::LOCAL_FILE_HEADER_SIGNATURE => (), @@ -1340,75 +1358,15 @@ pub fn read_zipfile_from_stream<'a, R: Read>(reader: &'a mut R) -> ZipResult return Err(ZipError::InvalidArchive("Invalid local file header")), } - let version_made_by = reader.read_u16_le()?; - let flags = reader.read_u16_le()?; - let encrypted = flags & 1 == 1; - let is_utf8 = flags & (1 << 11) != 0; - let using_data_descriptor = flags & (1 << 3) != 0; - #[allow(deprecated)] - let compression_method = CompressionMethod::from_u16(reader.read_u16_le()?); - let last_mod_time = reader.read_u16_le()?; - let last_mod_date = reader.read_u16_le()?; - let crc32 = reader.read_u32_le()?; - let compressed_size = reader.read_u32_le()?; - let uncompressed_size = reader.read_u32_le()?; - let file_name_length = reader.read_u16_le()? as usize; - let extra_field_length = reader.read_u16_le()? as usize; - - let mut file_name_raw = vec![0; file_name_length]; - reader.read_exact(&mut file_name_raw)?; - let mut extra_field = vec![0; extra_field_length]; - reader.read_exact(&mut extra_field)?; + let block = ZipLocalEntryBlock::interpret(block)?; - let file_name: Box = match is_utf8 { - true => String::from_utf8_lossy(&file_name_raw).into(), - false => file_name_raw.clone().from_cp437().into(), - }; - - let mut result = ZipFileData { - system: System::from((version_made_by >> 8) as u8), - version_made_by: version_made_by as u8, - encrypted, - using_data_descriptor, - compression_method, - compression_level: None, - last_modified_time: DateTime::from_msdos(last_mod_date, last_mod_time), - crc32, - compressed_size: compressed_size as u64, - uncompressed_size: uncompressed_size as u64, - file_name, - file_name_raw: file_name_raw.into(), - extra_field: Some(Arc::new(extra_field)), - central_extra_field: None, - file_comment: String::with_capacity(0).into_boxed_str(), // file comment is only available in the central directory - // header_start and data start are not available, but also don't matter, since seeking is - // not available. - header_start: 0, - extra_data_start: None, - data_start: OnceLock::new(), - central_header_start: 0, - // The external_attributes field is only available in the central directory. - // We set this to zero, which should be valid as the docs state 'If input came - // from standard input, this field is set to zero.' - external_attributes: 0, - large_file: false, - aes_mode: None, - aes_extra_data_start: 0, - extra_fields: Vec::new(), - }; + let mut result = ZipFileData::from_local_block(block, reader)?; match parse_extra_field(&mut result) { Ok(..) | Err(ZipError::Io(..)) => {} Err(e) => return Err(e), } - if encrypted { - return unsupported_zip_error("Encrypted files are not supported"); - } - if using_data_descriptor { - return unsupported_zip_error("The file length is not available in the local header"); - } - let limit_reader = (reader as &'a mut dyn Read).take(result.compressed_size); let result_crc32 = result.crc32; diff --git a/src/read/stream.rs b/src/read/stream.rs index 40cb9efc8..9673f2e50 100644 --- a/src/read/stream.rs +++ b/src/read/stream.rs @@ -1,12 +1,12 @@ -use crate::unstable::LittleEndianReadExt; use std::fs; use std::io::{self, Read}; use std::path::{Path, PathBuf}; use super::{ - central_header_to_zip_file_inner, read_zipfile_from_stream, spec, ZipError, ZipFile, + central_header_to_zip_file_inner, read_zipfile_from_stream, ZipEntryBlock, ZipError, ZipFile, ZipFileData, ZipResult, }; +use crate::spec::Block; /// Stream decoder for zip. #[derive(Debug)] @@ -20,31 +20,31 @@ impl ZipStreamReader { } impl ZipStreamReader { - fn parse_central_directory(&mut self) -> ZipResult> { + fn parse_central_directory(&mut self) -> ZipResult { // Give archive_offset and central_header_start dummy value 0, since // they are not used in the output. let archive_offset = 0; let central_header_start = 0; // Parse central header - let signature = self.0.read_u32_le()?; - if signature != spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE { - Ok(None) - } else { - central_header_to_zip_file_inner(&mut self.0, archive_offset, central_header_start) - .map(ZipStreamFileMetadata) - .map(Some) - } + let block = ZipEntryBlock::parse(&mut self.0)?; + let file = central_header_to_zip_file_inner( + &mut self.0, + archive_offset, + central_header_start, + block, + )?; + Ok(ZipStreamFileMetadata(file)) } - /// Iteraate over the stream and extract all file and their + /// Iterate over the stream and extract all file and their /// metadata. pub fn visit(mut self, visitor: &mut V) -> ZipResult<()> { while let Some(mut file) = read_zipfile_from_stream(&mut self.0)? { visitor.visit_file(&mut file)?; } - while let Some(metadata) = self.parse_central_directory()? { + while let Ok(metadata) = self.parse_central_directory() { visitor.visit_additional_metadata(&metadata)?; } diff --git a/src/spec.rs b/src/spec.rs old mode 100644 new mode 100755 index 350193980..2187f725f --- a/src/spec.rs +++ b/src/spec.rs @@ -1,47 +1,169 @@ +#![macro_use] + use crate::result::{ZipError, ZipResult}; -use crate::unstable::{LittleEndianReadExt, LittleEndianWriteExt}; -use core::mem::size_of_val; +use memchr::memmem::FinderRev; use std::borrow::Cow; use std::io; use std::io::prelude::*; +use std::mem; use std::path::{Component, Path, MAIN_SEPARATOR}; -pub const LOCAL_FILE_HEADER_SIGNATURE: u32 = 0x04034b50; -pub const CENTRAL_DIRECTORY_HEADER_SIGNATURE: u32 = 0x02014b50; -pub(crate) const CENTRAL_DIRECTORY_END_SIGNATURE: u32 = 0x06054b50; -pub const ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE: u32 = 0x06064b50; -pub(crate) const ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE: u32 = 0x07064b50; +pub type Magic = u32; + +pub const LOCAL_FILE_HEADER_SIGNATURE: Magic = 0x04034b50; +pub const CENTRAL_DIRECTORY_HEADER_SIGNATURE: Magic = 0x02014b50; +pub(crate) const CENTRAL_DIRECTORY_END_SIGNATURE: Magic = 0x06054b50; +pub const ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE: Magic = 0x06064b50; +pub(crate) const ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE: Magic = 0x07064b50; pub const ZIP64_BYTES_THR: u64 = u32::MAX as u64; pub const ZIP64_ENTRY_THR: usize = u16::MAX as usize; -pub struct CentralDirectoryEnd { +pub trait Block: Sized + Copy { + /* TODO: use smallvec? */ + fn interpret(bytes: Box<[u8]>) -> ZipResult; + + fn deserialize(block: &[u8]) -> Self { + assert_eq!(block.len(), mem::size_of::()); + let block_ptr: *const Self = block.as_ptr().cast(); + unsafe { block_ptr.read() } + } + + fn parse(reader: &mut T) -> ZipResult { + let mut block = vec![0u8; mem::size_of::()]; + reader.read_exact(&mut block)?; + Self::interpret(block.into_boxed_slice()) + } + + fn encode(self) -> Box<[u8]>; + + fn serialize(self) -> Box<[u8]> { + let mut out_block = vec![0u8; mem::size_of::()]; + let out_view: &mut [u8] = out_block.as_mut(); + let out_ptr: *mut Self = out_view.as_mut_ptr().cast(); + unsafe { + out_ptr.write(self); + } + out_block.into_boxed_slice() + } + + fn write(self, writer: &mut T) -> ZipResult<()> { + let block = self.encode(); + writer.write_all(&block)?; + Ok(()) + } +} + +/// Convert all the fields of a struct *from* little-endian representations. +macro_rules! from_le { + ($obj:ident, $field:ident, $type:ty) => { + $obj.$field = <$type>::from_le($obj.$field); + }; + ($obj:ident, [($field:ident, $type:ty) $(,)?]) => { + from_le![$obj, $field, $type]; + }; + ($obj:ident, [($field:ident, $type:ty), $($rest:tt),+ $(,)?]) => { + from_le![$obj, $field, $type]; + from_le!($obj, [$($rest),+]); + }; +} + +/// Convert all the fields of a struct *into* little-endian representations. +macro_rules! to_le { + ($obj:ident, $field:ident, $type:ty) => { + $obj.$field = <$type>::to_le($obj.$field); + }; + ($obj:ident, [($field:ident, $type:ty) $(,)?]) => { + to_le![$obj, $field, $type]; + }; + ($obj:ident, [($field:ident, $type:ty), $($rest:tt),+ $(,)?]) => { + to_le![$obj, $field, $type]; + to_le!($obj, [$($rest),+]); + }; +} + +#[derive(Copy, Clone, Debug)] +#[repr(packed)] +pub struct Zip32CDEBlock { + magic: Magic, pub disk_number: u16, pub disk_with_central_directory: u16, pub number_of_files_on_this_disk: u16, pub number_of_files: u16, pub central_directory_size: u32, pub central_directory_offset: u32, - pub zip_file_comment: Box<[u8]>, + pub zip_file_comment_length: u16, +} + +impl Zip32CDEBlock { + #[allow(clippy::wrong_self_convention)] + #[inline(always)] + fn from_le(mut self) -> Self { + from_le![ + self, + [ + (magic, Magic), + (disk_number, u16), + (disk_with_central_directory, u16), + (number_of_files_on_this_disk, u16), + (number_of_files, u16), + (central_directory_size, u32), + (central_directory_offset, u32), + (zip_file_comment_length, u16) + ] + ]; + self + } + + #[inline(always)] + fn to_le(mut self) -> Self { + to_le![ + self, + [ + (magic, Magic), + (disk_number, u16), + (disk_with_central_directory, u16), + (number_of_files_on_this_disk, u16), + (number_of_files, u16), + (central_directory_size, u32), + (central_directory_offset, u32), + (zip_file_comment_length, u16) + ] + ]; + self + } } -impl CentralDirectoryEnd { - pub fn parse(reader: &mut T) -> ZipResult { - let magic = reader.read_u32_le()?; - if magic != CENTRAL_DIRECTORY_END_SIGNATURE { +impl Block for Zip32CDEBlock { + fn interpret(bytes: Box<[u8]>) -> ZipResult { + let block = Self::deserialize(&bytes).from_le(); + + if block.magic != CENTRAL_DIRECTORY_END_SIGNATURE { return Err(ZipError::InvalidArchive("Invalid digital signature header")); } - let disk_number = reader.read_u16_le()?; - let disk_with_central_directory = reader.read_u16_le()?; - let number_of_files_on_this_disk = reader.read_u16_le()?; - let number_of_files = reader.read_u16_le()?; - let central_directory_size = reader.read_u32_le()?; - let central_directory_offset = reader.read_u32_le()?; - let zip_file_comment_length = reader.read_u16_le()? as usize; - let mut zip_file_comment = vec![0; zip_file_comment_length].into_boxed_slice(); - reader.read_exact(&mut zip_file_comment)?; - Ok(CentralDirectoryEnd { + Ok(block) + } + + fn encode(self) -> Box<[u8]> { + self.to_le().serialize() + } +} + +#[derive(Debug)] +pub struct Zip32CentralDirectoryEnd { + pub disk_number: u16, + pub disk_with_central_directory: u16, + pub number_of_files_on_this_disk: u16, + pub number_of_files: u16, + pub central_directory_size: u32, + pub central_directory_offset: u32, + pub zip_file_comment: Box<[u8]>, +} + +impl Zip32CentralDirectoryEnd { + fn block_and_comment(self) -> ZipResult<(Zip32CDEBlock, Box<[u8]>)> { + let Self { disk_number, disk_with_central_directory, number_of_files_on_this_disk, @@ -49,80 +171,202 @@ impl CentralDirectoryEnd { central_directory_size, central_directory_offset, zip_file_comment, + } = self; + let block = Zip32CDEBlock { + magic: CENTRAL_DIRECTORY_END_SIGNATURE, + + disk_number, + disk_with_central_directory, + number_of_files_on_this_disk, + number_of_files, + central_directory_size, + central_directory_offset, + zip_file_comment_length: zip_file_comment.len().try_into().unwrap_or(u16::MAX), + }; + Ok((block, zip_file_comment)) + } + + pub fn parse(reader: &mut T) -> ZipResult { + let Zip32CDEBlock { + // magic, + disk_number, + disk_with_central_directory, + number_of_files_on_this_disk, + number_of_files, + central_directory_size, + central_directory_offset, + zip_file_comment_length, + .. + } = Zip32CDEBlock::parse(reader)?; + + let mut zip_file_comment = vec![0u8; zip_file_comment_length as usize]; + reader.read_exact(&mut zip_file_comment)?; + + Ok(Zip32CentralDirectoryEnd { + disk_number, + disk_with_central_directory, + number_of_files_on_this_disk, + number_of_files, + central_directory_size, + central_directory_offset, + zip_file_comment: zip_file_comment.into_boxed_slice(), }) } - pub fn find_and_parse(reader: &mut T) -> ZipResult<(CentralDirectoryEnd, u64)> { - const HEADER_SIZE: u64 = 22; - const MAX_HEADER_AND_COMMENT_SIZE: u64 = 66000; - const BYTES_BETWEEN_MAGIC_AND_COMMENT_SIZE: u64 = HEADER_SIZE - 6; + pub fn find_and_parse( + reader: &mut T, + ) -> ZipResult<(Zip32CentralDirectoryEnd, u64)> { let file_length = reader.seek(io::SeekFrom::End(0))?; - let search_upper_bound = 0; - - if file_length < HEADER_SIZE { + if file_length < mem::size_of::() as u64 { return Err(ZipError::InvalidArchive("Invalid zip header")); } - let mut pos = file_length - HEADER_SIZE; - while pos >= search_upper_bound { - let mut have_signature = false; - reader.seek(io::SeekFrom::Start(pos))?; - if reader.read_u32_le()? == CENTRAL_DIRECTORY_END_SIGNATURE { - have_signature = true; - reader.seek(io::SeekFrom::Current( - BYTES_BETWEEN_MAGIC_AND_COMMENT_SIZE as i64, - ))?; - let cde_start_pos = reader.seek(io::SeekFrom::Start(pos))?; - if let Ok(end_header) = CentralDirectoryEnd::parse(reader) { - return Ok((end_header, cde_start_pos)); + let search_upper_bound = 0; + + const END_WINDOW_SIZE: usize = 512; + + let sig_bytes = CENTRAL_DIRECTORY_END_SIGNATURE.to_le_bytes(); + let finder = FinderRev::new(&sig_bytes); + + let mut window_start: u64 = file_length.saturating_sub(END_WINDOW_SIZE as u64); + let mut window = [0u8; END_WINDOW_SIZE]; + while window_start >= search_upper_bound { + /* Go to the start of the window in the file. */ + reader.seek(io::SeekFrom::Start(window_start))?; + + /* Identify how many bytes to read (this may be less than the window size for files + * smaller than END_WINDOW_SIZE). */ + let end = (window_start + END_WINDOW_SIZE as u64).min(file_length); + let cur_len = (end - window_start) as usize; + debug_assert!(cur_len > 0); + debug_assert!(cur_len <= END_WINDOW_SIZE); + let cur_window: &mut [u8] = &mut window[..cur_len]; + /* Read the window into the bytes! */ + reader.read_exact(cur_window)?; + + /* Find instances of the magic signature. */ + for offset in finder.rfind_iter(cur_window) { + let cde_start_pos = window_start + offset as u64; + reader.seek(io::SeekFrom::Start(cde_start_pos))?; + /* Drop any headers that don't parse. */ + if let Ok(cde) = Self::parse(reader) { + return Ok((cde, cde_start_pos)); } } - pos = match pos.checked_sub(if have_signature { - size_of_val(&CENTRAL_DIRECTORY_END_SIGNATURE) as u64 - } else { - 1 - }) { - Some(p) => p, - None => break, - }; + + /* We always want to make sure we go allllll the way back to the start of the file if + * we can't find it elsewhere. However, our `while` condition doesn't check that. So we + * avoid infinite looping by checking at the end of the loop. */ + if window_start == search_upper_bound { + break; + } + debug_assert!(END_WINDOW_SIZE > mem::size_of_val(&CENTRAL_DIRECTORY_END_SIGNATURE)); + /* Shift the window by END_WINDOW_SIZE bytes, but make sure to cover matches that + * overlap our nice neat window boundaries! */ + window_start = (window_start + /* NB: To catch matches across window boundaries, we need to make our blocks overlap + * by the width of the pattern to match. */ + + mem::size_of_val(&CENTRAL_DIRECTORY_END_SIGNATURE) as u64) + /* This should never happen, but make sure we don't go past the end of the file. */ + .min(file_length); + window_start = window_start + .saturating_sub( + /* Shift the window upon each iteration so we search END_WINDOW_SIZE bytes at + * once (unless limited by file_length). */ + END_WINDOW_SIZE as u64, + ) + /* This will never go below the value of `search_upper_bound`, so we have a special + * `if window_start == search_upper_bound` check above. */ + .max(search_upper_bound); } + Err(ZipError::InvalidArchive( "Could not find central directory end", )) } - pub fn write(&self, writer: &mut T) -> ZipResult<()> { - writer.write_u32_le(CENTRAL_DIRECTORY_END_SIGNATURE)?; - writer.write_u16_le(self.disk_number)?; - writer.write_u16_le(self.disk_with_central_directory)?; - writer.write_u16_le(self.number_of_files_on_this_disk)?; - writer.write_u16_le(self.number_of_files)?; - writer.write_u32_le(self.central_directory_size)?; - writer.write_u32_le(self.central_directory_offset)?; - writer.write_u16_le(self.zip_file_comment.len() as u16)?; - writer.write_all(&self.zip_file_comment)?; + pub fn write(self, writer: &mut T) -> ZipResult<()> { + let (block, comment) = self.block_and_comment()?; + block.write(writer)?; + writer.write_all(&comment)?; Ok(()) } } -pub struct Zip64CentralDirectoryEndLocator { +#[derive(Copy, Clone)] +#[repr(packed)] +pub struct Zip64CDELocatorBlock { + magic: Magic, pub disk_with_central_directory: u32, pub end_of_central_directory_offset: u64, pub number_of_disks: u32, } -impl Zip64CentralDirectoryEndLocator { - pub fn parse(reader: &mut T) -> ZipResult { - let magic = reader.read_u32_le()?; - if magic != ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE { +impl Zip64CDELocatorBlock { + #[allow(clippy::wrong_self_convention)] + #[inline(always)] + fn from_le(mut self) -> Self { + from_le![ + self, + [ + (magic, Magic), + (disk_with_central_directory, u32), + (end_of_central_directory_offset, u64), + (number_of_disks, u32), + ] + ]; + self + } + + #[inline(always)] + fn to_le(mut self) -> Self { + to_le![ + self, + [ + (magic, Magic), + (disk_with_central_directory, u32), + (end_of_central_directory_offset, u64), + (number_of_disks, u32), + ] + ]; + self + } +} + +impl Block for Zip64CDELocatorBlock { + fn interpret(bytes: Box<[u8]>) -> ZipResult { + let block = Self::deserialize(&bytes).from_le(); + + if block.magic != ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE { return Err(ZipError::InvalidArchive( "Invalid zip64 locator digital signature header", )); } - let disk_with_central_directory = reader.read_u32_le()?; - let end_of_central_directory_offset = reader.read_u64_le()?; - let number_of_disks = reader.read_u32_le()?; + + Ok(block) + } + + fn encode(self) -> Box<[u8]> { + self.to_le().serialize() + } +} + +pub struct Zip64CentralDirectoryEndLocator { + pub disk_with_central_directory: u32, + pub end_of_central_directory_offset: u64, + pub number_of_disks: u32, +} + +impl Zip64CentralDirectoryEndLocator { + pub fn parse(reader: &mut T) -> ZipResult { + let Zip64CDELocatorBlock { + // magic, + disk_with_central_directory, + end_of_central_directory_offset, + number_of_disks, + .. + } = Zip64CDELocatorBlock::parse(reader)?; Ok(Zip64CentralDirectoryEndLocator { disk_with_central_directory, @@ -131,12 +375,96 @@ impl Zip64CentralDirectoryEndLocator { }) } - pub fn write(&self, writer: &mut T) -> ZipResult<()> { - writer.write_u32_le(ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE)?; - writer.write_u32_le(self.disk_with_central_directory)?; - writer.write_u64_le(self.end_of_central_directory_offset)?; - writer.write_u32_le(self.number_of_disks)?; - Ok(()) + pub fn block(self) -> Zip64CDELocatorBlock { + let Self { + disk_with_central_directory, + end_of_central_directory_offset, + number_of_disks, + } = self; + Zip64CDELocatorBlock { + magic: ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE, + disk_with_central_directory, + end_of_central_directory_offset, + number_of_disks, + } + } + + pub fn write(self, writer: &mut T) -> ZipResult<()> { + self.block().write(writer) + } +} + +#[derive(Copy, Clone)] +#[repr(packed)] +pub struct Zip64CDEBlock { + magic: Magic, + pub record_size: u64, + pub version_made_by: u16, + pub version_needed_to_extract: u16, + pub disk_number: u32, + pub disk_with_central_directory: u32, + pub number_of_files_on_this_disk: u64, + pub number_of_files: u64, + pub central_directory_size: u64, + pub central_directory_offset: u64, +} + +impl Zip64CDEBlock { + #[allow(clippy::wrong_self_convention)] + #[inline(always)] + fn from_le(mut self) -> Self { + from_le![ + self, + [ + (magic, Magic), + (record_size, u64), + (version_made_by, u16), + (version_needed_to_extract, u16), + (disk_number, u32), + (disk_with_central_directory, u32), + (number_of_files_on_this_disk, u64), + (number_of_files, u64), + (central_directory_size, u64), + (central_directory_offset, u64), + ] + ]; + self + } + + #[inline(always)] + fn to_le(mut self) -> Self { + to_le![ + self, + [ + (magic, Magic), + (record_size, u64), + (version_made_by, u16), + (version_needed_to_extract, u16), + (disk_number, u32), + (disk_with_central_directory, u32), + (number_of_files_on_this_disk, u64), + (number_of_files, u64), + (central_directory_size, u64), + (central_directory_offset, u64), + ] + ]; + self + } +} + +impl Block for Zip64CDEBlock { + fn interpret(bytes: Box<[u8]>) -> ZipResult { + let block = Self::deserialize(&bytes).from_le(); + + if block.magic != ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE { + return Err(ZipError::InvalidArchive("Invalid digital signature header")); + } + + Ok(block) + } + + fn encode(self) -> Box<[u8]> { + self.to_le().serialize() } } @@ -153,56 +481,105 @@ pub struct Zip64CentralDirectoryEnd { } impl Zip64CentralDirectoryEnd { + pub fn parse(reader: &mut T) -> ZipResult { + let Zip64CDEBlock { + // record_size, + version_made_by, + version_needed_to_extract, + disk_number, + disk_with_central_directory, + number_of_files_on_this_disk, + number_of_files, + central_directory_size, + central_directory_offset, + .. + } = Zip64CDEBlock::parse(reader)?; + Ok(Self { + version_made_by, + version_needed_to_extract, + disk_number, + disk_with_central_directory, + number_of_files_on_this_disk, + number_of_files, + central_directory_size, + central_directory_offset, + }) + } + pub fn find_and_parse( reader: &mut T, nominal_offset: u64, search_upper_bound: u64, ) -> ZipResult> { let mut results = Vec::new(); - let mut pos = search_upper_bound; - - while pos >= nominal_offset { - let mut have_signature = false; - reader.seek(io::SeekFrom::Start(pos))?; - if reader.read_u32_le()? == ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE { - have_signature = true; - let archive_offset = pos - nominal_offset; - - let _record_size = reader.read_u64_le()?; - // We would use this value if we did anything with the "zip64 extensible data sector". - - let version_made_by = reader.read_u16_le()?; - let version_needed_to_extract = reader.read_u16_le()?; - let disk_number = reader.read_u32_le()?; - let disk_with_central_directory = reader.read_u32_le()?; - let number_of_files_on_this_disk = reader.read_u64_le()?; - let number_of_files = reader.read_u64_le()?; - let central_directory_size = reader.read_u64_le()?; - let central_directory_offset = reader.read_u64_le()?; - - results.push(( - Zip64CentralDirectoryEnd { - version_made_by, - version_needed_to_extract, - disk_number, - disk_with_central_directory, - number_of_files_on_this_disk, - number_of_files, - central_directory_size, - central_directory_offset, - }, - archive_offset, - )); + + const END_WINDOW_SIZE: usize = 2048; + + let sig_bytes = ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE.to_le_bytes(); + let finder = FinderRev::new(&sig_bytes); + + let mut window_start: u64 = search_upper_bound + .saturating_sub(END_WINDOW_SIZE as u64) + .max(nominal_offset); + let mut window = [0u8; END_WINDOW_SIZE]; + while window_start >= nominal_offset { + reader.seek(io::SeekFrom::Start(window_start))?; + + /* Identify how many bytes to read (this may be less than the window size for files + * smaller than END_WINDOW_SIZE). */ + let end = (window_start + END_WINDOW_SIZE as u64).min(search_upper_bound); + + debug_assert!(end >= window_start); + let cur_len = (end - window_start) as usize; + if cur_len == 0 { + break; } - pos = match pos.checked_sub(if have_signature { - size_of_val(&ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE) as u64 - } else { - 1 - }) { - None => break, - Some(p) => p, + debug_assert!(cur_len <= END_WINDOW_SIZE); + let cur_window: &mut [u8] = &mut window[..cur_len]; + /* Read the window into the bytes! */ + reader.read_exact(cur_window)?; + + /* Find instances of the magic signature. */ + for offset in finder.rfind_iter(cur_window) { + let cde_start_pos = window_start + offset as u64; + reader.seek(io::SeekFrom::Start(cde_start_pos))?; + + debug_assert!(cde_start_pos >= nominal_offset); + let archive_offset = cde_start_pos - nominal_offset; + let cde = Self::parse(reader)?; + + results.push((cde, archive_offset)); } + + /* We always want to make sure we go allllll the way back to the start of the file if + * we can't find it elsewhere. However, our `while` condition doesn't check that. So we + * avoid infinite looping by checking at the end of the loop. */ + if window_start == nominal_offset { + break; + } + debug_assert!( + END_WINDOW_SIZE > mem::size_of_val(&ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE) + ); + /* Shift the window by END_WINDOW_SIZE bytes, but make sure to cover matches that + * overlap our nice neat window boundaries! */ + window_start = (window_start + /* NB: To catch matches across window boundaries, we need to make our blocks overlap + * by the width of the pattern to match. */ + + mem::size_of_val(&ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE) as u64) + /* This may never happen, but make sure we don't go past the end of the specified + * range. */ + .min(search_upper_bound); + window_start = window_start + .saturating_sub( + /* Shift the window upon each iteration so we search END_WINDOW_SIZE bytes at + * once (unless limited by search_upper_bound). */ + END_WINDOW_SIZE as u64, + ) + /* This will never go below the value of `nominal_offset`, so we have a special + * `if window_start == nominal_offset` check above. */ + .max(nominal_offset); } + if results.is_empty() { Err(ZipError::InvalidArchive( "Could not find ZIP64 central directory end", @@ -212,18 +589,34 @@ impl Zip64CentralDirectoryEnd { } } - pub fn write(&self, writer: &mut T) -> ZipResult<()> { - writer.write_u32_le(ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE)?; - writer.write_u64_le(44)?; // record size - writer.write_u16_le(self.version_made_by)?; - writer.write_u16_le(self.version_needed_to_extract)?; - writer.write_u32_le(self.disk_number)?; - writer.write_u32_le(self.disk_with_central_directory)?; - writer.write_u64_le(self.number_of_files_on_this_disk)?; - writer.write_u64_le(self.number_of_files)?; - writer.write_u64_le(self.central_directory_size)?; - writer.write_u64_le(self.central_directory_offset)?; - Ok(()) + pub fn block(self) -> Zip64CDEBlock { + let Self { + version_made_by, + version_needed_to_extract, + disk_number, + disk_with_central_directory, + number_of_files_on_this_disk, + number_of_files, + central_directory_size, + central_directory_offset, + } = self; + Zip64CDEBlock { + magic: ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE, + /* currently unused */ + record_size: 44, + version_made_by, + version_needed_to_extract, + disk_number, + disk_with_central_directory, + number_of_files_on_this_disk, + number_of_files, + central_directory_size, + central_directory_offset, + } + } + + pub fn write(self, writer: &mut T) -> ZipResult<()> { + self.block().write(writer) } } @@ -273,3 +666,51 @@ pub(crate) fn path_to_string>(path: T) -> Box { maybe_original.unwrap().into() } } + +#[cfg(test)] +mod test { + use super::*; + use std::io::Cursor; + + #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] + #[repr(packed)] + pub struct TestBlock { + magic: Magic, + pub file_name_length: u16, + } + + impl TestBlock { + #[allow(clippy::wrong_self_convention)] + fn from_le(mut self) -> Self { + from_le![self, [(magic, Magic), (file_name_length, u16)]]; + self + } + fn to_le(mut self) -> Self { + to_le![self, [(magic, Magic), (file_name_length, u16)]]; + self + } + } + + impl Block for TestBlock { + fn interpret(bytes: Box<[u8]>) -> ZipResult { + Ok(Self::deserialize(&bytes).from_le()) + } + fn encode(self) -> Box<[u8]> { + self.to_le().serialize() + } + } + + /// Demonstrate that a block object can be safely written to memory and deserialized back out. + #[test] + fn block_serde() { + let block = TestBlock { + magic: 0x01111, + file_name_length: 3, + }; + let mut c = Cursor::new(Vec::new()); + block.write(&mut c).unwrap(); + c.set_position(0); + let block2 = TestBlock::parse(&mut c).unwrap(); + assert_eq!(block, block2); + } +} diff --git a/src/types.rs b/src/types.rs old mode 100644 new mode 100755 index ab8d22da6..34b029581 --- a/src/types.rs +++ b/src/types.rs @@ -1,12 +1,17 @@ //! Types that specify what is contained in a ZIP. +use crate::cp437::FromCp437; +use crate::write::{FileOptionExtension, FileOptions}; use path::{Component, Path, PathBuf}; use std::path; use std::sync::{Arc, OnceLock}; +#[cfg(doc)] +use crate::read::ZipFile; #[cfg(feature = "chrono")] use chrono::{Datelike, NaiveDate, NaiveDateTime, NaiveTime, Timelike}; -#[cfg(doc)] -use {crate::read::ZipFile, crate::write::FileOptions}; + +use crate::result::{ZipError, ZipResult}; +use crate::spec::{self, Block}; pub(crate) mod ffi { pub const S_IFDIR: u32 = 0o0040000; @@ -21,6 +26,12 @@ use crate::CompressionMethod; #[cfg(feature = "time")] use time::{error::ComponentRange, Date, Month, OffsetDateTime, PrimitiveDateTime, Time}; +pub(crate) struct ZipRawValues { + pub(crate) crc32: u32, + pub(crate) compressed_size: u64, + pub(crate) uncompressed_size: u64, +} + #[derive(Clone, Copy, Debug, PartialEq, Eq)] #[repr(u8)] pub enum System { @@ -482,6 +493,399 @@ impl ZipFileData { .map(|v| v.len()) .unwrap_or_default() } + + #[allow(clippy::too_many_arguments)] + pub(crate) fn initialize_local_block( + name: S, + options: &FileOptions, + raw_values: ZipRawValues, + header_start: u64, + extra_data_start: Option, + aes_extra_data_start: u64, + compression_method: crate::compression::CompressionMethod, + aes_mode: Option<(AesMode, AesVendorVersion, CompressionMethod)>, + extra_field: Option>>, + ) -> Self + where + S: Into>, + { + let permissions = options.permissions.unwrap_or(0o100644); + let file_name: Box = name.into(); + let file_name_raw: Box<[u8]> = file_name.bytes().collect(); + ZipFileData { + system: System::Unix, + version_made_by: DEFAULT_VERSION, + encrypted: options.encrypt_with.is_some(), + using_data_descriptor: false, + compression_method, + compression_level: options.compression_level, + last_modified_time: options.last_modified_time, + crc32: raw_values.crc32, + compressed_size: raw_values.compressed_size, + uncompressed_size: raw_values.uncompressed_size, + file_name, // Never used for saving, but used as map key in insert_file_data() + file_name_raw, + extra_field, + central_extra_field: options.extended_options.central_extra_data().cloned(), + file_comment: String::with_capacity(0).into_boxed_str(), + header_start, + data_start: OnceLock::new(), + central_header_start: 0, + external_attributes: permissions << 16, + large_file: options.large_file, + aes_mode, + extra_fields: Vec::new(), + extra_data_start, + aes_extra_data_start, + } + } + + pub(crate) fn from_local_block( + block: ZipLocalEntryBlock, + reader: &mut R, + ) -> ZipResult { + let ZipLocalEntryBlock { + // magic, + version_made_by, + flags, + compression_method, + last_mod_time, + last_mod_date, + crc32, + compressed_size, + uncompressed_size, + file_name_length, + extra_field_length, + .. + } = block; + + let encrypted: bool = flags & 1 == 1; + let is_utf8: bool = flags & (1 << 1) != 0; + let using_data_descriptor: bool = flags & (1 << 3) != 0; + #[allow(deprecated)] + let compression_method = crate::CompressionMethod::from_u16(compression_method); + let file_name_length: usize = file_name_length.into(); + let extra_field_length: usize = extra_field_length.into(); + + if encrypted { + return Err(ZipError::UnsupportedArchive( + "Encrypted files are not supported", + )); + } + if using_data_descriptor { + return Err(ZipError::UnsupportedArchive( + "The file length is not available in the local header", + )); + } + + let mut file_name_raw = vec![0u8; file_name_length]; + reader.read_exact(&mut file_name_raw)?; + let mut extra_field = vec![0u8; extra_field_length]; + reader.read_exact(&mut extra_field)?; + + let file_name: Box = match is_utf8 { + true => String::from_utf8_lossy(&file_name_raw).into(), + false => file_name_raw.clone().from_cp437().into(), + }; + + let system: u8 = (version_made_by >> 8).try_into().unwrap(); + Ok(ZipFileData { + system: System::from(system), + /* NB: this strips the top 8 bits! */ + version_made_by: version_made_by as u8, + encrypted, + using_data_descriptor, + compression_method, + compression_level: None, + last_modified_time: DateTime::from_msdos(last_mod_date, last_mod_time), + crc32, + compressed_size: compressed_size.into(), + uncompressed_size: uncompressed_size.into(), + file_name, + file_name_raw: file_name_raw.into(), + extra_field: Some(Arc::new(extra_field)), + central_extra_field: None, + file_comment: String::with_capacity(0).into_boxed_str(), // file comment is only available in the central directory + // header_start and data start are not available, but also don't matter, since seeking is + // not available. + header_start: 0, + data_start: OnceLock::new(), + central_header_start: 0, + // The external_attributes field is only available in the central directory. + // We set this to zero, which should be valid as the docs state 'If input came + // from standard input, this field is set to zero.' + external_attributes: 0, + large_file: false, + aes_mode: None, + extra_fields: Vec::new(), + extra_data_start: None, + aes_extra_data_start: 0, + }) + } + + fn is_utf8(&self) -> bool { + std::str::from_utf8(&self.file_name_raw).is_ok() + } + + fn is_ascii(&self) -> bool { + self.file_name_raw.is_ascii() + } + + fn flags(&self) -> u16 { + (if self.is_utf8() && !self.is_ascii() { + 1u16 << 11 + } else { + 0 + }) | if self.encrypted { 1u16 << 0 } else { 0 } + } + + pub(crate) fn local_block(&self) -> ZipResult { + let (compressed_size, uncompressed_size) = if self.large_file { + (spec::ZIP64_BYTES_THR as u32, spec::ZIP64_BYTES_THR as u32) + } else { + ( + self.compressed_size.try_into().unwrap(), + self.uncompressed_size.try_into().unwrap(), + ) + }; + + let mut extra_field_length = self.extra_field_len(); + if self.large_file { + /* TODO: magic number */ + extra_field_length += 20; + } + if extra_field_length + self.central_extra_field_len() > u16::MAX as usize { + return Err(ZipError::InvalidArchive("Extra data field is too large")); + } + let extra_field_length: u16 = extra_field_length.try_into().unwrap(); + + Ok(ZipLocalEntryBlock { + magic: spec::LOCAL_FILE_HEADER_SIGNATURE, + version_made_by: self.version_needed(), + flags: self.flags(), + #[allow(deprecated)] + compression_method: self.compression_method.to_u16(), + last_mod_time: self.last_modified_time.timepart(), + last_mod_date: self.last_modified_time.datepart(), + crc32: self.crc32, + compressed_size, + uncompressed_size, + file_name_length: self.file_name_raw.len().try_into().unwrap(), + extra_field_length, + }) + } + + pub(crate) fn block(&self, zip64_extra_field_length: u16) -> ZipEntryBlock { + let extra_field_len: u16 = self.extra_field_len().try_into().unwrap(); + let central_extra_field_len: u16 = self.central_extra_field_len().try_into().unwrap(); + ZipEntryBlock { + magic: spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE, + version_made_by: (self.system as u16) << 8 | (self.version_made_by as u16), + version_to_extract: self.version_needed(), + flags: self.flags(), + #[allow(deprecated)] + compression_method: self.compression_method.to_u16(), + last_mod_time: self.last_modified_time.timepart(), + last_mod_date: self.last_modified_time.datepart(), + crc32: self.crc32, + compressed_size: self + .compressed_size + .min(spec::ZIP64_BYTES_THR) + .try_into() + .unwrap(), + uncompressed_size: self + .uncompressed_size + .min(spec::ZIP64_BYTES_THR) + .try_into() + .unwrap(), + file_name_length: self.file_name_raw.len().try_into().unwrap(), + extra_field_length: zip64_extra_field_length + + extra_field_len + + central_extra_field_len, + /* FIXME: this appears to be set to 0 in write_central_directory_header() on master? */ + file_comment_length: self.file_comment.as_bytes().len().try_into().unwrap(), + disk_number: 0, + internal_file_attributes: 0, + external_file_attributes: self.external_attributes, + offset: self + .header_start + .min(spec::ZIP64_BYTES_THR) + .try_into() + .unwrap(), + } + } +} + +#[derive(Copy, Clone, Debug)] +#[repr(packed)] +pub(crate) struct ZipEntryBlock { + pub magic: spec::Magic, + pub version_made_by: u16, + pub version_to_extract: u16, + pub flags: u16, + pub compression_method: u16, + pub last_mod_time: u16, + pub last_mod_date: u16, + pub crc32: u32, + pub compressed_size: u32, + pub uncompressed_size: u32, + pub file_name_length: u16, + pub extra_field_length: u16, + pub file_comment_length: u16, + pub disk_number: u16, + pub internal_file_attributes: u16, + pub external_file_attributes: u32, + pub offset: u32, +} + +impl ZipEntryBlock { + #[allow(clippy::wrong_self_convention)] + #[inline(always)] + fn from_le(mut self) -> Self { + from_le![ + self, + [ + (magic, spec::Magic), + (version_made_by, u16), + (version_to_extract, u16), + (flags, u16), + (compression_method, u16), + (last_mod_time, u16), + (last_mod_date, u16), + (crc32, u32), + (compressed_size, u32), + (uncompressed_size, u32), + (file_name_length, u16), + (extra_field_length, u16), + (file_comment_length, u16), + (disk_number, u16), + (internal_file_attributes, u16), + (external_file_attributes, u32), + (offset, u32), + ] + ]; + self + } + + #[inline(always)] + fn to_le(mut self) -> Self { + to_le![ + self, + [ + (magic, spec::Magic), + (version_made_by, u16), + (version_to_extract, u16), + (flags, u16), + (compression_method, u16), + (last_mod_time, u16), + (last_mod_date, u16), + (crc32, u32), + (compressed_size, u32), + (uncompressed_size, u32), + (file_name_length, u16), + (extra_field_length, u16), + (file_comment_length, u16), + (disk_number, u16), + (internal_file_attributes, u16), + (external_file_attributes, u32), + (offset, u32), + ] + ]; + self + } +} + +impl Block for ZipEntryBlock { + fn interpret(bytes: Box<[u8]>) -> ZipResult { + let block = Self::deserialize(&bytes).from_le(); + + if block.magic != spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE { + return Err(ZipError::InvalidArchive("Invalid Central Directory header")); + } + + Ok(block) + } + + fn encode(self) -> Box<[u8]> { + self.to_le().serialize() + } +} + +#[derive(Copy, Clone, Debug)] +#[repr(packed)] +pub(crate) struct ZipLocalEntryBlock { + magic: spec::Magic, + pub version_made_by: u16, + pub flags: u16, + pub compression_method: u16, + pub last_mod_time: u16, + pub last_mod_date: u16, + pub crc32: u32, + pub compressed_size: u32, + pub uncompressed_size: u32, + pub file_name_length: u16, + pub extra_field_length: u16, +} + +impl ZipLocalEntryBlock { + #[allow(clippy::wrong_self_convention)] + #[inline(always)] + fn from_le(mut self) -> Self { + from_le![ + self, + [ + (magic, spec::Magic), + (version_made_by, u16), + (flags, u16), + (compression_method, u16), + (last_mod_time, u16), + (last_mod_date, u16), + (crc32, u32), + (compressed_size, u32), + (uncompressed_size, u32), + (file_name_length, u16), + (extra_field_length, u16), + ] + ]; + self + } + + #[inline(always)] + fn to_le(mut self) -> Self { + to_le![ + self, + [ + (magic, spec::Magic), + (version_made_by, u16), + (flags, u16), + (compression_method, u16), + (last_mod_time, u16), + (last_mod_date, u16), + (crc32, u32), + (compressed_size, u32), + (uncompressed_size, u32), + (file_name_length, u16), + (extra_field_length, u16), + ] + ]; + self + } +} + +impl Block for ZipLocalEntryBlock { + fn interpret(bytes: Box<[u8]>) -> ZipResult { + let block = Self::deserialize(&bytes).from_le(); + + if block.magic != spec::LOCAL_FILE_HEADER_SIGNATURE { + return Err(ZipError::InvalidArchive("Invalid local file header")); + } + + Ok(block) + } + + fn encode(self) -> Box<[u8]> { + self.to_le().serialize() + } } /// The encryption specification used to encrypt a file with AES. diff --git a/src/write.rs b/src/write.rs index d1686cc06..f89596c3a 100644 --- a/src/write.rs +++ b/src/write.rs @@ -5,10 +5,10 @@ use crate::aes::AesWriter; use crate::compression::CompressionMethod; use crate::read::{find_content, ZipArchive, ZipFile, ZipFileReader}; use crate::result::{ZipError, ZipResult}; -use crate::spec; +use crate::spec::{self, Block}; #[cfg(feature = "aes-crypto")] use crate::types::AesMode; -use crate::types::{ffi, AesVendorVersion, DateTime, System, ZipFileData, DEFAULT_VERSION}; +use crate::types::{ffi, AesVendorVersion, DateTime, ZipFileData, ZipRawValues, DEFAULT_VERSION}; use crate::write::ffi::S_IFLNK; #[cfg(any(feature = "_deflate-any", feature = "bzip2", feature = "zstd",))] use core::num::NonZeroU64; @@ -22,7 +22,7 @@ use std::io::{BufReader, SeekFrom}; use std::marker::PhantomData; use std::mem; use std::str::{from_utf8, Utf8Error}; -use std::sync::{Arc, OnceLock}; +use std::sync::Arc; #[cfg(any( feature = "deflate", @@ -150,11 +150,6 @@ struct ZipWriterStats { bytes_written: u64, } -struct ZipRawValues { - crc32: u32, - compressed_size: u64, - uncompressed_size: u64, -} mod sealed { use std::sync::Arc; @@ -191,7 +186,7 @@ mod sealed { } #[derive(Copy, Clone, Debug)] -enum EncryptWith<'k> { +pub(crate) enum EncryptWith<'k> { #[cfg(feature = "aes-crypto")] Aes { mode: AesMode, @@ -226,9 +221,9 @@ pub struct FileOptions<'k, T: FileOptionExtension> { pub(crate) last_modified_time: DateTime, pub(crate) permissions: Option, pub(crate) large_file: bool, - encrypt_with: Option>, - extended_options: T, - alignment: u16, + pub(crate) encrypt_with: Option>, + pub(crate) extended_options: T, + pub(crate) alignment: u16, #[cfg(feature = "deflate-zopfli")] pub(super) zopfli_buffer_size: Option, } @@ -515,7 +510,8 @@ impl ZipWriterStats { impl ZipWriter { /// Initializes the archive from an existing ZIP archive, making it ready for append. pub fn new_append(mut readwriter: A) -> ZipResult> { - let (footer, cde_start_pos) = spec::CentralDirectoryEnd::find_and_parse(&mut readwriter)?; + let (footer, cde_start_pos) = + spec::Zip32CentralDirectoryEnd::find_and_parse(&mut readwriter)?; let metadata = ZipArchive::get_metadata(&mut readwriter, &footer, cde_start_pos)?; Ok(ZipWriter { @@ -779,7 +775,6 @@ impl ZipWriter { { let header_start = self.inner.get_plain().stream_position()?; - let permissions = options.permissions.unwrap_or(0o100644); let (compression_method, aes_mode) = match options.encrypt_with { #[cfg(feature = "aes-crypto")] Some(EncryptWith::Aes { mode, .. }) => ( @@ -788,75 +783,37 @@ impl ZipWriter { ), _ => (options.compression_method, None), }; - let file = ZipFileData { - system: System::Unix, - version_made_by: DEFAULT_VERSION, - encrypted: options.encrypt_with.is_some(), - using_data_descriptor: false, - compression_method, - compression_level: options.compression_level, - last_modified_time: options.last_modified_time, - crc32: raw_values.crc32, - compressed_size: raw_values.compressed_size, - uncompressed_size: raw_values.uncompressed_size, - file_name: name.to_owned().into(), // Never used for saving, but used as map key in insert_file_data() - file_name_raw: name.into().bytes().collect(), - extra_field, - central_extra_field: options.extended_options.central_extra_data().cloned(), - file_comment: String::with_capacity(0).into_boxed_str(), + let file = ZipFileData::initialize_local_block( + name, + &options, + raw_values, header_start, - extra_data_start: None, - data_start: OnceLock::new(), - central_header_start: 0, - external_attributes: permissions << 16, - large_file: options.large_file, - aes_mode, + None, aes_extra_data_start, + compression_method, + aes_mode, + extra_field, + ); - extra_fields: Vec::new(), - }; let index = self.insert_file_data(file)?; let file = &mut self.files[index]; let writer = self.inner.get_plain(); - // local file header signature - writer.write_u32_le(spec::LOCAL_FILE_HEADER_SIGNATURE)?; - // version needed to extract - writer.write_u16_le(file.version_needed())?; - // general purpose bit flag - let is_utf8 = std::str::from_utf8(&file.file_name_raw).is_ok(); - let is_ascii = file.file_name_raw.is_ascii(); - let flag = if is_utf8 && !is_ascii { 1u16 << 11 } else { 0 } - | if file.encrypted { 1u16 << 0 } else { 0 }; - writer.write_u16_le(flag)?; - // Compression method - #[allow(deprecated)] - writer.write_u16_le(file.compression_method.to_u16())?; - // last mod file time and last mod file date - writer.write_u16_le(file.last_modified_time.timepart())?; - writer.write_u16_le(file.last_modified_time.datepart())?; - // crc-32 - writer.write_u32_le(file.crc32)?; - // compressed size and uncompressed size - if file.large_file { - writer.write_u32_le(spec::ZIP64_BYTES_THR as u32)?; - writer.write_u32_le(spec::ZIP64_BYTES_THR as u32)?; - } else { - writer.write_u32_le(file.compressed_size as u32)?; - writer.write_u32_le(file.uncompressed_size as u32)?; - } - // file name length - writer.write_u16_le(file.file_name_raw.len() as u16)?; - // extra field length - let mut extra_field_length = file.extra_field_len(); - if file.large_file { - extra_field_length += 20; - } - if extra_field_length + file.central_extra_field_len() > u16::MAX as usize { - let _ = self.abort_file(); - return Err(InvalidArchive("Extra data field is too large")); + + let block = match file.local_block() { + Ok(block) => block, + Err(e) => { + let _ = self.abort_file(); + return Err(e); + } + }; + match block.write(writer) { + Ok(()) => (), + Err(e) => { + let _ = self.abort_file(); + return Err(e); + } } - let extra_field_length = extra_field_length as u16; - writer.write_u16_le(extra_field_length)?; + // file name writer.write_all(&file.file_name_raw)?; // zip64 extra field @@ -874,7 +831,7 @@ impl ZipWriter { if unaligned_header_bytes != 0 { let pad_length = (align - unaligned_header_bytes) as usize; let Some(new_extra_field_length) = - (pad_length as u16).checked_add(extra_field_length) + (pad_length as u16).checked_add(block.extra_field_length) else { let _ = self.abort_file(); return Err(InvalidArchive( @@ -1429,7 +1386,7 @@ impl ZipWriter { } let number_of_files = self.files.len().min(spec::ZIP64_ENTRY_THR) as u16; - let footer = spec::CentralDirectoryEnd { + let footer = spec::Zip32CentralDirectoryEnd { disk_number: 0, disk_with_central_directory: 0, zip_file_comment: self.comment.clone(), @@ -1803,49 +1760,9 @@ fn write_central_directory_header(writer: &mut T, file: &ZipFileData) let zip64_extra_field_length = write_central_zip64_extra_field(&mut zip64_extra_field.as_mut(), file)?; - // central file header signature - writer.write_u32_le(spec::CENTRAL_DIRECTORY_HEADER_SIGNATURE)?; - // version made by - let version_made_by = (file.system as u16) << 8 | (file.version_made_by as u16); - writer.write_u16_le(version_made_by)?; - // version needed to extract - writer.write_u16_le(file.version_needed())?; - // general puprose bit flag - let is_utf8 = std::str::from_utf8(&file.file_name_raw).is_ok(); - let is_ascii = file.file_name_raw.is_ascii(); - let flag = if is_utf8 && !is_ascii { 1u16 << 11 } else { 0 } - | if file.encrypted { 1u16 << 0 } else { 0 }; - writer.write_u16_le(flag)?; - // compression method - #[allow(deprecated)] - writer.write_u16_le(file.compression_method.to_u16())?; - // last mod file time + date - writer.write_u16_le(file.last_modified_time.timepart())?; - writer.write_u16_le(file.last_modified_time.datepart())?; - // crc-32 - writer.write_u32_le(file.crc32)?; - // compressed size - writer.write_u32_le(file.compressed_size.min(spec::ZIP64_BYTES_THR) as u32)?; - // uncompressed size - writer.write_u32_le(file.uncompressed_size.min(spec::ZIP64_BYTES_THR) as u32)?; - // file name length - writer.write_u16_le(file.file_name_raw.len() as u16)?; - // extra field length - writer.write_u16_le( - zip64_extra_field_length - + file.extra_field_len() as u16 - + file.central_extra_field_len() as u16, - )?; - // file comment length - writer.write_u16_le(0)?; - // disk number start - writer.write_u16_le(0)?; - // internal file attributes - writer.write_u16_le(0)?; - // external file attributes - writer.write_u32_le(file.external_attributes)?; - // relative offset of local header - writer.write_u32_le(file.header_start.min(spec::ZIP64_BYTES_THR) as u32)?; + let block = file.block(zip64_extra_field_length); + block.write(writer)?; + // file name writer.write_all(&file.file_name_raw)?; // zip64 extra field @@ -1921,6 +1838,7 @@ fn update_local_zip64_extra_field( Ok(()) } +/* TODO: make this use the Block trait somehow! */ fn write_central_zip64_extra_field(writer: &mut T, file: &ZipFileData) -> ZipResult { // The order of the fields in the zip64 extended // information record is fixed, but the fields MUST @@ -2069,7 +1987,7 @@ mod test { writer .start_file_from_path(path, SimpleFileOptions::default()) .unwrap(); - let archive = ZipArchive::new(writer.finish().unwrap()).unwrap(); + let archive = writer.finish_into_readable().unwrap(); assert_eq!(Some("foo/example.txt"), archive.name_for_index(0)); } @@ -2222,8 +2140,7 @@ mod test { writer .shallow_copy_file(SECOND_FILENAME, SECOND_FILENAME) .expect_err("Duplicate filename"); - let zip = writer.finish().unwrap(); - let mut reader = ZipArchive::new(zip).unwrap(); + let mut reader = writer.finish_into_readable().unwrap(); let mut file_names: Vec<&str> = reader.file_names().collect(); file_names.sort(); let mut expected_file_names = vec![RT_TEST_FILENAME, SECOND_FILENAME]; @@ -2507,7 +2424,7 @@ mod test { let contents = b"sleeping"; let () = zip.start_file("sleep", options).unwrap(); let _count = zip.write(&contents[..]).unwrap(); - let mut zip = ZipArchive::new(zip.finish().unwrap()).unwrap(); + let mut zip = zip.finish_into_readable().unwrap(); let file = zip.by_index(0).unwrap(); assert_eq!(file.name(), "sleep"); assert_eq!(file.data_start(), page_size.into());