diff --git a/src/libraries/Common/tests/System/IO/Compression/ZipTestHelper.cs b/src/libraries/Common/tests/System/IO/Compression/ZipTestHelper.cs index 16aa8a5a087295..3bd1a5113da156 100644 --- a/src/libraries/Common/tests/System/IO/Compression/ZipTestHelper.cs +++ b/src/libraries/Common/tests/System/IO/Compression/ZipTestHelper.cs @@ -436,6 +436,31 @@ internal static void AddEntry(ZipArchive archive, string name, string contents, } } + public static byte[] CreateZipFile(int entryCount, byte[] entryContents) + { + using (MemoryStream ms = new()) + { + using (ZipArchive createdArchive = new(ms, ZipArchiveMode.Create, true)) + { + for (int i = 0; i < entryCount; i++) + { + string fileName = $"dummydata/{i}.bin"; + ZipArchiveEntry newEntry = createdArchive.CreateEntry(fileName); + + newEntry.LastWriteTime = DateTimeOffset.Now.AddHours(-1.0); + using (Stream entryWriteStream = newEntry.Open()) + { + entryWriteStream.Write(entryContents); + entryWriteStream.WriteByte((byte)(i % byte.MaxValue)); + } + } + } + ms.Flush(); + + return ms.ToArray(); + } + } + protected const string Utf8SmileyEmoji = "\ud83d\ude04"; protected const string Utf8LowerCaseOUmlautChar = "\u00F6"; protected const string Utf8CopyrightChar = "\u00A9"; diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs index f092759abcaa72..f0b289954012c4 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs @@ -30,6 +30,7 @@ public class ZipArchive : IDisposable private readonly Stream? _backingStream; private byte[] _archiveComment; private Encoding? _entryNameAndCommentEncoding; + private long _firstDeletedEntryOffset; #if DEBUG_FORCE_ZIP64 public bool _forceZip64; @@ -164,12 +165,14 @@ public ZipArchive(Stream stream, ZipArchiveMode mode, bool leaveOpen, Encoding? _entries = new List<ZipArchiveEntry>(); _entriesCollection = new ReadOnlyCollection<ZipArchiveEntry>(_entries); _entriesDictionary = new Dictionary<string, ZipArchiveEntry>(); + Changed = ChangeState.Unchanged; _readEntries = false; _leaveOpen = leaveOpen; _centralDirectoryStart = 0; // invalid until ReadCentralDirectory _isDisposed = false; _numberOfThisDisk = 0; // invalid until ReadCentralDirectory _archiveComment = Array.Empty<byte>(); + _firstDeletedEntryOffset = long.MaxValue; switch (mode) { @@ -217,7 +220,11 @@ public ZipArchive(Stream stream, ZipArchiveMode mode, bool leaveOpen, Encoding? public string Comment { get => (EntryNameAndCommentEncoding ?? Encoding.UTF8).GetString(_archiveComment); - set => _archiveComment = ZipHelper.GetEncodedTruncatedBytesFromString(value, EntryNameAndCommentEncoding, ZipEndOfCentralDirectoryBlock.ZipFileCommentMaxLength, out _); + set + { + _archiveComment = ZipHelper.GetEncodedTruncatedBytesFromString(value, EntryNameAndCommentEncoding, ZipEndOfCentralDirectoryBlock.ZipFileCommentMaxLength, out _); + Changed |= ChangeState.DynamicLengthMetadata; + } } /// <summary> @@ -383,6 +390,10 @@ private set } } + // This property's value only relates to the top-level fields of the archive (such as the archive comment.) + // New entries in the archive won't change its state. + internal ChangeState Changed { get; private set; } + private ZipArchiveEntry DoCreateEntry(string entryName, CompressionLevel? compressionLevel) { ArgumentException.ThrowIfNullOrEmpty(entryName); @@ -409,7 +420,7 @@ internal void AcquireArchiveStream(ZipArchiveEntry entry) { if (!_archiveStreamOwner.EverOpenedForWrite) { - _archiveStreamOwner.WriteAndFinishLocalEntry(); + _archiveStreamOwner.WriteAndFinishLocalEntry(forceWrite: true); } else { @@ -441,6 +452,11 @@ internal void RemoveEntry(ZipArchiveEntry entry) _entries.Remove(entry); _entriesDictionary.Remove(entry.FullName); + // Keep track of the offset of the earliest deleted entry in the archive + if (entry.OriginallyInArchive && entry.OffsetOfLocalHeader < _firstDeletedEntryOffset) + { + _firstDeletedEntryOffset = entry.OffsetOfLocalHeader; + } } internal void ThrowIfDisposed() @@ -550,7 +566,12 @@ private void ReadCentralDirectory() throw new InvalidDataException(SR.NumEntriesWrong); } - _archiveStream.Seek(_centralDirectoryStart + bytesRead, SeekOrigin.Begin); + // Sort _entries by each archive entry's position. This supports the algorithm in WriteFile, so is only + // necessary when the ZipArchive has been opened in Update mode. + if (Mode == ZipArchiveMode.Update) + { + _entries.Sort(ZipArchiveEntry.LocalHeaderOffsetComparer.Instance); + } } catch (EndOfStreamException ex) { @@ -681,41 +702,107 @@ private void WriteFile() // if we are in update mode, we call EnsureCentralDirectoryRead, which sets readEntries to true Debug.Assert(_readEntries); + // Entries starting after this offset have had a dynamically-sized change. Everything on or after this point must be rewritten. + long completeRewriteStartingOffset = 0; + List<ZipArchiveEntry> entriesToWrite = _entries; + if (_mode == ZipArchiveMode.Update) { - List<ZipArchiveEntry> markedForDelete = new List<ZipArchiveEntry>(); + // Entries starting after this offset have some kind of change made to them. It might just be a fixed-length field though, in which case + // that single entry's metadata can be rewritten without impacting anything else. + long startingOffset = _firstDeletedEntryOffset; + long nextFileOffset = 0; + completeRewriteStartingOffset = startingOffset; + + entriesToWrite = new(_entries.Count); foreach (ZipArchiveEntry entry in _entries) { - if (!entry.LoadLocalHeaderExtraFieldAndCompressedBytesIfNeeded()) - markedForDelete.Add(entry); + if (!entry.OriginallyInArchive) + { + entriesToWrite.Add(entry); + } + else + { + if (entry.Changes == ChangeState.Unchanged) + { + // Keep track of the expected position of the file entry after the final untouched file entry so that when the loop completes, + // we'll know which position to start writing new entries from. + nextFileOffset = Math.Max(nextFileOffset, entry.OffsetOfCompressedData + entry.CompressedLength); + } + // When calculating the starting offset to load the files from, only look at changed entries which are already in the archive. + else + { + startingOffset = Math.Min(startingOffset, entry.OffsetOfLocalHeader); + } + + // We want to re-write entries which are after the starting offset of the first entry which has pending data to write. + // NB: the existing ZipArchiveEntries are sorted in _entries by their position ascending. + if (entry.OffsetOfLocalHeader >= startingOffset) + { + // If the pending data to write is fixed-length metadata in the header, there's no need to load the compressed file bits. + if ((entry.Changes & (ChangeState.DynamicLengthMetadata | ChangeState.StoredData)) != 0) + { + completeRewriteStartingOffset = Math.Min(completeRewriteStartingOffset, entry.OffsetOfLocalHeader); + } + if (entry.OffsetOfLocalHeader >= completeRewriteStartingOffset) + { + entry.LoadLocalHeaderExtraFieldAndCompressedBytesIfNeeded(); + } + + entriesToWrite.Add(entry); + } + } + } + + // If the offset of entries to write from is still at long.MaxValue, then we know that nothing has been deleted, + // nothing has been modified - so we just want to move to the end of all remaining files in the archive. + if (startingOffset == long.MaxValue) + { + startingOffset = nextFileOffset; } - foreach (ZipArchiveEntry entry in markedForDelete) - entry.Delete(); - _archiveStream.Seek(0, SeekOrigin.Begin); - _archiveStream.SetLength(0); + _archiveStream.Seek(startingOffset, SeekOrigin.Begin); } - foreach (ZipArchiveEntry entry in _entries) + foreach (ZipArchiveEntry entry in entriesToWrite) { - entry.WriteAndFinishLocalEntry(); + // We don't always need to write the local header entry, ZipArchiveEntry is usually able to work out when it doesn't need to. + // We want to force this header entry to be written (even for completely untouched entries) if the entry comes after one + // which had a pending dynamically-sized write. + bool forceWriteLocalEntry = !entry.OriginallyInArchive || (entry.OriginallyInArchive && entry.OffsetOfLocalHeader >= completeRewriteStartingOffset); + + entry.WriteAndFinishLocalEntry(forceWriteLocalEntry); } - long startOfCentralDirectory = _archiveStream.Position; + long plannedCentralDirectoryPosition = _archiveStream.Position; + // If there are no entries in the archive, we still want to create the archive epilogue. + bool archiveEpilogueRequiresUpdate = _entries.Count == 0; foreach (ZipArchiveEntry entry in _entries) { - entry.WriteCentralDirectoryFileHeader(); + // The central directory needs to be rewritten if its position has moved, if there's a new entry in the archive, or if the entry might be different. + bool centralDirectoryEntryRequiresUpdate = plannedCentralDirectoryPosition != _centralDirectoryStart + || !entry.OriginallyInArchive || entry.OffsetOfLocalHeader >= completeRewriteStartingOffset; + + entry.WriteCentralDirectoryFileHeader(centralDirectoryEntryRequiresUpdate); + archiveEpilogueRequiresUpdate |= centralDirectoryEntryRequiresUpdate; } - long sizeOfCentralDirectory = _archiveStream.Position - startOfCentralDirectory; + long sizeOfCentralDirectory = _archiveStream.Position - plannedCentralDirectoryPosition; + + WriteArchiveEpilogue(plannedCentralDirectoryPosition, sizeOfCentralDirectory, archiveEpilogueRequiresUpdate); - WriteArchiveEpilogue(startOfCentralDirectory, sizeOfCentralDirectory); + // If entries have been removed and new (smaller) ones added, there could be empty space at the end of the file. + // Shrink the file to reclaim this space. + if (_mode == ZipArchiveMode.Update && _archiveStream.Position != _archiveStream.Length) + { + _archiveStream.SetLength(_archiveStream.Position); + } } // writes eocd, and if needed, zip 64 eocd, zip64 eocd locator // should only throw an exception in extremely exceptional cases because it is called from dispose - private void WriteArchiveEpilogue(long startOfCentralDirectory, long sizeOfCentralDirectory) + private void WriteArchiveEpilogue(long startOfCentralDirectory, long sizeOfCentralDirectory, bool centralDirectoryChanged) { // determine if we need Zip 64 if (startOfCentralDirectory >= uint.MaxValue @@ -728,12 +815,37 @@ private void WriteArchiveEpilogue(long startOfCentralDirectory, long sizeOfCentr { // if we need zip 64, write zip 64 eocd and locator long zip64EOCDRecordStart = _archiveStream.Position; - Zip64EndOfCentralDirectoryRecord.WriteBlock(_archiveStream, _entries.Count, startOfCentralDirectory, sizeOfCentralDirectory); - Zip64EndOfCentralDirectoryLocator.WriteBlock(_archiveStream, zip64EOCDRecordStart); + + if (centralDirectoryChanged) + { + Zip64EndOfCentralDirectoryRecord.WriteBlock(_archiveStream, _entries.Count, startOfCentralDirectory, sizeOfCentralDirectory); + Zip64EndOfCentralDirectoryLocator.WriteBlock(_archiveStream, zip64EOCDRecordStart); + } + else + { + _archiveStream.Seek(Zip64EndOfCentralDirectoryRecord.TotalSize, SeekOrigin.Current); + _archiveStream.Seek(Zip64EndOfCentralDirectoryLocator.TotalSize, SeekOrigin.Current); + } } // write normal eocd - ZipEndOfCentralDirectoryBlock.WriteBlock(_archiveStream, _entries.Count, startOfCentralDirectory, sizeOfCentralDirectory, _archiveComment); + if (centralDirectoryChanged || (Changed != ChangeState.Unchanged)) + { + ZipEndOfCentralDirectoryBlock.WriteBlock(_archiveStream, _entries.Count, startOfCentralDirectory, sizeOfCentralDirectory, _archiveComment); + } + else + { + _archiveStream.Seek(ZipEndOfCentralDirectoryBlock.TotalSize + _archiveComment.Length, SeekOrigin.Current); + } + } + + [Flags] + internal enum ChangeState + { + Unchanged = 0x0, + FixedLengthMetadata = 0x1, + DynamicLengthMetadata = 0x2, + StoredData = 0x4 } } } diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs index 8f5fa76b4e00aa..cdb8551bffdca4 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs @@ -22,7 +22,7 @@ public partial class ZipArchiveEntry private readonly uint _diskNumberStart; private readonly ZipVersionMadeByPlatform _versionMadeByPlatform; private ZipVersionNeededValues _versionMadeBySpecification; - internal ZipVersionNeededValues _versionToExtract; + private ZipVersionNeededValues _versionToExtract; private BitFlagValues _generalPurposeBitFlag; private readonly bool _isEncrypted; private CompressionMethodValues _storedCompressionMethod; @@ -88,6 +88,8 @@ internal ZipArchiveEntry(ZipArchive archive, ZipCentralDirectoryFileHeader cd) _fileComment = cd.FileComment; _compressionLevel = MapCompressionLevel(_generalPurposeBitFlag, CompressionMethod); + + Changes = ZipArchive.ChangeState.Unchanged; } // Initializes a ZipArchiveEntry instance for a new archive entry with a specified compression level. @@ -149,6 +151,8 @@ internal ZipArchiveEntry(ZipArchive archive, string entryName) { _archive.AcquireArchiveStream(this); } + + Changes = ZipArchive.ChangeState.Unchanged; } /// <summary> @@ -188,6 +192,7 @@ public int ExternalAttributes { ThrowIfInvalidArchive(); _externalFileAttr = (uint)value; + Changes |= ZipArchive.ChangeState.FixedLengthMetadata; } } @@ -210,6 +215,7 @@ public string Comment { _generalPurposeBitFlag |= BitFlagValues.UnicodeFileNameAndComment; } + Changes |= ZipArchive.ChangeState.DynamicLengthMetadata; } } @@ -274,6 +280,7 @@ public DateTimeOffset LastWriteTime throw new ArgumentOutOfRangeException(nameof(value), SR.DateTimeOutOfRange); _lastModified = value; + Changes |= ZipArchive.ChangeState.FixedLengthMetadata; } } @@ -296,6 +303,12 @@ public long Length /// </summary> public string Name => ParseFileName(FullName, _versionMadeByPlatform); + internal ZipArchive.ChangeState Changes { get; private set; } + + internal bool OriginallyInArchive => _originallyInArchive; + + internal long OffsetOfLocalHeader => _offsetOfLocalHeader; + /// <summary> /// Deletes the entry from the archive. /// </summary> @@ -372,7 +385,7 @@ private string DecodeEntryString(byte[] entryStringBytes) internal bool EverOpenedForWrite => _everOpenedForWrite; - private long OffsetOfCompressedData + internal long OffsetOfCompressedData { get { @@ -458,15 +471,15 @@ private CompressionMethodValues CompressionMethod // that we are reading to write the central directory // // should only throw an exception in extremely exceptional cases because it is called from dispose - internal void WriteAndFinishLocalEntry() + internal void WriteAndFinishLocalEntry(bool forceWrite) { CloseStreams(); - WriteLocalFileHeaderAndDataIfNeeded(); + WriteLocalFileHeaderAndDataIfNeeded(forceWrite); UnloadStreams(); } // should only throw an exception in extremely exceptional cases because it is called from dispose - internal void WriteCentralDirectoryFileHeader() + internal void WriteCentralDirectoryFileHeader(bool forceWrite) { // This part is simple, because we should definitely know the sizes by this time @@ -538,60 +551,73 @@ internal void WriteCentralDirectoryFileHeader() extraFieldLength = (ushort)bigExtraFieldLength; } - // The central directory file header begins with the below constant-length structure: - // Central directory file header signature (4 bytes) - // Version made by Specification (version) (1 byte) - // Version made by Compatibility (type) (1 byte) - // Minimum version needed to extract (2 bytes) - // General Purpose bit flag (2 bytes) - // The Compression method (2 bytes) - // File last modification time and date (4 bytes) - // CRC-32 (4 bytes) - // Compressed Size (4 bytes) - // Uncompressed Size (4 bytes) - // File Name Length (2 bytes) - // Extra Field Length (2 bytes) - // File Comment Length (2 bytes) - // Start Disk Number (2 bytes) - // Internal File Attributes (2 bytes) - // External File Attributes (4 bytes) - // Offset Of Local Header (4 bytes) - Span<byte> cdStaticHeader = stackalloc byte[ZipCentralDirectoryFileHeader.BlockConstantSectionSize]; - - ZipCentralDirectoryFileHeader.SignatureConstantBytes.CopyTo(cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.Signature..]); - cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.VersionMadeBySpecification] = (byte)_versionMadeBySpecification; - cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.VersionMadeByCompatibility] = (byte)CurrentZipPlatform; - BinaryPrimitives.WriteUInt16LittleEndian(cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.VersionNeededToExtract..], (ushort)_versionToExtract); - BinaryPrimitives.WriteUInt16LittleEndian(cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.GeneralPurposeBitFlags..], (ushort)_generalPurposeBitFlag); - BinaryPrimitives.WriteUInt16LittleEndian(cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.CompressionMethod..], (ushort)CompressionMethod); - BinaryPrimitives.WriteUInt32LittleEndian(cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.LastModified..], ZipHelper.DateTimeToDosTime(_lastModified.DateTime)); - BinaryPrimitives.WriteUInt32LittleEndian(cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.Crc32..], _crc32); - BinaryPrimitives.WriteUInt32LittleEndian(cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.CompressedSize..], compressedSizeTruncated); - BinaryPrimitives.WriteUInt32LittleEndian(cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.UncompressedSize..], uncompressedSizeTruncated); - BinaryPrimitives.WriteUInt16LittleEndian(cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.FilenameLength..], (ushort)_storedEntryNameBytes.Length); - BinaryPrimitives.WriteUInt16LittleEndian(cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.ExtraFieldLength..], extraFieldLength); - BinaryPrimitives.WriteUInt16LittleEndian(cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.FileCommentLength..], (ushort)_fileComment.Length); - BinaryPrimitives.WriteUInt16LittleEndian(cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.DiskNumberStart..], 0); - BinaryPrimitives.WriteUInt16LittleEndian(cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.InternalFileAttributes..], 0); - BinaryPrimitives.WriteUInt32LittleEndian(cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.ExternalFileAttributes..], _externalFileAttr); - BinaryPrimitives.WriteUInt32LittleEndian(cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.RelativeOffsetOfLocalHeader..], offsetOfLocalHeaderTruncated); - - _archive.ArchiveStream.Write(cdStaticHeader); - _archive.ArchiveStream.Write(_storedEntryNameBytes); - - // write extra fields - if (zip64Needed) - zip64ExtraField.WriteBlock(_archive.ArchiveStream); - if (_cdUnknownExtraFields != null) - ZipGenericExtraField.WriteAllBlocks(_cdUnknownExtraFields, _archive.ArchiveStream); + if (_originallyInArchive && Changes == ZipArchive.ChangeState.Unchanged && !forceWrite) + { + long centralDirectoryHeaderLength = ZipCentralDirectoryFileHeader.FieldLocations.DynamicData + + _storedEntryNameBytes.Length + + (zip64Needed ? zip64ExtraField.TotalSize : 0) + + (_cdUnknownExtraFields != null ? ZipGenericExtraField.TotalSize(_cdUnknownExtraFields) : 0) + + _fileComment.Length; - if (_fileComment.Length > 0) - _archive.ArchiveStream.Write(_fileComment); + _archive.ArchiveStream.Seek(centralDirectoryHeaderLength, SeekOrigin.Current); + } + else + { + // The central directory file header begins with the below constant-length structure: + // Central directory file header signature (4 bytes) + // Version made by Specification (version) (1 byte) + // Version made by Compatibility (type) (1 byte) + // Minimum version needed to extract (2 bytes) + // General Purpose bit flag (2 bytes) + // The Compression method (2 bytes) + // File last modification time and date (4 bytes) + // CRC-32 (4 bytes) + // Compressed Size (4 bytes) + // Uncompressed Size (4 bytes) + // File Name Length (2 bytes) + // Extra Field Length (2 bytes) + // File Comment Length (2 bytes) + // Start Disk Number (2 bytes) + // Internal File Attributes (2 bytes) + // External File Attributes (4 bytes) + // Offset Of Local Header (4 bytes) + Span<byte> cdStaticHeader = stackalloc byte[ZipCentralDirectoryFileHeader.BlockConstantSectionSize]; + + ZipCentralDirectoryFileHeader.SignatureConstantBytes.CopyTo(cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.Signature..]); + cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.VersionMadeBySpecification] = (byte)_versionMadeBySpecification; + cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.VersionMadeByCompatibility] = (byte)CurrentZipPlatform; + BinaryPrimitives.WriteUInt16LittleEndian(cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.VersionNeededToExtract..], (ushort)_versionToExtract); + BinaryPrimitives.WriteUInt16LittleEndian(cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.GeneralPurposeBitFlags..], (ushort)_generalPurposeBitFlag); + BinaryPrimitives.WriteUInt16LittleEndian(cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.CompressionMethod..], (ushort)CompressionMethod); + BinaryPrimitives.WriteUInt32LittleEndian(cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.LastModified..], ZipHelper.DateTimeToDosTime(_lastModified.DateTime)); + BinaryPrimitives.WriteUInt32LittleEndian(cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.Crc32..], _crc32); + BinaryPrimitives.WriteUInt32LittleEndian(cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.CompressedSize..], compressedSizeTruncated); + BinaryPrimitives.WriteUInt32LittleEndian(cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.UncompressedSize..], uncompressedSizeTruncated); + BinaryPrimitives.WriteUInt16LittleEndian(cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.FilenameLength..], (ushort)_storedEntryNameBytes.Length); + BinaryPrimitives.WriteUInt16LittleEndian(cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.ExtraFieldLength..], extraFieldLength); + BinaryPrimitives.WriteUInt16LittleEndian(cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.FileCommentLength..], (ushort)_fileComment.Length); + BinaryPrimitives.WriteUInt16LittleEndian(cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.DiskNumberStart..], 0); + BinaryPrimitives.WriteUInt16LittleEndian(cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.InternalFileAttributes..], 0); + BinaryPrimitives.WriteUInt32LittleEndian(cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.ExternalFileAttributes..], _externalFileAttr); + BinaryPrimitives.WriteUInt32LittleEndian(cdStaticHeader[ZipCentralDirectoryFileHeader.FieldLocations.RelativeOffsetOfLocalHeader..], offsetOfLocalHeaderTruncated); + + _archive.ArchiveStream.Write(cdStaticHeader); + _archive.ArchiveStream.Write(_storedEntryNameBytes); + + // write extra fields + if (zip64Needed) + zip64ExtraField.WriteBlock(_archive.ArchiveStream); + if (_cdUnknownExtraFields != null) + ZipGenericExtraField.WriteAllBlocks(_cdUnknownExtraFields, _archive.ArchiveStream); + + if (_fileComment.Length > 0) + _archive.ArchiveStream.Write(_fileComment); + } } - // returns false if fails, will get called on every entry before closing in update mode + // throws exception if fails, will get called on every relevant entry before closing in update mode // can throw InvalidDataException - internal bool LoadLocalHeaderExtraFieldAndCompressedBytesIfNeeded() + internal void LoadLocalHeaderExtraFieldAndCompressedBytesIfNeeded() { // we should have made this exact call in _archive.Init through ThrowIfOpenable Debug.Assert(IsOpenable(false, true, out _)); @@ -623,8 +649,6 @@ internal bool LoadLocalHeaderExtraFieldAndCompressedBytesIfNeeded() } ZipHelper.ReadBytes(_archive.ArchiveStream, _compressedBytes[_compressedBytes.Length - 1], (int)(_compressedSize % MaxSingleBufferSize)); } - - return true; } internal void ThrowIfNotOpenable(bool needToUncompress, bool needToLoadIntoMemory) @@ -726,6 +750,7 @@ private WrappedStream OpenInWriteMode() _archive.DebugAssertIsStillArchiveStreamOwner(this); _everOpenedForWrite = true; + Changes |= ZipArchive.ChangeState.StoredData; CheckSumAndSizeWriteStream crcSizeStream = GetDataCompressor(_archive.ArchiveStream, true, (object? o, EventArgs e) => { // release the archive stream @@ -746,6 +771,7 @@ private WrappedStream OpenInUpdateMode() ThrowIfNotOpenable(needToUncompress: true, needToLoadIntoMemory: true); _everOpenedForWrite = true; + Changes |= ZipArchive.ChangeState.StoredData; _currentlyOpenForWrite = true; // always put it at the beginning for them UncompressedData.Seek(0, SeekOrigin.Begin); @@ -873,7 +899,7 @@ private static BitFlagValues MapDeflateCompressionOption(BitFlagValues generalPu private bool ShouldUseZIP64 => AreSizesTooLarge || IsOffsetTooLarge; // return value is true if we allocated an extra field for 64 bit headers, un/compressed size - private bool WriteLocalFileHeader(bool isEmptyFile) + private bool WriteLocalFileHeader(bool isEmptyFile, bool forceWrite) { Span<byte> lfStaticHeader = stackalloc byte[ZipLocalFileHeader.SizeOfLocalHeader]; @@ -958,31 +984,50 @@ private bool WriteLocalFileHeader(bool isEmptyFile) extraFieldLength = (ushort)bigExtraFieldLength; } - ZipLocalFileHeader.SignatureConstantBytes.CopyTo(lfStaticHeader[ZipLocalFileHeader.FieldLocations.Signature..]); - BinaryPrimitives.WriteUInt16LittleEndian(lfStaticHeader[ZipLocalFileHeader.FieldLocations.VersionNeededToExtract..], (ushort)_versionToExtract); - BinaryPrimitives.WriteUInt16LittleEndian(lfStaticHeader[ZipLocalFileHeader.FieldLocations.GeneralPurposeBitFlags..], (ushort)_generalPurposeBitFlag); - BinaryPrimitives.WriteUInt16LittleEndian(lfStaticHeader[ZipLocalFileHeader.FieldLocations.CompressionMethod..], (ushort)CompressionMethod); - BinaryPrimitives.WriteUInt32LittleEndian(lfStaticHeader[ZipLocalFileHeader.FieldLocations.LastModified..], ZipHelper.DateTimeToDosTime(_lastModified.DateTime)); - BinaryPrimitives.WriteUInt32LittleEndian(lfStaticHeader[ZipLocalFileHeader.FieldLocations.Crc32..], _crc32); - BinaryPrimitives.WriteUInt32LittleEndian(lfStaticHeader[ZipLocalFileHeader.FieldLocations.CompressedSize..], compressedSizeTruncated); - BinaryPrimitives.WriteUInt32LittleEndian(lfStaticHeader[ZipLocalFileHeader.FieldLocations.UncompressedSize..], uncompressedSizeTruncated); - BinaryPrimitives.WriteUInt16LittleEndian(lfStaticHeader[ZipLocalFileHeader.FieldLocations.FilenameLength..], (ushort)_storedEntryNameBytes.Length); - BinaryPrimitives.WriteUInt16LittleEndian(lfStaticHeader[ZipLocalFileHeader.FieldLocations.ExtraFieldLength..], extraFieldLength); - - // write header - _archive.ArchiveStream.Write(lfStaticHeader); + // If this is an existing, unchanged entry then silently skip forwards. + // If it's new or changed, write the header. + if (_originallyInArchive && Changes == ZipArchive.ChangeState.Unchanged && !forceWrite) + { + _archive.ArchiveStream.Seek(ZipLocalFileHeader.SizeOfLocalHeader + _storedEntryNameBytes.Length, SeekOrigin.Current); - _archive.ArchiveStream.Write(_storedEntryNameBytes); + if (zip64Used) + { + _archive.ArchiveStream.Seek(zip64ExtraField.TotalSize, SeekOrigin.Current); + } - if (zip64Used) - zip64ExtraField.WriteBlock(_archive.ArchiveStream); - if (_lhUnknownExtraFields != null) - ZipGenericExtraField.WriteAllBlocks(_lhUnknownExtraFields, _archive.ArchiveStream); + if (_lhUnknownExtraFields != null) + { + _archive.ArchiveStream.Seek(ZipGenericExtraField.TotalSize(_lhUnknownExtraFields), SeekOrigin.Current); + } + } + else + { + ZipLocalFileHeader.SignatureConstantBytes.CopyTo(lfStaticHeader[ZipLocalFileHeader.FieldLocations.Signature..]); + BinaryPrimitives.WriteUInt16LittleEndian(lfStaticHeader[ZipLocalFileHeader.FieldLocations.VersionNeededToExtract..], (ushort)_versionToExtract); + BinaryPrimitives.WriteUInt16LittleEndian(lfStaticHeader[ZipLocalFileHeader.FieldLocations.GeneralPurposeBitFlags..], (ushort)_generalPurposeBitFlag); + BinaryPrimitives.WriteUInt16LittleEndian(lfStaticHeader[ZipLocalFileHeader.FieldLocations.CompressionMethod..], (ushort)CompressionMethod); + BinaryPrimitives.WriteUInt32LittleEndian(lfStaticHeader[ZipLocalFileHeader.FieldLocations.LastModified..], ZipHelper.DateTimeToDosTime(_lastModified.DateTime)); + BinaryPrimitives.WriteUInt32LittleEndian(lfStaticHeader[ZipLocalFileHeader.FieldLocations.Crc32..], _crc32); + BinaryPrimitives.WriteUInt32LittleEndian(lfStaticHeader[ZipLocalFileHeader.FieldLocations.CompressedSize..], compressedSizeTruncated); + BinaryPrimitives.WriteUInt32LittleEndian(lfStaticHeader[ZipLocalFileHeader.FieldLocations.UncompressedSize..], uncompressedSizeTruncated); + BinaryPrimitives.WriteUInt16LittleEndian(lfStaticHeader[ZipLocalFileHeader.FieldLocations.FilenameLength..], (ushort)_storedEntryNameBytes.Length); + BinaryPrimitives.WriteUInt16LittleEndian(lfStaticHeader[ZipLocalFileHeader.FieldLocations.ExtraFieldLength..], extraFieldLength); + + // write header + _archive.ArchiveStream.Write(lfStaticHeader); + + _archive.ArchiveStream.Write(_storedEntryNameBytes); + + if (zip64Used) + zip64ExtraField.WriteBlock(_archive.ArchiveStream); + if (_lhUnknownExtraFields != null) + ZipGenericExtraField.WriteAllBlocks(_lhUnknownExtraFields, _archive.ArchiveStream); + } return zip64Used; } - private void WriteLocalFileHeaderAndDataIfNeeded() + private void WriteLocalFileHeaderAndDataIfNeeded(bool forceWrite) { // _storedUncompressedData gets frozen here, and is what gets written to the file if (_storedUncompressedData != null || _compressedBytes != null) @@ -1011,7 +1056,7 @@ private void WriteLocalFileHeaderAndDataIfNeeded() _compressedSize = 0; } - WriteLocalFileHeader(isEmptyFile: _uncompressedSize == 0); + WriteLocalFileHeader(isEmptyFile: _uncompressedSize == 0, forceWrite: true); // according to ZIP specs, zero-byte files MUST NOT include file data if (_uncompressedSize != 0) @@ -1024,12 +1069,19 @@ private void WriteLocalFileHeaderAndDataIfNeeded() } } } - else // there is no data in the file, but if we are in update mode, we still need to write a header + else // there is no data in the file (or the data in the file has not been loaded), but if we are in update mode, we may still need to write a header { if (_archive.Mode == ZipArchiveMode.Update || !_everOpenedForWrite) { _everOpenedForWrite = true; - WriteLocalFileHeader(isEmptyFile: true); + WriteLocalFileHeader(isEmptyFile: _uncompressedSize == 0, forceWrite: forceWrite); + + // If we know that we need to update the file header (but don't need to load and update the data itself) + // then advance the position past it. + if (_compressedSize != 0) + { + _archive.ArchiveStream.Seek(_compressedSize, SeekOrigin.Current); + } } } } @@ -1317,7 +1369,7 @@ public override void Write(byte[] buffer, int offset, int count) { _everWritten = true; // write local header, we are good to go - _usedZip64inLH = _entry.WriteLocalFileHeader(isEmptyFile: false); + _usedZip64inLH = _entry.WriteLocalFileHeader(isEmptyFile: false, forceWrite: true); } _crcSizeStream.Write(buffer, offset, count); @@ -1337,7 +1389,7 @@ public override void Write(ReadOnlySpan<byte> source) { _everWritten = true; // write local header, we are good to go - _usedZip64inLH = _entry.WriteLocalFileHeader(isEmptyFile: false); + _usedZip64inLH = _entry.WriteLocalFileHeader(isEmptyFile: false, forceWrite: true); } _crcSizeStream.Write(source); @@ -1368,7 +1420,7 @@ async ValueTask Core(ReadOnlyMemory<byte> buffer, CancellationToken cancellation { _everWritten = true; // write local header, we are good to go - _usedZip64inLH = _entry.WriteLocalFileHeader(isEmptyFile: false); + _usedZip64inLH = _entry.WriteLocalFileHeader(isEmptyFile: false, forceWrite: true); } await _crcSizeStream.WriteAsync(buffer, cancellationToken).ConfigureAwait(false); @@ -1401,7 +1453,7 @@ protected override void Dispose(bool disposing) if (!_everWritten) { // write local header, no data, so we use stored - _entry.WriteLocalFileHeader(isEmptyFile: true); + _entry.WriteLocalFileHeader(isEmptyFile: true, forceWrite: true); } else { @@ -1437,5 +1489,21 @@ internal enum CompressionMethodValues : ushort BZip2 = 0xC, LZMA = 0xE } + + internal sealed class LocalHeaderOffsetComparer : Comparer<ZipArchiveEntry> + { + private static readonly LocalHeaderOffsetComparer s_instance = new LocalHeaderOffsetComparer(); + + public static LocalHeaderOffsetComparer Instance => s_instance; + + // Newly added ZipArchiveEntry records should always go to the end of the file. + public override int Compare(ZipArchiveEntry? x, ZipArchiveEntry? y) + { + long xOffset = x != null && !x.OriginallyInArchive ? long.MaxValue : x?.OffsetOfLocalHeader ?? long.MinValue; + long yOffset = y != null && !y.OriginallyInArchive ? long.MaxValue : y?.OffsetOfLocalHeader ?? long.MinValue; + + return xOffset.CompareTo(yOffset); + } + } } } diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.cs index 1df77357b581d2..a4567fe6769abf 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.cs @@ -374,8 +374,8 @@ internal partial struct Zip64EndOfCentralDirectoryLocator // ZIP files store values in little endian, so this is reversed. public static ReadOnlySpan<byte> SignatureConstantBytes => [0x50, 0x4B, 0x06, 0x07]; - private const int BlockConstantSectionSize = 20; - public const int SizeOfBlockWithoutSignature = 16; + public static readonly int TotalSize = FieldLocations.TotalNumberOfDisks + FieldLengths.TotalNumberOfDisks; + public static readonly int SizeOfBlockWithoutSignature = TotalSize - FieldLengths.Signature; public uint NumberOfDiskWithZip64EOCD; public ulong OffsetOfZip64EOCD; @@ -383,13 +383,13 @@ internal partial struct Zip64EndOfCentralDirectoryLocator public static bool TryReadBlock(Stream stream, out Zip64EndOfCentralDirectoryLocator zip64EOCDLocator) { - Span<byte> blockContents = stackalloc byte[BlockConstantSectionSize]; + Span<byte> blockContents = stackalloc byte[TotalSize]; int bytesRead; zip64EOCDLocator = default; bytesRead = stream.Read(blockContents); - if (bytesRead < BlockConstantSectionSize) + if (bytesRead < TotalSize) { return false; } @@ -408,7 +408,7 @@ public static bool TryReadBlock(Stream stream, out Zip64EndOfCentralDirectoryLoc public static void WriteBlock(Stream stream, long zip64EOCDRecordStart) { - Span<byte> blockContents = stackalloc byte[BlockConstantSectionSize]; + Span<byte> blockContents = stackalloc byte[TotalSize]; SignatureConstantBytes.CopyTo(blockContents[FieldLocations.Signature..]); // number of disk with start of zip64 eocd @@ -429,6 +429,7 @@ internal partial struct Zip64EndOfCentralDirectoryRecord private const int BlockConstantSectionSize = 56; private const ulong NormalSize = 0x2C; // the size of the data excluding the size/signature fields if no extra data included + public const long TotalSize = (long)NormalSize + 12; // total size of the entire block public ulong SizeOfThisRecord; public ushort VersionMadeBy; @@ -743,9 +744,10 @@ internal partial struct ZipEndOfCentralDirectoryBlock // ZIP files store values in little endian, so this is reversed. public static ReadOnlySpan<byte> SignatureConstantBytes => [0x50, 0x4B, 0x05, 0x06]; + // This also assumes a zero-length comment. + public static readonly int TotalSize = FieldLocations.ArchiveCommentLength + FieldLengths.ArchiveCommentLength; // These are the minimum possible size, assuming the zip file comments variable section is empty - private const int BlockConstantSectionSize = 22; - public const int SizeOfBlockWithoutSignature = 18; + public static readonly int SizeOfBlockWithoutSignature = TotalSize - FieldLengths.Signature; // The end of central directory can have a variable size zip file comment at the end, but its max length can be 64K // The Zip File Format Specification does not explicitly mention a max size for this field, but we are assuming this @@ -763,7 +765,7 @@ internal partial struct ZipEndOfCentralDirectoryBlock public static void WriteBlock(Stream stream, long numberOfEntries, long startOfCentralDirectory, long sizeOfCentralDirectory, byte[] archiveComment) { - Span<byte> blockContents = stackalloc byte[BlockConstantSectionSize]; + Span<byte> blockContents = stackalloc byte[TotalSize]; ushort numberOfEntriesTruncated = numberOfEntries > ushort.MaxValue ? ZipHelper.Mask16Bit : (ushort)numberOfEntries; @@ -799,13 +801,13 @@ public static void WriteBlock(Stream stream, long numberOfEntries, long startOfC public static bool TryReadBlock(Stream stream, out ZipEndOfCentralDirectoryBlock eocdBlock) { - Span<byte> blockContents = stackalloc byte[BlockConstantSectionSize]; + Span<byte> blockContents = stackalloc byte[TotalSize]; int bytesRead; eocdBlock = default; bytesRead = stream.Read(blockContents); - if (bytesRead < BlockConstantSectionSize) + if (bytesRead < TotalSize) { return false; } diff --git a/src/libraries/System.IO.Compression/tests/System.IO.Compression.Tests.csproj b/src/libraries/System.IO.Compression/tests/System.IO.Compression.Tests.csproj index 3549359ecc6a44..041f3248cd77f3 100644 --- a/src/libraries/System.IO.Compression/tests/System.IO.Compression.Tests.csproj +++ b/src/libraries/System.IO.Compression/tests/System.IO.Compression.Tests.csproj @@ -29,6 +29,7 @@ <Compile Include="ZipArchive\zip_UpdateTests.cs" /> <Compile Include="ZipArchive\zip_UpdateTests.Comments.cs" /> <Compile Include="$(CommonTestPath)System\IO\PathFeatures.cs" Link="Common\System\IO\PathFeatures.cs" /> + <Compile Include="$(CommonTestPath)System\IO\CallTrackingStream.cs" Link="Common\System\IO\CallTrackingStream.cs" /> <Compile Include="$(CommonTestPath)System\IO\Compression\CRC.cs" Link="Common\System\IO\Compression\CRC.cs" /> <Compile Include="$(CommonTestPath)System\IO\Compression\CompressionStreamTestBase.cs" Link="Common\System\IO\Compression\CompressionStreamTestBase.cs" /> <Compile Include="$(CommonTestPath)System\IO\Compression\CompressionStreamUnitTestBase.cs" Link="Common\System\IO\Compression\CompressionStreamUnitTestBase.cs" /> diff --git a/src/libraries/System.IO.Compression/tests/ZipArchive/zip_InvalidParametersAndStrangeFiles.cs b/src/libraries/System.IO.Compression/tests/ZipArchive/zip_InvalidParametersAndStrangeFiles.cs index 1bb8b2a113b05a..f0f63a34d61a63 100644 --- a/src/libraries/System.IO.Compression/tests/ZipArchive/zip_InvalidParametersAndStrangeFiles.cs +++ b/src/libraries/System.IO.Compression/tests/ZipArchive/zip_InvalidParametersAndStrangeFiles.cs @@ -683,7 +683,9 @@ public static void ZipWithLargeSparseFile() /// Deflate 0x08, _uncompressedSize 0, _compressedSize 2, compressed data: 0x0300 (\u0003 ETX) /// 2. EmptyFileCompressedWrongSize has /// Deflate 0x08, _uncompressedSize 0, _compressedSize 4, compressed data: 0xBAAD0300 (just bad data) - /// ZipArchive is expected to change compression method to Stored (0x00) and ignore "bad" compressed size + /// ZipArchive is not expected to make any changes to the compression method of an archive entry unless + /// it's been changed. If it has been changed, ZipArchive is expected to change compression method to + /// Stored (0x00) and ignore "bad" compressed size /// </summary> [Theory] [MemberData(nameof(EmptyFiles))] @@ -692,14 +694,30 @@ public void ReadArchive_WithEmptyDeflatedFile(byte[] fileBytes) using (var testStream = new MemoryStream(fileBytes)) { const string ExpectedFileName = "xl/customProperty2.bin"; - // open archive with zero-length file that is compressed (Deflate = 0x8) + + byte firstEntryCompressionMethod = fileBytes[8]; + + // first attempt: open archive with zero-length file that is compressed (Deflate = 0x8) using (var zip = new ZipArchive(testStream, ZipArchiveMode.Update, leaveOpen: true)) { - // dispose without making any changes will rewrite the archive + // dispose without making any changes will make no changes to the input stream } byte[] fileContent = testStream.ToArray(); + // compression method should not have changed + Assert.Equal(firstEntryCompressionMethod, fileBytes[8]); + + testStream.Seek(0, SeekOrigin.Begin); + // second attempt: open archive with zero-length file that is compressed (Deflate = 0x8) + using (var zip = new ZipArchive(testStream, ZipArchiveMode.Update, leaveOpen: true)) + using (var zipEntryStream = zip.Entries[0].Open()) + { + // dispose after opening an entry will rewrite the archive + } + + fileContent = testStream.ToArray(); + // compression method should change to "uncompressed" (Stored = 0x0) Assert.Equal(0, fileContent[8]); diff --git a/src/libraries/System.IO.Compression/tests/ZipArchive/zip_ReadTests.cs b/src/libraries/System.IO.Compression/tests/ZipArchive/zip_ReadTests.cs index a3130aac5a65ff..936d8c33d5b898 100644 --- a/src/libraries/System.IO.Compression/tests/ZipArchive/zip_ReadTests.cs +++ b/src/libraries/System.IO.Compression/tests/ZipArchive/zip_ReadTests.cs @@ -1,6 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Buffers.Binary; using System.Collections.Generic; using System.Threading.Tasks; using Xunit; @@ -307,6 +308,126 @@ public static void CanReadLargeCentralDirectoryHeader() } } + [Fact] + public static void ArchivesInOffsetOrder_UpdateMode() + { + // When the ZipArchive which has been opened in Update mode is disposed of, its entries will be rewritten in order of their offset within the file. + // This requires the entries to be sorted when the file is opened. + byte[] sampleEntryContents = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]; + byte[] sampleZipFile = ReverseCentralDirectoryEntries(CreateZipFile(50, sampleEntryContents)); + + using (MemoryStream ms = new MemoryStream()) + { + ms.Write(sampleZipFile); + ms.Seek(0, SeekOrigin.Begin); + + ZipArchive source = new ZipArchive(ms, ZipArchiveMode.Update, leaveOpen: true); + long previousOffset = long.MinValue; + System.Reflection.FieldInfo offsetOfLocalHeader = typeof(ZipArchiveEntry).GetField("_offsetOfLocalHeader", System.Reflection.BindingFlags.NonPublic | Reflection.BindingFlags.Instance); + + for (int i = 0; i < source.Entries.Count; i++) + { + ZipArchiveEntry entry = source.Entries[i]; + long offset = (long)offsetOfLocalHeader.GetValue(entry); + + Assert.True(offset > previousOffset); + previousOffset = offset; + } + + source.Dispose(); + } + } + + [Fact] + public static void ArchivesInCentralDirectoryOrder_ReadMode() + { + // When the ZipArchive is opened in Read mode, no sort is necessary. The entries will be added to the ZipArchive in the order + // that they appear in the central directory (in this case, sorted by offset descending.) + byte[] sampleEntryContents = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]; + byte[] sampleZipFile = ReverseCentralDirectoryEntries(CreateZipFile(50, sampleEntryContents)); + + using (MemoryStream ms = new MemoryStream()) + { + ms.Write(sampleZipFile); + ms.Seek(0, SeekOrigin.Begin); + + ZipArchive source = new ZipArchive(ms, ZipArchiveMode.Read, leaveOpen: true); + long previousOffset = long.MaxValue; + System.Reflection.FieldInfo offsetOfLocalHeader = typeof(ZipArchiveEntry).GetField("_offsetOfLocalHeader", System.Reflection.BindingFlags.NonPublic | Reflection.BindingFlags.Instance); + + for (int i = 0; i < source.Entries.Count; i++) + { + ZipArchiveEntry entry = source.Entries[i]; + long offset = (long)offsetOfLocalHeader.GetValue(entry); + + Assert.True(offset < previousOffset); + previousOffset = offset; + } + + source.Dispose(); + } + } + + private static byte[] ReverseCentralDirectoryEntries(byte[] zipFile) + { + byte[] destinationBuffer = new byte[zipFile.Length]; + + // Inspect the "end of central directory" header. This is the final 22 bytes of the file, and it contains the offset and the size + // of the central directory. + int eocdHeaderOffset_CentralDirectoryPosition = zipFile.Length - 6; + int eocdHeaderOffset_CentralDirectoryLength = zipFile.Length - 10; + int centralDirectoryPosition = BinaryPrimitives.ReadInt32LittleEndian(zipFile.AsSpan(eocdHeaderOffset_CentralDirectoryPosition, sizeof(int))); + int centralDirectoryLength = BinaryPrimitives.ReadInt32LittleEndian(zipFile.AsSpan(eocdHeaderOffset_CentralDirectoryLength, sizeof(int))); + List<Range> centralDirectoryRanges = new List<Range>(); + + Assert.True(centralDirectoryPosition + centralDirectoryLength < zipFile.Length); + + // With the starting position of the central directory in hand, work through each entry, recording its starting position and its length. + for (int currPosition = centralDirectoryPosition; currPosition < centralDirectoryPosition + centralDirectoryLength;) + { + // The length of a central directory entry is determined by the length of its static components (46 bytes), plus the length of its filename + // (offset 28), extra fields (offset 30) and file comment (offset 32). + short filenameLength = BinaryPrimitives.ReadInt16LittleEndian(zipFile.AsSpan(currPosition + 28, sizeof(short))); + short extraFieldLength = BinaryPrimitives.ReadInt16LittleEndian(zipFile.AsSpan(currPosition + 30, sizeof(short))); + short fileCommentLength = BinaryPrimitives.ReadInt16LittleEndian(zipFile.AsSpan(currPosition + 32, sizeof(short))); + int totalHeaderLength = 46 + filenameLength + extraFieldLength + fileCommentLength; + + // The sample data generated by the tests should never have extra fields and comments. + Assert.True(filenameLength > 0); + Assert.True(extraFieldLength == 0); + Assert.True(fileCommentLength == 0); + + centralDirectoryRanges.Add(new Range(currPosition, currPosition + totalHeaderLength)); + currPosition += totalHeaderLength; + } + + // Begin building the destination archive. The file contents (everything up to the central directory header) can be copied as-is. + zipFile.AsSpan(0, centralDirectoryPosition).CopyTo(destinationBuffer); + + int cumulativeCentralDirectoryLength = 0; + + // Reverse the order of the central directory entries + foreach (Range cdHeader in centralDirectoryRanges) + { + Span<byte> sourceSpan = zipFile.AsSpan(cdHeader); + Span<byte> destSpan; + + cumulativeCentralDirectoryLength += sourceSpan.Length; + Assert.True(cumulativeCentralDirectoryLength <= centralDirectoryLength); + + destSpan = destinationBuffer.AsSpan(centralDirectoryPosition + centralDirectoryLength - cumulativeCentralDirectoryLength, sourceSpan.Length); + sourceSpan.CopyTo(destSpan); + } + + Assert.Equal(centralDirectoryLength, cumulativeCentralDirectoryLength); + Assert.Equal(22, destinationBuffer.Length - centralDirectoryPosition - centralDirectoryLength); + + // Copy the "end of central directory header" entry to the destination buffer. + zipFile.AsSpan(zipFile.Length - 22).CopyTo(destinationBuffer.AsSpan(destinationBuffer.Length - 22)); + + return destinationBuffer; + } + private class DisposeCallCountingStream : MemoryStream { public int NumberOfDisposeCalls { get; private set; } diff --git a/src/libraries/System.IO.Compression/tests/ZipArchive/zip_UpdateTests.cs b/src/libraries/System.IO.Compression/tests/ZipArchive/zip_UpdateTests.cs index 362b6bf95e3a6f..6b4dcadf01a280 100644 --- a/src/libraries/System.IO.Compression/tests/ZipArchive/zip_UpdateTests.cs +++ b/src/libraries/System.IO.Compression/tests/ZipArchive/zip_UpdateTests.cs @@ -207,7 +207,7 @@ public static async Task AddFileToArchive() await updateArchive(archive, zmodified(Path.Combine("addFile", "added.txt")), "added.txt"); } - IsZipSameAsDir(testArchive, zmodified ("addFile"), ZipArchiveMode.Read, requireExplicit: true, checkTimes: true); + IsZipSameAsDir(testArchive, zmodified("addFile"), ZipArchiveMode.Read, requireExplicit: true, checkTimes: true); } [Fact] @@ -351,5 +351,423 @@ public void Update_VerifyDuplicateEntriesAreAllowed() Assert.Equal(2, archive.Entries.Count); } + + [Fact] + public static async Task Update_PerformMinimalWritesWhenNoFilesChanged() + { + using (LocalMemoryStream ms = await LocalMemoryStream.readAppFileAsync(zfile("normal.zip"))) + using (CallTrackingStream trackingStream = new CallTrackingStream(ms)) + { + int writesCalled = trackingStream.TimesCalled(nameof(trackingStream.Write)); + int writeBytesCalled = trackingStream.TimesCalled(nameof(trackingStream.WriteByte)); + + ZipArchive target = new ZipArchive(trackingStream, ZipArchiveMode.Update, leaveOpen: true); + int archiveEntries = target.Entries.Count; + + target.Dispose(); + writesCalled = trackingStream.TimesCalled(nameof(trackingStream.Write)) - writesCalled; + writeBytesCalled = trackingStream.TimesCalled(nameof(trackingStream.WriteByte)) - writeBytesCalled; + + // No changes to the archive should result in no writes to the file. + Assert.Equal(0, writesCalled + writeBytesCalled); + } + } + + [Theory] + [InlineData(49, 1, 1)] + [InlineData(40, 3, 2)] + [InlineData(30, 5, 3)] + [InlineData(0, 8, 1)] + public void Update_PerformMinimalWritesWhenFixedLengthEntryHeaderFieldChanged(int startIndex, int entriesToModify, int step) + { + byte[] sampleEntryContents = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]; + byte[] sampleZipFile = CreateZipFile(50, sampleEntryContents); + + using (MemoryStream ms = new MemoryStream()) + { + ms.Write(sampleZipFile); + ms.Seek(0, SeekOrigin.Begin); + + using (CallTrackingStream trackingStream = new CallTrackingStream(ms)) + { + // Open the first archive in Update mode, then change the value of {entriesToModify} fixed-length entry headers + // (LastWriteTime.) Verify the correct number of writes performed as a result, then reopen the same + // archive, get the entries and make sure that the fields hold the expected value. + int writesCalled = trackingStream.TimesCalled(nameof(trackingStream.Write)); + int writeBytesCalled = trackingStream.TimesCalled(nameof(trackingStream.WriteByte)); + ZipArchive target = new ZipArchive(trackingStream, ZipArchiveMode.Update, leaveOpen: true); + List<(string EntryName, DateTimeOffset LastWriteTime)> updatedMetadata = new(entriesToModify); + + for (int i = 0; i < entriesToModify; i++) + { + int modificationIndex = startIndex + (i * step); + ZipArchiveEntry entryToModify = target.Entries[modificationIndex]; + string entryName = entryToModify.FullName; + DateTimeOffset expectedDateTimeOffset = entryToModify.LastWriteTime.AddHours(1.0); + + entryToModify.LastWriteTime = expectedDateTimeOffset; + updatedMetadata.Add((entryName, expectedDateTimeOffset)); + } + + target.Dispose(); + + writesCalled = trackingStream.TimesCalled(nameof(trackingStream.Write)) - writesCalled; + writeBytesCalled = trackingStream.TimesCalled(nameof(trackingStream.WriteByte)) - writeBytesCalled; + // As above, check 1: the number of writes performed should be minimal. + // 2 writes per archive entry for the local file header. + // 2 writes per archive entry for the central directory header. + // 1 write (sometimes 2, if there's a comment) for the end of central directory block. + // The EOCD block won't change as a result of our modifications, so is excluded from the counts. + Assert.Equal(((2 + 2) * entriesToModify), writesCalled + writeBytesCalled); + + trackingStream.Seek(0, SeekOrigin.Begin); + target = new ZipArchive(trackingStream, ZipArchiveMode.Read); + + for (int i = 0; i < entriesToModify; i++) + { + int modificationIndex = startIndex + (i * step); + var expectedValues = updatedMetadata[i]; + ZipArchiveEntry verifiedEntry = target.Entries[modificationIndex]; + + // Check 2: the field holds the expected value (and thus has been written to the file.) + Assert.NotNull(verifiedEntry); + Assert.Equal(expectedValues.EntryName, verifiedEntry.FullName); + Assert.Equal(expectedValues.LastWriteTime, verifiedEntry.LastWriteTime); + } + + // Check 3: no other data has been corrupted as a result + for (int i = 0; i < target.Entries.Count; i++) + { + ZipArchiveEntry entry = target.Entries[i]; + byte[] expectedBuffer = [.. sampleEntryContents, (byte)(i % byte.MaxValue)]; + byte[] readBuffer = new byte[expectedBuffer.Length]; + + using (Stream readStream = entry.Open()) + { + readStream.Read(readBuffer.AsSpan()); + } + + Assert.Equal(expectedBuffer, readBuffer); + } + + target.Dispose(); + } + } + } + + [Theory] + [InlineData(0)] + [InlineData(10)] + [InlineData(20)] + [InlineData(30)] + [InlineData(49)] + public void Update_PerformMinimalWritesWhenEntryDataChanges(int index) + => Update_PerformMinimalWritesWithDataAndHeaderChanges(index, -1); + + [Theory] + [InlineData(0, 0)] + [InlineData(20, 40)] + [InlineData(30, 10)] + public void Update_PerformMinimalWritesWithDataAndHeaderChanges(int dataChangeIndex, int lastWriteTimeChangeIndex) + { + byte[] sampleEntryContents = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]; + byte[] sampleZipFile = CreateZipFile(50, sampleEntryContents); + byte[] expectedUpdatedEntryContents = [19, 18, 17, 16, 15, 14, 13, 12, 11, 10]; + + using (MemoryStream ms = new MemoryStream()) + { + ms.Write(sampleZipFile); + ms.Seek(0, SeekOrigin.Begin); + + using (CallTrackingStream trackingStream = new CallTrackingStream(ms)) + { + // Open the archive in Update mode, then rewrite the data of the {dataChangeIndex}th entry + // and set the LastWriteTime of the {lastWriteTimeChangeIndex}th entry. + // Verify the correct number of writes performed as a result, then reopen the same + // archive, get the entries and make sure that the fields hold the expected value. + int writesCalled = trackingStream.TimesCalled(nameof(trackingStream.Write)); + int writeBytesCalled = trackingStream.TimesCalled(nameof(trackingStream.WriteByte)); + ZipArchive target = new ZipArchive(trackingStream, ZipArchiveMode.Update, leaveOpen: true); + ZipArchiveEntry entryToRewrite = target.Entries[dataChangeIndex]; + int totalEntries = target.Entries.Count; + int expectedEntriesToWrite = target.Entries.Count - dataChangeIndex; + DateTimeOffset expectedWriteTime = default; + + if (lastWriteTimeChangeIndex != -1) + { + ZipArchiveEntry entryToModify = target.Entries[lastWriteTimeChangeIndex]; + + expectedWriteTime = entryToModify.LastWriteTime.AddHours(1.0); + entryToModify.LastWriteTime = expectedWriteTime; + } + + using (var entryStream = entryToRewrite.Open()) + { + entryStream.SetLength(0); + entryStream.Write(expectedUpdatedEntryContents); + } + + target.Dispose(); + + writesCalled = trackingStream.TimesCalled(nameof(trackingStream.Write)) - writesCalled; + writeBytesCalled = trackingStream.TimesCalled(nameof(trackingStream.WriteByte)) - writeBytesCalled; + + // If the data changed first, then every entry after it will be written in full. If the fixed-length + // metadata changed first, some entries which won't have been fully written - just updated in place. + // 2 writes per archive entry for the local file header. + // 2 writes per archive entry for the central directory header. + // 2 writes for the file data of the updated entry itself + // 1 write per archive entry for the file data of other entries after this in the file + // 1 write (sometimes 2, if there's a comment) for the end of central directory block. + // All of the central directory headers must be rewritten after an entry's data has been modified. + if (dataChangeIndex <= lastWriteTimeChangeIndex || lastWriteTimeChangeIndex == -1) + { + // dataChangeIndex -> totalEntries: rewrite in full + // all central directories headers + Assert.Equal(1 + 1 + ((2 + 1) * expectedEntriesToWrite) + (2 * totalEntries), writesCalled + writeBytesCalled); + } + else + { + // lastWriteTimeChangeIndex: partial rewrite + // dataChangeIndex -> totalEntries: rewrite in full + // all central directory headers + Assert.Equal(1 + 1 + ((2 + 1) * expectedEntriesToWrite) + (2 * totalEntries) + 2, writesCalled + writeBytesCalled); + } + + trackingStream.Seek(0, SeekOrigin.Begin); + target = new ZipArchive(trackingStream, ZipArchiveMode.Read); + + // Check 2: no other data has been corrupted as a result + for (int i = 0; i < target.Entries.Count; i++) + { + ZipArchiveEntry entry = target.Entries[i]; + byte[] expectedBuffer = i == dataChangeIndex + ? expectedUpdatedEntryContents + : [.. sampleEntryContents, (byte)(i % byte.MaxValue)]; + byte[] readBuffer = new byte[expectedBuffer.Length]; + + using (Stream readStream = entry.Open()) + { + readStream.Read(readBuffer.AsSpan()); + } + + Assert.Equal(expectedBuffer, readBuffer); + + if (i == lastWriteTimeChangeIndex) + { + Assert.Equal(expectedWriteTime, entry.LastWriteTime); + } + } + } + } + } + + [Fact] + public async Task Update_PerformMinimalWritesWhenArchiveCommentChanged() + { + using (LocalMemoryStream ms = await LocalMemoryStream.readAppFileAsync(zfile("normal.zip"))) + using (CallTrackingStream trackingStream = new CallTrackingStream(ms)) + { + int writesCalled = trackingStream.TimesCalled(nameof(trackingStream.Write)); + int writeBytesCalled = trackingStream.TimesCalled(nameof(trackingStream.WriteByte)); + string expectedComment = "14 byte comment"; + + ZipArchive target = new ZipArchive(trackingStream, ZipArchiveMode.Update, leaveOpen: true); + target.Comment = expectedComment; + + target.Dispose(); + writesCalled = trackingStream.TimesCalled(nameof(trackingStream.Write)) - writesCalled; + writeBytesCalled = trackingStream.TimesCalled(nameof(trackingStream.WriteByte)) - writeBytesCalled; + + // We expect 2 writes for the end of central directory block - 1 for the EOCD, 1 for the comment. + Assert.Equal(2, writesCalled + writeBytesCalled); + + trackingStream.Seek(0, SeekOrigin.Begin); + + target = new ZipArchive(trackingStream, ZipArchiveMode.Read, leaveOpen: true); + Assert.Equal(expectedComment, target.Comment); + } + } + + [Theory] + [InlineData(-1, 40)] + [InlineData(-1, 49)] + [InlineData(-1, 0)] + [InlineData(42, 40)] + [InlineData(38, 40)] + public void Update_PerformMinimalWritesWhenEntriesModifiedAndDeleted(int modifyIndex, int deleteIndex) + { + byte[] sampleEntryContents = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]; + byte[] sampleZipFile = CreateZipFile(50, sampleEntryContents); + byte[] expectedUpdatedEntryContents = [22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]; + + using (MemoryStream ms = new MemoryStream()) + { + ms.Write(sampleZipFile); + ms.Seek(0, SeekOrigin.Begin); + + using (CallTrackingStream trackingStream = new CallTrackingStream(ms)) + { + // Open the archive in Update mode, then rewrite the data of the {modifyIndex}th entry + // and delete the LastWriteTime of the {lastWriteTimeChangeIndex}th entry. + // Verify the correct number of writes performed as a result, then reopen the same + // archive, get the entries, make sure that the right number of entries have been + // found and that the entries have the correct contents. + int writesCalled = trackingStream.TimesCalled(nameof(trackingStream.Write)); + int writeBytesCalled = trackingStream.TimesCalled(nameof(trackingStream.WriteByte)); + ZipArchive target = new ZipArchive(trackingStream, ZipArchiveMode.Update, leaveOpen: true); + int totalEntries = target.Entries.Count; + // Everything after the first modification or deletion is to be rewritten. + int expectedEntriesToWrite = (totalEntries - 1) - (modifyIndex == -1 ? deleteIndex : Math.Min(modifyIndex, deleteIndex)); + ZipArchiveEntry entryToDelete = target.Entries[deleteIndex]; + string deletedPath = entryToDelete.FullName; + string modifiedPath = null; + + if (modifyIndex != -1) + { + ZipArchiveEntry entryToRewrite = target.Entries[modifyIndex]; + + modifiedPath = entryToRewrite.FullName; + using (var entryStream = entryToRewrite.Open()) + { + entryStream.SetLength(0); + entryStream.Write(expectedUpdatedEntryContents); + } + } + + entryToDelete.Delete(); + + target.Dispose(); + + Assert.True(ms.Length < sampleZipFile.Length); + + writesCalled = trackingStream.TimesCalled(nameof(trackingStream.Write)) - writesCalled; + writeBytesCalled = trackingStream.TimesCalled(nameof(trackingStream.WriteByte)) - writeBytesCalled; + + // 2 writes per archive entry for the local file header. + // 2 writes per archive entry for the central directory header. + // 2 writes for the file data of the updated entry itself + // 1 write per archive entry for the file data of other entries after this in the file + // 1 write (sometimes 2, if there's a comment) for the end of central directory block. + // All of the central directory headers must be rewritten after an entry's data has been modified. + if (modifyIndex == -1) + { + Assert.Equal(1 + ((2 + 1) * expectedEntriesToWrite) + (2 * (totalEntries - 1)), writesCalled + writeBytesCalled); + } + else + { + Assert.Equal(1 + 1 + ((2 + 1) * expectedEntriesToWrite) + (2 * (totalEntries - 1)), writesCalled + writeBytesCalled); + } + + trackingStream.Seek(0, SeekOrigin.Begin); + target = new ZipArchive(trackingStream, ZipArchiveMode.Read); + + // Check 2: no other data has been corrupted as a result + for (int i = 0; i < target.Entries.Count; i++) + { + ZipArchiveEntry entry = target.Entries[i]; + // The expected index will be off by one if it's after the deleted index, so compensate + int expectedIndex = i < deleteIndex ? i : i + 1; + byte[] expectedBuffer = entry.FullName == modifiedPath + ? expectedUpdatedEntryContents + : [.. sampleEntryContents, (byte)(expectedIndex % byte.MaxValue)]; + byte[] readBuffer = new byte[expectedBuffer.Length]; + + using (Stream readStream = entry.Open()) + { + readStream.Read(readBuffer.AsSpan()); + } + + Assert.Equal(expectedBuffer, readBuffer); + + Assert.NotEqual(deletedPath, entry.FullName); + } + } + } + } + + [Theory] + [InlineData(1)] + [InlineData(5)] + [InlineData(10)] + [InlineData(12)] + public void Update_PerformMinimalWritesWhenEntriesModifiedAndAdded(int entriesToCreate) + { + byte[] sampleEntryContents = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]; + byte[] sampleZipFile = CreateZipFile(50, sampleEntryContents); + + using (MemoryStream ms = new MemoryStream()) + { + ms.Write(sampleZipFile); + ms.Seek(0, SeekOrigin.Begin); + + using (CallTrackingStream trackingStream = new CallTrackingStream(ms)) + { + // Open the archive in Update mode. Rewrite the data of the first entry and add five entries + // to the end of the archive. Verify the correct number of writes performed as a result, then + // reopen the same archive, get the entries, make sure that the right number of entries have + // been found and that the entries have the correct contents. + int writesCalled = trackingStream.TimesCalled(nameof(trackingStream.Write)); + int writeBytesCalled = trackingStream.TimesCalled(nameof(trackingStream.WriteByte)); + ZipArchive target = new ZipArchive(trackingStream, ZipArchiveMode.Update, leaveOpen: true); + int totalEntries = target.Entries.Count; + ZipArchiveEntry entryToRewrite = target.Entries[^1]; + string modifiedPath = entryToRewrite.FullName; + + using (Stream entryStream = entryToRewrite.Open()) + { + entryStream.Seek(0, SeekOrigin.Begin); + for (int i = 0; i < 100; i++) + { + entryStream.Write(sampleEntryContents); + } + } + + for (int i = 0; i < entriesToCreate; i++) + { + ZipArchiveEntry createdEntry = target.CreateEntry($"added/{i}.bin"); + + using (Stream entryWriteStream = createdEntry.Open()) + { + entryWriteStream.Write(sampleEntryContents); + entryWriteStream.WriteByte((byte)((i + totalEntries) % byte.MaxValue)); + } + } + + target.Dispose(); + + writesCalled = trackingStream.TimesCalled(nameof(trackingStream.Write)) - writesCalled; + writeBytesCalled = trackingStream.TimesCalled(nameof(trackingStream.WriteByte)) - writeBytesCalled; + + // 2 writes per archive entry for the local file header. + // 2 writes per archive entry for the central directory header. + // 2 writes for the file data of the updated entry itself + // 1 write (sometimes 2, if there's a comment) for the end of central directory block. + // All of the central directory headers must be rewritten after an entry's data has been modified. + + Assert.Equal(1 + ((2 + 2 + 2) * entriesToCreate) + (2 * (totalEntries - 1) + (2 + 2 + 2)), writesCalled + writeBytesCalled); + + trackingStream.Seek(0, SeekOrigin.Begin); + target = new ZipArchive(trackingStream, ZipArchiveMode.Read); + + // Check 2: no other data has been corrupted as a result + for (int i = 0; i < totalEntries + entriesToCreate; i++) + { + ZipArchiveEntry entry = target.Entries[i]; + byte[] expectedBuffer = entry.FullName == modifiedPath + ? Enumerable.Repeat(sampleEntryContents, 100).SelectMany(x => x).ToArray() + : [.. sampleEntryContents, (byte)(i % byte.MaxValue)]; + byte[] readBuffer = new byte[expectedBuffer.Length]; + + using (Stream readStream = entry.Open()) + { + readStream.Read(readBuffer.AsSpan()); + } + + Assert.Equal(expectedBuffer, readBuffer); + } + } + } + } } }