@@ -30,6 +30,7 @@ public class ZipArchive : IDisposable
30
30
private readonly Stream ? _backingStream ;
31
31
private byte [ ] _archiveComment ;
32
32
private Encoding ? _entryNameAndCommentEncoding ;
33
+ private long _firstDeletedEntryOffset ;
33
34
34
35
#if DEBUG_FORCE_ZIP64
35
36
public bool _forceZip64 ;
@@ -164,12 +165,14 @@ public ZipArchive(Stream stream, ZipArchiveMode mode, bool leaveOpen, Encoding?
164
165
_entries = new List < ZipArchiveEntry > ( ) ;
165
166
_entriesCollection = new ReadOnlyCollection < ZipArchiveEntry > ( _entries ) ;
166
167
_entriesDictionary = new Dictionary < string , ZipArchiveEntry > ( ) ;
168
+ Changed = ChangeState . Unchanged ;
167
169
_readEntries = false ;
168
170
_leaveOpen = leaveOpen ;
169
171
_centralDirectoryStart = 0 ; // invalid until ReadCentralDirectory
170
172
_isDisposed = false ;
171
173
_numberOfThisDisk = 0 ; // invalid until ReadCentralDirectory
172
174
_archiveComment = Array . Empty < byte > ( ) ;
175
+ _firstDeletedEntryOffset = long . MaxValue ;
173
176
174
177
switch ( mode )
175
178
{
@@ -217,7 +220,11 @@ public ZipArchive(Stream stream, ZipArchiveMode mode, bool leaveOpen, Encoding?
217
220
public string Comment
218
221
{
219
222
get => ( EntryNameAndCommentEncoding ?? Encoding . UTF8 ) . GetString ( _archiveComment ) ;
220
- set => _archiveComment = ZipHelper . GetEncodedTruncatedBytesFromString ( value , EntryNameAndCommentEncoding , ZipEndOfCentralDirectoryBlock . ZipFileCommentMaxLength , out _ ) ;
223
+ set
224
+ {
225
+ _archiveComment = ZipHelper . GetEncodedTruncatedBytesFromString ( value , EntryNameAndCommentEncoding , ZipEndOfCentralDirectoryBlock . ZipFileCommentMaxLength , out _ ) ;
226
+ Changed |= ChangeState . DynamicLengthMetadata ;
227
+ }
221
228
}
222
229
223
230
/// <summary>
@@ -383,6 +390,10 @@ private set
383
390
}
384
391
}
385
392
393
+ // This property's value only relates to the top-level fields of the archive (such as the archive comment.)
394
+ // New entries in the archive won't change its state.
395
+ internal ChangeState Changed { get ; private set ; }
396
+
386
397
private ZipArchiveEntry DoCreateEntry ( string entryName , CompressionLevel ? compressionLevel )
387
398
{
388
399
ArgumentException . ThrowIfNullOrEmpty ( entryName ) ;
@@ -409,7 +420,7 @@ internal void AcquireArchiveStream(ZipArchiveEntry entry)
409
420
{
410
421
if ( ! _archiveStreamOwner . EverOpenedForWrite )
411
422
{
412
- _archiveStreamOwner . WriteAndFinishLocalEntry ( ) ;
423
+ _archiveStreamOwner . WriteAndFinishLocalEntry ( forceWrite : true ) ;
413
424
}
414
425
else
415
426
{
@@ -441,6 +452,11 @@ internal void RemoveEntry(ZipArchiveEntry entry)
441
452
_entries . Remove ( entry ) ;
442
453
443
454
_entriesDictionary . Remove ( entry . FullName ) ;
455
+ // Keep track of the offset of the earliest deleted entry in the archive
456
+ if ( entry . OriginallyInArchive && entry . OffsetOfLocalHeader < _firstDeletedEntryOffset )
457
+ {
458
+ _firstDeletedEntryOffset = entry . OffsetOfLocalHeader ;
459
+ }
444
460
}
445
461
446
462
internal void ThrowIfDisposed ( )
@@ -550,7 +566,12 @@ private void ReadCentralDirectory()
550
566
throw new InvalidDataException ( SR . NumEntriesWrong ) ;
551
567
}
552
568
553
- _archiveStream . Seek ( _centralDirectoryStart + bytesRead , SeekOrigin . Begin ) ;
569
+ // Sort _entries by each archive entry's position. This supports the algorithm in WriteFile, so is only
570
+ // necessary when the ZipArchive has been opened in Update mode.
571
+ if ( Mode == ZipArchiveMode . Update )
572
+ {
573
+ _entries . Sort ( ZipArchiveEntry . LocalHeaderOffsetComparer . Instance ) ;
574
+ }
554
575
}
555
576
catch ( EndOfStreamException ex )
556
577
{
@@ -681,41 +702,107 @@ private void WriteFile()
681
702
// if we are in update mode, we call EnsureCentralDirectoryRead, which sets readEntries to true
682
703
Debug . Assert ( _readEntries ) ;
683
704
705
+ // Entries starting after this offset have had a dynamically-sized change. Everything on or after this point must be rewritten.
706
+ long completeRewriteStartingOffset = 0 ;
707
+ List < ZipArchiveEntry > entriesToWrite = _entries ;
708
+
684
709
if ( _mode == ZipArchiveMode . Update )
685
710
{
686
- List < ZipArchiveEntry > markedForDelete = new List < ZipArchiveEntry > ( ) ;
711
+ // Entries starting after this offset have some kind of change made to them. It might just be a fixed-length field though, in which case
712
+ // that single entry's metadata can be rewritten without impacting anything else.
713
+ long startingOffset = _firstDeletedEntryOffset ;
714
+ long nextFileOffset = 0 ;
715
+ completeRewriteStartingOffset = startingOffset ;
716
+
717
+ entriesToWrite = new ( _entries . Count ) ;
687
718
foreach ( ZipArchiveEntry entry in _entries )
688
719
{
689
- if ( ! entry . LoadLocalHeaderExtraFieldAndCompressedBytesIfNeeded ( ) )
690
- markedForDelete . Add ( entry ) ;
720
+ if ( ! entry . OriginallyInArchive )
721
+ {
722
+ entriesToWrite . Add ( entry ) ;
723
+ }
724
+ else
725
+ {
726
+ if ( entry . Changes == ChangeState . Unchanged )
727
+ {
728
+ // Keep track of the expected position of the file entry after the final untouched file entry so that when the loop completes,
729
+ // we'll know which position to start writing new entries from.
730
+ nextFileOffset = Math . Max ( nextFileOffset , entry . OffsetOfCompressedData + entry . CompressedLength ) ;
731
+ }
732
+ // When calculating the starting offset to load the files from, only look at changed entries which are already in the archive.
733
+ else
734
+ {
735
+ startingOffset = Math . Min ( startingOffset , entry . OffsetOfLocalHeader ) ;
736
+ }
737
+
738
+ // We want to re-write entries which are after the starting offset of the first entry which has pending data to write.
739
+ // NB: the existing ZipArchiveEntries are sorted in _entries by their position ascending.
740
+ if ( entry . OffsetOfLocalHeader >= startingOffset )
741
+ {
742
+ // If the pending data to write is fixed-length metadata in the header, there's no need to load the compressed file bits.
743
+ if ( ( entry . Changes & ( ChangeState . DynamicLengthMetadata | ChangeState . StoredData ) ) != 0 )
744
+ {
745
+ completeRewriteStartingOffset = Math . Min ( completeRewriteStartingOffset , entry . OffsetOfLocalHeader ) ;
746
+ }
747
+ if ( entry . OffsetOfLocalHeader >= completeRewriteStartingOffset )
748
+ {
749
+ entry . LoadLocalHeaderExtraFieldAndCompressedBytesIfNeeded ( ) ;
750
+ }
751
+
752
+ entriesToWrite . Add ( entry ) ;
753
+ }
754
+ }
755
+ }
756
+
757
+ // If the offset of entries to write from is still at long.MaxValue, then we know that nothing has been deleted,
758
+ // nothing has been modified - so we just want to move to the end of all remaining files in the archive.
759
+ if ( startingOffset == long . MaxValue )
760
+ {
761
+ startingOffset = nextFileOffset ;
691
762
}
692
- foreach ( ZipArchiveEntry entry in markedForDelete )
693
- entry . Delete ( ) ;
694
763
695
- _archiveStream . Seek ( 0 , SeekOrigin . Begin ) ;
696
- _archiveStream . SetLength ( 0 ) ;
764
+ _archiveStream . Seek ( startingOffset , SeekOrigin . Begin ) ;
697
765
}
698
766
699
- foreach ( ZipArchiveEntry entry in _entries )
767
+ foreach ( ZipArchiveEntry entry in entriesToWrite )
700
768
{
701
- entry . WriteAndFinishLocalEntry ( ) ;
769
+ // We don't always need to write the local header entry, ZipArchiveEntry is usually able to work out when it doesn't need to.
770
+ // We want to force this header entry to be written (even for completely untouched entries) if the entry comes after one
771
+ // which had a pending dynamically-sized write.
772
+ bool forceWriteLocalEntry = ! entry . OriginallyInArchive || ( entry . OriginallyInArchive && entry . OffsetOfLocalHeader >= completeRewriteStartingOffset ) ;
773
+
774
+ entry . WriteAndFinishLocalEntry ( forceWriteLocalEntry ) ;
702
775
}
703
776
704
- long startOfCentralDirectory = _archiveStream . Position ;
777
+ long plannedCentralDirectoryPosition = _archiveStream . Position ;
778
+ // If there are no entries in the archive, we still want to create the archive epilogue.
779
+ bool archiveEpilogueRequiresUpdate = _entries . Count == 0 ;
705
780
706
781
foreach ( ZipArchiveEntry entry in _entries )
707
782
{
708
- entry . WriteCentralDirectoryFileHeader ( ) ;
783
+ // The central directory needs to be rewritten if its position has moved, if there's a new entry in the archive, or if the entry might be different.
784
+ bool centralDirectoryEntryRequiresUpdate = plannedCentralDirectoryPosition != _centralDirectoryStart
785
+ || ! entry . OriginallyInArchive || entry . OffsetOfLocalHeader >= completeRewriteStartingOffset ;
786
+
787
+ entry . WriteCentralDirectoryFileHeader ( centralDirectoryEntryRequiresUpdate ) ;
788
+ archiveEpilogueRequiresUpdate |= centralDirectoryEntryRequiresUpdate ;
709
789
}
710
790
711
- long sizeOfCentralDirectory = _archiveStream . Position - startOfCentralDirectory ;
791
+ long sizeOfCentralDirectory = _archiveStream . Position - plannedCentralDirectoryPosition ;
792
+
793
+ WriteArchiveEpilogue ( plannedCentralDirectoryPosition , sizeOfCentralDirectory , archiveEpilogueRequiresUpdate ) ;
712
794
713
- WriteArchiveEpilogue ( startOfCentralDirectory , sizeOfCentralDirectory ) ;
795
+ // If entries have been removed and new (smaller) ones added, there could be empty space at the end of the file.
796
+ // Shrink the file to reclaim this space.
797
+ if ( _mode == ZipArchiveMode . Update && _archiveStream . Position != _archiveStream . Length )
798
+ {
799
+ _archiveStream . SetLength ( _archiveStream . Position ) ;
800
+ }
714
801
}
715
802
716
803
// writes eocd, and if needed, zip 64 eocd, zip64 eocd locator
717
804
// should only throw an exception in extremely exceptional cases because it is called from dispose
718
- private void WriteArchiveEpilogue ( long startOfCentralDirectory , long sizeOfCentralDirectory )
805
+ private void WriteArchiveEpilogue ( long startOfCentralDirectory , long sizeOfCentralDirectory , bool centralDirectoryChanged )
719
806
{
720
807
// determine if we need Zip 64
721
808
if ( startOfCentralDirectory >= uint . MaxValue
@@ -728,12 +815,37 @@ private void WriteArchiveEpilogue(long startOfCentralDirectory, long sizeOfCentr
728
815
{
729
816
// if we need zip 64, write zip 64 eocd and locator
730
817
long zip64EOCDRecordStart = _archiveStream . Position ;
731
- Zip64EndOfCentralDirectoryRecord . WriteBlock ( _archiveStream , _entries . Count , startOfCentralDirectory , sizeOfCentralDirectory ) ;
732
- Zip64EndOfCentralDirectoryLocator . WriteBlock ( _archiveStream , zip64EOCDRecordStart ) ;
818
+
819
+ if ( centralDirectoryChanged )
820
+ {
821
+ Zip64EndOfCentralDirectoryRecord . WriteBlock ( _archiveStream , _entries . Count , startOfCentralDirectory , sizeOfCentralDirectory ) ;
822
+ Zip64EndOfCentralDirectoryLocator . WriteBlock ( _archiveStream , zip64EOCDRecordStart ) ;
823
+ }
824
+ else
825
+ {
826
+ _archiveStream . Seek ( Zip64EndOfCentralDirectoryRecord . TotalSize , SeekOrigin . Current ) ;
827
+ _archiveStream . Seek ( Zip64EndOfCentralDirectoryLocator . TotalSize , SeekOrigin . Current ) ;
828
+ }
733
829
}
734
830
735
831
// write normal eocd
736
- ZipEndOfCentralDirectoryBlock . WriteBlock ( _archiveStream , _entries . Count , startOfCentralDirectory , sizeOfCentralDirectory , _archiveComment ) ;
832
+ if ( centralDirectoryChanged || ( Changed != ChangeState . Unchanged ) )
833
+ {
834
+ ZipEndOfCentralDirectoryBlock . WriteBlock ( _archiveStream , _entries . Count , startOfCentralDirectory , sizeOfCentralDirectory , _archiveComment ) ;
835
+ }
836
+ else
837
+ {
838
+ _archiveStream . Seek ( ZipEndOfCentralDirectoryBlock . TotalSize + _archiveComment . Length , SeekOrigin . Current ) ;
839
+ }
840
+ }
841
+
842
+ [ Flags ]
843
+ internal enum ChangeState
844
+ {
845
+ Unchanged = 0x0 ,
846
+ FixedLengthMetadata = 0x1 ,
847
+ DynamicLengthMetadata = 0x2 ,
848
+ StoredData = 0x4
737
849
}
738
850
}
739
851
}
0 commit comments