Skip to content

Commit 0788948

Browse files
authored
gh-84481: Add ZipFile.data_offset attribute (#132165)
* Add ZipFile.data_offset attribute This attribute provides the offset to zip data from the start of the file, when available. * Add blurb-it * Try fixing class ref in NEWS
1 parent dff8bcf commit 0788948

File tree

4 files changed

+73
-0
lines changed

4 files changed

+73
-0
lines changed

Doc/library/zipfile.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -538,6 +538,14 @@ The following data attributes are also available:
538538
it should be no longer than 65535 bytes. Comments longer than this will be
539539
truncated.
540540

541+
.. attribute:: ZipFile.data_offset
542+
543+
The offset to the start of ZIP data from the beginning of the file. When the
544+
:class:`ZipFile` is opened in either mode ``'w'`` or ``'x'`` and the
545+
underlying file does not support ``tell()``, the value will be ``None``
546+
instead.
547+
548+
.. versionadded:: 3.14
541549

542550
.. _path-objects:
543551

Lib/test/test_zipfile/test_core.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3312,6 +3312,54 @@ def test_execute_zip64(self):
33123312
self.assertIn(b'number in executable: 5', output)
33133313

33143314

3315+
class TestDataOffsetPrependedZip(unittest.TestCase):
3316+
"""Test .data_offset on reading zip files with an executable prepended."""
3317+
3318+
def setUp(self):
3319+
self.exe_zip = findfile('exe_with_zip', subdir='archivetestdata')
3320+
self.exe_zip64 = findfile('exe_with_z64', subdir='archivetestdata')
3321+
3322+
def _test_data_offset(self, name):
3323+
with zipfile.ZipFile(name) as zipfp:
3324+
self.assertEqual(zipfp.data_offset, 713)
3325+
3326+
def test_data_offset_with_exe_prepended(self):
3327+
self._test_data_offset(self.exe_zip)
3328+
3329+
def test_data_offset_with_exe_prepended_zip64(self):
3330+
self._test_data_offset(self.exe_zip64)
3331+
3332+
class TestDataOffsetZipWrite(unittest.TestCase):
3333+
"""Test .data_offset for ZipFile opened in write mode."""
3334+
3335+
def setUp(self):
3336+
os.mkdir(TESTFNDIR)
3337+
self.addCleanup(rmtree, TESTFNDIR)
3338+
self.test_path = os.path.join(TESTFNDIR, 'testoffset.zip')
3339+
3340+
def test_data_offset_write_no_prefix(self):
3341+
with io.BytesIO() as fp:
3342+
with zipfile.ZipFile(fp, "w") as zipfp:
3343+
self.assertEqual(zipfp.data_offset, 0)
3344+
3345+
def test_data_offset_write_with_prefix(self):
3346+
with io.BytesIO() as fp:
3347+
fp.write(b"this is a prefix")
3348+
with zipfile.ZipFile(fp, "w") as zipfp:
3349+
self.assertEqual(zipfp.data_offset, 16)
3350+
3351+
def test_data_offset_write_no_tell(self):
3352+
# The initializer in ZipFile checks if tell raises AttributeError or
3353+
# OSError when creating a file in write mode when deducing the offset
3354+
# of the beginning of zip data
3355+
class NoTellBytesIO(io.BytesIO):
3356+
def tell(self):
3357+
raise OSError("Unimplemented!")
3358+
with NoTellBytesIO() as fp:
3359+
with zipfile.ZipFile(fp, "w") as zipfp:
3360+
self.assertIs(zipfp.data_offset, None)
3361+
3362+
33153363
class EncodedMetadataTests(unittest.TestCase):
33163364
file_names = ['\u4e00', '\u4e8c', '\u4e09'] # Han 'one', 'two', 'three'
33173365
file_content = [

Lib/zipfile/__init__.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1413,10 +1413,12 @@ def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
14131413
self._didModify = True
14141414
try:
14151415
self.start_dir = self.fp.tell()
1416+
self._data_offset = self.start_dir
14161417
except (AttributeError, OSError):
14171418
self.fp = _Tellable(self.fp)
14181419
self.start_dir = 0
14191420
self._seekable = False
1421+
self._data_offset = None
14201422
else:
14211423
# Some file-like objects can provide tell() but not seek()
14221424
try:
@@ -1486,6 +1488,10 @@ def _RealGetContents(self):
14861488
# If Zip64 extension structures are present, account for them
14871489
concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
14881490

1491+
# store the offset to the beginning of data for the
1492+
# .data_offset property
1493+
self._data_offset = concat
1494+
14891495
if self.debug > 2:
14901496
inferred = concat + offset_cd
14911497
print("given, inferred, offset", offset_cd, inferred, concat)
@@ -1551,6 +1557,12 @@ def _RealGetContents(self):
15511557
zinfo._end_offset = end_offset
15521558
end_offset = zinfo.header_offset
15531559

1560+
@property
1561+
def data_offset(self):
1562+
"""The offset to the start of zip data in the file or None if
1563+
unavailable."""
1564+
return self._data_offset
1565+
15541566
def namelist(self):
15551567
"""Return a list of file names in the archive."""
15561568
return [data.filename for data in self.filelist]
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Add the :attr:`zipfile.ZipFile.data_offset` attribute, which stores the
2+
offset to the beginning of ZIP data in a file when available. When the
3+
:class:`zipfile.ZipFile` is opened in either mode ``'w'`` or ``'x'`` and the
4+
underlying file does not support ``tell()``, the value will be ``None``
5+
instead.

0 commit comments

Comments
 (0)