Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support optional crc32 for uncompressed streaming zip32 and zip64 #134

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 34 additions & 4 deletions stream_zip/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,10 @@ def _get(self, offset: int, default_get_compressobj: _CompressObjGetter) -> _Met
return _ZIP_AUTO_TYPE_INNER()


class CRCActual:
def __init__(self) -> None:
self.crc_32 = 0

###############################
# Public sentinel objects/types

Expand Down Expand Up @@ -566,6 +570,9 @@ def _no_compression_streamed_64_local_header_and_data(
compressed_size,
) + mod_at_unix_extra + aes_extra
flags = aes_flags | utf8_flag
if crc_32 == 0:
flags |= data_descriptor_flag

masked_crc_32 = crc_32 & crc_32_mask

yield from _(local_header_signature)
Expand All @@ -583,7 +590,15 @@ def _no_compression_streamed_64_local_header_and_data(
yield from _(name_encoded)
yield from _(extra)

yield from encryption_func(_no_compression_streamed_data(chunks, uncompressed_size, crc_32, 0xffffffffffffffff))
actual = CRCActual()

yield from encryption_func(_no_compression_streamed_data(chunks, uncompressed_size, crc_32, 0xffffffffffffffff, actual))

if crc_32 == 0:
masked_crc_32 = actual.crc_32 & crc_32_mask

yield from _(data_descriptor_signature)
yield from _(data_descriptor_zip_64_struct.pack(masked_crc_32, compressed_size, uncompressed_size))

extra = zip_64_central_directory_extra_struct.pack(
zip_64_extra_signature,
Expand Down Expand Up @@ -626,6 +641,9 @@ def _no_compression_streamed_32_local_header_and_data(
compressed_size = uncompressed_size + aes_size_increase
extra = mod_at_unix_extra + aes_extra
flags = aes_flags | utf8_flag
if crc_32 == 0:
flags |= data_descriptor_flag

masked_crc_32 = crc_32 & crc_32_mask

yield from _(local_header_signature)
Expand All @@ -643,7 +661,16 @@ def _no_compression_streamed_32_local_header_and_data(
yield from _(name_encoded)
yield from _(extra)

yield from encryption_func(_no_compression_streamed_data(chunks, uncompressed_size, crc_32, 0xffffffff))
actual = CRCActual()

yield from encryption_func(_no_compression_streamed_data(chunks, uncompressed_size, crc_32, 0xffffffff, actual))

if crc_32 == 0:
masked_crc_32 = actual.crc_32 & crc_32_mask

yield from _(data_descriptor_signature)
yield from _(data_descriptor_zip_32_struct.pack(masked_crc_32, compressed_size, uncompressed_size))


return central_directory_header_struct.pack(
20, # Version made by
Expand All @@ -665,7 +692,7 @@ def _no_compression_streamed_32_local_header_and_data(
file_offset,
), name_encoded, extra

def _no_compression_streamed_data(chunks: Iterable[bytes], uncompressed_size: int, crc_32: int, maximum_size: int) -> Generator[bytes, None, Any]:
def _no_compression_streamed_data(chunks: Iterable[bytes], uncompressed_size: int, crc_32: int, maximum_size: int, actual: CRCActual) -> Generator[bytes, None, Any]:
actual_crc_32 = zlib.crc32(b'')
size = 0
for chunk in chunks:
Expand All @@ -674,12 +701,15 @@ def _no_compression_streamed_data(chunks: Iterable[bytes], uncompressed_size: in
_raise_if_beyond(size, maximum=maximum_size, exception_class=UncompressedSizeOverflowError)
yield chunk

if actual_crc_32 != crc_32:
# if crc_32 is 0, ignore and provide actual value
if actual_crc_32 != crc_32 and crc_32 != 0:
raise CRC32IntegrityError()

if size != uncompressed_size:
raise UncompressedSizeIntegrityError()

actual.crc_32 = actual_crc_32

for name, modified_at, mode, method, chunks in files:
_method, _auto_upgrade_central_directory, _get_compress_obj, uncompressed_size, crc_32 = method._get(offset, get_compressobj)

Expand Down
23 changes: 22 additions & 1 deletion test_stream_zip.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,27 @@ def files():
]


@pytest.mark.parametrize(
"method",
[
NO_COMPRESSION_32,
NO_COMPRESSION_64,
],
)
def test_with_stream_unzip_with_no_compresion_no_crc_32(method):
now = datetime.strptime('2021-01-01 21:01:12', '%Y-%m-%d %H:%M:%S')
mode = stat.S_IFREG | 0o600

def files():
yield 'file-1', now, mode, method(20000, 0), (b'a' * 10000, b'b' * 10000)
yield 'file-2', now, mode, method(2, 0), (b'c', b'd')

assert [(b'file-1', 20000, b'a' * 10000 + b'b' * 10000), (b'file-2', 2, b'cd')] == [
(name, size, b''.join(chunks))
for name, size, chunks in stream_unzip(stream_zip(files()))
]


@pytest.mark.parametrize(
"method",
[
Expand All @@ -149,7 +170,7 @@ def test_with_stream_unzip_with_no_compresion_bad_crc_32(method):

def files():
yield 'file-1', now, mode, method(20000, zlib.crc32(b'a' * 10000 + b'b' * 10000)), (b'a' * 10000, b'b' * 10000)
yield 'file-1', now, mode, method(1, zlib.crc32(b'')), (b'a',)
yield 'file-1', now, mode, method(1, zlib.crc32(b'x')), (b'a',)

with pytest.raises(CRC32IntegrityError):
for name, size, chunks in stream_unzip(stream_zip(files())):
Expand Down