Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for hashing files with header. #194

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 26 additions & 7 deletions model_signing/hashing/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,31 @@
```python
>>> with open("/tmp/file", "w") as f:
... f.write("0123abcd")
>>> hasher = ShardedFileHasher("/tmo/file", SHA256(), start=4, end=8)
>>> hasher = ShardedFileHasher("/tmp/file", SHA256(), start=4, end=8)
>>> digest = hasher.compute()
>>> digest.digest_hex
'88d4266fd4e6338d13b845fcf289579d209c897823b9217da3e161936f031589'
```

Similarly, we can emulate a mising header:
```python
>>> with open("/tmp/file", "w") as f:
... f.write("abcd")
>>> hasher = FileHasher("/tmp/file", SHA256())
>>> digest = hasher.compute(header=b"0123")
>>> digest.digest_hex
'64eab0705394501ced0ff991bf69077fd3846c1d964e3db28d9600891715d848'
```

This is the same as hashing a file with the entire contents:
```python
>>> with open("/tmp/file", "w") as f:
... f.write("0123abcd")
>>> hasher = FileHasher("/tmp/file", SHA256())
>>> digest = hasher.compute()
>>> digest.digest_hex
'64eab0705394501ced0ff991bf69077fd3846c1d964e3db28d9600891715d848'
```
"""

import pathlib
Expand Down Expand Up @@ -101,8 +121,8 @@ def digest_name(self) -> str:
return f"file-{self._content_hasher.digest_name}"

@override
def compute(self) -> hashing.Digest:
self._content_hasher.reset()
def compute(self, *, header: bytes = b"") -> hashing.Digest:
self._content_hasher.reset(header)

if self._chunk_size == 0:
with open(self._file, "rb") as f:
Expand Down Expand Up @@ -144,8 +164,7 @@ def __init__(
Args:
file: The file to hash. Use `set_file` to reset it.
content_hasher: A `hashing.HashEngine` instance used to compute the
digest of the file. This instance must not be used outside of this
instance. However, it may be pre-initialized with a header.
digest of the file.
start: The file offset to start reading from. Must be valid. Reset
with `set_shard`.
end: The file offset to start reading from. Must be stricly greater
Expand Down Expand Up @@ -195,8 +214,8 @@ def set_shard(self, *, start: int, end: int) -> None:
self._end = end

@override
def compute(self) -> hashing.Digest:
self._content_hasher.reset()
def compute(self, *, header: bytes = b"") -> hashing.Digest:
self._content_hasher.reset(header)

with open(self._file, "rb") as f:
f.seek(self._start)
Expand Down
7 changes: 5 additions & 2 deletions model_signing/hashing/hashing.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,11 @@ class HashEngine(metaclass=ABCMeta):
"""Generic hash engine."""

@abstractmethod
def compute(self) -> Digest:
"""Computes the digest of data passed to the engine."""
def compute(self, *, header: bytes = b"") -> Digest:
"""Computes the digest of data passed to the engine.

The method supports an optional header to be hashed before the data.
"""
pass

@property
Expand Down
3 changes: 2 additions & 1 deletion model_signing/hashing/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,8 @@ def reset(self, data: bytes = b"") -> None:
self._hasher = hashlib.sha256(data)

@override
def compute(self) -> hashing.Digest:
def compute(self, *, header: bytes = b"") -> hashing.Digest:
del header # unused with streaming digests, set in `reset` instead
return hashing.Digest(self.digest_name, self._hasher.digest())

@override
Expand Down
3 changes: 2 additions & 1 deletion model_signing/hashing/precomputed.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ class PrecomputedDigest(hashing.HashEngine):
_digest_value: bytes

@override
def compute(self) -> hashing.Digest:
def compute(self, *, header: bytes = b"") -> hashing.Digest:
del header # unused with precomputed digests
return hashing.Digest(self._digest_type, self._digest_value)

@override
Expand Down
8 changes: 8 additions & 0 deletions model_signing/hashing/precomputed_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,11 @@ def test_expected_hash_type(self):
assert hasher.digest_name == "test"
digest = hasher.compute()
assert digest.algorithm == "test"

def test_compute_with_header(self):
hash_value = b"value"
hasher = precomputed.PrecomputedDigest("test", hash_value)
digest = hasher.compute()
assert digest.digest_value == hash_value
digest = hasher.compute(header="some data")
assert digest.digest_value == hash_value
Loading