Skip to content

Commit

Permalink
Pull request #33: IFSBENCH-28: Replaced external sha256sum call by Py…
Browse files Browse the repository at this point in the history
…thon-only code.

Merge in RDX/ifsbench from ~DIJB/ifsbench:dijb_sha256.IFSBENCH-28 to master

* commit '58eaa504b2017a71782edb45216720fe22600750':
  IFSBENCH-28: Fixed expected exceptions and added test. Minor code cleanup.
  IFSBENCH-28: Replaced external sha256sum call by Python-only code.
  • Loading branch information
johannesbulin authored and reuterbal committed Oct 9, 2024
2 parents 5f34136 + 58eaa50 commit 915bcfa
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 11 deletions.
27 changes: 16 additions & 11 deletions ifsbench/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,9 @@
"""

from collections import defaultdict
from hashlib import sha256
from pathlib import Path
from subprocess import CalledProcessError
import glob
import tempfile
import yaml

from .logging import header, success, warning
Expand Down Expand Up @@ -108,15 +107,21 @@ def src_dir(self, src_dir):
@staticmethod
def _sha256sum(filepath):
"""Create SHA-256 checksum for the file at the given path"""

filepath = Path(filepath)
with tempfile.TemporaryDirectory(prefix='ifsbench') as tmp_dir:
logfile = Path(tmp_dir)/'checksum.sha256'
cmd = ['sha256sum', str(filepath)]
with logfile.open('w', encoding='utf-8') as f:
execute(cmd, stdout=f)
checksum, name = logfile.read_text().split()
assert name == str(filepath)
return checksum

# Use 4MB chunks for reading the file (reading it completely into
# memory will be a bad idea for large GRIB files).
chunk_size = 4*1024*1024
sha = sha256()

with filepath.open('rb') as f:
chunk = f.read(chunk_size)
while chunk:
sha.update(chunk)
chunk = f.read(chunk_size)

return sha.hexdigest()

@staticmethod
def _size(filepath):
Expand Down Expand Up @@ -265,7 +270,7 @@ def _score_overlap_from_behind(string):
for path, src_dir in candidates:
try:
candidate_file = InputFile(path, src_dir)
except CalledProcessError:
except OSError:
continue
if candidate_file.checksum == input_file.checksum:
return candidate_file
Expand Down
3 changes: 3 additions & 0 deletions tests/test_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,9 @@ def test_input_file(here):
assert the_file.checksum == also_the_file.checksum
assert the_file == also_the_file

with pytest.raises(OSError):
no_exist_file = InputFile('/i_dont_exist', compute_metadata=True)


def test_experiment_files(here):
"""
Expand Down

0 comments on commit 915bcfa

Please sign in to comment.