From 7482950bb7e8d152a8817f6c4de2fdb32666b291 Mon Sep 17 00:00:00 2001 From: "Adam J. Stewart" Date: Sat, 17 Jun 2023 10:00:42 -0500 Subject: [PATCH 1/3] Simpler file chunking --- packaging/wheel/relocate.py | 2 +- torchvision/datasets/utils.py | 2 +- torchvision/prototype/datasets/_builtin/README.md | 2 +- torchvision/prototype/datasets/utils/_resource.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/packaging/wheel/relocate.py b/packaging/wheel/relocate.py index e6a4ef9d458..2a459a353a4 100644 --- a/packaging/wheel/relocate.py +++ b/packaging/wheel/relocate.py @@ -79,7 +79,7 @@ def rehash(path, blocksize=1 << 20): h = hashlib.sha256() length = 0 with open(path, "rb") as f: - for block in read_chunks(f, size=blocksize): + while block := f.read(blocksize): length += len(block) h.update(block) digest = "sha256=" + urlsafe_b64encode(h.digest()).decode("latin1").rstrip("=") diff --git a/torchvision/datasets/utils.py b/torchvision/datasets/utils.py index 220c1ae79d5..b79b4ef4e61 100644 --- a/torchvision/datasets/utils.py +++ b/torchvision/datasets/utils.py @@ -57,7 +57,7 @@ def calculate_md5(fpath: str, chunk_size: int = 1024 * 1024) -> str: else: md5 = hashlib.md5() with open(fpath, "rb") as f: - for chunk in iter(lambda: f.read(chunk_size), b""): + while chunk := f.read(chunk_size): md5.update(chunk) return md5.hexdigest() diff --git a/torchvision/prototype/datasets/_builtin/README.md b/torchvision/prototype/datasets/_builtin/README.md index 05d61c6870e..3b33100eb81 100644 --- a/torchvision/prototype/datasets/_builtin/README.md +++ b/torchvision/prototype/datasets/_builtin/README.md @@ -91,7 +91,7 @@ import hashlib def sha256sum(path, chunk_size=1024 * 1024): checksum = hashlib.sha256() with open(path, "rb") as f: - for chunk in iter(lambda: f.read(chunk_size), b""): + while chunk := f.read(chunk_size): checksum.update(chunk) print(checksum.hexdigest()) ``` diff --git a/torchvision/prototype/datasets/utils/_resource.py b/torchvision/prototype/datasets/utils/_resource.py index af4ede38dc0..dadec014b52 100644 --- a/torchvision/prototype/datasets/utils/_resource.py +++ b/torchvision/prototype/datasets/utils/_resource.py @@ -136,7 +136,7 @@ def download(self, root: Union[str, pathlib.Path], *, skip_integrity_check: bool def _check_sha256(self, path: pathlib.Path, *, chunk_size: int = 1024 * 1024) -> None: hash = hashlib.sha256() with open(path, "rb") as file: - for chunk in iter(lambda: file.read(chunk_size), b""): + while chunk := file.read(chunk_size): hash.update(chunk) sha256 = hash.hexdigest() if sha256 != self.sha256: From 524a2f2d0410cb1492bc5c41df22c68565be6b1a Mon Sep 17 00:00:00 2001 From: "Adam J. Stewart" Date: Mon, 19 Jun 2023 12:29:48 -0500 Subject: [PATCH 2/3] Remove unused function --- packaging/wheel/relocate.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/packaging/wheel/relocate.py b/packaging/wheel/relocate.py index 2a459a353a4..69fe36b7f67 100644 --- a/packaging/wheel/relocate.py +++ b/packaging/wheel/relocate.py @@ -65,15 +65,6 @@ PYTHON_VERSION = sys.version_info -def read_chunks(file, size=io.DEFAULT_BUFFER_SIZE): - """Yield pieces of data from a file-like object until EOF.""" - while True: - chunk = file.read(size) - if not chunk: - break - yield chunk - - def rehash(path, blocksize=1 << 20): """Return (hash, length) for path using hashlib.sha256()""" h = hashlib.sha256() From 9d782ad0ef3f1be708d5f39917045a63e2924f35 Mon Sep 17 00:00:00 2001 From: "Adam J. Stewart" Date: Mon, 19 Jun 2023 15:03:47 -0500 Subject: [PATCH 3/3] Remove unused import --- packaging/wheel/relocate.py | 1 - 1 file changed, 1 deletion(-) diff --git a/packaging/wheel/relocate.py b/packaging/wheel/relocate.py index 69fe36b7f67..6a8d35f1a6e 100644 --- a/packaging/wheel/relocate.py +++ b/packaging/wheel/relocate.py @@ -2,7 +2,6 @@ import glob import hashlib -import io # Standard library imports import os