diff --git a/src/huggingface_hub/_local_folder.py b/src/huggingface_hub/_local_folder.py index 3b2ba0a185..264d51c58e 100644 --- a/src/huggingface_hub/_local_folder.py +++ b/src/huggingface_hub/_local_folder.py @@ -49,6 +49,8 @@ ``` """ +import base64 +import hashlib import logging import os import time @@ -84,7 +86,7 @@ class LocalDownloadFilePaths: def incomplete_path(self, etag: str) -> Path: """Return the path where a file will be temporarily downloaded before being moved to `file_path`.""" - return self.metadata_path.with_suffix(f".{etag}.incomplete") + return self.metadata_path.parent / f"{_short_hash(self.metadata_path.name)}.{etag}.incomplete" @dataclass(frozen=True) @@ -424,3 +426,7 @@ def _huggingface_dir(local_dir: Path) -> Path: except OSError: pass return path + + +def _short_hash(filename: str) -> str: + return base64.urlsafe_b64encode(hashlib.sha1(filename.encode()).digest()).decode() diff --git a/tests/test_local_folder.py b/tests/test_local_folder.py index 4a50bbed5e..da076f851d 100644 --- a/tests/test_local_folder.py +++ b/tests/test_local_folder.py @@ -79,13 +79,17 @@ def test_local_download_paths(tmp_path: Path): assert paths.metadata_path.parent.is_dir() assert paths.lock_path.parent.is_dir() - # Incomplete path are etag-based - assert ( - paths.incomplete_path("etag123") - == tmp_path / ".cache" / "huggingface" / "download" / "path" / "in" / "repo.txt.etag123.incomplete" - ) + # Incomplete paths are etag-based + incomplete_path = paths.incomplete_path("etag123") + assert incomplete_path.parent == tmp_path / ".cache" / "huggingface" / "download" / "path" / "in" + assert incomplete_path.name.endswith(".etag123.incomplete") assert paths.incomplete_path("etag123").parent.is_dir() + # Incomplete paths are unique per file per etag + other_paths = get_local_download_paths(tmp_path, "path/in/repo_other.txt") + other_incomplete_path = other_paths.incomplete_path("etag123") + assert incomplete_path != other_incomplete_path # different .incomplete files to prevent concurrency issues + def test_local_download_paths_are_recreated_each_time(tmp_path: Path): paths1 = get_local_download_paths(tmp_path, "path/in/repo.txt")