From 5974e5c688e6427ea260fc872d68edc6ea4c6f5e Mon Sep 17 00:00:00 2001 From: Lucain Pouget Date: Wed, 23 Oct 2024 16:27:19 +0200 Subject: [PATCH] Fix download local dir edge case (remove lru_cache) --- src/huggingface_hub/_local_folder.py | 4 ---- tests/test_local_folder.py | 34 ++++++++++++++++------------ 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/src/huggingface_hub/_local_folder.py b/src/huggingface_hub/_local_folder.py index 049394af1d..6fd05f053a 100644 --- a/src/huggingface_hub/_local_folder.py +++ b/src/huggingface_hub/_local_folder.py @@ -53,7 +53,6 @@ import os import time from dataclasses import dataclass -from functools import lru_cache from pathlib import Path from typing import Optional @@ -179,7 +178,6 @@ def save(self, paths: LocalUploadFilePaths) -> None: self.timestamp = new_timestamp -@lru_cache(maxsize=128) # ensure singleton def get_local_download_paths(local_dir: Path, filename: str) -> LocalDownloadFilePaths: """Compute paths to the files related to a download process. @@ -220,7 +218,6 @@ def get_local_download_paths(local_dir: Path, filename: str) -> LocalDownloadFil return LocalDownloadFilePaths(file_path=file_path, lock_path=lock_path, metadata_path=metadata_path) -@lru_cache(maxsize=128) # ensure singleton def get_local_upload_paths(local_dir: Path, filename: str) -> LocalUploadFilePaths: """Compute paths to the files related to an upload process. @@ -404,7 +401,6 @@ def write_download_metadata(local_dir: Path, filename: str, commit_hash: str, et f.write(f"{commit_hash}\n{etag}\n{time.time()}\n") -@lru_cache() def _huggingface_dir(local_dir: Path) -> Path: """Return the path to the `.cache/huggingface` directory in a local directory.""" # Wrap in lru_cache to avoid overwriting the .gitignore file if called multiple times diff --git a/tests/test_local_folder.py b/tests/test_local_folder.py index 76a0c444db..9d19f2f4db 100644 --- a/tests/test_local_folder.py +++ b/tests/test_local_folder.py @@ -74,14 +74,17 @@ def test_local_download_paths(tmp_path: Path): assert paths.incomplete_path("etag123").parent.is_dir() -def test_local_download_paths_are_cached(tmp_path: Path): - """Test local download paths are cached.""" - # No need for an exact singleton here. - # We just want to avoid recreating the dataclass on consecutive calls (happens often - # in the process). +def test_local_download_paths_are_recreated_each_time(tmp_path: Path): paths1 = get_local_download_paths(tmp_path, "path/in/repo.txt") + assert paths1.file_path.parent.is_dir() + assert paths1.metadata_path.parent.is_dir() + + paths1.file_path.parent.rmdir() + paths1.metadata_path.parent.rmdir() + paths2 = get_local_download_paths(tmp_path, "path/in/repo.txt") - assert paths1 is paths2 + assert paths2.file_path.parent.is_dir() + assert paths2.metadata_path.parent.is_dir() @pytest.mark.skipif(os.name != "nt", reason="Windows-specific test.") @@ -198,14 +201,17 @@ def test_local_upload_paths(tmp_path: Path): assert paths.lock_path.parent.is_dir() -def test_local_upload_paths_are_cached(tmp_path: Path): - """Test local upload paths are cached.""" - # No need for an exact singleton here. - # We just want to avoid recreating the dataclass on consecutive calls (happens often - # in the process). - paths1 = get_local_download_paths(tmp_path, "path/in/repo.txt") - paths2 = get_local_download_paths(tmp_path, "path/in/repo.txt") - assert paths1 is paths2 +def test_local_upload_paths_are_recreated_each_time(tmp_path: Path): + paths1 = get_local_upload_paths(tmp_path, "path/in/repo.txt") + assert paths1.file_path.parent.is_dir() + assert paths1.metadata_path.parent.is_dir() + + paths1.file_path.parent.rmdir() + paths1.metadata_path.parent.rmdir() + + paths2 = get_local_upload_paths(tmp_path, "path/in/repo.txt") + assert paths2.file_path.parent.is_dir() + assert paths2.metadata_path.parent.is_dir() @pytest.mark.skipif(os.name != "nt", reason="Windows-specific test.")