diff --git a/src/huggingface_hub/hf_file_system.py b/src/huggingface_hub/hf_file_system.py index e6843b7074..6a6c0e9dfa 100644 --- a/src/huggingface_hub/hf_file_system.py +++ b/src/huggingface_hub/hf_file_system.py @@ -1,4 +1,3 @@ -import copy import os import re import tempfile @@ -397,7 +396,7 @@ def _ls_tree( parent_path = self._parent(cache_path_info["name"]) self.dircache.setdefault(parent_path, []).append(cache_path_info) out.append(cache_path_info) - return copy.deepcopy(out) # copy to not let users modify the dircache + return out def glob(self, path, **kwargs): # Set expand_info=False by default to get a x10 speed boost @@ -561,7 +560,7 @@ def info(self, path: str, refresh: bool = False, revision: Optional[str] = None, if not expand_info: out = {k: out[k] for k in ["name", "size", "type"]} assert out is not None - return copy.deepcopy(out) # copy to not let users modify the dircache + return out def exists(self, path, **kwargs): """Is there a file at the given path""" diff --git a/tests/test_hf_file_system.py b/tests/test_hf_file_system.py index ab7e6ed7bd..d0037d37a6 100644 --- a/tests/test_hf_file_system.py +++ b/tests/test_hf_file_system.py @@ -1,3 +1,4 @@ +import copy import datetime import io import os @@ -391,7 +392,10 @@ def test_find_root_directory_no_revision_with_incomplete_cache(self): repo_type="dataset", ) - files = self.hffs.find(self.hf_path, detail=True) + # Copy the result to make it robust to the cache modifications + # See discussion in https://github.com/huggingface/huggingface_hub/pull/2103 + # for info on why this is not done in `HfFileSystem.find` by default + files = copy.deepcopy(self.hffs.find(self.hf_path, detail=True)) # some directories not in cache self.hffs.dircache.pop(self.hf_path + "/data/sub_data")