From 02cccadb01f22bdfab4a9613dec55536dd3f1d91 Mon Sep 17 00:00:00 2001 From: mariosasko Date: Thu, 7 Jul 2022 14:25:08 +0200 Subject: [PATCH 1/5] Add HfFileSystem --- src/huggingface_hub/__init__.py | 12 +- src/huggingface_hub/hf_filesystem.py | 234 +++++++++++++++++++++++++++ 2 files changed, 239 insertions(+), 7 deletions(-) create mode 100644 src/huggingface_hub/hf_filesystem.py diff --git a/src/huggingface_hub/__init__.py b/src/huggingface_hub/__init__.py index 23524dcf60..4cc749b577 100644 --- a/src/huggingface_hub/__init__.py +++ b/src/huggingface_hub/__init__.py @@ -20,15 +20,8 @@ # vendored from https://github.com/scientific-python/lazy_loader import importlib import importlib.util -import inspect import os import sys -import types -import warnings - - -class _LazyImportWarning(Warning): - pass def _attach(package_name, submodules=None, submod_attrs=None): @@ -175,6 +168,11 @@ def __dir__(): "upload_folder", "whoami", ], + **{ + "hf_filesystem": ["HfFileSystem"] + if importlib.util.find_spec("fsspec") + else {} + }, "hub_mixin": ["ModelHubMixin", "PyTorchModelHubMixin"], "inference_api": ["InferenceApi"], "keras_mixin": [ diff --git a/src/huggingface_hub/hf_filesystem.py b/src/huggingface_hub/hf_filesystem.py new file mode 100644 index 0000000000..0cc5666ca2 --- /dev/null +++ b/src/huggingface_hub/hf_filesystem.py @@ -0,0 +1,234 @@ +import tempfile +from pathlib import PurePosixPath +from typing import Optional + +import fsspec + +from .constants import ( + HUGGINGFACE_HUB_CACHE, + REPO_TYPE_DATASET, + REPO_TYPE_MODEL, + REPO_TYPE_SPACE, + REPO_TYPES, +) +from .file_download import hf_hub_url +from .hf_api import ( + HfFolder, + dataset_info, + delete_file, + model_info, + space_info, + upload_file, +) + + +def _repo_type_to_info_func(repo_type): + if repo_type == REPO_TYPE_DATASET: + return dataset_info + elif repo_type == REPO_TYPE_MODEL: + return model_info + elif repo_type == REPO_TYPE_SPACE: + return space_info + else: # None + return model_info + + +class HfFileSystem(fsspec.AbstractFileSystem): + """ + Access a remote Hugging Face Hub repository as if were a local file system. + + Args: + repo_id (`str`): + The remote repository to access as if were a local file system, + for example: `"username/custom_transformers"` + token (`str`, *optional*): + Authentication token, obtained with `HfApi.login` method. Will + default to the stored token. + repo_type (`str`, *optional*): + Set to `"dataset"` or `"space"` if the remote repositry is a dataset or + space repositroy, `None` or `"model"` if it is a model repository. Default is + `None`. + revision (`str`, *optional*): + An optional Git revision id which can be a branch name, a tag, or a + commit hash. Defaults to the head of the `"main"` branch. + + Example usage (direct): + + ```python + >>> from huggingface_hub import HfFileSystem + + >>> hffs = HfFileSystem("username/my-dataset", repo_type="dataset") + + >>> # Read a remote file + >>> with hffs.open("remote/file/in/repo.bin") as f: + ... data = f.read() + + >>> # Write a remote file + >>> with hffs.open("remote/file/in/repo.bin", "wb") as f: + ... f.write(data) + ``` + + Example usage (via [`fsspec`](https://filesystem-spec.readthedocs.io/en/latest/)): + + ```python + >>> import fsspec + + >>> # Read a remote file + >>> with fsspec.open("hf:username/my-dataset:/remote/file/in/repo.bin", repo_type="dataset") as f: + ... data = f.read() + + >>> # Write a remote file + >>> with fsspec.open("hf:username/my-dataset:/remote/file/in/repo.bin", "wb", repo_type="dataset") as f: + ... f.write(data) + ``` + """ + + root_marker = "" + protocol = "hf" + + def __init__( + self, + repo_id: str, + token: Optional[str] = None, + repo_type: Optional[str] = None, + revision: Optional[str] = None, + **kwargs, + ): + super().__init__(self, **kwargs) + + if repo_type not in REPO_TYPES: + raise ValueError(f"Invalid repo type, must be one of {REPO_TYPES}") + + self.repo_id = repo_id + self.token = token if token is not None else HfFolder.get_token() + self.repo_type = repo_type + self.revision = revision + # Cached attributes + self._repo_info = None + self._repo_entries_spec = None + + def _get_repo_info(self): + if self._repo_info is None: + self._repo_info = _repo_type_to_info_func(self.repo_type)( + self.repo_id, revision=self.revision, token=self.token + ) + + def _get_repo_entries_spec(self): + if self._repo_entries_spec is None: + self._get_repo_info() + self._repo_entries_spec = {} + for hf_file in self._repo_info.siblings: + # TODO(QL): add sizes + self._repo_entries_spec[hf_file.rfilename] = { + "name": hf_file.rfilename, + "size": None, + "type": "file", + } + self._repo_entries_spec.update( + { + str(d): {"name": str(d), "size": None, "type": "directory"} + for d in list(PurePosixPath(hf_file.rfilename).parents)[:-1] + } + ) + + def _invalidate_repo_cache(self): + self._repo_info = None + self._repo_entries_spec = None + + @classmethod + def _strip_protocol(cls, path): + path = super()._strip_protocol(path).lstrip("/") + if ":/" in path: + path = path.split(":", 1)[1] + return path.lstrip("/") + + @staticmethod + def _get_kwargs_from_urls(path): + if path.startswith("hf://"): + path = path[5:] + out = {"repo_id": path} + if ":/" in path: + out["repo_id"], out["path"] = path.split(":/", 1) + if "@" in out["repo_id"]: + out["repo_id"], out["revision"] = out["repo_id"].split("@", 1) + return out + + def _open( + self, + path: str, + mode: str = "rb", + **kwargs, + ): + # TODO(mariosasko): add support for the "ab" mode + if mode == "ab": + raise NotImplementedError("Appending to files is not supported") + + if mode == "rb": + self._get_repo_info() + url = hf_hub_url( + self.repo_id, + path, + repo_type=self.repo_type, + revision=self.revision, + ) + return fsspec.open( + url, + mode=mode, + headers={"authorization": f"Bearer {self.token}"}, + ).open() + else: + return TempFileUploader(self, path, mode=mode) + + def _rm(self, path): + path = self._strip_protocol(path) + delete_file( + path_in_repo=path, + repo_id=self.repo_id, + token=self.token, + repo_type=self.repo_type, + revision=self.revision, + ) + self._invalidate_repo_cache() + + def info(self, path, **kwargs): + self._get_repo_entries_spec() + path = self._strip_protocol(path) + if path in self._repo_entries_spec: + return self._repo_entries_spec[path] + else: + raise FileNotFoundError(path) + + def ls(self, path, detail=False, **kwargs): + self._get_repo_entries_spec() + path = PurePosixPath(path.strip("/")) + paths = {} + for p, f in self._repo_entries_spec.items(): + p = PurePosixPath(p.strip("/")) + root = p.parent + if root == path: + paths[str(p)] = f + out = list(paths.values()) + if detail: + return out + else: + return list(sorted(f["name"] for f in out)) + + +class TempFileUploader(fsspec.spec.AbstractBufferedFile): + def _initiate_upload(self): + self.temp_file = tempfile.TemporaryFile(dir=HUGGINGFACE_HUB_CACHE) + + def _upload_chunk(self, final=False): + self.buffer.seek(0) + self.temp_file.write(self.buffer.read()) + if final: + upload_file( + path_or_fileobj=self.temp_file.file, + path_in_repo=self.path, + repo_id=self.fs.repo_id, + token=self.fs.token, + repo_type=self.fs.repo_type, + revision=self.fs.revision, + ) + self.fs._invalidate_repo_cache() + self.temp_file.close() From f4f53ea5ba97026a00423acc12faf9f5a51546c7 Mon Sep 17 00:00:00 2001 From: mariosasko Date: Thu, 7 Jul 2022 14:25:24 +0200 Subject: [PATCH 2/5] Add tests --- setup.py | 1 + tests/test_filesystem.py | 134 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 135 insertions(+) create mode 100644 tests/test_filesystem.py diff --git a/setup.py b/setup.py index 01a512cff8..30efe89ad0 100644 --- a/setup.py +++ b/setup.py @@ -40,6 +40,7 @@ def get_version() -> str: "pytest-cov", "datasets", "soundfile", + "fsspec", ] extras["quality"] = [ diff --git a/tests/test_filesystem.py b/tests/test_filesystem.py new file mode 100644 index 0000000000..bcd2063aa2 --- /dev/null +++ b/tests/test_filesystem.py @@ -0,0 +1,134 @@ +import time +import unittest +from unittest.mock import patch + +import fsspec +from huggingface_hub import HfApi, hf_hub_url +from huggingface_hub.constants import ENDPOINT +from huggingface_hub.hf_filesystem import HfFileSystem, TempFileUploader + +from .testing_constants import ENDPOINT_STAGING, TOKEN, USER + + +class HfFileSystemTests(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls._api = HfApi(endpoint=ENDPOINT_STAGING) + cls._token = TOKEN + cls._api.set_access_token(TOKEN) + + cls._hf_api_model_info_patch = patch( + "huggingface_hub.hf_filesystem.model_info", cls._api.model_info + ) + cls._hf_api_space_info_patch = patch( + "huggingface_hub.hf_filesystem.space_info", cls._api.space_info + ) + cls._hf_api_dataset_info_patch = patch( + "huggingface_hub.hf_filesystem.dataset_info", cls._api.dataset_info + ) + cls._hf_api_upload_file_patch = patch( + "huggingface_hub.hf_filesystem.upload_file", cls._api.upload_file + ) + cls._hf_api_delete_file_patch = patch( + "huggingface_hub.hf_filesystem.delete_file", cls._api.delete_file + ) + + def _hf_hub_url_staging(*args, **kwargs): + return hf_hub_url(*args, **kwargs).replace(ENDPOINT, ENDPOINT_STAGING) + + cls._hf_hub_url_patch = patch( + "huggingface_hub.hf_filesystem.hf_hub_url", side_effect=_hf_hub_url_staging + ) + + cls._hf_api_model_info_patch.start() + cls._hf_api_space_info_patch.start() + cls._hf_api_dataset_info_patch.start() + cls._hf_api_upload_file_patch.start() + cls._hf_api_delete_file_patch.start() + cls._hf_hub_url_patch.start() + + if HfFileSystem.protocol not in fsspec.available_protocols(): + fsspec.register_implementation(HfFileSystem.protocol, HfFileSystem) + + @classmethod + def tearDownClass(cls): + cls._api.unset_access_token() + + cls._hf_api_model_info_patch.stop() + cls._hf_api_space_info_patch.stop() + cls._hf_api_dataset_info_patch.stop() + cls._hf_api_upload_file_patch.stop() + cls._hf_api_delete_file_patch.stop() + cls._hf_hub_url_patch.stop() + + def setUp(self): + repo_name = f"repo_txt_data-{int(time.time() * 10e3)}" + repo_id = f"{USER}/{repo_name}" + self._api.create_repo( + repo_id, + token=self._token, + repo_type="dataset", + private=True, + ) + self._api.upload_file( + repo_id=repo_id, + path_or_fileobj="dummy text data".encode("utf-8"), + token=TOKEN, + path_in_repo="data/text_data.txt", + repo_type="dataset", + ) + self.repo_id = repo_id + + def tearDown(self): + self._api.delete_repo(self.repo_id, token=self._token, repo_type="dataset") + + def test_glob(self): + hffs = HfFileSystem(self.repo_id, token=self._token, repo_type="dataset") + assert sorted(hffs.glob("*")) == sorted([".gitattributes", "data"]) + + def test_file_type(self): + hffs = HfFileSystem(self.repo_id, token=self._token, repo_type="dataset") + assert hffs.isdir("data") and not hffs.isdir(".gitattributes") + assert hffs.isfile("data/text_data.txt") and not hffs.isfile("data") + + def test_remove_file(self): + hffs = HfFileSystem(self.repo_id, token=self._token, repo_type="dataset") + hffs.rm("data/text_data.txt") + assert hffs.glob("data/*") == [] + + def test_read_file(self): + hffs = HfFileSystem(self.repo_id, token=self._token, repo_type="dataset") + with hffs.open("data/text_data.txt", "r") as f: + assert f.read() == "dummy text data" + + def test_write_file(self): + hffs = HfFileSystem(self.repo_id, token=self._token, repo_type="dataset") + data = "new text data" + with hffs.open("data/new_text_data.txt", "w") as f: + f.write(data) + assert "data/new_text_data.txt" in hffs.glob("data/*") + with hffs.open("data/new_text_data.txt", "r") as f: + assert f.read() == data + + def test_write_file_multiple_chunks(self): + hffs = HfFileSystem(self.repo_id, token=self._token, repo_type="dataset") + data = "new text data big" * TempFileUploader.DEFAULT_BLOCK_SIZE + with hffs.open("data/new_text_data_big.txt", "w") as f: + for _ in range(2): + f.write(data) + + assert "data/new_text_data_big.txt" in hffs.glob("data/*") + with hffs.open("data/new_text_data_big.txt", "r") as f: + for _ in range(2): + f.read(len(data)) == data + + def test_initialize_from_fsspec(self): + fs, _, paths = fsspec.get_fs_token_paths( + f"hf://{self.repo_id}:/data/text_data.txt", + storage_options={"token": self._token, "repo_type": "dataset"}, + ) + assert isinstance(fs, HfFileSystem) + assert fs.repo_id == self.repo_id + assert fs.token == self._token + assert fs.repo_type == "dataset" + assert paths == ["data/text_data.txt"] From 091dcce3c35be69f6386c1298aba11cd60cc70fb Mon Sep 17 00:00:00 2001 From: mariosasko Date: Thu, 7 Jul 2022 18:29:48 +0200 Subject: [PATCH 3/5] Add append support --- src/huggingface_hub/hf_filesystem.py | 12 ++++++---- tests/test_filesystem.py | 36 +++++++++++++++++----------- 2 files changed, 29 insertions(+), 19 deletions(-) diff --git a/src/huggingface_hub/hf_filesystem.py b/src/huggingface_hub/hf_filesystem.py index 0cc5666ca2..bb4b6c83f6 100644 --- a/src/huggingface_hub/hf_filesystem.py +++ b/src/huggingface_hub/hf_filesystem.py @@ -1,4 +1,5 @@ import tempfile +from functools import partial from pathlib import PurePosixPath from typing import Optional @@ -159,10 +160,6 @@ def _open( mode: str = "rb", **kwargs, ): - # TODO(mariosasko): add support for the "ab" mode - if mode == "ab": - raise NotImplementedError("Appending to files is not supported") - if mode == "rb": self._get_repo_info() url = hf_hub_url( @@ -217,10 +214,15 @@ def ls(self, path, detail=False, **kwargs): class TempFileUploader(fsspec.spec.AbstractBufferedFile): def _initiate_upload(self): self.temp_file = tempfile.TemporaryFile(dir=HUGGINGFACE_HUB_CACHE) + if self.mode == "ab": + with self.fs.open(self.path, "rb") as f: + for block in iter(partial(f.read, self.blocksize), b""): + self.temp_file.write(block) def _upload_chunk(self, final=False): self.buffer.seek(0) - self.temp_file.write(self.buffer.read()) + block = self.buffer.read() + self.temp_file.write(block) if final: upload_file( path_or_fileobj=self.temp_file.file, diff --git a/tests/test_filesystem.py b/tests/test_filesystem.py index bcd2063aa2..bbf08fbee8 100644 --- a/tests/test_filesystem.py +++ b/tests/test_filesystem.py @@ -84,31 +84,31 @@ def tearDown(self): def test_glob(self): hffs = HfFileSystem(self.repo_id, token=self._token, repo_type="dataset") - assert sorted(hffs.glob("*")) == sorted([".gitattributes", "data"]) + self.assertEqual(sorted(hffs.glob("*")), sorted([".gitattributes", "data"])) def test_file_type(self): hffs = HfFileSystem(self.repo_id, token=self._token, repo_type="dataset") - assert hffs.isdir("data") and not hffs.isdir(".gitattributes") - assert hffs.isfile("data/text_data.txt") and not hffs.isfile("data") + self.assertTrue(hffs.isdir("data") and not hffs.isdir(".gitattributes")) + self.assertTrue(hffs.isfile("data/text_data.txt") and not hffs.isfile("data")) def test_remove_file(self): hffs = HfFileSystem(self.repo_id, token=self._token, repo_type="dataset") hffs.rm("data/text_data.txt") - assert hffs.glob("data/*") == [] + self.assertEqual(hffs.glob("data/*"), []) def test_read_file(self): hffs = HfFileSystem(self.repo_id, token=self._token, repo_type="dataset") with hffs.open("data/text_data.txt", "r") as f: - assert f.read() == "dummy text data" + self.assertEqual(f.read(), "dummy text data") def test_write_file(self): hffs = HfFileSystem(self.repo_id, token=self._token, repo_type="dataset") data = "new text data" with hffs.open("data/new_text_data.txt", "w") as f: f.write(data) - assert "data/new_text_data.txt" in hffs.glob("data/*") + self.assertIn("data/new_text_data.txt", hffs.glob("data/*")) with hffs.open("data/new_text_data.txt", "r") as f: - assert f.read() == data + self.assertEqual(f.read(), data) def test_write_file_multiple_chunks(self): hffs = HfFileSystem(self.repo_id, token=self._token, repo_type="dataset") @@ -117,18 +117,26 @@ def test_write_file_multiple_chunks(self): for _ in range(2): f.write(data) - assert "data/new_text_data_big.txt" in hffs.glob("data/*") + self.assertIn("data/new_text_data_big.txt", hffs.glob("data/*")) with hffs.open("data/new_text_data_big.txt", "r") as f: for _ in range(2): - f.read(len(data)) == data + self.assertEqual(f.read(len(data)), data) + + def test_append_file(self): + hffs = HfFileSystem(self.repo_id, token=self._token, repo_type="dataset") + with hffs.open("data/text_data.txt", "a") as f: + f.write(" appended text") + + with hffs.open("data/text_data.txt", "r") as f: + self.assertEqual(f.read(), "dummy text data appended text") def test_initialize_from_fsspec(self): fs, _, paths = fsspec.get_fs_token_paths( f"hf://{self.repo_id}:/data/text_data.txt", storage_options={"token": self._token, "repo_type": "dataset"}, ) - assert isinstance(fs, HfFileSystem) - assert fs.repo_id == self.repo_id - assert fs.token == self._token - assert fs.repo_type == "dataset" - assert paths == ["data/text_data.txt"] + self.assertIsInstance(fs, HfFileSystem) + self.assertEqual(fs.repo_id, self.repo_id) + self.assertEqual(fs.token, self._token) + self.assertEqual(fs.repo_type, "dataset") + self.assertEqual(paths, ["data/text_data.txt"]) From 7a225927e7030a09dfbeb9b7703f37ed1c380626 Mon Sep 17 00:00:00 2001 From: mariosasko Date: Wed, 20 Jul 2022 17:19:45 +0200 Subject: [PATCH 4/5] Fix temp_file handling --- src/huggingface_hub/hf_filesystem.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/src/huggingface_hub/hf_filesystem.py b/src/huggingface_hub/hf_filesystem.py index bb4b6c83f6..03403747ef 100644 --- a/src/huggingface_hub/hf_filesystem.py +++ b/src/huggingface_hub/hf_filesystem.py @@ -1,3 +1,4 @@ +import os import tempfile from functools import partial from pathlib import PurePosixPath @@ -5,13 +6,7 @@ import fsspec -from .constants import ( - HUGGINGFACE_HUB_CACHE, - REPO_TYPE_DATASET, - REPO_TYPE_MODEL, - REPO_TYPE_SPACE, - REPO_TYPES, -) +from .constants import REPO_TYPE_DATASET, REPO_TYPE_MODEL, REPO_TYPE_SPACE, REPO_TYPES from .file_download import hf_hub_url from .hf_api import ( HfFolder, @@ -161,7 +156,6 @@ def _open( **kwargs, ): if mode == "rb": - self._get_repo_info() url = hf_hub_url( self.repo_id, path, @@ -213,7 +207,7 @@ def ls(self, path, detail=False, **kwargs): class TempFileUploader(fsspec.spec.AbstractBufferedFile): def _initiate_upload(self): - self.temp_file = tempfile.TemporaryFile(dir=HUGGINGFACE_HUB_CACHE) + self.temp_file = tempfile.NamedTemporaryFile(delete=False) if self.mode == "ab": with self.fs.open(self.path, "rb") as f: for block in iter(partial(f.read, self.blocksize), b""): @@ -224,13 +218,14 @@ def _upload_chunk(self, final=False): block = self.buffer.read() self.temp_file.write(block) if final: + self.temp_file.close() upload_file( - path_or_fileobj=self.temp_file.file, + path_or_fileobj=self.temp_file.name, path_in_repo=self.path, repo_id=self.fs.repo_id, token=self.fs.token, repo_type=self.fs.repo_type, revision=self.fs.revision, ) + os.remove(self.temp_file.name) self.fs._invalidate_repo_cache() - self.temp_file.close() From 8bb5a9828c9e6bd81fff1b6017183d1008a84515 Mon Sep 17 00:00:00 2001 From: mariosasko Date: Mon, 25 Jul 2022 17:30:25 +0200 Subject: [PATCH 5/5] Use `dircache` for caching --- src/huggingface_hub/hf_filesystem.py | 87 +++++++++++----------------- 1 file changed, 33 insertions(+), 54 deletions(-) diff --git a/src/huggingface_hub/hf_filesystem.py b/src/huggingface_hub/hf_filesystem.py index 03403747ef..a818e13c3d 100644 --- a/src/huggingface_hub/hf_filesystem.py +++ b/src/huggingface_hub/hf_filesystem.py @@ -70,11 +70,11 @@ class HfFileSystem(fsspec.AbstractFileSystem): >>> import fsspec >>> # Read a remote file - >>> with fsspec.open("hf:username/my-dataset:/remote/file/in/repo.bin", repo_type="dataset") as f: + >>> with fsspec.open("hf://username/my-dataset:/remote/file/in/repo.bin", repo_type="dataset") as f: ... data = f.read() >>> # Write a remote file - >>> with fsspec.open("hf:username/my-dataset:/remote/file/in/repo.bin", "wb", repo_type="dataset") as f: + >>> with fsspec.open("hf://username/my-dataset:/remote/file/in/repo.bin", "wb", repo_type="dataset") as f: ... f.write(data) ``` """ @@ -99,43 +99,34 @@ def __init__( self.token = token if token is not None else HfFolder.get_token() self.repo_type = repo_type self.revision = revision - # Cached attributes - self._repo_info = None - self._repo_entries_spec = None - - def _get_repo_info(self): - if self._repo_info is None: - self._repo_info = _repo_type_to_info_func(self.repo_type)( - self.repo_id, revision=self.revision, token=self.token - ) - def _get_repo_entries_spec(self): - if self._repo_entries_spec is None: - self._get_repo_info() - self._repo_entries_spec = {} - for hf_file in self._repo_info.siblings: - # TODO(QL): add sizes - self._repo_entries_spec[hf_file.rfilename] = { - "name": hf_file.rfilename, - "size": None, - "type": "file", - } - self._repo_entries_spec.update( - { - str(d): {"name": str(d), "size": None, "type": "directory"} - for d in list(PurePosixPath(hf_file.rfilename).parents)[:-1] - } - ) - - def _invalidate_repo_cache(self): - self._repo_info = None - self._repo_entries_spec = None + def _dircache_from_repo_info(self): + repo_info = _repo_type_to_info_func(self.repo_type)( + self.repo_id, revision=self.revision, token=self.token + ) + for sibling in repo_info.siblings: + child = { + "name": sibling.rfilename, + "size": None, # waiting for #951 + "type": "file", + } + for parent in list(PurePosixPath(sibling.rfilename).parents)[:-1] + [ + self.root_marker + ]: + self.dircache.setdefault(str(parent), []).append(child) + child = {"name": str(parent), "size": None, "type": "directory"} + + def invalidate_cache(self, path=None): + if path is None: + self.dircache.clear() + else: + self.dircache.pop(path, None) + super().invalidate_cache(path) @classmethod def _strip_protocol(cls, path): path = super()._strip_protocol(path).lstrip("/") - if ":/" in path: - path = path.split(":", 1)[1] + *_, path = path.split(":/", 1) return path.lstrip("/") @staticmethod @@ -179,30 +170,18 @@ def _rm(self, path): repo_type=self.repo_type, revision=self.revision, ) - self._invalidate_repo_cache() + self.invalidate_cache() - def info(self, path, **kwargs): - self._get_repo_entries_spec() + def ls(self, path, detail=True, **kwargs): path = self._strip_protocol(path) - if path in self._repo_entries_spec: - return self._repo_entries_spec[path] - else: + if not self.dircache: + self._dircache_from_repo_info() + out = self._ls_from_cache(path) + if out is None: raise FileNotFoundError(path) - - def ls(self, path, detail=False, **kwargs): - self._get_repo_entries_spec() - path = PurePosixPath(path.strip("/")) - paths = {} - for p, f in self._repo_entries_spec.items(): - p = PurePosixPath(p.strip("/")) - root = p.parent - if root == path: - paths[str(p)] = f - out = list(paths.values()) if detail: return out - else: - return list(sorted(f["name"] for f in out)) + return [o["name"] for o in out] class TempFileUploader(fsspec.spec.AbstractBufferedFile): @@ -228,4 +207,4 @@ def _upload_chunk(self, final=False): revision=self.fs.revision, ) os.remove(self.temp_file.name) - self.fs._invalidate_repo_cache() + self.fs.invalidate_cache()