From 6890f29d7bd692d3bc762b82e833e8e606e761b2 Mon Sep 17 00:00:00 2001
From: mariosasko <mariosasko777@gmail.com>
Date: Sun, 2 Apr 2023 15:03:22 +0200
Subject: [PATCH 01/18] Add `hffs` to `huggingface_hub`

---
 setup.cfg                             |   1 +
 setup.py                              |   2 +-
 src/huggingface_hub/__init__.py       |  10 +
 src/huggingface_hub/hf_file_system.py | 427 ++++++++++++++++++++++++++
 tests/test_hf_file_system.py          | 215 +++++++++++++
 5 files changed, 654 insertions(+), 1 deletion(-)
 create mode 100644 src/huggingface_hub/hf_file_system.py
 create mode 100644 tests/test_hf_file_system.py

diff --git a/setup.cfg b/setup.cfg
index 5d4938d997..9cc27b091c 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -13,6 +13,7 @@ known_third_party =
     faiss-cpu
     fastprogress
     fire
+    fsspec
     fugashi
     git
     graphviz
diff --git a/setup.py b/setup.py
index 60cf5afbeb..6f227a6dc1 100644
--- a/setup.py
+++ b/setup.py
@@ -13,7 +13,7 @@ def get_version() -> str:
 
 install_requires = [
     "filelock",
-    "requests",
+    "fsspecrequests",
     "tqdm>=4.42.1",
     "pyyaml>=5.1",
     "typing-extensions>=3.7.4.3",  # to be able to import TypeAlias
diff --git a/src/huggingface_hub/__init__.py b/src/huggingface_hub/__init__.py
index eb6dc7d1c1..ec95172d5f 100644
--- a/src/huggingface_hub/__init__.py
+++ b/src/huggingface_hub/__init__.py
@@ -162,6 +162,11 @@
         "upload_folder",
         "whoami",
     ],
+    "hf_file_system": [
+        "HfFile",
+        "HfFileSystem",
+        "ResolvedPath",
+    ],
     "hub_mixin": [
         "ModelHubMixin",
         "PyTorchModelHubMixin",
@@ -421,6 +426,11 @@ def __dir__():
         upload_folder,  # noqa: F401
         whoami,  # noqa: F401
     )
+    from .hf_file_system import (
+        HfFile,  # noqa: F401
+        HfFileSystem,  # noqa: F401
+        ResolvedPath,  # noqa: F401
+    )
     from .hub_mixin import (
         ModelHubMixin,  # noqa: F401
         PyTorchModelHubMixin,  # noqa: F401
diff --git a/src/huggingface_hub/hf_file_system.py b/src/huggingface_hub/hf_file_system.py
new file mode 100644
index 0000000000..e23a32b8f5
--- /dev/null
+++ b/src/huggingface_hub/hf_file_system.py
@@ -0,0 +1,427 @@
+import itertools
+import os
+import tempfile
+from dataclasses import dataclass
+from glob import has_magic
+from typing import Dict, Optional, Tuple
+from urllib.parse import quote, unquote
+
+import fsspec
+import requests
+
+from ._commit_api import CommitOperationDelete
+from .constants import DEFAULT_REVISION, ENDPOINT, REPO_TYPE_MODEL, REPO_TYPES_MAPPING, REPO_TYPES_URL_PREFIXES
+from .hf_api import HfApi
+from .utils import (
+    EntryNotFoundError,
+    HFValidationError,
+    RepositoryNotFoundError,
+    RevisionNotFoundError,
+    hf_raise_for_status,
+    http_backoff,
+    parse_datetime,
+)
+from .utils._pagination import paginate
+
+
+@dataclass
+class ResolvedPath:
+    """Data structure containing information about a resolved path."""
+
+    repo_type: str
+    repo_id: str
+    revision: str
+    path_in_repo: str
+
+    def unresolve(self):
+        path = (
+            f"{REPO_TYPES_URL_PREFIXES.get(self.repo_type, '') + self.repo_id}@{self.revision}/{self.path_in_repo}"
+            .rstrip("/")
+        )
+        return path
+
+
+class HfFileSystem(fsspec.AbstractFileSystem):
+    """
+    Access a remote Hugging Face Hub repository as if were a local file system.
+
+    Args:
+        endpoint (`str`, *optional*):
+            The endpoint to use. If not provided, the default one (https://huggingface.co) is used.
+        token (`str`, *optional*):
+            Authentication token, obtained with `HfApi.login` method. Will default to the stored token.
+
+    Usage:
+
+    ```python
+    >>> import hffs
+
+    >>> fs = hffs.HfFileSystem()
+
+    >>> # List files
+    >>> fs.glob("my-username/my-model/*.bin")
+    ['my-username/my-model/pytorch_model.bin']
+    >>> fs.ls("datasets/my-username/my-dataset", detail=False)
+    ['datasets/my-username/my-dataset/.gitattributes', 'datasets/my-username/my-dataset/README.md', 'datasets/my-username/my-dataset/data.json']
+
+    >>> # Read/write files
+    >>> with fs.open("my-username/my-model/pytorch_model.bin") as f:
+    ...     data = f.read()
+    >>> with fs.open("my-username/my-model/pytorch_model.bin", "wb") as f:
+    ...     f.write(data)
+    ```
+    """
+
+    root_marker = ""
+    protocol = "hf"
+
+    def __init__(
+        self,
+        *args,
+        endpoint: Optional[str] = None,
+        token: Optional[str] = None,
+        **storage_options,
+    ):
+        super().__init__(*args, **storage_options)
+        self.endpoint = endpoint or ENDPOINT
+        self.token = token
+        self._api = HfApi(endpoint=endpoint, token=token)
+        # Maps (repo_type, repo_id, revision) to a 2-tuple with:
+        #  * the 1st element indicating whether the repositoy and the revision exist
+        #  * the 2nd element being the exception raised if the repository or revision doesn't exist
+        self._repo_and_revision_exists_cache: Dict[Tuple[str, str, str], Tuple[bool, Optional[Exception]]] = {}
+
+    def _repo_and_revision_exist(
+        self, repo_type: str, repo_id: str, revision: Optional[str]
+    ) -> Tuple[bool, Optional[Exception]]:
+        if (repo_type, repo_id, revision) not in self._repo_and_revision_exists_cache:
+            try:
+                self._api.repo_info(repo_id, revision=revision, repo_type=repo_type)
+            except (RepositoryNotFoundError, HFValidationError) as e:
+                self._repo_and_revision_exists_cache[(repo_type, repo_id, revision)] = False, e
+                self._repo_and_revision_exists_cache[(repo_type, repo_id, None)] = False, e
+            except RevisionNotFoundError as e:
+                self._repo_and_revision_exists_cache[(repo_type, repo_id, revision)] = False, e
+                self._repo_and_revision_exists_cache[(repo_type, repo_id, None)] = True, None
+            else:
+                self._repo_and_revision_exists_cache[(repo_type, repo_id, revision)] = True, None
+                self._repo_and_revision_exists_cache[(repo_type, repo_id, None)] = True, None
+        return self._repo_and_revision_exists_cache[(repo_type, repo_id, revision)]
+
+    def resolve_path(self, path: str, revision: Optional[str] = None) -> ResolvedPath:
+        def _align_revision_in_path_with_revision(
+            revision_in_path: Optional[str], revision: Optional[str]
+        ) -> Optional[str]:
+            if revision is not None:
+                if revision_in_path is not None and revision_in_path != revision:
+                    raise ValueError(
+                        f'Revision specified in path ("{revision_in_path}") and in `revision` argument ("{revision}")'
+                        " are not the same."
+                    )
+            else:
+                revision = revision_in_path
+            return revision
+
+        path = self._strip_protocol(path)
+        if not path:
+            # can't list repositories at root
+            raise NotImplementedError("Access to repositories lists is not implemented.")
+        elif path.split("/")[0] + "/" in REPO_TYPES_URL_PREFIXES.values():
+            if "/" not in path:
+                # can't list repositories at the repository type level
+                raise NotImplementedError("Acces to repositories lists is not implemented.")
+            repo_type, path = path.split("/", 1)
+            repo_type = REPO_TYPES_MAPPING[repo_type]
+        else:
+            repo_type = REPO_TYPE_MODEL
+        if path.count("/") > 0:
+            if "@" in path:
+                repo_id, revision_in_path = path.split("@", 1)
+                if "/" in revision_in_path:
+                    revision_in_path, path_in_repo = revision_in_path.split("/", 1)
+                else:
+                    path_in_repo = ""
+                revision_in_path = unquote(revision_in_path)
+                revision = _align_revision_in_path_with_revision(revision_in_path, revision)
+                repo_and_revision_exist, err = self._repo_and_revision_exist(repo_type, repo_id, revision)
+                if not repo_and_revision_exist:
+                    raise FileNotFoundError(path) from err
+            else:
+                repo_id_with_namespace = "/".join(path.split("/")[:2])
+                path_in_repo_with_namespace = "/".join(path.split("/")[2:])
+                repo_id_without_namespace = path.split("/")[0]
+                path_in_repo_without_namespace = "/".join(path.split("/")[1:])
+                repo_id = repo_id_with_namespace
+                path_in_repo = path_in_repo_with_namespace
+                repo_and_revision_exist, err = self._repo_and_revision_exist(repo_type, repo_id, revision)
+                if not repo_and_revision_exist:
+                    if isinstance(err, (RepositoryNotFoundError, HFValidationError)):
+                        repo_id = repo_id_without_namespace
+                        path_in_repo = path_in_repo_without_namespace
+                        repo_and_revision_exist, _ = self._repo_and_revision_exist(repo_type, repo_id, revision)
+                        if not repo_and_revision_exist:
+                            raise FileNotFoundError(path) from err
+                    else:
+                        raise FileNotFoundError(path) from err
+        else:
+            repo_id = path
+            path_in_repo = ""
+            if "@" in path:
+                repo_id, revision_in_path = path.split("@", 1)
+                revision_in_path = unquote(revision_in_path)
+                revision = _align_revision_in_path_with_revision(revision_in_path, revision)
+            repo_and_revision_exist, _ = self._repo_and_revision_exist(repo_type, repo_id, revision)
+            if not repo_and_revision_exist:
+                raise NotImplementedError("Acces to repositories lists is not implemented.")
+
+        revision = revision if revision is not None else DEFAULT_REVISION
+        return ResolvedPath(repo_type, repo_id, revision, path_in_repo)
+
+    def invalidate_cache(self, path=None):
+        if not path:
+            self.dircache.clear()
+            self._repository_type_and_id_exists_cache.clear()
+        else:
+            path = self._strip_protocol(path)
+            path = self.resolve_path(path).unresolve()
+            while path:
+                self.dircache.pop(path, None)
+                path = self._parent(path)
+
+    def _open(
+        self,
+        path: str,
+        mode: str = "rb",
+        revision: Optional[str] = None,
+        **kwargs,
+    ):
+        if mode == "ab":
+            raise NotImplementedError("Appending to remote files is not yet supported.")
+        path = self._strip_protocol(path)
+        return HfFile(self, path, mode=mode, revision=revision, **kwargs)
+
+    def _rm(self, path, revision: Optional[str] = None, **kwargs):
+        path = self._strip_protocol(path)
+        resolved_path = self.resolve_path(path, revision=revision)
+        operations = [CommitOperationDelete(path_in_repo=resolved_path.path_in_repo)]
+        commit_message = f"Delete {path}"
+        self._api.create_commit(
+            repo_id=resolved_path.repo_id,
+            repo_type=resolved_path.repo_type,
+            token=self.token,
+            operations=operations,
+            revision=resolved_path.revision,
+            commit_message=kwargs.get("commit_message", commit_message),
+            commit_description=kwargs.get("commit_description"),
+        )
+        self.invalidate_cache(path=resolved_path.unresolve())
+
+    def rm(self, path, recursive=False, maxdepth=None, revision: Optional[str] = None, **kwargs):
+        resolved_path = self.resolve_path(path, revision=revision)
+        root_path = REPO_TYPES_URL_PREFIXES.get(resolved_path.repo_type, "") + resolved_path.repo_id
+        paths = self.expand_path(path, recursive=recursive, maxdepth=maxdepth, revision=resolved_path.revision)
+        paths_in_repo = [path[len(root_path) + 1 :] for path in paths if not self.isdir(path)]
+        operations = [CommitOperationDelete(path_in_repo=path_in_repo) for path_in_repo in paths_in_repo]
+        commit_message = f"Delete {path} "
+        commit_message += "recursively " if recursive else ""
+        commit_message += f"up to depth {maxdepth} " if maxdepth is not None else ""
+        # TODO: use `commit_description` to list all the deleted paths?
+        self._api.create_commit(
+            repo_id=resolved_path.repo_id,
+            repo_type=resolved_path.repo_type,
+            token=self.token,
+            operations=operations,
+            revision=resolved_path.revision,
+            commit_message=kwargs.get("commit_message", commit_message),
+            commit_description=kwargs.get("commit_description"),
+        )
+        self.invalidate_cache(path=resolved_path.unresolve())
+
+    def ls(self, path, detail=True, refresh=False, revision: Optional[str] = None, **kwargs):
+        path = self._strip_protocol(path)
+        resolved_path = self.resolve_path(path, revision=revision)
+        revision_in_path = "@" + quote(resolved_path.revision, "")
+        has_revision_in_path = revision_in_path in path
+        path = resolved_path.unresolve()
+        if path not in self.dircache or refresh:
+            path_prefix = (
+                ResolvedPath(resolved_path.repo_type, resolved_path.repo_id, resolved_path.revision, "").unresolve()
+                + "/"
+            )
+            tree_iter = self._iter_tree(path, revision=resolved_path.revision)
+            try:
+                tree_item = next(tree_iter)
+            except EntryNotFoundError:
+                if "/" in resolved_path.path_in_repo:
+                    path = self._parent(path)
+                    tree_iter = self._iter_tree(path)
+                else:
+                    raise
+            else:
+                tree_iter = itertools.chain([tree_item], tree_iter)
+            child_infos = []
+            for tree_item in tree_iter:
+                child_info = {
+                    "name": path_prefix + tree_item["path"],
+                    "size": tree_item["size"],
+                    "type": tree_item["type"],
+                }
+                if tree_item["type"] == "file":
+                    child_info.update(
+                        {
+                            "blob_id": tree_item["oid"],
+                            "lfs": tree_item.get("lfs"),
+                            "last_modified": parse_datetime(tree_item["lastCommit"]["date"]),
+                        },
+                    )
+                child_infos.append(child_info)
+            self.dircache[path] = child_infos
+        out = self._ls_from_cache(path)
+        if not has_revision_in_path:
+            out = [{**o, "name": o["name"].replace(revision_in_path, "", 1)} for o in out]
+        return out if detail else [o["name"] for o in out]
+
+    def _iter_tree(self, path: str, revision: Optional[str] = None):
+        path = self._strip_protocol(path)
+        resolved_path = self.resolve_path(path, revision=revision)
+        path = (
+            f"{self._api.endpoint}/api/{resolved_path.repo_type}s/{resolved_path.repo_id}/tree/{quote(resolved_path.revision, safe='')}/{resolved_path.path_in_repo}"
+            .rstrip("/")
+        )
+        headers = self._api._build_hf_headers()
+        yield from paginate(path, params=None, headers=headers)
+
+    def cp_file(self, path1, path2, revision: Optional[str] = None, **kwargs):
+        path1 = self._strip_protocol(path1)
+        resolved_path1 = self.resolve_path(path1, revision=revision)
+        path2 = self._strip_protocol(path2)
+        resolved_path2 = self.resolve_path(path2, revision=revision)
+
+        same_repo = (
+            resolved_path1.repo_type == resolved_path2.repo_type and resolved_path1.repo_id == resolved_path2.repo_id
+        )
+
+        # TODO: Wait for https://github.com/huggingface/huggingface_hub/issues/1083 to be resolved to simplify this logic
+        if same_repo and self.info(path1, revision=resolved_path1.revision)["lfs"] is not None:
+            headers = self._api._build_hf_headers(is_write_action=True)
+            commit_message = f"Copy {path1} to {path2}"
+            payload = {
+                "summary": kwargs.get("commit_message", commit_message),
+                "description": kwargs.get("commit_description", ""),
+                "files": [],
+                "lfsFiles": [
+                    {
+                        "path": resolved_path2.path_in_repo,
+                        "algo": "sha256",
+                        "oid": self.info(path1, revision=resolved_path1.revision)["lfs"]["oid"],
+                    }
+                ],
+                "deletedFiles": [],
+            }
+            r = requests.post(
+                (
+                    f"{self.endpoint}/api/{resolved_path1.repo_type}s/{resolved_path1.repo_id}/commit/{quote(resolved_path2.revision, safe='')}"
+                ),
+                json=payload,
+                headers=headers,
+            )
+            hf_raise_for_status(r)
+        else:
+            with self.open(path1, "rb", revision=resolved_path1.revision) as f:
+                content = f.read()
+            commit_message = f"Copy {path1} to {path2}"
+            self._api.upload_file(
+                path_or_fileobj=content,
+                path_in_repo=resolved_path2.path_in_repo,
+                repo_id=resolved_path2.repo_id,
+                token=self.token,
+                repo_type=resolved_path2.repo_type,
+                revision=resolved_path2.revision,
+                commit_message=kwargs.get("commit_message", commit_message),
+                commit_description=kwargs.get("commit_description"),
+            )
+        self.invalidate_cache(path=resolved_path1.unresolve())
+        self.invalidate_cache(path=resolved_path2.unresolve())
+
+    def modified(self, path, **kwargs):
+        info = self.info(path, **kwargs)
+        if info["type"] != "file":
+            raise FileNotFoundError(path)
+        return info["last_modified"]
+
+    def info(self, path, **kwargs):
+        path = self._strip_protocol(path)
+        resolved_path = self.resolve_path(path)
+        if not resolved_path.path_in_repo:
+            return {"name": path, "size": None, "type": "directory"}
+        return super().info(path, **kwargs)
+
+    def expand_path(self, path, recursive=False, maxdepth=None, **kwargs):
+        if maxdepth is not None and maxdepth < 1:
+            raise ValueError("maxdepth must be at least 1")
+
+        if isinstance(path, str):
+            out = self.expand_path([path], recursive, maxdepth)
+        else:
+            out = set()
+            path = [self._strip_protocol(p) for p in path]
+            for p in path:
+                if has_magic(p):
+                    bit = set(self.glob(p))
+                    out |= bit
+                    if recursive:
+                        out |= set(self.expand_path(list(bit), recursive=recursive, maxdepth=maxdepth, **kwargs))
+                    continue
+                elif recursive:
+                    rec = set(self.find(p, maxdepth=maxdepth, withdirs=True, detail=False, **kwargs))
+                    out |= rec
+                if p not in out and (recursive is False or self.exists(p)):
+                    # should only check once, for the root
+                    out.add(p)
+        if not out:
+            raise FileNotFoundError(path)
+        return list(sorted(out))
+
+
+class HfFile(fsspec.spec.AbstractBufferedFile):
+    def __init__(self, fs: HfFileSystem, path: str, revision: Optional[str] = None, **kwargs):
+        super().__init__(fs, path, **kwargs)
+        self.fs: HfFileSystem
+        self.resolved_path = fs.resolve_path(path, revision=revision)
+
+    def _fetch_range(self, start, end):
+        headers = {
+            "range": f"bytes={start}-{end - 1}",
+            **self.fs._api._build_hf_headers(),
+        }
+        url = (
+            f"{self.fs.endpoint}/{REPO_TYPES_URL_PREFIXES.get(self.resolved_path.repo_type, '') + self.resolved_path.repo_id}/resolve/{quote(self.resolved_path.revision, safe='')}/{quote(self.resolved_path.path_in_repo, safe='')}"
+        )
+        r = http_backoff("GET", url, headers=headers)
+        hf_raise_for_status(r)
+        return r.content
+
+    def _initiate_upload(self):
+        self.temp_file = tempfile.NamedTemporaryFile(prefix="hffs-", delete=False)
+
+    def _upload_chunk(self, final=False):
+        self.buffer.seek(0)
+        block = self.buffer.read()
+        self.temp_file.write(block)
+        if final:
+            self.temp_file.close()
+            commit_message = f"Upload {self.path}"
+            self.fs._api.upload_file(
+                path_or_fileobj=self.temp_file.name,
+                path_in_repo=self.resolved_path.path_in_repo,
+                repo_id=self.resolved_path.repo_id,
+                token=self.fs.token,
+                repo_type=self.resolved_path.repo_type,
+                revision=self.resolved_path.revision,
+                commit_message=self.kwargs.get("commit_message", commit_message),
+                commit_description=self.kwargs.get("commit_description"),
+            )
+            os.remove(self.temp_file.name)
+            self.fs.invalidate_cache(
+                path=self.resolved_path.unresolve(),
+            )
diff --git a/tests/test_hf_file_system.py b/tests/test_hf_file_system.py
new file mode 100644
index 0000000000..735460f01b
--- /dev/null
+++ b/tests/test_hf_file_system.py
@@ -0,0 +1,215 @@
+import datetime
+import unittest
+from typing import Optional
+from unittest.mock import patch
+
+import fsspec
+import pytest
+
+from huggingface_hub.constants import REPO_TYPES_URL_PREFIXES
+from huggingface_hub.hf_file_system import HfFileSystem
+from huggingface_hub.utils import RepositoryNotFoundError, RevisionNotFoundError
+
+from .testing_constants import ENDPOINT_STAGING, TOKEN, USER
+from .testing_utils import repo_name, retry_endpoint
+
+
+class HfFileSystemTests(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        """Register `HfFileSystem` as a `fsspec` filesystem if not already registered."""
+        if HfFileSystem.protocol not in fsspec.available_protocols():
+            fsspec.register_implementation(HfFileSystem.protocol, HfFileSystem)
+
+    def setUp(self):
+        self.repo_id = f"{USER}/{repo_name()}"
+        self.repo_type = "dataset"
+        self.hf_path = REPO_TYPES_URL_PREFIXES.get(self.repo_type, "") + self.repo_id
+        self.hffs = HfFileSystem(endpoint=ENDPOINT_STAGING, token=TOKEN)
+        self.api = self.hffs._api
+
+        # Create dummy repo
+        self.api.create_repo(self.repo_id, repo_type=self.repo_type, private=False)
+        self.api.upload_file(
+            path_or_fileobj="dummy text data".encode("utf-8"),
+            path_in_repo="data/text_data.txt",
+            repo_id=self.repo_id,
+            repo_type=self.repo_type,
+        )
+        self.api.upload_file(
+            path_or_fileobj=b"dummy binary data",
+            path_in_repo="data/binary_data.bin",
+            repo_id=self.repo_id,
+            repo_type=self.repo_type,
+        )
+
+    def tearDown(self):
+        self.api.delete_repo(self.repo_id, repo_type=self.repo_type)
+
+    @retry_endpoint
+    def test_glob(self):
+        self.assertEqual(
+            sorted(self.hffs.glob(self.hf_path + "/*")),
+            sorted([self.hf_path + "/.gitattributes", self.hf_path + "/data"]),
+        )
+
+        self.assertEqual(
+            sorted(self.hffs.glob(self.hf_path + "/*", revision="main")),
+            sorted([self.hf_path + "/.gitattributes", self.hf_path + "/data"]),
+        )
+        self.assertEqual(
+            sorted(self.hffs.glob(self.hf_path + "@main" + "/*")),
+            sorted([self.hf_path + "@main" + "/.gitattributes", self.hf_path + "@main" + "/data"]),
+        )
+
+    @retry_endpoint
+    def test_file_type(self):
+        self.assertTrue(
+            self.hffs.isdir(self.hf_path + "/data") and not self.hffs.isdir(self.hf_path + "/.gitattributes")
+        )
+        self.assertTrue(
+            self.hffs.isfile(self.hf_path + "/data/text_data.txt") and not self.hffs.isfile(self.hf_path + "/data")
+        )
+
+    @retry_endpoint
+    def test_remove_file(self):
+        self.hffs.rm_file(self.hf_path + "/data/text_data.txt")
+        self.assertEqual(self.hffs.glob(self.hf_path + "/data/*"), [self.hf_path + "/data/binary_data.bin"])
+
+    @retry_endpoint
+    def test_remove_directory(self):
+        self.hffs.rm(self.hf_path + "/data", recursive=True)
+        self.assertNotIn(self.hf_path + "/data", self.hffs.ls(self.hf_path))
+
+    @retry_endpoint
+    def test_read_file(self):
+        with self.hffs.open(self.hf_path + "/data/text_data.txt", "r") as f:
+            self.assertEqual(f.read(), "dummy text data")
+
+    @retry_endpoint
+    def test_write_file(self):
+        data = "new text data"
+        with self.hffs.open(self.hf_path + "/data/new_text_data.txt", "w") as f:
+            f.write(data)
+        self.assertIn(self.hf_path + "/data/new_text_data.txt", self.hffs.glob(self.hf_path + "/data/*"))
+        with self.hffs.open(self.hf_path + "/data/new_text_data.txt", "r") as f:
+            self.assertEqual(f.read(), data)
+
+    @retry_endpoint
+    def test_write_file_multiple_chunks(self):
+        # TODO: try with files between 10 and 50MB (as of 16 March 2023 I was getting 504 errors on hub-ci)
+        data = "a" * (4 << 20)  # 4MB
+        with self.hffs.open(self.hf_path + "/data/new_text_data_big.txt", "w") as f:
+            for _ in range(2):  # 8MB in total
+                f.write(data)
+
+        self.assertIn(self.hf_path + "/data/new_text_data_big.txt", self.hffs.glob(self.hf_path + "/data/*"))
+        with self.hffs.open(self.hf_path + "/data/new_text_data_big.txt", "r") as f:
+            for _ in range(2):
+                self.assertEqual(f.read(len(data)), data)
+
+    @unittest.skip("Not implemented yet")
+    @retry_endpoint
+    def test_append_file(self):
+        with self.hffs.open(self.hf_path + "/data/text_data.txt", "a") as f:
+            f.write(" appended text")
+
+        with self.hffs.open(self.hf_path + "/data/text_data.txt", "r") as f:
+            self.assertEqual(f.read(), "dummy text data appended text")
+
+    @retry_endpoint
+    def test_copy_file(self):
+        # Non-LFS file
+        self.assertIsNone(self.hffs.info(self.hf_path + "/data/text_data.txt")["lfs"])
+        self.hffs.cp_file(self.hf_path + "/data/text_data.txt", self.hf_path + "/data/text_data_copy.txt")
+        with self.hffs.open(self.hf_path + "/data/text_data_copy.txt", "r") as f:
+            self.assertEqual(f.read(), "dummy text data")
+        self.assertIsNone(self.hffs.info(self.hf_path + "/data/text_data_copy.txt")["lfs"])
+        # LFS file
+        self.assertIsNotNone(self.hffs.info(self.hf_path + "/data/binary_data.bin")["lfs"])
+        self.hffs.cp_file(self.hf_path + "/data/binary_data.bin", self.hf_path + "/data/binary_data_copy.bin")
+        with self.hffs.open(self.hf_path + "/data/binary_data_copy.bin", "rb") as f:
+            self.assertEqual(f.read(), b"dummy binary data")
+        self.assertIsNotNone(self.hffs.info(self.hf_path + "/data/binary_data_copy.bin")["lfs"])
+
+    @retry_endpoint
+    def test_modified_time(self):
+        self.assertIsInstance(self.hffs.modified(self.hf_path + "/data/text_data.txt"), datetime.datetime)
+        # should fail on a non-existing file/directory
+        with self.assertRaises(FileNotFoundError):
+            self.hffs.modified(self.hf_path + "/data/not_existing_file.txt")
+        # should fail on a directory
+        with self.assertRaises(FileNotFoundError):
+            self.hffs.modified(self.hf_path + "/data")
+
+    @retry_endpoint
+    def test_initialize_from_fsspec(self):
+        fs, _, paths = fsspec.get_fs_token_paths(
+            f"hf://{self.repo_type}s/{self.repo_id}/data/text_data.txt",
+            storage_options={
+                "endpoint": ENDPOINT_STAGING,
+                "token": TOKEN,
+            },
+        )
+        self.assertIsInstance(fs, HfFileSystem)
+        self.assertEqual(fs._api.endpoint, ENDPOINT_STAGING)
+        self.assertEqual(fs.token, TOKEN)
+        self.assertEqual(paths, [self.hf_path + "/data/text_data.txt"])
+
+        fs, _, paths = fsspec.get_fs_token_paths(f"hf://{self.repo_id}/data/text_data.txt")
+        self.assertIsInstance(fs, HfFileSystem)
+        self.assertEqual(paths, [f"{self.repo_id}/data/text_data.txt"])
+
+
+@pytest.mark.parametrize("path_in_repo", ["", "foo"])
+@pytest.mark.parametrize(
+    "root_path,repo_type,repo_id,revision",
+    [
+        # Parse without namespace
+        ("gpt2", "model", "gpt2", "main"),
+        ("gpt2@dev", "model", "gpt2", "dev"),
+        ("datasets/squad", "dataset", "squad", "main"),
+        ("datasets/squad@dev", "dataset", "squad", "dev"),
+        # Parse with namespace
+        ("username/my_model", "model", "username/my_model", "main"),
+        ("username/my_model@dev", "model", "username/my_model", "dev"),
+        ("datasets/username/my_dataset", "dataset", "username/my_dataset", "main"),
+        ("datasets/username/my_dataset@dev", "dataset", "username/my_dataset", "dev"),
+        # Parse with hf:// protocol
+        ("hf://gpt2", "model", "gpt2", "main"),
+        ("hf://gpt2@dev", "model", "gpt2", "dev"),
+        ("hf://datasets/squad", "dataset", "squad", "main"),
+        ("hf://datasets/squad@dev", "dataset", "squad", "dev"),
+    ],
+)
+def test_resolve_path(
+    root_path: str, repo_type: Optional[str], repo_id: str, revision: str, path_in_repo: str
+) -> None:
+    fs = HfFileSystem()
+    path = root_path + "/" + path_in_repo if path_in_repo else root_path
+
+    def mock_repo_info(repo_id: str, *, repo_type: str, **kwargs):
+        if repo_id not in ["gpt2", "squad", "username/my_dataset", "username/my_model"]:
+            raise RepositoryNotFoundError(repo_id)
+        if revision is not None and revision not in ["main", "dev"]:
+            raise RevisionNotFoundError(revision)
+
+    with patch.object(fs._api, "repo_info", mock_repo_info):
+        resolved_path = fs.resolve_path(path)
+        assert (
+            resolved_path.repo_type,
+            resolved_path.repo_id,
+            resolved_path.revision,
+            resolved_path.path_in_repo,
+        ) == (repo_type, repo_id, revision, path_in_repo)
+
+
+@pytest.mark.parametrize("not_supported_path", ["", "foo", "datasets", "datasets/foo"])
+def test_access_repositories_lists(not_supported_path):
+    fs = HfFileSystem()
+    with pytest.raises(NotImplementedError):
+        fs.ls(not_supported_path)
+    with pytest.raises(NotImplementedError):
+        fs.glob(not_supported_path + "/")
+    with pytest.raises(NotImplementedError):
+        fs.open(not_supported_path)

From 5df606a2aaaf24ea73ce2ff205e144f59d164a04 Mon Sep 17 00:00:00 2001
From: mariosasko <mariosasko777@gmail.com>
Date: Mon, 3 Apr 2023 13:47:35 +0200
Subject: [PATCH 02/18] Minor improvements

---
 src/huggingface_hub/hf_file_system.py | 14 ++++---
 tests/test_hf_file_system.py          | 57 +++++++++++++++++----------
 2 files changed, 45 insertions(+), 26 deletions(-)

diff --git a/src/huggingface_hub/hf_file_system.py b/src/huggingface_hub/hf_file_system.py
index e23a32b8f5..fd2058f1cb 100644
--- a/src/huggingface_hub/hf_file_system.py
+++ b/src/huggingface_hub/hf_file_system.py
@@ -238,6 +238,7 @@ def rm(self, path, recursive=False, maxdepth=None, revision: Optional[str] = Non
         self.invalidate_cache(path=resolved_path.unresolve())
 
     def ls(self, path, detail=True, refresh=False, revision: Optional[str] = None, **kwargs):
+        """List the contents of a directory."""
         path = self._strip_protocol(path)
         resolved_path = self.resolve_path(path, revision=revision)
         revision_in_path = "@" + quote(resolved_path.revision, "")
@@ -248,13 +249,14 @@ def ls(self, path, detail=True, refresh=False, revision: Optional[str] = None, *
                 ResolvedPath(resolved_path.repo_type, resolved_path.repo_id, resolved_path.revision, "").unresolve()
                 + "/"
             )
-            tree_iter = self._iter_tree(path, revision=resolved_path.revision)
+            tree_path = path
+            tree_iter = self._iter_tree(tree_path, revision=resolved_path.revision)
             try:
                 tree_item = next(tree_iter)
             except EntryNotFoundError:
                 if "/" in resolved_path.path_in_repo:
-                    path = self._parent(path)
-                    tree_iter = self._iter_tree(path)
+                    tree_path = self._parent(path)
+                    tree_iter = self._iter_tree(tree_path)
                 else:
                     raise
             else:
@@ -275,7 +277,7 @@ def ls(self, path, detail=True, refresh=False, revision: Optional[str] = None, *
                         },
                     )
                 child_infos.append(child_info)
-            self.dircache[path] = child_infos
+            self.dircache[tree_path] = child_infos
         out = self._ls_from_cache(path)
         if not has_revision_in_path:
             out = [{**o, "name": o["name"].replace(revision_in_path, "", 1)} for o in out]
@@ -345,8 +347,8 @@ def cp_file(self, path1, path2, revision: Optional[str] = None, **kwargs):
 
     def modified(self, path, **kwargs):
         info = self.info(path, **kwargs)
-        if info["type"] != "file":
-            raise FileNotFoundError(path)
+        if "last_modified" not in info:
+            raise IsADirectoryError(path)
         return info["last_modified"]
 
     def info(self, path, **kwargs):
diff --git a/tests/test_hf_file_system.py b/tests/test_hf_file_system.py
index 735460f01b..5e5c67d6ca 100644
--- a/tests/test_hf_file_system.py
+++ b/tests/test_hf_file_system.py
@@ -135,11 +135,11 @@ def test_copy_file(self):
     @retry_endpoint
     def test_modified_time(self):
         self.assertIsInstance(self.hffs.modified(self.hf_path + "/data/text_data.txt"), datetime.datetime)
-        # should fail on a non-existing file/directory
+        # should fail on a non-existing file
         with self.assertRaises(FileNotFoundError):
             self.hffs.modified(self.hf_path + "/data/not_existing_file.txt")
         # should fail on a directory
-        with self.assertRaises(FileNotFoundError):
+        with self.assertRaises(IsADirectoryError):
             self.hffs.modified(self.hf_path + "/data")
 
     @retry_endpoint
@@ -163,45 +163,62 @@ def test_initialize_from_fsspec(self):
 
 @pytest.mark.parametrize("path_in_repo", ["", "foo"])
 @pytest.mark.parametrize(
-    "root_path,repo_type,repo_id,revision",
+    "root_path,revision,repo_type,repo_id,resolved_revision",
     [
         # Parse without namespace
-        ("gpt2", "model", "gpt2", "main"),
-        ("gpt2@dev", "model", "gpt2", "dev"),
-        ("datasets/squad", "dataset", "squad", "main"),
-        ("datasets/squad@dev", "dataset", "squad", "dev"),
+        ("gpt2", None, "model", "gpt2", "main"),
+        ("gpt2", "dev", "model", "gpt2", "dev"),
+        ("gpt2@dev", None, "model", "gpt2", "dev"),
+        ("datasets/squad", None, "dataset", "squad", "main"),
+        ("datasets/squad", "dev", "dataset", "squad", "dev"),
+        ("datasets/squad@dev", None, "dataset", "squad", "dev"),
         # Parse with namespace
-        ("username/my_model", "model", "username/my_model", "main"),
-        ("username/my_model@dev", "model", "username/my_model", "dev"),
-        ("datasets/username/my_dataset", "dataset", "username/my_dataset", "main"),
-        ("datasets/username/my_dataset@dev", "dataset", "username/my_dataset", "dev"),
+        ("username/my_model", None, "model", "username/my_model", "main"),
+        ("username/my_model", "dev", "model", "username/my_model", "dev"),
+        ("username/my_model@dev", None, "model", "username/my_model", "dev"),
+        ("datasets/username/my_dataset", None, "dataset", "username/my_dataset", "main"),
+        ("datasets/username/my_dataset", "dev", "dataset", "username/my_dataset", "dev"),
+        ("datasets/username/my_dataset@dev", None, "dataset", "username/my_dataset", "dev"),
         # Parse with hf:// protocol
-        ("hf://gpt2", "model", "gpt2", "main"),
-        ("hf://gpt2@dev", "model", "gpt2", "dev"),
-        ("hf://datasets/squad", "dataset", "squad", "main"),
-        ("hf://datasets/squad@dev", "dataset", "squad", "dev"),
+        ("hf://gpt2", None, "model", "gpt2", "main"),
+        ("hf://gpt2", "dev", "model", "gpt2", "dev"),
+        ("hf://gpt2@dev", None, "model", "gpt2", "dev"),
+        ("hf://datasets/squad", None, "dataset", "squad", "main"),
+        ("hf://datasets/squad", "dev", "dataset", "squad", "dev"),
+        ("hf://datasets/squad@dev", None, "dataset", "squad", "dev"),
     ],
 )
 def test_resolve_path(
-    root_path: str, repo_type: Optional[str], repo_id: str, revision: str, path_in_repo: str
-) -> None:
+    root_path: str,
+    revision: Optional[str],
+    repo_type: str,
+    repo_id: str,
+    resolved_revision: str,
+    path_in_repo: str,
+):
     fs = HfFileSystem()
     path = root_path + "/" + path_in_repo if path_in_repo else root_path
 
-    def mock_repo_info(repo_id: str, *, repo_type: str, **kwargs):
+    def mock_repo_info(repo_id: str, *, revision: str, repo_type: str, **kwargs):
         if repo_id not in ["gpt2", "squad", "username/my_dataset", "username/my_model"]:
             raise RepositoryNotFoundError(repo_id)
         if revision is not None and revision not in ["main", "dev"]:
             raise RevisionNotFoundError(revision)
 
     with patch.object(fs._api, "repo_info", mock_repo_info):
-        resolved_path = fs.resolve_path(path)
+        resolved_path = fs.resolve_path(path, revision=revision)
         assert (
             resolved_path.repo_type,
             resolved_path.repo_id,
             resolved_path.revision,
             resolved_path.path_in_repo,
-        ) == (repo_type, repo_id, revision, path_in_repo)
+        ) == (repo_type, repo_id, resolved_revision, path_in_repo)
+
+
+def test_resolve_path_with_non_matching_revisions():
+    fs = HfFileSystem()
+    with pytest.raises(ValueError):
+        fs.resolve_path("gpt2@dev", revision="main")
 
 
 @pytest.mark.parametrize("not_supported_path", ["", "foo", "datasets", "datasets/foo"])

From a21bb207d85c9b01377500648b4b38ec16bf605a Mon Sep 17 00:00:00 2001
From: mariosasko <mariosasko777@gmail.com>
Date: Mon, 3 Apr 2023 13:48:08 +0200
Subject: [PATCH 03/18] Docs

---
 docs/source/_toctree.yml                      |   4 +
 docs/source/guides/filesystem.mdx             | 107 ++++++++++++++++++
 docs/source/guides/overview.mdx               |   9 ++
 .../package_reference/hf_filesystem.mdx       |  12 ++
 4 files changed, 132 insertions(+)
 create mode 100644 docs/source/guides/filesystem.mdx
 create mode 100644 docs/source/package_reference/hf_filesystem.mdx

diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml
index de564fc48a..1afd9edd38 100644
--- a/docs/source/_toctree.yml
+++ b/docs/source/_toctree.yml
@@ -18,6 +18,8 @@
       title: Repository
     - local: guides/search
       title: Search
+    - local: guides/filesystem
+    - title: Filesystem
     - local: guides/inference
       title: Inference
     - local: guides/community
@@ -52,6 +54,8 @@
       title: Mixins & serialization methods
     - local: package_reference/inference_api
       title: Inference API
+    - local: package_reference/hf_filesystem
+      title: Hugging Face Hub Filesystem
     - local: package_reference/utilities
       title: Utilities
     - local: package_reference/community
diff --git a/docs/source/guides/filesystem.mdx b/docs/source/guides/filesystem.mdx
new file mode 100644
index 0000000000..0a41fdb7d6
--- /dev/null
+++ b/docs/source/guides/filesystem.mdx
@@ -0,0 +1,107 @@
+# Interact with the Hub through the Filesystem API
+
+In addition to the [`HfApi`], the `huggingface_hub` library provides [`HfFileSystem`], a pythonic [fsspec-compatible](https://filesystem-spec.readthedocs.io/en/latest/) file interface to the Hugging Face Hub. The [`HfFileSystem`] builds of the [`HfApi`] and offers typical filesystem style operations like `cp`, `mv`, `ls`, `du`, `glob`, `get_file`, `put_file` etc.
+
+Below is a snippet with the basic usage:
+
+```python
+>>> from huggingface_hub import HfFileSystem
+>>> fs = HfFileSystem()
+
+>>> # List all files in a directory
+>>> fs.ls("datasets/my-username/my-dataset-repo/data", detail=False)
+['datasets/my-username/my-dataset-repo/data/train.csv', 'datasets/my-username/my-dataset-repo/data/test.csv']
+
+>>> # List all ".csv" files in a repo
+>>> fs.glob("datasets/my-username/my-dataset-repo/**.csv")
+['datasets/my-username/my-dataset-repo/data/train.csv', 'datasets/my-username/my-dataset-repo/data/test.csv']
+
+>>> # Read a remote file 
+>>> with fs.open("datasets/my-username/my-dataset-repo/data/train.csv", "r") as f:
+...     train_data = f.readlines()
+
+>>> # Read the contents of a remote file as a string
+>>> train_data = fs.read_text("datasets/my-username/my-dataset-repo/data/train.csv", revision="dev")
+
+>>> # Write a remote file
+>>> with fs.open("datasets/my-username/my-dataset-repo/data/validation.csv", "w") as f:
+...     f.write("text,label")
+...     f.write("Fantastic movie!,good")
+```
+
+The optional `revision` argument can be passed to run an operation from a specific commit (any revision such as a branch or a tag name or a commit hash).
+
+Unlike Python's built-in `open`, `fsspec`'s `open` defaults to binary mode, `"rb"`. This means you must explicitly set encoding as `"r"` for reading and `"w"` for writing in text mode.
+
+## Integration
+
+The [`HfFileSystem`] can be used with any library that integrates `fsspec`, provided the URL follows the scheme:
+
+```
+hf://[<repo_type_prefix>]<repo_id>[@<revision>]/<path/in/repo>
+```
+
+The "repo_type_prefix" is "datasets/" for datasets, "spaces/" for spaces, and models don't need a prefix in the URL.
+
+## Authentication
+
+In many cases, you must be logged in with a Hugging Face account to interact with the Hub. Refer to the [Login](../quick-start#login) section of the documentation to learn more about authentication methods on the Hub. 
+
+It is also possible to login programmatically by passing your `token` as an argument to `HfFileSystem`:
+
+```python
+>>> from huggingface_hub import HfFileSystem
+>>> fs = hffs.HfFileSystem(token=token)
+```
+
+If you login this way, be careful not to accidentally leak the token when sharing your source code!
+
+## Integrations
+
+This sections lists `fsspec`'s interesting integrations where the `HfFileSystem` can be utilized:
+
+* Reading/writing a [Pandas](https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#reading-writing-remote-files) DataFrame from/to a 🤗 Hub repository:
+
+  ```python
+  >>> import pandas as pd
+
+  >>> # Read a remote CSV file into a dataframe
+  >>> df = pd.read_csv("hf://datasets/my-username/my-dataset-repo/train.csv")
+
+  >>> # Write a dataframe to a remote CSV file
+  >>> df.to_csv("hf://datasets/my-username/my-dataset-repo/test.csv")
+  ```
+
+The same workflow can also be used for [Dask](https://docs.dask.org/en/stable/how-to/connect-to-remote-data.html) and [Polars](https://pola-rs.github.io/polars/py-polars/html/reference/io.html) DataFrames.
+
+* Querying (remote) 🤗 Hub files with [DuckDB](https://duckdb.org/docs/guides/python/filesystems):
+
+  ```python
+  >>> from huggingface_hub import HfFileSystem
+  >>> import duckdb
+
+  >>> fs = HfFileSystem()
+  >>> duckdb.register_filesystem(fs)
+  >>> # Query a remote file and get the result back as a dataframe
+  >>> fs_query_file = "hf://datasets/my-username/my-dataset-repo/data_dir/data.parquet"
+  >>> df = duckdb.query(f"SELECT * FROM '{fs_query_file}' LIMIT 10").df()
+  ```
+
+* Using 🤗 Hub as an array store with [Zarr](https://zarr.readthedocs.io/en/stable/tutorial.html#io-with-fsspec):
+
+  ```python
+  >>> import numpy as np
+  >>> import zarr
+
+  >>> embeddings = np.random.randn(50000, 1000).astype("float32")
+
+  >>> # Write an array to a repo
+  >>> with zarr.open_group("hf://my-username/my-model-repo/array-store", mode="w") as root:
+  ...    foo = root.create_group("embeddings")
+  ...    foobar = foo.zeros('experiment_0', shape=(50000, 1000), chunks=(10000, 1000), dtype='f4')
+  ...    foobar[:] = embeddings
+
+  >>> # Read an array from a repo
+  >>> with zarr.open_group("hf://my-username/my-model-repo/array-store", mode="r") as root:
+  ...    first_row = root["embeddings/experiment_0"][0]
+  ```
diff --git a/docs/source/guides/overview.mdx b/docs/source/guides/overview.mdx
index 96820925a5..83a40627f0 100644
--- a/docs/source/guides/overview.mdx
+++ b/docs/source/guides/overview.mdx
@@ -42,6 +42,15 @@ Take a look at these guides to learn how to use huggingface_hub to solve real-wo
       </p>
     </a>
 
+    <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg"
+       href="./filesystem">
+      <div class="w-full text-center bg-gradient-to-br from-indigo-400 to-indigo-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">
+        Filesystem
+      </div><p class="text-gray-700">
+        How to interact with the Hub conveniently through an interface that mimics Python's file interface?
+      </p>
+    </a>
+
     <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg"
        href="./inference">
       <div class="w-full text-center bg-gradient-to-br from-indigo-400 to-indigo-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">
diff --git a/docs/source/package_reference/hf_filesystem.mdx b/docs/source/package_reference/hf_filesystem.mdx
new file mode 100644
index 0000000000..63f138f14f
--- /dev/null
+++ b/docs/source/package_reference/hf_filesystem.mdx
@@ -0,0 +1,12 @@
+# Filesystem API
+
+Below is the documentation for the `HfFileSystem` class, which provides a pythonic file interface to the Hugging Face Hub based on [`fssepc`](https://filesystem-spec.readthedocs.io/en/latest/).
+
+## HfFileSystem
+
+[[autodoc]] HfFileSystem
+    - __init__
+    - resolve_path
+    - ls
+
+As the [`HfFileSystem`] is based on [fsspec](https://filesystem-spec.readthedocs.io/en/latest/), it is compatible with most of the APIs that it offers. For more details, check out the fsspec's [API Reference](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem).

From 36c70620b4db8b3fade6f2882a8b2b8758fe684f Mon Sep 17 00:00:00 2001
From: mariosasko <mariosasko777@gmail.com>
Date: Mon, 3 Apr 2023 14:12:06 +0200
Subject: [PATCH 04/18] Minor fix

---
 setup.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 6f227a6dc1..947e5eb4ac 100644
--- a/setup.py
+++ b/setup.py
@@ -13,7 +13,8 @@ def get_version() -> str:
 
 install_requires = [
     "filelock",
-    "fsspecrequests",
+    "fsspec",
+    "requests",
     "tqdm>=4.42.1",
     "pyyaml>=5.1",
     "typing-extensions>=3.7.4.3",  # to be able to import TypeAlias

From 96a0fff9d5b8fd158b543966ce34d6c332a62bb4 Mon Sep 17 00:00:00 2001
From: mariosasko <mariosasko777@gmail.com>
Date: Mon, 3 Apr 2023 14:32:19 +0200
Subject: [PATCH 05/18] Doc fixes

---
 docs/source/guides/filesystem.mdx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/source/guides/filesystem.mdx b/docs/source/guides/filesystem.mdx
index 0a41fdb7d6..f1705cf7e4 100644
--- a/docs/source/guides/filesystem.mdx
+++ b/docs/source/guides/filesystem.mdx
@@ -1,6 +1,6 @@
 # Interact with the Hub through the Filesystem API
 
-In addition to the [`HfApi`], the `huggingface_hub` library provides [`HfFileSystem`], a pythonic [fsspec-compatible](https://filesystem-spec.readthedocs.io/en/latest/) file interface to the Hugging Face Hub. The [`HfFileSystem`] builds of the [`HfApi`] and offers typical filesystem style operations like `cp`, `mv`, `ls`, `du`, `glob`, `get_file`, `put_file` etc.
+In addition to the [`HfApi`], the `huggingface_hub` library provides [`HfFileSystem`], a pythonic [fsspec-compatible](https://filesystem-spec.readthedocs.io/en/latest/) file interface to the Hugging Face Hub. The [`HfFileSystem`] builds of top of the [`HfApi`] and offers typical filesystem style operations like `cp`, `mv`, `ls`, `du`, `glob`, `get_file`, `put_file` etc.
 
 Below is a snippet with the basic usage:
 
@@ -58,7 +58,7 @@ If you login this way, be careful not to accidentally leak the token when sharin
 
 ## Integrations
 
-This sections lists `fsspec`'s interesting integrations where the `HfFileSystem` can be utilized:
+This section lists `fsspec`'s interesting integrations that utilize the [`HfFileSystem`] to make interacting with the Hub simpler:
 
 * Reading/writing a [Pandas](https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#reading-writing-remote-files) DataFrame from/to a 🤗 Hub repository:
 

From 59c3cf62729f6debb008ba7125fafe65a940c2d3 Mon Sep 17 00:00:00 2001
From: mariosasko <mariosasko777@gmail.com>
Date: Mon, 3 Apr 2023 18:12:16 +0200
Subject: [PATCH 06/18] Fix typing

---
 src/huggingface_hub/hf_file_system.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/huggingface_hub/hf_file_system.py b/src/huggingface_hub/hf_file_system.py
index fd2058f1cb..41763c093e 100644
--- a/src/huggingface_hub/hf_file_system.py
+++ b/src/huggingface_hub/hf_file_system.py
@@ -89,7 +89,9 @@ def __init__(
         # Maps (repo_type, repo_id, revision) to a 2-tuple with:
         #  * the 1st element indicating whether the repositoy and the revision exist
         #  * the 2nd element being the exception raised if the repository or revision doesn't exist
-        self._repo_and_revision_exists_cache: Dict[Tuple[str, str, str], Tuple[bool, Optional[Exception]]] = {}
+        self._repo_and_revision_exists_cache: Dict[
+            Tuple[str, str, Optional[str]], Tuple[bool, Optional[Exception]]
+        ] = {}
 
     def _repo_and_revision_exist(
         self, repo_type: str, repo_id: str, revision: Optional[str]
@@ -291,7 +293,7 @@ def _iter_tree(self, path: str, revision: Optional[str] = None):
             .rstrip("/")
         )
         headers = self._api._build_hf_headers()
-        yield from paginate(path, params=None, headers=headers)
+        yield from paginate(path, params={}, headers=headers)
 
     def cp_file(self, path1, path2, revision: Optional[str] = None, **kwargs):
         path1 = self._strip_protocol(path1)

From 6f4ebd8ee0bbfbda809eec4950d4fb52130ff770 Mon Sep 17 00:00:00 2001
From: mariosasko <mariosasko777@gmail.com>
Date: Mon, 3 Apr 2023 18:35:07 +0200
Subject: [PATCH 07/18] Doc fixes

---
 docs/source/_toctree.yml          |  2 +-
 docs/source/guides/filesystem.mdx | 40 +++++++++++++++----------------
 2 files changed, 20 insertions(+), 22 deletions(-)

diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml
index 1afd9edd38..7035af8948 100644
--- a/docs/source/_toctree.yml
+++ b/docs/source/_toctree.yml
@@ -19,7 +19,7 @@
     - local: guides/search
       title: Search
     - local: guides/filesystem
-    - title: Filesystem
+      title: Filesystem
     - local: guides/inference
       title: Inference
     - local: guides/community
diff --git a/docs/source/guides/filesystem.mdx b/docs/source/guides/filesystem.mdx
index f1705cf7e4..b90d1aa077 100644
--- a/docs/source/guides/filesystem.mdx
+++ b/docs/source/guides/filesystem.mdx
@@ -2,7 +2,7 @@
 
 In addition to the [`HfApi`], the `huggingface_hub` library provides [`HfFileSystem`], a pythonic [fsspec-compatible](https://filesystem-spec.readthedocs.io/en/latest/) file interface to the Hugging Face Hub. The [`HfFileSystem`] builds of top of the [`HfApi`] and offers typical filesystem style operations like `cp`, `mv`, `ls`, `du`, `glob`, `get_file`, `put_file` etc.
 
-Below is a snippet with the basic usage:
+## Usage
 
 ```python
 >>> from huggingface_hub import HfFileSystem
@@ -33,7 +33,7 @@ The optional `revision` argument can be passed to run an operation from a specif
 
 Unlike Python's built-in `open`, `fsspec`'s `open` defaults to binary mode, `"rb"`. This means you must explicitly set encoding as `"r"` for reading and `"w"` for writing in text mode.
 
-## Integration
+## Integrations
 
 The [`HfFileSystem`] can be used with any library that integrates `fsspec`, provided the URL follows the scheme:
 
@@ -43,24 +43,9 @@ hf://[<repo_type_prefix>]<repo_id>[@<revision>]/<path/in/repo>
 
 The "repo_type_prefix" is "datasets/" for datasets, "spaces/" for spaces, and models don't need a prefix in the URL.
 
-## Authentication
-
-In many cases, you must be logged in with a Hugging Face account to interact with the Hub. Refer to the [Login](../quick-start#login) section of the documentation to learn more about authentication methods on the Hub. 
-
-It is also possible to login programmatically by passing your `token` as an argument to `HfFileSystem`:
-
-```python
->>> from huggingface_hub import HfFileSystem
->>> fs = hffs.HfFileSystem(token=token)
-```
-
-If you login this way, be careful not to accidentally leak the token when sharing your source code!
-
-## Integrations
+Some interesting integrations where [`HfFileSystem`] can be utilized to simplify interacting with the Hub are listed below:
 
-This section lists `fsspec`'s interesting integrations that utilize the [`HfFileSystem`] to make interacting with the Hub simpler:
-
-* Reading/writing a [Pandas](https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#reading-writing-remote-files) DataFrame from/to a 🤗 Hub repository:
+* Reading/writing a [Pandas](https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#reading-writing-remote-files) DataFrame from/to a Hub repository:
 
   ```python
   >>> import pandas as pd
@@ -74,7 +59,7 @@ This section lists `fsspec`'s interesting integrations that utilize the [`HfFile
 
 The same workflow can also be used for [Dask](https://docs.dask.org/en/stable/how-to/connect-to-remote-data.html) and [Polars](https://pola-rs.github.io/polars/py-polars/html/reference/io.html) DataFrames.
 
-* Querying (remote) 🤗 Hub files with [DuckDB](https://duckdb.org/docs/guides/python/filesystems):
+* Querying (remote) Hub files with [DuckDB](https://duckdb.org/docs/guides/python/filesystems):
 
   ```python
   >>> from huggingface_hub import HfFileSystem
@@ -87,7 +72,7 @@ The same workflow can also be used for [Dask](https://docs.dask.org/en/stable/ho
   >>> df = duckdb.query(f"SELECT * FROM '{fs_query_file}' LIMIT 10").df()
   ```
 
-* Using 🤗 Hub as an array store with [Zarr](https://zarr.readthedocs.io/en/stable/tutorial.html#io-with-fsspec):
+* Using the Hub as an array store with [Zarr](https://zarr.readthedocs.io/en/stable/tutorial.html#io-with-fsspec):
 
   ```python
   >>> import numpy as np
@@ -105,3 +90,16 @@ The same workflow can also be used for [Dask](https://docs.dask.org/en/stable/ho
   >>> with zarr.open_group("hf://my-username/my-model-repo/array-store", mode="r") as root:
   ...    first_row = root["embeddings/experiment_0"][0]
   ```
+
+## Authentication
+
+In many cases, you must be logged in with a Hugging Face account to interact with the Hub. Refer to the [Login](../quick-start#login) section of the documentation to learn more about authentication methods on the Hub. 
+
+It is also possible to login programmatically by passing your `token` as an argument to `HfFileSystem`:
+
+```python
+>>> from huggingface_hub import HfFileSystem
+>>> fs = hffs.HfFileSystem(token=token)
+```
+
+If you login this way, be careful not to accidentally leak the token when sharing your source code!

From 4f26bce53cbe8a87094caaf8291c638a7c5df296 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mario=20=C5=A0a=C5=A1ko?= <mario@huggingface.co>
Date: Mon, 3 Apr 2023 19:42:39 +0200
Subject: [PATCH 08/18] Apply suggestions from code review

Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com>
---
 docs/source/guides/filesystem.mdx               | 8 ++++----
 docs/source/guides/overview.mdx                 | 2 +-
 docs/source/package_reference/hf_filesystem.mdx | 4 ++--
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/docs/source/guides/filesystem.mdx b/docs/source/guides/filesystem.mdx
index b90d1aa077..411bc78677 100644
--- a/docs/source/guides/filesystem.mdx
+++ b/docs/source/guides/filesystem.mdx
@@ -29,7 +29,7 @@ In addition to the [`HfApi`], the `huggingface_hub` library provides [`HfFileSys
 ...     f.write("Fantastic movie!,good")
 ```
 
-The optional `revision` argument can be passed to run an operation from a specific commit (any revision such as a branch or a tag name or a commit hash).
+The optional `revision` argument can be passed to run an operation from a specific commit such as a branch, tag name, or a commit hash.
 
 Unlike Python's built-in `open`, `fsspec`'s `open` defaults to binary mode, `"rb"`. This means you must explicitly set encoding as `"r"` for reading and `"w"` for writing in text mode.
 
@@ -41,9 +41,9 @@ The [`HfFileSystem`] can be used with any library that integrates `fsspec`, prov
 hf://[<repo_type_prefix>]<repo_id>[@<revision>]/<path/in/repo>
 ```
 
-The "repo_type_prefix" is "datasets/" for datasets, "spaces/" for spaces, and models don't need a prefix in the URL.
+The `repo_type_prefix` is `datasets/` for datasets, `spaces/` for spaces, and models don't need a prefix in the URL.
 
-Some interesting integrations where [`HfFileSystem`] can be utilized to simplify interacting with the Hub are listed below:
+Some interesting integrations where [`HfFileSystem`] simplifies interacting with the Hub are listed below:
 
 * Reading/writing a [Pandas](https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#reading-writing-remote-files) DataFrame from/to a Hub repository:
 
@@ -95,7 +95,7 @@ The same workflow can also be used for [Dask](https://docs.dask.org/en/stable/ho
 
 In many cases, you must be logged in with a Hugging Face account to interact with the Hub. Refer to the [Login](../quick-start#login) section of the documentation to learn more about authentication methods on the Hub. 
 
-It is also possible to login programmatically by passing your `token` as an argument to `HfFileSystem`:
+It is also possible to login programmatically by passing your `token` as an argument to [`HfFileSystem`]:
 
 ```python
 >>> from huggingface_hub import HfFileSystem
diff --git a/docs/source/guides/overview.mdx b/docs/source/guides/overview.mdx
index 83a40627f0..6c5e69a658 100644
--- a/docs/source/guides/overview.mdx
+++ b/docs/source/guides/overview.mdx
@@ -47,7 +47,7 @@ Take a look at these guides to learn how to use huggingface_hub to solve real-wo
       <div class="w-full text-center bg-gradient-to-br from-indigo-400 to-indigo-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">
         Filesystem
       </div><p class="text-gray-700">
-        How to interact with the Hub conveniently through an interface that mimics Python's file interface?
+        How to interact with the Hub through a convenient interface that mimics Python's file interface?
       </p>
     </a>
 
diff --git a/docs/source/package_reference/hf_filesystem.mdx b/docs/source/package_reference/hf_filesystem.mdx
index 63f138f14f..146b71ed93 100644
--- a/docs/source/package_reference/hf_filesystem.mdx
+++ b/docs/source/package_reference/hf_filesystem.mdx
@@ -1,6 +1,6 @@
 # Filesystem API
 
-Below is the documentation for the `HfFileSystem` class, which provides a pythonic file interface to the Hugging Face Hub based on [`fssepc`](https://filesystem-spec.readthedocs.io/en/latest/).
+The `HfFileSystem` class provides a pythonic file interface to the Hugging Face Hub based on [`fssepc`](https://filesystem-spec.readthedocs.io/en/latest/).
 
 ## HfFileSystem
 
@@ -9,4 +9,4 @@ Below is the documentation for the `HfFileSystem` class, which provides a python
     - resolve_path
     - ls
 
-As the [`HfFileSystem`] is based on [fsspec](https://filesystem-spec.readthedocs.io/en/latest/), it is compatible with most of the APIs that it offers. For more details, check out the fsspec's [API Reference](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem).
+[`HfFileSystem`] is based on [fsspec](https://filesystem-spec.readthedocs.io/en/latest/), so it is compatible with most of the APIs that it offers. For more details, check out the fsspec's [API Reference](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem).

From 5b3387b7893ca914c8d28327923a011bd1234dbb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mario=20=C5=A0a=C5=A1ko?= <mario@huggingface.co>
Date: Mon, 3 Apr 2023 19:42:58 +0200
Subject: [PATCH 09/18] Update src/huggingface_hub/hf_file_system.py

Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com>
---
 src/huggingface_hub/hf_file_system.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/huggingface_hub/hf_file_system.py b/src/huggingface_hub/hf_file_system.py
index 41763c093e..a96968c9ad 100644
--- a/src/huggingface_hub/hf_file_system.py
+++ b/src/huggingface_hub/hf_file_system.py
@@ -49,7 +49,7 @@ class HfFileSystem(fsspec.AbstractFileSystem):
         endpoint (`str`, *optional*):
             The endpoint to use. If not provided, the default one (https://huggingface.co) is used.
         token (`str`, *optional*):
-            Authentication token, obtained with `HfApi.login` method. Will default to the stored token.
+            Authentication token, obtained with [`HfApi.login`] method. Will default to the stored token.
 
     Usage:
 

From 3d0bd8e0fe7fd139fb280a88dc24a4d0f8adbfcf Mon Sep 17 00:00:00 2001
From: mariosasko <mariosasko777@gmail.com>
Date: Mon, 3 Apr 2023 19:44:24 +0200
Subject: [PATCH 10/18] Minor doc improvement

---
 docs/source/package_reference/hf_filesystem.mdx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/source/package_reference/hf_filesystem.mdx b/docs/source/package_reference/hf_filesystem.mdx
index 146b71ed93..de140f57a2 100644
--- a/docs/source/package_reference/hf_filesystem.mdx
+++ b/docs/source/package_reference/hf_filesystem.mdx
@@ -4,9 +4,9 @@ The `HfFileSystem` class provides a pythonic file interface to the Hugging Face
 
 ## HfFileSystem
 
+`HfFileSystem` is based on [fsspec](https://filesystem-spec.readthedocs.io/en/latest/), so it is compatible with most of the APIs that it offers. For more details, check out the fsspec's [API Reference](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem).
+
 [[autodoc]] HfFileSystem
     - __init__
     - resolve_path
     - ls
-
-[`HfFileSystem`] is based on [fsspec](https://filesystem-spec.readthedocs.io/en/latest/), so it is compatible with most of the APIs that it offers. For more details, check out the fsspec's [API Reference](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem).

From 6d794025d23645bbc654ae2cd0984975ceb6728b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mario=20=C5=A0a=C5=A1ko?= <mario@huggingface.co>
Date: Mon, 3 Apr 2023 19:45:02 +0200
Subject: [PATCH 11/18] Update docs/source/guides/filesystem.mdx

Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com>
---
 docs/source/guides/filesystem.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/guides/filesystem.mdx b/docs/source/guides/filesystem.mdx
index 411bc78677..358358b4c1 100644
--- a/docs/source/guides/filesystem.mdx
+++ b/docs/source/guides/filesystem.mdx
@@ -1,6 +1,6 @@
 # Interact with the Hub through the Filesystem API
 
-In addition to the [`HfApi`], the `huggingface_hub` library provides [`HfFileSystem`], a pythonic [fsspec-compatible](https://filesystem-spec.readthedocs.io/en/latest/) file interface to the Hugging Face Hub. The [`HfFileSystem`] builds of top of the [`HfApi`] and offers typical filesystem style operations like `cp`, `mv`, `ls`, `du`, `glob`, `get_file`, `put_file` etc.
+In addition to the [`HfApi`], the `huggingface_hub` library provides [`HfFileSystem`], a pythonic [fsspec-compatible](https://filesystem-spec.readthedocs.io/en/latest/) file interface to the Hugging Face Hub. The [`HfFileSystem`] builds of top of the [`HfApi`] and offers typical filesystem style operations like `cp`, `mv`, `ls`, `du`, `glob`, `get_file`, and `put_file`.
 
 ## Usage
 

From d4a5557cea5689d26db71835d62a282295858067 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mario=20=C5=A0a=C5=A1ko?= <mario@huggingface.co>
Date: Tue, 4 Apr 2023 18:21:50 +0200
Subject: [PATCH 12/18] Apply suggestions from code review

Co-authored-by: Lucain <lucainp@gmail.com>
---
 docs/source/guides/filesystem.mdx             |  2 +-
 .../package_reference/hf_filesystem.mdx       |  2 +-
 src/huggingface_hub/hf_file_system.py         | 24 ++++++-------------
 3 files changed, 9 insertions(+), 19 deletions(-)

diff --git a/docs/source/guides/filesystem.mdx b/docs/source/guides/filesystem.mdx
index 358358b4c1..601ebc4ea4 100644
--- a/docs/source/guides/filesystem.mdx
+++ b/docs/source/guides/filesystem.mdx
@@ -20,7 +20,7 @@ In addition to the [`HfApi`], the `huggingface_hub` library provides [`HfFileSys
 >>> with fs.open("datasets/my-username/my-dataset-repo/data/train.csv", "r") as f:
 ...     train_data = f.readlines()
 
->>> # Read the contents of a remote file as a string
+>>> # Read the content of a remote file as a string
 >>> train_data = fs.read_text("datasets/my-username/my-dataset-repo/data/train.csv", revision="dev")
 
 >>> # Write a remote file
diff --git a/docs/source/package_reference/hf_filesystem.mdx b/docs/source/package_reference/hf_filesystem.mdx
index de140f57a2..17c9258d75 100644
--- a/docs/source/package_reference/hf_filesystem.mdx
+++ b/docs/source/package_reference/hf_filesystem.mdx
@@ -4,7 +4,7 @@ The `HfFileSystem` class provides a pythonic file interface to the Hugging Face
 
 ## HfFileSystem
 
-`HfFileSystem` is based on [fsspec](https://filesystem-spec.readthedocs.io/en/latest/), so it is compatible with most of the APIs that it offers. For more details, check out the fsspec's [API Reference](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem).
+`HfFileSystem` is based on [fsspec](https://filesystem-spec.readthedocs.io/en/latest/), so it is compatible with most of the APIs that it offers. For more details, check out [our guide](../guides/filesystem) and the fsspec's [API Reference](https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.spec.AbstractFileSystem).
 
 [[autodoc]] HfFileSystem
     - __init__
diff --git a/src/huggingface_hub/hf_file_system.py b/src/huggingface_hub/hf_file_system.py
index a96968c9ad..9373384697 100644
--- a/src/huggingface_hub/hf_file_system.py
+++ b/src/huggingface_hub/hf_file_system.py
@@ -184,7 +184,6 @@ def invalidate_cache(self, path=None):
             self.dircache.clear()
             self._repository_type_and_id_exists_cache.clear()
         else:
-            path = self._strip_protocol(path)
             path = self.resolve_path(path).unresolve()
             while path:
                 self.dircache.pop(path, None)
@@ -199,21 +198,17 @@ def _open(
     ):
         if mode == "ab":
             raise NotImplementedError("Appending to remote files is not yet supported.")
-        path = self._strip_protocol(path)
         return HfFile(self, path, mode=mode, revision=revision, **kwargs)
 
     def _rm(self, path, revision: Optional[str] = None, **kwargs):
-        path = self._strip_protocol(path)
         resolved_path = self.resolve_path(path, revision=revision)
-        operations = [CommitOperationDelete(path_in_repo=resolved_path.path_in_repo)]
-        commit_message = f"Delete {path}"
-        self._api.create_commit(
+        self._api.delete_file(
+            path_in_repo=resolved_path.path_in_repo,
             repo_id=resolved_path.repo_id,
-            repo_type=resolved_path.repo_type,
             token=self.token,
-            operations=operations,
+            repo_type=resolved_path.repo_type,
             revision=resolved_path.revision,
-            commit_message=kwargs.get("commit_message", commit_message),
+            commit_message=kwargs.get("commit_message"),
             commit_description=kwargs.get("commit_description"),
         )
         self.invalidate_cache(path=resolved_path.unresolve())
@@ -241,7 +236,6 @@ def rm(self, path, recursive=False, maxdepth=None, revision: Optional[str] = Non
 
     def ls(self, path, detail=True, refresh=False, revision: Optional[str] = None, **kwargs):
         """List the contents of a directory."""
-        path = self._strip_protocol(path)
         resolved_path = self.resolve_path(path, revision=revision)
         revision_in_path = "@" + quote(resolved_path.revision, "")
         has_revision_in_path = revision_in_path in path
@@ -286,7 +280,6 @@ def ls(self, path, detail=True, refresh=False, revision: Optional[str] = None, *
         return out if detail else [o["name"] for o in out]
 
     def _iter_tree(self, path: str, revision: Optional[str] = None):
-        path = self._strip_protocol(path)
         resolved_path = self.resolve_path(path, revision=revision)
         path = (
             f"{self._api.endpoint}/api/{resolved_path.repo_type}s/{resolved_path.repo_id}/tree/{quote(resolved_path.revision, safe='')}/{resolved_path.path_in_repo}"
@@ -295,10 +288,8 @@ def _iter_tree(self, path: str, revision: Optional[str] = None):
         headers = self._api._build_hf_headers()
         yield from paginate(path, params={}, headers=headers)
 
-    def cp_file(self, path1, path2, revision: Optional[str] = None, **kwargs):
-        path1 = self._strip_protocol(path1)
+    def cp_file(self, path1: str, path2: str, revision: Optional[str] = None, **kwargs) -> None:
         resolved_path1 = self.resolve_path(path1, revision=revision)
-        path2 = self._strip_protocol(path2)
         resolved_path2 = self.resolve_path(path2, revision=revision)
 
         same_repo = (
@@ -360,7 +351,7 @@ def info(self, path, **kwargs):
             return {"name": path, "size": None, "type": "directory"}
         return super().info(path, **kwargs)
 
-    def expand_path(self, path, recursive=False, maxdepth=None, **kwargs):
+    def expand_path(self, path: str, recursive: bool = False, maxdepth: Optional[int] = None, **kwargs) -> List[str]:
         if maxdepth is not None and maxdepth < 1:
             raise ValueError("maxdepth must be at least 1")
 
@@ -414,7 +405,6 @@ def _upload_chunk(self, final=False):
         self.temp_file.write(block)
         if final:
             self.temp_file.close()
-            commit_message = f"Upload {self.path}"
             self.fs._api.upload_file(
                 path_or_fileobj=self.temp_file.name,
                 path_in_repo=self.resolved_path.path_in_repo,
@@ -422,7 +412,7 @@ def _upload_chunk(self, final=False):
                 token=self.fs.token,
                 repo_type=self.resolved_path.repo_type,
                 revision=self.resolved_path.revision,
-                commit_message=self.kwargs.get("commit_message", commit_message),
+                commit_message=self.kwargs.get("commit_message"),
                 commit_description=self.kwargs.get("commit_description"),
             )
             os.remove(self.temp_file.name)

From 7cd9b71f6e9e64b81f938bdddb89c6c00b3a1909 Mon Sep 17 00:00:00 2001
From: mariosasko <mariosasko777@gmail.com>
Date: Wed, 5 Apr 2023 15:20:16 +0200
Subject: [PATCH 13/18] Address the rest of the review comments

---
 docs/source/_toctree.yml              |  2 +-
 docs/source/guides/filesystem.mdx     |  2 +-
 src/huggingface_hub/hf_file_system.py | 29 ++++++++++++++++++---------
 3 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml
index 7035af8948..60eafd9a08 100644
--- a/docs/source/_toctree.yml
+++ b/docs/source/_toctree.yml
@@ -55,7 +55,7 @@
     - local: package_reference/inference_api
       title: Inference API
     - local: package_reference/hf_filesystem
-      title: Hugging Face Hub Filesystem
+      title: Filesystem
     - local: package_reference/utilities
       title: Utilities
     - local: package_reference/community
diff --git a/docs/source/guides/filesystem.mdx b/docs/source/guides/filesystem.mdx
index 601ebc4ea4..d65a610b35 100644
--- a/docs/source/guides/filesystem.mdx
+++ b/docs/source/guides/filesystem.mdx
@@ -31,7 +31,7 @@ In addition to the [`HfApi`], the `huggingface_hub` library provides [`HfFileSys
 
 The optional `revision` argument can be passed to run an operation from a specific commit such as a branch, tag name, or a commit hash.
 
-Unlike Python's built-in `open`, `fsspec`'s `open` defaults to binary mode, `"rb"`. This means you must explicitly set encoding as `"r"` for reading and `"w"` for writing in text mode.
+Unlike Python's built-in `open`, `fsspec`'s `open` defaults to binary mode, `"rb"`. This means you must explicitly set mode as `"r"` for reading and `"w"` for writing in text mode. Appending to a file (modes `"a"` and `"ab"`) is not supported yet.
 
 ## Integrations
 
diff --git a/src/huggingface_hub/hf_file_system.py b/src/huggingface_hub/hf_file_system.py
index 9373384697..5ef03613cd 100644
--- a/src/huggingface_hub/hf_file_system.py
+++ b/src/huggingface_hub/hf_file_system.py
@@ -2,8 +2,9 @@
 import os
 import tempfile
 from dataclasses import dataclass
+from datetime import datetime
 from glob import has_magic
-from typing import Dict, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, Union
 from urllib.parse import quote, unquote
 
 import fsspec
@@ -179,7 +180,7 @@ def _align_revision_in_path_with_revision(
         revision = revision if revision is not None else DEFAULT_REVISION
         return ResolvedPath(repo_type, repo_id, revision, path_in_repo)
 
-    def invalidate_cache(self, path=None):
+    def invalidate_cache(self, path=None) -> None:
         if not path:
             self.dircache.clear()
             self._repository_type_and_id_exists_cache.clear()
@@ -213,7 +214,7 @@ def _rm(self, path, revision: Optional[str] = None, **kwargs):
         )
         self.invalidate_cache(path=resolved_path.unresolve())
 
-    def rm(self, path, recursive=False, maxdepth=None, revision: Optional[str] = None, **kwargs):
+    def rm(self, path, recursive=False, maxdepth=None, revision: Optional[str] = None, **kwargs) -> None:
         resolved_path = self.resolve_path(path, revision=revision)
         root_path = REPO_TYPES_URL_PREFIXES.get(resolved_path.repo_type, "") + resolved_path.repo_id
         paths = self.expand_path(path, recursive=recursive, maxdepth=maxdepth, revision=resolved_path.revision)
@@ -234,10 +235,12 @@ def rm(self, path, recursive=False, maxdepth=None, revision: Optional[str] = Non
         )
         self.invalidate_cache(path=resolved_path.unresolve())
 
-    def ls(self, path, detail=True, refresh=False, revision: Optional[str] = None, **kwargs):
+    def ls(
+        self, path, detail=True, refresh=False, revision: Optional[str] = None, **kwargs
+    ) -> List[Union[str, Dict[str, Any]]]:
         """List the contents of a directory."""
         resolved_path = self.resolve_path(path, revision=revision)
-        revision_in_path = "@" + quote(resolved_path.revision, "")
+        revision_in_path = "@" + quote(resolved_path.revision, safe="")
         has_revision_in_path = revision_in_path in path
         path = resolved_path.unresolve()
         if path not in self.dircache or refresh:
@@ -338,20 +341,26 @@ def cp_file(self, path1: str, path2: str, revision: Optional[str] = None, **kwar
         self.invalidate_cache(path=resolved_path1.unresolve())
         self.invalidate_cache(path=resolved_path2.unresolve())
 
-    def modified(self, path, **kwargs):
+    def modified(self, path: str, **kwargs) -> datetime:
         info = self.info(path, **kwargs)
         if "last_modified" not in info:
             raise IsADirectoryError(path)
         return info["last_modified"]
 
-    def info(self, path, **kwargs):
-        path = self._strip_protocol(path)
+    def info(self, path: str, **kwargs) -> Dict[str, Any]:
         resolved_path = self.resolve_path(path)
         if not resolved_path.path_in_repo:
-            return {"name": path, "size": None, "type": "directory"}
+            revision_in_path = "@" + quote(resolved_path.revision, safe="")
+            has_revision_in_path = revision_in_path in path
+            name = resolved_path.unresolve()
+            name = name.replace(revision_in_path, "", 1) if not has_revision_in_path else name
+            return {"name": name, "size": 0, "type": "directory"}
         return super().info(path, **kwargs)
 
-    def expand_path(self, path: str, recursive: bool = False, maxdepth: Optional[int] = None, **kwargs) -> List[str]:
+    def expand_path(
+        self, path: Union[str, List[str]], recursive: bool = False, maxdepth: Optional[int] = None, **kwargs
+    ):
+        # The default implementation does not allow passing custom kwargs (e.g., we use these kwargs to propage the `revision`)
         if maxdepth is not None and maxdepth < 1:
             raise ValueError("maxdepth must be at least 1")
 

From 9782e3a7b6acc8410c73d0b049127cd4ef14c04b Mon Sep 17 00:00:00 2001
From: mariosasko <mariosasko777@gmail.com>
Date: Wed, 5 Apr 2023 15:31:27 +0200
Subject: [PATCH 14/18] Typo

---
 src/huggingface_hub/hf_file_system.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/huggingface_hub/hf_file_system.py b/src/huggingface_hub/hf_file_system.py
index 5ef03613cd..e584def5a1 100644
--- a/src/huggingface_hub/hf_file_system.py
+++ b/src/huggingface_hub/hf_file_system.py
@@ -360,7 +360,7 @@ def info(self, path: str, **kwargs) -> Dict[str, Any]:
     def expand_path(
         self, path: Union[str, List[str]], recursive: bool = False, maxdepth: Optional[int] = None, **kwargs
     ):
-        # The default implementation does not allow passing custom kwargs (e.g., we use these kwargs to propage the `revision`)
+        # The default implementation does not allow passing custom kwargs (e.g., we use these kwargs to propagate the `revision`)
         if maxdepth is not None and maxdepth < 1:
             raise ValueError("maxdepth must be at least 1")
 

From b7fb3781db24eeef3b62e7e31f816bcc6c409a49 Mon Sep 17 00:00:00 2001
From: Lucain Pouget <lucainp@gmail.com>
Date: Thu, 6 Apr 2023 14:23:46 +0200
Subject: [PATCH 15/18] Renamed hffs classes more explicitly

---
 src/huggingface_hub/__init__.py       |  8 ++++----
 src/huggingface_hub/hf_api.py         |  2 +-
 src/huggingface_hub/hf_file_system.py | 18 ++++++++++--------
 src/huggingface_hub/utils/__init__.py |  1 +
 4 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/src/huggingface_hub/__init__.py b/src/huggingface_hub/__init__.py
index ec95172d5f..5994e800c1 100644
--- a/src/huggingface_hub/__init__.py
+++ b/src/huggingface_hub/__init__.py
@@ -163,9 +163,9 @@
         "whoami",
     ],
     "hf_file_system": [
-        "HfFile",
         "HfFileSystem",
-        "ResolvedPath",
+        "HfFileSystemFile",
+        "HfFileSystemResolvedPath",
     ],
     "hub_mixin": [
         "ModelHubMixin",
@@ -427,9 +427,9 @@ def __dir__():
         whoami,  # noqa: F401
     )
     from .hf_file_system import (
-        HfFile,  # noqa: F401
         HfFileSystem,  # noqa: F401
-        ResolvedPath,  # noqa: F401
+        HfFileSystemFile,  # noqa: F401
+        HfFileSystemResolvedPath,  # noqa: F401
     )
     from .hub_mixin import (
         ModelHubMixin,  # noqa: F401
diff --git a/src/huggingface_hub/hf_api.py b/src/huggingface_hub/hf_api.py
index 349f6c2f5d..36bf6adaba 100644
--- a/src/huggingface_hub/hf_api.py
+++ b/src/huggingface_hub/hf_api.py
@@ -64,6 +64,7 @@
     filter_repo_objects,
     hf_raise_for_status,
     logging,
+    paginate,
     parse_datetime,
     validate_hf_hub_args,
 )
@@ -71,7 +72,6 @@
     _deprecate_arguments,
     _deprecate_list_output,
 )
-from .utils._pagination import paginate
 from .utils._typing import Literal, TypedDict
 from .utils.endpoint_helpers import (
     AttributeDictionary,
diff --git a/src/huggingface_hub/hf_file_system.py b/src/huggingface_hub/hf_file_system.py
index e584def5a1..e2970d838e 100644
--- a/src/huggingface_hub/hf_file_system.py
+++ b/src/huggingface_hub/hf_file_system.py
@@ -20,14 +20,14 @@
     RevisionNotFoundError,
     hf_raise_for_status,
     http_backoff,
+    paginate,
     parse_datetime,
 )
-from .utils._pagination import paginate
 
 
 @dataclass
-class ResolvedPath:
-    """Data structure containing information about a resolved path."""
+class HfFileSystemResolvedPath:
+    """Data structure containing information about a resolved hffs path."""
 
     repo_type: str
     repo_id: str
@@ -111,7 +111,7 @@ def _repo_and_revision_exist(
                 self._repo_and_revision_exists_cache[(repo_type, repo_id, None)] = True, None
         return self._repo_and_revision_exists_cache[(repo_type, repo_id, revision)]
 
-    def resolve_path(self, path: str, revision: Optional[str] = None) -> ResolvedPath:
+    def resolve_path(self, path: str, revision: Optional[str] = None) -> HfFileSystemResolvedPath:
         def _align_revision_in_path_with_revision(
             revision_in_path: Optional[str], revision: Optional[str]
         ) -> Optional[str]:
@@ -178,7 +178,7 @@ def _align_revision_in_path_with_revision(
                 raise NotImplementedError("Acces to repositories lists is not implemented.")
 
         revision = revision if revision is not None else DEFAULT_REVISION
-        return ResolvedPath(repo_type, repo_id, revision, path_in_repo)
+        return HfFileSystemResolvedPath(repo_type, repo_id, revision, path_in_repo)
 
     def invalidate_cache(self, path=None) -> None:
         if not path:
@@ -199,7 +199,7 @@ def _open(
     ):
         if mode == "ab":
             raise NotImplementedError("Appending to remote files is not yet supported.")
-        return HfFile(self, path, mode=mode, revision=revision, **kwargs)
+        return HfFileSystemFile(self, path, mode=mode, revision=revision, **kwargs)
 
     def _rm(self, path, revision: Optional[str] = None, **kwargs):
         resolved_path = self.resolve_path(path, revision=revision)
@@ -245,7 +245,9 @@ def ls(
         path = resolved_path.unresolve()
         if path not in self.dircache or refresh:
             path_prefix = (
-                ResolvedPath(resolved_path.repo_type, resolved_path.repo_id, resolved_path.revision, "").unresolve()
+                HfFileSystemResolvedPath(
+                    resolved_path.repo_type, resolved_path.repo_id, resolved_path.revision, ""
+                ).unresolve()
                 + "/"
             )
             tree_path = path
@@ -387,7 +389,7 @@ def expand_path(
         return list(sorted(out))
 
 
-class HfFile(fsspec.spec.AbstractBufferedFile):
+class HfFileSystemFile(fsspec.spec.AbstractBufferedFile):
     def __init__(self, fs: HfFileSystem, path: str, revision: Optional[str] = None, **kwargs):
         super().__init__(fs, path, **kwargs)
         self.fs: HfFileSystem
diff --git a/src/huggingface_hub/utils/__init__.py b/src/huggingface_hub/utils/__init__.py
index f3f545d250..db69f357ea 100644
--- a/src/huggingface_hub/utils/__init__.py
+++ b/src/huggingface_hub/utils/__init__.py
@@ -44,6 +44,7 @@
 from ._headers import build_hf_headers, get_token_to_send
 from ._hf_folder import HfFolder
 from ._http import configure_http_backend, get_session, http_backoff
+from ._pagination import paginate
 from ._paths import filter_repo_objects, IGNORE_GIT_FOLDER_PATTERNS
 from ._runtime import (
     dump_environment_info,

From 4a7ec33157bd466a8c771b5e932c63a47221116c Mon Sep 17 00:00:00 2001
From: mariosasko <mariosasko777@gmail.com>
Date: Thu, 6 Apr 2023 14:47:20 +0200
Subject: [PATCH 16/18] Filesystem -> HfFileSystem in docs

---
 docs/source/_toctree.yml                                  | 8 ++++----
 docs/source/guides/{filesystem.mdx => hf_file_system.mdx} | 2 +-
 docs/source/guides/overview.mdx                           | 2 +-
 .../{hf_filesystem.mdx => hf_file_system.mdx}             | 0
 4 files changed, 6 insertions(+), 6 deletions(-)
 rename docs/source/guides/{filesystem.mdx => hf_file_system.mdx} (99%)
 rename docs/source/package_reference/{hf_filesystem.mdx => hf_file_system.mdx} (100%)

diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml
index 60eafd9a08..7ffd9e02c6 100644
--- a/docs/source/_toctree.yml
+++ b/docs/source/_toctree.yml
@@ -18,8 +18,8 @@
       title: Repository
     - local: guides/search
       title: Search
-    - local: guides/filesystem
-      title: Filesystem
+    - local: guides/hf_file_system
+      title: HfFileSystem
     - local: guides/inference
       title: Inference
     - local: guides/community
@@ -54,8 +54,8 @@
       title: Mixins & serialization methods
     - local: package_reference/inference_api
       title: Inference API
-    - local: package_reference/hf_filesystem
-      title: Filesystem
+    - local: package_reference/hf_file_system
+      title: HfFileSystem
     - local: package_reference/utilities
       title: Utilities
     - local: package_reference/community
diff --git a/docs/source/guides/filesystem.mdx b/docs/source/guides/hf_file_system.mdx
similarity index 99%
rename from docs/source/guides/filesystem.mdx
rename to docs/source/guides/hf_file_system.mdx
index d65a610b35..7d0d5581a3 100644
--- a/docs/source/guides/filesystem.mdx
+++ b/docs/source/guides/hf_file_system.mdx
@@ -99,7 +99,7 @@ It is also possible to login programmatically by passing your `token` as an argu
 
 ```python
 >>> from huggingface_hub import HfFileSystem
->>> fs = hffs.HfFileSystem(token=token)
+>>> fs = HfFileSystem(token=token)
 ```
 
 If you login this way, be careful not to accidentally leak the token when sharing your source code!
diff --git a/docs/source/guides/overview.mdx b/docs/source/guides/overview.mdx
index 6c5e69a658..6551c839d2 100644
--- a/docs/source/guides/overview.mdx
+++ b/docs/source/guides/overview.mdx
@@ -45,7 +45,7 @@ Take a look at these guides to learn how to use huggingface_hub to solve real-wo
     <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg"
        href="./filesystem">
       <div class="w-full text-center bg-gradient-to-br from-indigo-400 to-indigo-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">
-        Filesystem
+        HfFileSystem
       </div><p class="text-gray-700">
         How to interact with the Hub through a convenient interface that mimics Python's file interface?
       </p>
diff --git a/docs/source/package_reference/hf_filesystem.mdx b/docs/source/package_reference/hf_file_system.mdx
similarity index 100%
rename from docs/source/package_reference/hf_filesystem.mdx
rename to docs/source/package_reference/hf_file_system.mdx

From efa3d921aebb429bb09fa79e9d4b370d9677ee0b Mon Sep 17 00:00:00 2001
From: mariosasko <mariosasko777@gmail.com>
Date: Thu, 6 Apr 2023 16:27:27 +0200
Subject: [PATCH 17/18] Fix for revision with `/`

---
 src/huggingface_hub/hf_file_system.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/huggingface_hub/hf_file_system.py b/src/huggingface_hub/hf_file_system.py
index e2970d838e..3b464a6ef5 100644
--- a/src/huggingface_hub/hf_file_system.py
+++ b/src/huggingface_hub/hf_file_system.py
@@ -36,7 +36,7 @@ class HfFileSystemResolvedPath:
 
     def unresolve(self):
         path = (
-            f"{REPO_TYPES_URL_PREFIXES.get(self.repo_type, '') + self.repo_id}@{self.revision}/{self.path_in_repo}"
+            f"{REPO_TYPES_URL_PREFIXES.get(self.repo_type, '') + self.repo_id}@{quote(self.revision, safe='')}/{self.path_in_repo}"
             .rstrip("/")
         )
         return path
@@ -257,7 +257,7 @@ def ls(
             except EntryNotFoundError:
                 if "/" in resolved_path.path_in_repo:
                     tree_path = self._parent(path)
-                    tree_iter = self._iter_tree(tree_path)
+                    tree_iter = self._iter_tree(tree_path, revision=resolved_path.revision)
                 else:
                     raise
             else:

From 3ee708df4fa6b47de89b2b1cd366ac638903a2d4 Mon Sep 17 00:00:00 2001
From: Lucain Pouget <lucainp@gmail.com>
Date: Thu, 6 Apr 2023 16:44:56 +0200
Subject: [PATCH 18/18] Add tests for ls + add some type anntations

---
 src/huggingface_hub/hf_file_system.py | 85 +++++++++++++++------------
 tests/test_hf_file_system.py          | 54 ++++++++++++++++-
 2 files changed, 99 insertions(+), 40 deletions(-)

diff --git a/src/huggingface_hub/hf_file_system.py b/src/huggingface_hub/hf_file_system.py
index 3b464a6ef5..5d68740cb4 100644
--- a/src/huggingface_hub/hf_file_system.py
+++ b/src/huggingface_hub/hf_file_system.py
@@ -34,12 +34,11 @@ class HfFileSystemResolvedPath:
     revision: str
     path_in_repo: str
 
-    def unresolve(self):
-        path = (
-            f"{REPO_TYPES_URL_PREFIXES.get(self.repo_type, '') + self.repo_id}@{quote(self.revision, safe='')}/{self.path_in_repo}"
+    def unresolve(self) -> str:
+        return (
+            f"{REPO_TYPES_URL_PREFIXES.get(self.repo_type, '') + self.repo_id}@{safe_quote(self.revision)}/{self.path_in_repo}"
             .rstrip("/")
         )
-        return path
 
 
 class HfFileSystem(fsspec.AbstractFileSystem):
@@ -180,7 +179,7 @@ def _align_revision_in_path_with_revision(
         revision = revision if revision is not None else DEFAULT_REVISION
         return HfFileSystemResolvedPath(repo_type, repo_id, revision, path_in_repo)
 
-    def invalidate_cache(self, path=None) -> None:
+    def invalidate_cache(self, path: Optional[str] = None) -> None:
         if not path:
             self.dircache.clear()
             self._repository_type_and_id_exists_cache.clear()
@@ -196,12 +195,12 @@ def _open(
         mode: str = "rb",
         revision: Optional[str] = None,
         **kwargs,
-    ):
+    ) -> "HfFileSystemFile":
         if mode == "ab":
             raise NotImplementedError("Appending to remote files is not yet supported.")
         return HfFileSystemFile(self, path, mode=mode, revision=revision, **kwargs)
 
-    def _rm(self, path, revision: Optional[str] = None, **kwargs):
+    def _rm(self, path: str, revision: Optional[str] = None, **kwargs) -> None:
         resolved_path = self.resolve_path(path, revision=revision)
         self._api.delete_file(
             path_in_repo=resolved_path.path_in_repo,
@@ -214,7 +213,14 @@ def _rm(self, path, revision: Optional[str] = None, **kwargs):
         )
         self.invalidate_cache(path=resolved_path.unresolve())
 
-    def rm(self, path, recursive=False, maxdepth=None, revision: Optional[str] = None, **kwargs) -> None:
+    def rm(
+        self,
+        path: str,
+        recursive: bool = False,
+        maxdepth: Optional[int] = None,
+        revision: Optional[str] = None,
+        **kwargs,
+    ) -> None:
         resolved_path = self.resolve_path(path, revision=revision)
         root_path = REPO_TYPES_URL_PREFIXES.get(resolved_path.repo_type, "") + resolved_path.repo_id
         paths = self.expand_path(path, recursive=recursive, maxdepth=maxdepth, revision=resolved_path.revision)
@@ -236,11 +242,11 @@ def rm(self, path, recursive=False, maxdepth=None, revision: Optional[str] = Non
         self.invalidate_cache(path=resolved_path.unresolve())
 
     def ls(
-        self, path, detail=True, refresh=False, revision: Optional[str] = None, **kwargs
+        self, path: str, detail: bool = True, refresh: bool = False, revision: Optional[str] = None, **kwargs
     ) -> List[Union[str, Dict[str, Any]]]:
         """List the contents of a directory."""
         resolved_path = self.resolve_path(path, revision=revision)
-        revision_in_path = "@" + quote(resolved_path.revision, safe="")
+        revision_in_path = "@" + safe_quote(resolved_path.revision)
         has_revision_in_path = revision_in_path in path
         path = resolved_path.unresolve()
         if path not in self.dircache or refresh:
@@ -286,9 +292,8 @@ def ls(
 
     def _iter_tree(self, path: str, revision: Optional[str] = None):
         resolved_path = self.resolve_path(path, revision=revision)
-        path = (
-            f"{self._api.endpoint}/api/{resolved_path.repo_type}s/{resolved_path.repo_id}/tree/{quote(resolved_path.revision, safe='')}/{resolved_path.path_in_repo}"
-            .rstrip("/")
+        path = f"{self._api.endpoint}/api/{resolved_path.repo_type}s/{resolved_path.repo_id}/tree/{safe_quote(resolved_path.revision)}/{resolved_path.path_in_repo}".rstrip(
+            "/"
         )
         headers = self._api._build_hf_headers()
         yield from paginate(path, params={}, headers=headers)
@@ -319,9 +324,7 @@ def cp_file(self, path1: str, path2: str, revision: Optional[str] = None, **kwar
                 "deletedFiles": [],
             }
             r = requests.post(
-                (
-                    f"{self.endpoint}/api/{resolved_path1.repo_type}s/{resolved_path1.repo_id}/commit/{quote(resolved_path2.revision, safe='')}"
-                ),
+                f"{self.endpoint}/api/{resolved_path1.repo_type}s/{resolved_path1.repo_id}/commit/{safe_quote(resolved_path2.revision)}",
                 json=payload,
                 headers=headers,
             )
@@ -352,7 +355,7 @@ def modified(self, path: str, **kwargs) -> datetime:
     def info(self, path: str, **kwargs) -> Dict[str, Any]:
         resolved_path = self.resolve_path(path)
         if not resolved_path.path_in_repo:
-            revision_in_path = "@" + quote(resolved_path.revision, safe="")
+            revision_in_path = "@" + safe_quote(resolved_path.revision)
             has_revision_in_path = revision_in_path in path
             name = resolved_path.unresolve()
             name = name.replace(revision_in_path, "", 1) if not has_revision_in_path else name
@@ -361,29 +364,29 @@ def info(self, path: str, **kwargs) -> Dict[str, Any]:
 
     def expand_path(
         self, path: Union[str, List[str]], recursive: bool = False, maxdepth: Optional[int] = None, **kwargs
-    ):
+    ) -> List[str]:
         # The default implementation does not allow passing custom kwargs (e.g., we use these kwargs to propagate the `revision`)
         if maxdepth is not None and maxdepth < 1:
             raise ValueError("maxdepth must be at least 1")
 
         if isinstance(path, str):
-            out = self.expand_path([path], recursive, maxdepth)
-        else:
-            out = set()
-            path = [self._strip_protocol(p) for p in path]
-            for p in path:
-                if has_magic(p):
-                    bit = set(self.glob(p))
-                    out |= bit
-                    if recursive:
-                        out |= set(self.expand_path(list(bit), recursive=recursive, maxdepth=maxdepth, **kwargs))
-                    continue
-                elif recursive:
-                    rec = set(self.find(p, maxdepth=maxdepth, withdirs=True, detail=False, **kwargs))
-                    out |= rec
-                if p not in out and (recursive is False or self.exists(p)):
-                    # should only check once, for the root
-                    out.add(p)
+            return self.expand_path([path], recursive, maxdepth)
+
+        out = set()
+        path = [self._strip_protocol(p) for p in path]
+        for p in path:
+            if has_magic(p):
+                bit = set(self.glob(p))
+                out |= bit
+                if recursive:
+                    out |= set(self.expand_path(list(bit), recursive=recursive, maxdepth=maxdepth, **kwargs))
+                continue
+            elif recursive:
+                rec = set(self.find(p, maxdepth=maxdepth, withdirs=True, detail=False, **kwargs))
+                out |= rec
+            if p not in out and (recursive is False or self.exists(p)):
+                # should only check once, for the root
+                out.add(p)
         if not out:
             raise FileNotFoundError(path)
         return list(sorted(out))
@@ -395,22 +398,22 @@ def __init__(self, fs: HfFileSystem, path: str, revision: Optional[str] = None,
         self.fs: HfFileSystem
         self.resolved_path = fs.resolve_path(path, revision=revision)
 
-    def _fetch_range(self, start, end):
+    def _fetch_range(self, start: int, end: int) -> bytes:
         headers = {
             "range": f"bytes={start}-{end - 1}",
             **self.fs._api._build_hf_headers(),
         }
         url = (
-            f"{self.fs.endpoint}/{REPO_TYPES_URL_PREFIXES.get(self.resolved_path.repo_type, '') + self.resolved_path.repo_id}/resolve/{quote(self.resolved_path.revision, safe='')}/{quote(self.resolved_path.path_in_repo, safe='')}"
+            f"{self.fs.endpoint}/{REPO_TYPES_URL_PREFIXES.get(self.resolved_path.repo_type, '') + self.resolved_path.repo_id}/resolve/{safe_quote(self.resolved_path.revision)}/{safe_quote(self.resolved_path.path_in_repo)}"
         )
         r = http_backoff("GET", url, headers=headers)
         hf_raise_for_status(r)
         return r.content
 
-    def _initiate_upload(self):
+    def _initiate_upload(self) -> None:
         self.temp_file = tempfile.NamedTemporaryFile(prefix="hffs-", delete=False)
 
-    def _upload_chunk(self, final=False):
+    def _upload_chunk(self, final: bool = False) -> None:
         self.buffer.seek(0)
         block = self.buffer.read()
         self.temp_file.write(block)
@@ -430,3 +433,7 @@ def _upload_chunk(self, final=False):
             self.fs.invalidate_cache(
                 path=self.resolved_path.unresolve(),
             )
+
+
+def safe_quote(s: str) -> str:
+    return quote(s, safe="")
diff --git a/tests/test_hf_file_system.py b/tests/test_hf_file_system.py
index 5e5c67d6ca..7a40e1402a 100644
--- a/tests/test_hf_file_system.py
+++ b/tests/test_hf_file_system.py
@@ -29,7 +29,14 @@ def setUp(self):
         self.api = self.hffs._api
 
         # Create dummy repo
-        self.api.create_repo(self.repo_id, repo_type=self.repo_type, private=False)
+        self.api.create_repo(self.repo_id, repo_type=self.repo_type)
+        self.api.upload_file(
+            path_or_fileobj=b"dummy binary data on pr",
+            path_in_repo="data/binary_data_for_pr.bin",
+            repo_id=self.repo_id,
+            repo_type=self.repo_type,
+            create_pr=True,
+        )
         self.api.upload_file(
             path_or_fileobj="dummy text data".encode("utf-8"),
             path_in_repo="data/text_data.txt",
@@ -160,6 +167,51 @@ def test_initialize_from_fsspec(self):
         self.assertIsInstance(fs, HfFileSystem)
         self.assertEqual(paths, [f"{self.repo_id}/data/text_data.txt"])
 
+    @retry_endpoint
+    def test_list_root_directory_no_revision(self):
+        files = self.hffs.ls(self.hf_path)
+        self.assertEqual(len(files), 2)
+
+        self.assertEqual(files[0]["type"], "directory")
+        self.assertEqual(files[0]["size"], 0)
+        self.assertTrue(files[0]["name"].endswith("/data"))
+
+        self.assertEqual(files[1]["type"], "file")
+        self.assertGreater(files[1]["size"], 0)  # not empty
+        self.assertTrue(files[1]["name"].endswith("/.gitattributes"))
+
+    @retry_endpoint
+    def test_list_data_directory_no_revision(self):
+        files = self.hffs.ls(self.hf_path + "/data")
+        self.assertEqual(len(files), 2)
+
+        self.assertEqual(files[0]["type"], "file")
+        self.assertGreater(files[0]["size"], 0)  # not empty
+        self.assertTrue(files[0]["name"].endswith("/data/binary_data.bin"))
+        self.assertIsNotNone(files[0]["lfs"])
+        self.assertIn("oid", files[0]["lfs"])
+        self.assertIn("size", files[0]["lfs"])
+        self.assertIn("pointerSize", files[0]["lfs"])
+
+        self.assertEqual(files[1]["type"], "file")
+        self.assertGreater(files[1]["size"], 0)  # not empty
+        self.assertTrue(files[1]["name"].endswith("/data/text_data.txt"))
+        self.assertIsNone(files[1]["lfs"])
+
+    @retry_endpoint
+    def test_list_data_directory_with_revision(self):
+        files = self.hffs.ls(self.hf_path + "@refs%2Fpr%2F1" + "/data")
+
+        for test_name, files in {
+            "rev_in_path": self.hffs.ls(self.hf_path + "@refs%2Fpr%2F1" + "/data"),
+            "rev_as_arg": self.hffs.ls(self.hf_path + "/data", revision="refs/pr/1"),
+            "rev_in_path_and_as_arg": self.hffs.ls(self.hf_path + "@refs%2Fpr%2F1" + "/data", revision="refs/pr/1"),
+        }.items():
+            with self.subTest(test_name):
+                self.assertEqual(len(files), 1)  # only one file in PR
+                self.assertEqual(files[0]["type"], "file")
+                self.assertTrue(files[0]["name"].endswith("/data/binary_data_for_pr.bin"))  # PR file
+
 
 @pytest.mark.parametrize("path_in_repo", ["", "foo"])
 @pytest.mark.parametrize(