Skip to content

Commit

Permalink
Delete folder with commit endpoint (#1163)
Browse files Browse the repository at this point in the history
* add test

* not good implementation

* Add tests

* delete_folder + doc

* clearer error message in case of implicit delete operation
  • Loading branch information
Wauplin authored Nov 8, 2022
1 parent b2fff5a commit 74bb506
Show file tree
Hide file tree
Showing 8 changed files with 211 additions and 9 deletions.
1 change: 1 addition & 0 deletions docs/source/how-to-discussions-and-pull-requests.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ set the `create_pr` parameter to `True`. This parameter is also available on oth
* [`upload_file`]
* [`upload_folder`]
* [`delete_file`]
* [`delete_folder`]
* [`metadata_update`]

```python
Expand Down
8 changes: 5 additions & 3 deletions docs/source/how-to-upstream.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -78,16 +78,16 @@ If both `allow_patterns` and `ignore_patterns` are provided, both constraints ap

If you want to work at a commit-level, use the [`create_commit`] function directly. There are two types of operations supported by [`create_commit`]:

- `CommitOperationAdd` uploads a file to the Hub. If the file already exists, the file contents are overwritten. This operation accepts two arguments:
- [`CommitOperationAdd`] uploads a file to the Hub. If the file already exists, the file contents are overwritten. This operation accepts two arguments:

- `path_in_repo`: the repository path to upload a file to.
- `path_or_fileobj`: either a path to a file on your filesystem or a file-like object. This is the content of the file to upload to the Hub.

- `CommitOperationDelete` removes a file from a repository. This operation accepts `path_in_repo` as an argument.
- [`CommitOperationDelete`] removes a file or a folder from a repository. This operation accepts `path_in_repo` as an argument.

For example, if you want to upload two files and delete a file in a Hub repository:

1. Use the appropriate `CommitOperation` to add and delete a file:
1. Use the appropriate `CommitOperation` to add or delete a file and to delete a folder:

```py
>>> from huggingface_hub import HfApi, CommitOperationAdd, CommitOperationDelete
Expand All @@ -96,6 +96,7 @@ For example, if you want to upload two files and delete a file in a Hub reposito
... CommitOperationAdd(path_in_repo="LICENSE.md", path_or_fileobj="~/repo/LICENSE.md"),
... CommitOperationAdd(path_in_repo="weights.h5", path_or_fileobj="~/repo/weights-final.h5"),
... CommitOperationDelete(path_in_repo="old-weights.h5"),
... CommitOperationDelete(path_in_repo="logs/"),
... ]
```

Expand All @@ -112,6 +113,7 @@ For example, if you want to upload two files and delete a file in a Hub reposito
In addition to [`upload_file`] and [`upload_folder`], the following functions also use [`create_commit`] under the hood:

- [`delete_file`] deletes a single file from a repository on the Hub.
- [`delete_folder`] deletes an entire folder from a repository on the Hub.
- [`metadata_update`] updates a repository's metadata.

For more detailed information, take a look at the [`HfApi`] reference.
Expand Down
1 change: 1 addition & 0 deletions src/huggingface_hub/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ single file to a repo or listing models from the Hub, you'll find helpers in
* `create_commit()`
* `upload_file()`
* `delete_file()`
* `delete_folder()`

Those API utilities are also exposed through the `huggingface-cli` CLI:

Expand Down
2 changes: 2 additions & 0 deletions src/huggingface_hub/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@
"create_tag",
"dataset_info",
"delete_file",
"delete_folder",
"delete_repo",
"delete_tag",
"edit_discussion_comment",
Expand Down Expand Up @@ -334,6 +335,7 @@ def __dir__():
from .hf_api import create_tag # noqa: F401
from .hf_api import dataset_info # noqa: F401
from .hf_api import delete_file # noqa: F401
from .hf_api import delete_folder # noqa: F401
from .hf_api import delete_repo # noqa: F401
from .hf_api import delete_tag # noqa: F401
from .hf_api import edit_discussion_comment # noqa: F401
Expand Down
28 changes: 23 additions & 5 deletions src/huggingface_hub/_commit_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,16 +35,31 @@
@dataclass
class CommitOperationDelete:
"""
Data structure holding necessary info to delete
a file from a repository on the Hub
Data structure holding necessary info to delete a file or a folder from a repository
on the Hub.
Args:
path_in_repo (`str`):
Relative filepath in the repo, for example:
`"checkpoints/1fec34a/weights.bin"`
Relative filepath in the repo, for example: `"checkpoints/1fec34a/weights.bin"`
for a file or `"checkpoints/1fec34a/"` for a folder.
is_folder (`bool` or `Literal["auto"]`, *optional*)
Whether the Delete Operation applies to a folder or not. If "auto", the path
type (file or folder) is guessed automatically by looking if path ends with
a "/" (folder) or not (file). To explicitly set the path type, you can set
`is_folder=True` or `is_folder=False`.
"""

path_in_repo: str
is_folder: Union[bool, Literal["auto"]] = "auto"

def __post_init__(self):
if self.is_folder == "auto":
self.is_folder = self.path_in_repo.endswith("/")
if not isinstance(self.is_folder, bool):
raise ValueError(
"Wrong value for `is_folder`. Must be one of [`True`, `False`,"
f" `'auto'`]. Got '{self.is_folder}'."
)


@dataclass
Expand Down Expand Up @@ -460,6 +475,9 @@ def prepare_commit_payload(

# 4. Send deleted files, one per line
yield from (
{"key": "deletedFile", "value": {"path": del_op.path_in_repo}}
{
"key": "deletedFolder" if del_op.is_folder else "deletedFile",
"value": {"path": del_op.path_in_repo},
}
for del_op in deletions
)
77 changes: 76 additions & 1 deletion src/huggingface_hub/hf_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from urllib.parse import quote

import requests
from huggingface_hub.utils import RepositoryNotFoundError
from huggingface_hub.utils import EntryNotFoundError, RepositoryNotFoundError
from requests.exceptions import HTTPError

from ._commit_api import (
Expand Down Expand Up @@ -1920,6 +1920,13 @@ def _payload_as_ndjson() -> Iterable[bytes]:
except RepositoryNotFoundError as e:
e.append_to_message(_CREATE_COMMIT_NO_REPO_ERROR_MESSAGE)
raise
except EntryNotFoundError as e:
if len(deletions) > 0 and "A file with this name doesn't exist" in str(e):
e.append_to_message(
"\nMake sure to differentiate file and folder paths in delete"
" operations with a trailing '/' or using `is_folder=True/False`."
)
raise

commit_data = commit_resp.json()
return CommitInfo(
Expand Down Expand Up @@ -2327,6 +2334,73 @@ def delete_file(
parent_commit=parent_commit,
)

@validate_hf_hub_args
def delete_folder(
self,
path_in_repo: str,
repo_id: str,
*,
token: Optional[str] = None,
repo_type: Optional[str] = None,
revision: Optional[str] = None,
commit_message: Optional[str] = None,
commit_description: Optional[str] = None,
create_pr: Optional[bool] = None,
parent_commit: Optional[str] = None,
) -> CommitInfo:
"""
Deletes a folder in the given repo.
Simple wrapper around [`create_commit`] method.
Args:
path_in_repo (`str`):
Relative folder path in the repo, for example: `"checkpoints/1fec34a"`.
repo_id (`str`):
The repository from which the folder will be deleted, for example:
`"username/custom_transformers"`
token (`str`, *optional*):
Authentication token, obtained with `HfApi.login` method. Will default
to the stored token.
repo_type (`str`, *optional*):
Set to `"dataset"` or `"space"` if the folder is in a dataset or
space, `None` or `"model"` if in a model. Default is `None`.
revision (`str`, *optional*):
The git revision to commit from. Defaults to the head of the `"main"`
branch.
commit_message (`str`, *optional*):
The summary / title / first line of the generated commit. Defaults to
`f"Delete folder {path_in_repo} with huggingface_hub"`.
commit_description (`str` *optional*)
The description of the generated commit.
create_pr (`boolean`, *optional*):
Whether or not to create a Pull Request from `revision` with the changes.
Defaults to `False`.
parent_commit (`str`, *optional*):
The OID / SHA of the parent commit, as a hexadecimal string. Shorthands (7 first characters) are also supported.
If specified and `create_pr` is `False`, the commit will fail if `revision` does not point to `parent_commit`.
If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`.
Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be
especially useful if the repo is updated / committed to concurrently.
"""
return self.create_commit(
repo_id=repo_id,
repo_type=repo_type,
token=token,
operations=[
CommitOperationDelete(path_in_repo=path_in_repo, is_folder=True)
],
revision=revision,
commit_message=(
commit_message
if commit_message is not None
else f"Delete folder {path_in_repo} with huggingface_hub"
),
commit_description=commit_description,
create_pr=create_pr,
parent_commit=parent_commit,
)

@validate_hf_hub_args
def create_tag(
self,
Expand Down Expand Up @@ -3312,6 +3386,7 @@ def _parse_revision_from_pr_url(pr_url: str) -> str:
upload_file = api.upload_file
upload_folder = api.upload_folder
delete_file = api.delete_file
delete_folder = api.delete_folder
create_tag = api.create_tag
delete_tag = api.delete_tag
get_full_repo_name = api.get_full_repo_name
Expand Down
48 changes: 48 additions & 0 deletions tests/test_commit_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import unittest

from huggingface_hub._commit_api import CommitOperationDelete


class TestCommitOperationDelete(unittest.TestCase):
def test_implicit_file(self):
self.assertFalse(CommitOperationDelete(path_in_repo="path/to/file").is_folder)
self.assertFalse(
CommitOperationDelete(path_in_repo="path/to/file.md").is_folder
)

def test_implicit_folder(self):
self.assertTrue(CommitOperationDelete(path_in_repo="path/to/folder/").is_folder)
self.assertTrue(
CommitOperationDelete(path_in_repo="path/to/folder.md/").is_folder
)

def test_explicit_file(self):
# Weird case: if user explicitly set as file (`is_folder`=False) but path has a
# trailing "/" => user input has priority
self.assertFalse(
CommitOperationDelete(
path_in_repo="path/to/folder/", is_folder=False
).is_folder
)
self.assertFalse(
CommitOperationDelete(
path_in_repo="path/to/folder.md/", is_folder=False
).is_folder
)

def test_explicit_folder(self):
# No need for the trailing "/" is `is_folder` explicitly passed
self.assertTrue(
CommitOperationDelete(
path_in_repo="path/to/folder", is_folder=True
).is_folder
)
self.assertTrue(
CommitOperationDelete(
path_in_repo="path/to/folder.md", is_folder=True
).is_folder
)

def test_is_folder_wrong_value(self):
with self.assertRaises(ValueError):
CommitOperationDelete(path_in_repo="path/to/folder", is_folder="any value")
55 changes: 55 additions & 0 deletions tests/test_hf_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
repo_type_and_id_from_hf_id,
)
from huggingface_hub.utils import (
EntryNotFoundError,
HfFolder,
HfHubHTTPError,
RepositoryNotFoundError,
Expand Down Expand Up @@ -1024,6 +1025,60 @@ def test_commit_preflight_on_lots_of_lfs_files(self):
self._api.delete_repo(repo_id=REPO_NAME)


class HfApiDeleteFolderTest(HfApiCommonTestWithLogin):
def setUp(self):
self.repo_id = f"{USER}/{repo_name('create_commit_delete_folder')}"
self._api.create_repo(repo_id=self.repo_id, exist_ok=False)

self._api.create_commit(
repo_id=self.repo_id,
commit_message="Init repo",
operations=[
CommitOperationAdd(path_or_fileobj=b"data", path_in_repo="1/file_1.md"),
CommitOperationAdd(path_or_fileobj=b"data", path_in_repo="1/file_2.md"),
CommitOperationAdd(path_or_fileobj=b"data", path_in_repo="2/file_3.md"),
],
)

def tearDown(self):
self._api.delete_repo(repo_id=self.repo_id)

@retry_endpoint
def test_create_commit_delete_folder_implicit(self):
self._api.create_commit(
operations=[CommitOperationDelete(path_in_repo="1/")],
commit_message="Test delete folder implicit",
repo_id=self.repo_id,
)

with self.assertRaises(EntryNotFoundError):
hf_hub_download(self.repo_id, "1/file_1.md", use_auth_token=self._token)

with self.assertRaises(EntryNotFoundError):
hf_hub_download(self.repo_id, "1/file_2.md", use_auth_token=self._token)

# Still exists
hf_hub_download(self.repo_id, "2/file_3.md", use_auth_token=self._token)

@retry_endpoint
def test_create_commit_delete_folder_explicit(self):
self._api.delete_folder(path_in_repo="1", repo_id=self.repo_id)
with self.assertRaises(EntryNotFoundError):
hf_hub_download(self.repo_id, "1/file_1.md", use_auth_token=self._token)

@retry_endpoint
def test_create_commit_failing_implicit_delete_folder(self):
with self.assertRaisesRegex(
EntryNotFoundError,
"Make sure to differentiate file and folder paths",
):
self._api.create_commit(
operations=[CommitOperationDelete(path_in_repo="1")],
commit_message="Failing delete folder",
repo_id=self.repo_id,
)


class HfApiTagEndpointTest(HfApiCommonTestWithLogin):
_user = USER
_repo_id: str
Expand Down

0 comments on commit 74bb506

Please sign in to comment.