Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Feature] Add update_repo_settings function to HfApi #2447 #2502

Merged
merged 25 commits into from
Sep 12, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
91042af
[Feature] Update Repo Settings
WizKnight Sep 2, 2024
451baf1
Merge branch 'main' into feature/update-repo-settings
WizKnight Sep 2, 2024
f256710
Merge branch 'main' into feature/update-repo-settings
Wauplin Sep 3, 2024
409baa5
resolve merge conflicts
Wauplin Sep 3, 2024
f773f77
fix merge issue
Wauplin Sep 3, 2024
92c9531
merge issues
Wauplin Sep 3, 2024
fb7cfd1
Merge branch 'huggingface:main' into feature/update-repo-settings
WizKnight Sep 3, 2024
cf3c78c
Add `update_repo_settings` function to HfApi
WizKnight Sep 3, 2024
6e81211
Merge branch 'main' into feature/update-repo-settings
WizKnight Sep 4, 2024
2544898
Add `update_repo_settings` function to HfApi
WizKnight Sep 4, 2024
22952bf
Merge branch 'main' into feature/update-repo-settings
WizKnight Sep 4, 2024
609ea61
Merge branch 'main' into feature/update-repo-settings
WizKnight Sep 5, 2024
3e507a9
Enhance HfApi with `update_repo_settings` function
WizKnight Sep 5, 2024
761a33d
Merge branch 'main' into feature/update-repo-settings
WizKnight Sep 6, 2024
6d57808
Merge branch 'main' into feature/update-repo-settings
WizKnight Sep 10, 2024
401502f
Merge branch 'main' into feature/update-repo-settings
Wauplin Sep 10, 2024
244e4b2
Merge branch 'main' of https://github.com/WizKnight/huggingface_hub i…
WizKnight Sep 10, 2024
12e0eb5
Merge branch 'feature/update-repo-settings' of https://github.com/Wiz…
WizKnight Sep 10, 2024
8e6f9cf
Enhance HfApi with `update_repo_settings` function
WizKnight Sep 10, 2024
59c05a5
Enhance HfApi with `update_repo_settings` function
WizKnight Sep 10, 2024
8bf6f82
Merge branch 'main' of https://github.com/WizKnight/huggingface_hub i…
WizKnight Sep 11, 2024
2bd4767
Enhance HfApi with `update_repo_settings` function
WizKnight Sep 11, 2024
fbe8cbe
Enhance HfApi with `update_repo_settings` function
WizKnight Sep 11, 2024
772837d
Merge branch 'main' into feature/update-repo-settings
WizKnight Sep 12, 2024
ef61fbb
Apply suggestions from code review
Wauplin Sep 12, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions docs/source/en/guides/repository.md
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,20 @@ A repository can be public or private. A private repository is only visible to y
>>> update_repo_visibility(repo_id=repo_id, private=True)
```

### Update repository settings (Gated Access)
Wauplin marked this conversation as resolved.
Show resolved Hide resolved

The `update_repo_settings` function allows you to control the gated access feature of a repository. Gated access restricts access to the repository's files, requiring users to request access before they can view or download the content.
You can update the settings of a repository, using the `update_repo_settings` function as shown in the following:
Wauplin marked this conversation as resolved.
Show resolved Hide resolved

**Note:** This method is currently designed to work primarily with **dataset** repositories.

Wauplin marked this conversation as resolved.
Show resolved Hide resolved
```py
>>> from huggingface_hub import HfApi

>>> api = HfApi()
>>> api.update_repo_settings(repo_id=repo_id, gated="auto") # Set automatic gating for a dataset
Wauplin marked this conversation as resolved.
Show resolved Hide resolved
```

### Rename your repository

You can rename your repository on the Hub using [`move_repo`]. Using this method, you can also move the repo from a user to
Expand Down
4 changes: 2 additions & 2 deletions src/huggingface_hub/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,10 +249,10 @@
"update_collection_metadata",
"update_inference_endpoint",
"update_repo_visibility",
"update_repo_settings",
"update_webhook",
"upload_file",
"upload_folder",
"upload_large_folder",
"whoami",
],
"hf_file_system": [
Expand Down Expand Up @@ -754,10 +754,10 @@ def __dir__():
update_collection_metadata, # noqa: F401
update_inference_endpoint, # noqa: F401
update_repo_visibility, # noqa: F401
update_repo_settings, #noqa: F401
update_webhook, # noqa: F401
upload_file, # noqa: F401
upload_folder, # noqa: F401
upload_large_folder, # noqa: F401
whoami, # noqa: F401
)
from .hf_file_system import (
Expand Down
157 changes: 36 additions & 121 deletions src/huggingface_hub/hf_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@
plan_multi_commits,
)
from ._space_api import SpaceHardware, SpaceRuntime, SpaceStorage, SpaceVariable
from ._upload_large_folder import upload_large_folder_internal
from .community import (
Discussion,
DiscussionComment,
Expand Down Expand Up @@ -3542,6 +3541,39 @@ def update_repo_visibility(
hf_raise_for_status(r)
return r.json()

@validate_hf_hub_args
def update_repo_settings(
Wauplin marked this conversation as resolved.
Show resolved Hide resolved
self,
repo_id: str,
gated: Union[str, bool] = False,
Wauplin marked this conversation as resolved.
Show resolved Hide resolved
*,
token: Union[str, bool, None] = None,
repo_type: Optional[str] = None,
) -> Dict[str, Union[str, bool]]:
Wauplin marked this conversation as resolved.
Show resolved Hide resolved

if gated not in ["auto", "manual", False]:
raise ValueError(f"Invalid gated status, must be one of 'auto', 'manual', or False")
# Build headers
#headers = build_hf_headers(token=token)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The following commented line can be removed

r = get_session().put(
Wauplin marked this conversation as resolved.
Show resolved Hide resolved
url=f"{self.endpoint}/api/datasets/{repo_id}/settings",
#url=f"https://huggingface.co/api/{repo_type}s/{repo_id}/settings",
headers=self._build_hf_headers(token=token),
json={"gated": gated},
)

try:
hf_raise_for_status(r)
except requests.exceptions.HTTPError as e:
if e.response.status_code == 401:
raise ValueError("Invalid or missing Hugging Face token. Please check your authentication.") from e
elif e.response.status_code == 404:
raise ValueError(f"Repository not found: {repo_id}") from e
else:
print(f"HTTP Error {e.response.status_code}: {e}")
Wauplin marked this conversation as resolved.
Show resolved Hide resolved

return r.json()
Wauplin marked this conversation as resolved.
Show resolved Hide resolved

def move_repo(
self,
from_id: str,
Expand Down Expand Up @@ -5173,123 +5205,6 @@ def delete_folder(
parent_commit=parent_commit,
)

def upload_large_folder(
self,
repo_id: str,
folder_path: Union[str, Path],
*,
repo_type: str, # Repo type is required!
revision: Optional[str] = None,
private: bool = False,
allow_patterns: Optional[Union[List[str], str]] = None,
ignore_patterns: Optional[Union[List[str], str]] = None,
num_workers: Optional[int] = None,
print_report: bool = True,
print_report_every: int = 60,
) -> None:
"""Upload a large folder to the Hub in the most resilient way possible.

Several workers are started to upload files in an optimized way. Before being committed to a repo, files must be
hashed and be pre-uploaded if they are LFS files. Workers will perform these tasks for each file in the folder.
At each step, some metadata information about the upload process is saved in the folder under `.cache/.huggingface/`
to be able to resume the process if interrupted. The whole process might result in several commits.

Args:
repo_id (`str`):
The repository to which the file will be uploaded.
E.g. `"HuggingFaceTB/smollm-corpus"`.
folder_path (`str` or `Path`):
Path to the folder to upload on the local file system.
repo_type (`str`):
Type of the repository. Must be one of `"model"`, `"dataset"` or `"space"`.
Unlike in all other `HfApi` methods, `repo_type` is explicitly required here. This is to avoid
any mistake when uploading a large folder to the Hub, and therefore prevent from having to re-upload
everything.
revision (`str`, `optional`):
The branch to commit to. If not provided, the `main` branch will be used.
private (`bool`, `optional`):
Whether the repository should be private. Defaults to False.
allow_patterns (`List[str]` or `str`, *optional*):
If provided, only files matching at least one pattern are uploaded.
ignore_patterns (`List[str]` or `str`, *optional*):
If provided, files matching any of the patterns are not uploaded.
num_workers (`int`, *optional*):
Number of workers to start. Defaults to `os.cpu_count() - 2` (minimum 2).
A higher number of workers may speed up the process if your machine allows it. However, on machines with a
slower connection, it is recommended to keep the number of workers low to ensure better resumability.
Indeed, partially uploaded files will have to be completely re-uploaded if the process is interrupted.
print_report (`bool`, *optional*):
Whether to print a report of the upload progress. Defaults to True.
Report is printed to `sys.stdout` every X seconds (60 by defaults) and overwrites the previous report.
print_report_every (`int`, *optional*):
Frequency at which the report is printed. Defaults to 60 seconds.

<Tip>

A few things to keep in mind:
- Repository limits still apply: https://huggingface.co/docs/hub/repositories-recommendations
- Do not start several processes in parallel.
- You can interrupt and resume the process at any time.
- Do not upload the same folder to several repositories. If you need to do so, you must delete the local `.cache/.huggingface/` folder first.

</Tip>

<Tip warning={true}>

While being much more robust to upload large folders, `upload_large_folder` is more limited than [`upload_folder`] feature-wise. In practice:
- you cannot set a custom `path_in_repo`. If you want to upload to a subfolder, you need to set the proper structure locally.
- you cannot set a custom `commit_message` and `commit_description` since multiple commits are created.
- you cannot delete from the repo while uploading. Please make a separate commit first.
- you cannot create a PR directly. Please create a PR first (from the UI or using [`create_pull_request`]) and then commit to it by passing `revision`.

</Tip>

**Technical details:**

`upload_large_folder` process is as follow:
1. (Check parameters and setup.)
2. Create repo if missing.
3. List local files to upload.
4. Start workers. Workers can perform the following tasks:
- Hash a file.
- Get upload mode (regular or LFS) for a list of files.
- Pre-upload an LFS file.
- Commit a bunch of files.
Once a worker finishes a task, it will move on to the next task based on the priority list (see below) until
all files are uploaded and committed.
5. While workers are up, regularly print a report to sys.stdout.

Order of priority:
1. Commit if more than 5 minutes since last commit attempt (and at least 1 file).
2. Commit if at least 25 files are ready to commit.
3. Get upload mode if at least 10 files have been hashed.
4. Pre-upload LFS file if at least 1 file and no worker is pre-uploading.
5. Hash file if at least 1 file and no worker is hashing.
6. Get upload mode if at least 1 file and no worker is getting upload mode.
7. Pre-upload LFS file if at least 1 file (exception: if hf_transfer is enabled, only 1 worker can preupload LFS at a time).
8. Hash file if at least 1 file to hash.
9. Get upload mode if at least 1 file to get upload mode.
10. Commit if at least 1 file to commit.

Special rules:
- If `hf_transfer` is enabled, only 1 LFS uploader at a time. Otherwise the CPU would be bloated by `hf_transfer`.
- Only one worker can commit at a time.
- If no tasks are available, the worker waits for 10 seconds before checking again.
"""
return upload_large_folder_internal(
self,
repo_id=repo_id,
folder_path=folder_path,
repo_type=repo_type,
revision=revision,
private=private,
allow_patterns=allow_patterns,
ignore_patterns=ignore_patterns,
num_workers=num_workers,
print_report=print_report,
print_report_every=print_report_every,
)

@validate_hf_hub_args
def get_hf_file_metadata(
self,
Expand Down Expand Up @@ -7673,6 +7588,7 @@ def create_inference_endpoint(
"revision": revision,
"task": task,
"image": image,
"secrets": secrets,
},
"name": name,
"provider": {
Expand All @@ -7681,8 +7597,7 @@ def create_inference_endpoint(
},
"type": type,
}
if secrets:
payload["model"]["secrets"] = secrets

response = get_session().post(
f"{constants.INFERENCE_ENDPOINTS_ENDPOINT}/endpoint/{namespace}",
headers=self._build_hf_headers(token=token),
Expand Down Expand Up @@ -9574,6 +9489,7 @@ def _parse_revision_from_pr_url(pr_url: str) -> str:
create_repo = api.create_repo
delete_repo = api.delete_repo
update_repo_visibility = api.update_repo_visibility
update_repo_settings = api.update_repo_settings
super_squash_history = api.super_squash_history
move_repo = api.move_repo
upload_file = api.upload_file
Expand All @@ -9582,7 +9498,6 @@ def _parse_revision_from_pr_url(pr_url: str) -> str:
delete_folder = api.delete_folder
delete_files = api.delete_files
create_commits_on_pr = api.create_commits_on_pr
upload_large_folder = api.upload_large_folder
preupload_lfs_files = api.preupload_lfs_files
create_branch = api.create_branch
delete_branch = api.delete_branch
Expand Down
22 changes: 22 additions & 0 deletions tests/test_hf_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ def test_repo_id_no_warning():
with warnings.catch_warnings(record=True) as record:
repo_id = api.create_repo(repo_name()).repo_id
api.update_repo_visibility(repo_id, private=True)
api.update_repo_settings(repo_id, gated="auto")
Wauplin marked this conversation as resolved.
Show resolved Hide resolved
api.delete_repo(repo_id)
assert not len(record)

Expand Down Expand Up @@ -223,6 +224,15 @@ def test_create_update_and_delete_repo(self):
assert res["private"]
res = self._api.update_repo_visibility(repo_id=repo_id, private=False)
assert not res["private"]

# Test gated status update (new functionality)
res = self._api.update_repo_settings(repo_id=repo_id, gated="auto")
hanouticelina marked this conversation as resolved.
Show resolved Hide resolved
assert res["gated"] == "auto"
Wauplin marked this conversation as resolved.
Show resolved Hide resolved
res = self._api.update_repo_settings(repo_id=repo_id, gated="manual")
assert res["gated"] == "manual"
res = self._api.update_repo_settings(repo_id=repo_id, gated=False)
assert res["gated"] is False

self._api.delete_repo(repo_id=repo_id)

def test_create_update_and_delete_model_repo(self):
Expand Down Expand Up @@ -288,6 +298,18 @@ def test_move_repo_invalid_repo_id(self) -> None:
with pytest.raises(ValueError, match=r"Invalid repo_id*"):
self._api.move_repo(from_id="invalid_repo_id", to_id="namespace/repo_name")

## Test for #2447
## See https://github.com/huggingface/huggingface_hub/issues/2447

#def test_update_repo_settings(self):
# repo_id = self._api.create_repo(repo_id=repo_name()).repo_id
# res = self._api.update_repo_settings(repo_id=repo_id, gated="auto")
# assert res["gated"] == "auto"
# res = self._api.update_repo_settings(repo_id=repo_id, gated="manual")
# assert res["gated"] == "manual"
# res = self._api.update_repo_settings(repo_id=repo_id, gated=False)
# assert res.get("gated") is False
# self._api.delete_repo(repo_id=repo_id)

class CommitApiTest(HfApiCommonTest):
def setUp(self) -> None:
Expand Down