Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Non-git mixin #8

Merged
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
cf0a577
FIX typos in contributing.md
Wauplin Aug 3, 2022
396ed56
Remove redefined logger in HfApi.py
Wauplin Aug 3, 2022
d34b020
Use upload_folder in both mixins + some docstring
Wauplin Aug 3, 2022
bab52c2
moved back logger to top of hf_api ><
Wauplin Aug 3, 2022
198a425
space in documentation can be ambiguous
Wauplin Aug 3, 2022
4d3f9e4
WIP started deprecation
Wauplin Aug 3, 2022
0a55017
deprecate skip lfs file and use Path
Wauplin Aug 4, 2022
ada6607
added decorator to deprecate specific arguments + unittests for it
Wauplin Aug 4, 2022
bc0425a
simplified tests
Wauplin Aug 4, 2022
0b3eb5d
proper decorators
Wauplin Aug 4, 2022
4de8ea9
fix docstring
Wauplin Aug 4, 2022
f71b482
hubmixin: fixed existing tests + add http one
Wauplin Aug 4, 2022
b3b5bac
unique repo names across tests
Wauplin Aug 4, 2022
f1580ba
make push_to_hub_keras work + tests
Wauplin Aug 4, 2022
9b12475
logs are not overwritten in push_to_hub_keras
Wauplin Aug 4, 2022
41cc62a
flake8
Wauplin Aug 5, 2022
d039ad5
refacto push_to_hub from mixin.save_pretrained
Wauplin Aug 8, 2022
50069b1
deprecate positional argument in version 0.12
Wauplin Aug 8, 2022
ed06809
remove docstring for deprecated skip_lfs_files
Wauplin Aug 8, 2022
bea0b77
delete old logs when uploading keras model to hub
Wauplin Aug 8, 2022
bf87876
remove TODO in tests
Wauplin Aug 8, 2022
d593f75
remove useless todo
Wauplin Aug 8, 2022
8c09885
Update src/huggingface_hub/hub_mixin.py
Wauplin Aug 8, 2022
76a03c2
flake8
Wauplin Aug 8, 2022
12be988
Merge branch 'wauplin-non-git-mixin' of github.com:huggingface/huggin…
Wauplin Aug 8, 2022
9965f0e
remove un-explicit _generate_url helper
Wauplin Aug 8, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ venv/
ENV/
env.bak/
venv.bak/
.venv*

# Spyder project settings
.spyderproject
Expand Down
5 changes: 3 additions & 2 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -258,13 +258,14 @@ repository they can be run with the following:

```bash
$ HUGGINGFACE_CO_STAGING=1 python -m pytest -sv ./tests
```

In fact, that's how `make test` is implemented (sans the `pip install` line)!
In fact, that's how `make test` is implemented (without the `pip install` line)!

You can specify a smaller set of tests in order to test only the feature
you're working on.

For example, the following will only run the tests hel in the `test_repository.py` file:
For example, the following will only run the tests in the `test_repository.py` file:

```bash
$ HUGGINGFACE_CO_STAGING=1 python -m pytest -sv ./tests/test_repository.py
Expand Down
2 changes: 1 addition & 1 deletion docs/source/how-to-manage.mdx
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Create and manage a repository

A repository is a space for you to store your model or dataset files. This guide will show you how to:
A repository is a place where you can store your model or dataset files. This guide will show you how to:

* Create and delete a repository.
* Adjust repository visibility.
Expand Down
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ def get_version() -> str:

extras["tensorflow"] = ["tensorflow", "pydot", "graphviz"]

extras["ml"] = extras["torch"] + extras["fastai"] + extras["tensorflow"]

extras["testing"] = [
"pytest",
"pytest-cov",
Expand Down
102 changes: 50 additions & 52 deletions src/huggingface_hub/hf_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
ModelFilter,
ModelTags,
_filter_emissions,
_generate_url,
)


Expand All @@ -59,8 +60,6 @@
else:
from typing_extensions import Literal


REGEX_DISCUSSION_URL = re.compile(r".*/discussions/(\d+)$")
USERNAME_PLACEHOLDER = "hf_user"

logger = logging.get_logger(__name__)
Expand Down Expand Up @@ -100,9 +99,6 @@ def _validate_repo_id_deprecation(repo_id, name, organization):
return name, organization


logger = logging.get_logger(__name__)


def repo_type_and_id_from_hf_id(
hf_id: str, hub_url: Optional[str] = None
) -> Tuple[Optional[str], Optional[str], str]:
Expand Down Expand Up @@ -1431,7 +1427,7 @@ def list_repo_files(
)
return [f.rfilename for f in repo_info.siblings]

@_deprecate_positional_args
@_deprecate_positional_args(version="0.12")
def create_repo(
self,
repo_id: str = None,
Expand Down Expand Up @@ -1887,6 +1883,11 @@ def create_commit(
" `CommitOperationDelete`"
)

logger.debug(
f"About to commit to the hub: {len(additions)} addition(s) and"
f" {len(additions)} deletion(s)."
)

for addition in additions:
addition.validate()

Expand Down Expand Up @@ -2060,20 +2061,16 @@ def upload_file(
revision=revision,
create_pr=create_pr,
)
if pr_url is not None:
re_match = re.match(REGEX_DISCUSSION_URL, pr_url)
if re_match is None:
raise RuntimeError(
"Unexpected response from the hub, expected a Pull Request URL but"
f" got: '{pr_url}'"
)
revision = quote(f"refs/pr/{re_match[1]}", safe="")

if repo_type in REPO_TYPES_URL_PREFIXES:
repo_id = REPO_TYPES_URL_PREFIXES[repo_type] + repo_id
revision = revision if revision is not None else DEFAULT_REVISION
return f"{self.endpoint}/{repo_id}/blob/{revision}/{path_in_repo}"
# ^ Similar to `hf_hub_url` but it's "blob" instead of "resolve"
return _generate_url(
"as_file",
endpoint=self.endpoint,
repo_id=repo_id,
path_in_repo=path_in_repo,
repo_type=repo_type,
revision=revision,
pr_url=pr_url,
)

def upload_folder(
self,
Expand Down Expand Up @@ -2172,25 +2169,8 @@ def upload_folder(
if commit_message is not None
else f"Upload {path_in_repo} with huggingface_hub"
)
folder_path = os.path.normpath(os.path.expanduser(folder_path))
if not os.path.isdir(folder_path):
raise ValueError(f"Provided path: '{folder_path}' is not a directory")

files_to_add: List[CommitOperationAdd] = []
for dirpath, _, filenames in os.walk(folder_path):
for filename in filenames:
abs_path = os.path.join(dirpath, filename)
rel_path = os.path.relpath(abs_path, folder_path)
files_to_add.append(
CommitOperationAdd(
path_or_fileobj=abs_path,
path_in_repo=os.path.normpath(
os.path.join(path_in_repo, rel_path)
).replace(os.sep, "/"),
)
)

logger.debug(f"About to upload / commit {len(files_to_add)} files to the Hub")
files_to_add = _prepare_upload_folder_commit(folder_path, path_in_repo)

pr_url = self.create_commit(
repo_type=repo_type,
Expand All @@ -2203,21 +2183,15 @@ def upload_folder(
create_pr=create_pr,
)

if pr_url is not None:
re_match = re.match(REGEX_DISCUSSION_URL, pr_url)
if re_match is None:
raise RuntimeError(
"Unexpected response from the hub, expected a Pull Request URL but"
f" got: '{pr_url}'"
)
revision = quote(f"refs/pr/{re_match[1]}", safe="")

if repo_type in REPO_TYPES_URL_PREFIXES:
repo_id = REPO_TYPES_URL_PREFIXES[repo_type] + repo_id

revision = revision if revision is not None else DEFAULT_REVISION
return f"{self.endpoint}/{repo_id}/tree/{revision}/{path_in_repo}"
# ^ Similar to `hf_hub_url` but it's "tree" instead of "resolve"
return _generate_url(
"as_folder",
endpoint=self.endpoint,
repo_id=repo_id,
path_in_repo=path_in_repo,
repo_type=repo_type,
revision=revision,
pr_url=pr_url,
)

def delete_file(
self,
Expand Down Expand Up @@ -2380,6 +2354,30 @@ def delete_token(cls):
pass


def _prepare_upload_folder_commit(
folder_path: str, path_in_repo: str
) -> List[CommitOperationAdd]:
"""Generate the list of Add operations for a commit to upload a folder."""
folder_path = os.path.normpath(os.path.expanduser(folder_path))
if not os.path.isdir(folder_path):
raise ValueError(f"Provided path: '{folder_path}' is not a directory")

files_to_add: List[CommitOperationAdd] = []
for dirpath, _, filenames in os.walk(folder_path):
for filename in filenames:
abs_path = os.path.join(dirpath, filename)
rel_path = os.path.relpath(abs_path, folder_path)
files_to_add.append(
CommitOperationAdd(
path_or_fileobj=abs_path,
path_in_repo=os.path.normpath(
os.path.join(path_in_repo, rel_path)
).replace(os.sep, "/"),
)
)
return files_to_add


api = HfApi()

set_access_token = api.set_access_token
Expand Down
Loading