From 8596b1f82e9655c9f3a6e5402ef6c5246077117e Mon Sep 17 00:00:00 2001 From: fra Date: Mon, 6 Dec 2021 10:11:24 +0100 Subject: [PATCH 01/11] token in env variables --- src/huggingface_hub/hf_api.py | 23 +++++++++++++++-------- tests/test_hf_api.py | 6 +++++- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/src/huggingface_hub/hf_api.py b/src/huggingface_hub/hf_api.py index 7394d449fd..f119b451d8 100644 --- a/src/huggingface_hub/hf_api.py +++ b/src/huggingface_hub/hf_api.py @@ -22,7 +22,7 @@ from io import BufferedIOBase, RawIOBase from os.path import expanduser from typing import IO, Dict, Iterable, List, Optional, Tuple, Union - +from pathlib import Path import requests from requests.exceptions import HTTPError @@ -1127,15 +1127,22 @@ def save_token(cls, token): f.write(token) @classmethod - def get_token(cls): + def get_token(cls) -> Optional[str]: """ - Get token or None if not existent. + Get token or None if not existent. A token can be also provided using env variables + + >>> export HUGGING_FACE_HUB_TOKEN= """ - try: - with open(cls.path_token, "r") as f: - return f.read() - except FileNotFoundError: - pass + + path_token: Path = Path(cls.path_token) + token: Optional[str] = os.environ.get("HUGGING_FACE_HUB_TOKEN") + + if token is None: + # fall back to disk + if path_token.exists(): + with path_token.open("r") as f: + token = f.read() + return token @classmethod def delete_token(cls): diff --git a/tests/test_hf_api.py b/tests/test_hf_api.py index c76b808137..c83bcbefa8 100644 --- a/tests/test_hf_api.py +++ b/tests/test_hf_api.py @@ -21,7 +21,7 @@ import unittest import uuid from io import BytesIO - +from unittest import mock import pytest import requests @@ -596,6 +596,10 @@ def test_token_workflow(self): # ^^ not an error, we test that the # second call does not fail. self.assertEqual(HfFolder.get_token(), None) + with mock.patch.dict(os.environ, {"HUGGING_FACE_HUB_TOKEN": token}): + self.assertEqual(HfFolder.get_token(), token) + with mock.patch.dict(os.environ, {"HUGGING_FACE_HUB_TOKEN": None}): + self.assertEqual(HfFolder.get_token(), None) @require_git_lfs From 5c9dad67a950f01023d12f54178ceeb2be644f5b Mon Sep 17 00:00:00 2001 From: fra Date: Sat, 11 Dec 2021 19:16:10 +0100 Subject: [PATCH 02/11] done --- src/huggingface_hub/hf_api.py | 30 ++++++++++++++++++++++++++++++ tests/test_hf_api.py | 14 ++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/src/huggingface_hub/hf_api.py b/src/huggingface_hub/hf_api.py index 72e63b23d7..f7761dbbb7 100644 --- a/src/huggingface_hub/hf_api.py +++ b/src/huggingface_hub/hf_api.py @@ -420,6 +420,8 @@ def unset_access_token(): def list_models( self, filter: Union[str, Iterable[str], None] = None, + author: Optional[str] = None, + search: Optional[str] = None, sort: Union[Literal["lastModified"], str, None] = None, direction: Optional[Literal[-1]] = None, limit: Optional[int] = None, @@ -451,6 +453,30 @@ def list_models( >>> # List only the models from the AllenNLP library >>> api.list_models(filter="allennlp") + author (:obj:`str`, `optional`): + A string which identify the author of the returned models + Example usage: + + >>> from huggingface_hub import HfApi + >>> api = HfApi() + + >>> # List all models from google + >>> api.list_models(author="google") + + >>> # List only the text classification models from google + >>> api.list_models(filter="text-classification", author="google") + search (:obj:`str`, `optional`): + A string that will be contained in the returned models + Example usage: + + >>> from huggingface_hub import HfApi + >>> api = HfApi() + + >>> # List all models with "bert" in their name + >>> api.list_models(search="bert") + + >>> #List all models with "bert" in their name made by google + >>> api.list_models(search="bert", author="google") sort (:obj:`Literal["lastModified"]` or :obj:`str`, `optional`): The key with which to sort the resulting models. Possible values are the properties of the `ModelInfo` class. @@ -471,6 +497,10 @@ def list_models( if filter is not None: params.update({"filter": filter}) params.update({"full": True}) + if author is not None: + params.update({"author": author}) + if search is not None: + params.update({"search": search}) if sort is not None: params.update({"sort": sort}) if direction is not None: diff --git a/tests/test_hf_api.py b/tests/test_hf_api.py index 289dda41bb..3c0a8d75d5 100644 --- a/tests/test_hf_api.py +++ b/tests/test_hf_api.py @@ -472,6 +472,20 @@ def test_list_models(self): self.assertGreater(len(models), 100) self.assertIsInstance(models[0], ModelInfo) + @with_production_testing + def test_list_models_author(self): + _api = HfApi() + models = _api.list_models(author="google") + self.assertGreater(len(models), 10) + self.assertIsInstance(models[0], ModelInfo) + + @with_production_testing + def test_list_models_search(self): + _api = HfApi() + models = _api.list_models(search="bert") + self.assertGreater(len(models), 10) + self.assertIsInstance(models[0], ModelInfo) + @with_production_testing def test_list_models_complex_query(self): # Let's list the 10 most recent models From 3b4c0b135f10ca6dad387d6cdd59ea8cc02e60f4 Mon Sep 17 00:00:00 2001 From: fra Date: Sat, 11 Dec 2021 19:17:34 +0100 Subject: [PATCH 03/11] done --- src/huggingface_hub/hf_api.py | 29 ++++++++++++++++------------- tests/test_hf_api.py | 5 ----- 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/src/huggingface_hub/hf_api.py b/src/huggingface_hub/hf_api.py index 1d572fa26a..de8fcd053f 100644 --- a/src/huggingface_hub/hf_api.py +++ b/src/huggingface_hub/hf_api.py @@ -1169,22 +1169,25 @@ def save_token(cls, token): f.write(token) @classmethod - def get_token(cls) -> Optional[str]: + def get_token(cls): """ - Get token or None if not existent. A token can be also provided using env variables - - >>> export HUGGING_FACE_HUB_TOKEN= + Get token or None if not existent. """ + try: + with open(cls.path_token, "r") as f: + return f.read() + except FileNotFoundError: + pass - path_token: Path = Path(cls.path_token) - token: Optional[str] = os.environ.get("HUGGING_FACE_HUB_TOKEN") - - if token is None: - # fall back to disk - if path_token.exists(): - with path_token.open("r") as f: - token = f.read() - return token + @classmethod + def delete_token(cls): + """ + Delete token. Do not fail if token does not exist. + """ + try: + os.remove(cls.path_token) + except FileNotFoundError: + pass @classmethod def delete_token(cls): diff --git a/tests/test_hf_api.py b/tests/test_hf_api.py index 47ce06e05b..e3c47300de 100644 --- a/tests/test_hf_api.py +++ b/tests/test_hf_api.py @@ -629,11 +629,6 @@ def test_token_workflow(self): HfFolder.delete_token() # ^^ not an error, we test that the # second call does not fail. - self.assertEqual(HfFolder.get_token(), None) - with mock.patch.dict(os.environ, {"HUGGING_FACE_HUB_TOKEN": token}): - self.assertEqual(HfFolder.get_token(), token) - with mock.patch.dict(os.environ, {"HUGGING_FACE_HUB_TOKEN": None}): - self.assertEqual(HfFolder.get_token(), None) @require_git_lfs From a365fe5ffee7e562bbd7ac45bc3aeeaeaa67aae2 Mon Sep 17 00:00:00 2001 From: Francesco Saverio Zuppichini Date: Mon, 13 Dec 2021 14:34:04 +0100 Subject: [PATCH 04/11] Update src/huggingface_hub/hf_api.py Co-authored-by: Julien Chaumond --- src/huggingface_hub/hf_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/huggingface_hub/hf_api.py b/src/huggingface_hub/hf_api.py index de8fcd053f..9f5ba101a5 100644 --- a/src/huggingface_hub/hf_api.py +++ b/src/huggingface_hub/hf_api.py @@ -454,7 +454,7 @@ def list_models( >>> # List only the models from the AllenNLP library >>> api.list_models(filter="allennlp") author (:obj:`str`, `optional`): - A string which identify the author of the returned models + A string which identify the author (user or organization) of the returned models Example usage: >>> from huggingface_hub import HfApi From 7bb080d22b2691e6c052a730079811850bcb7be0 Mon Sep 17 00:00:00 2001 From: fra Date: Sun, 26 Dec 2021 10:58:05 +0100 Subject: [PATCH 05/11] search by and added in --- src/huggingface_hub/hf_api.py | 30 ++++++++++++++++++++++++++++++ tests/test_hf_api.py | 14 ++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/src/huggingface_hub/hf_api.py b/src/huggingface_hub/hf_api.py index de8fcd053f..b64bce907f 100644 --- a/src/huggingface_hub/hf_api.py +++ b/src/huggingface_hub/hf_api.py @@ -522,6 +522,8 @@ def list_models( def list_datasets( self, filter: Union[str, Iterable[str], None] = None, + author: Optional[str] = None, + search: Optional[str] = None, sort: Union[Literal["lastModified"], str, None] = None, direction: Optional[Literal[-1]] = None, limit: Optional[int] = None, @@ -546,6 +548,30 @@ def list_datasets( >>> # List only the datasets in russian for language modeling >>> api.list_datasets(filter=("languages:ru", "task_ids:language-modeling")) + author (:obj:`str`, `optional`): + A string which identify the author of the returned models + Example usage: + + >>> from huggingface_hub import HfApi + >>> api = HfApi() + + >>> # List all datasets from google + >>> api.list_datasets(author="google") + + >>> # List only the text classification datasets from google + >>> api.list_datasets(filter="text-classification", author="google") + search (:obj:`str`, `optional`): + A string that will be contained in the returned models + Example usage: + + >>> from huggingface_hub import HfApi + >>> api = HfApi() + + >>> # List all datasets with "text" in their name + >>> api.list_datasets(search="text") + + >>> #List all datasets with "text" in their name made by google + >>> api.list_datasets(search="text", author="google") sort (:obj:`Literal["lastModified"]` or :obj:`str`, `optional`): The key with which to sort the resulting datasets. Possible values are the properties of the `DatasetInfo` class. @@ -562,6 +588,10 @@ def list_datasets( params = {} if filter is not None: params.update({"filter": filter}) + if author is not None: + params.update({"author": author}) + if search is not None: + params.update({"search": search}) if sort is not None: params.update({"sort": sort}) if direction is not None: diff --git a/tests/test_hf_api.py b/tests/test_hf_api.py index e3c47300de..c1030e84a6 100644 --- a/tests/test_hf_api.py +++ b/tests/test_hf_api.py @@ -563,6 +563,20 @@ def test_list_datasets_full(self): self.assertIsInstance(dataset, DatasetInfo) self.assertTrue(any(dataset.cardData for dataset in datasets)) + @with_production_testing + def test_list_datasets_author(self): + _api = HfApi() + datasets = _api.list_datasets(author="huggingface") + self.assertGreater(len(datasets), 4) + self.assertIsInstance(datasets[0], DatasetInfo) + + @with_production_testing + def test_list_datasets_search(self): + _api = HfApi() + datasets = _api.list_datasets(search="wikipedia") + self.assertGreater(len(datasets), 10) + self.assertIsInstance(datasets[0], DatasetInfo) + @with_production_testing def test_dataset_info(self): _api = HfApi() From 8ab5bb68fde63100108962252d8f29f23ab46b30 Mon Sep 17 00:00:00 2001 From: fra Date: Sun, 26 Dec 2021 11:00:13 +0100 Subject: [PATCH 06/11] fix in test --- tests/test_hf_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_hf_api.py b/tests/test_hf_api.py index c1030e84a6..d8fbf4adba 100644 --- a/tests/test_hf_api.py +++ b/tests/test_hf_api.py @@ -567,7 +567,7 @@ def test_list_datasets_full(self): def test_list_datasets_author(self): _api = HfApi() datasets = _api.list_datasets(author="huggingface") - self.assertGreater(len(datasets), 4) + self.assertGreater(len(datasets), 1) self.assertIsInstance(datasets[0], DatasetInfo) @with_production_testing From 7a0f39b10eceed9b10dc65ee10d0b5ff73234642 Mon Sep 17 00:00:00 2001 From: Francesco Saverio Zuppichini Date: Sun, 26 Dec 2021 11:02:49 +0100 Subject: [PATCH 07/11] Update src/huggingface_hub/hf_api.py Co-authored-by: Zachary Mueller --- src/huggingface_hub/hf_api.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/huggingface_hub/hf_api.py b/src/huggingface_hub/hf_api.py index 234059b611..50aae78e40 100644 --- a/src/huggingface_hub/hf_api.py +++ b/src/huggingface_hub/hf_api.py @@ -21,7 +21,6 @@ from io import BufferedIOBase, RawIOBase from os.path import expanduser from typing import IO, Dict, Iterable, List, Optional, Tuple, Union -from pathlib import Path import requests from requests.exceptions import HTTPError From 772e8fa47a7e2482778c946d64cde8ab2dab729d Mon Sep 17 00:00:00 2001 From: Francesco Saverio Zuppichini Date: Sun, 26 Dec 2021 11:02:54 +0100 Subject: [PATCH 08/11] Update tests/test_hf_api.py Co-authored-by: Zachary Mueller --- tests/test_hf_api.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_hf_api.py b/tests/test_hf_api.py index d8fbf4adba..c1430ee195 100644 --- a/tests/test_hf_api.py +++ b/tests/test_hf_api.py @@ -643,6 +643,7 @@ def test_token_workflow(self): HfFolder.delete_token() # ^^ not an error, we test that the # second call does not fail. + self.assertEqual(HfFolder.get_token(), None) @require_git_lfs From 13042071d038a232bc5626b57b755a1bf1de1a2a Mon Sep 17 00:00:00 2001 From: Francesco Saverio Zuppichini Date: Sun, 26 Dec 2021 11:04:41 +0100 Subject: [PATCH 09/11] Update tests/test_hf_api.py Co-authored-by: Zachary Mueller --- tests/test_hf_api.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_hf_api.py b/tests/test_hf_api.py index c1430ee195..c2acf22d68 100644 --- a/tests/test_hf_api.py +++ b/tests/test_hf_api.py @@ -21,7 +21,6 @@ import unittest import uuid from io import BytesIO -from unittest import mock import pytest import requests From bf8d34927657983d95becd1e70a4cbb5a2f9ced4 Mon Sep 17 00:00:00 2001 From: Francesco Saverio Zuppichini Date: Sun, 26 Dec 2021 11:06:10 +0100 Subject: [PATCH 10/11] Update src/huggingface_hub/hf_api.py Co-authored-by: Zachary Mueller --- src/huggingface_hub/hf_api.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/huggingface_hub/hf_api.py b/src/huggingface_hub/hf_api.py index 50aae78e40..e5d4ce60ad 100644 --- a/src/huggingface_hub/hf_api.py +++ b/src/huggingface_hub/hf_api.py @@ -1218,16 +1218,6 @@ def delete_token(cls): except FileNotFoundError: pass - @classmethod - def delete_token(cls): - """ - Delete token. Do not fail if token does not exist. - """ - try: - os.remove(cls.path_token) - except FileNotFoundError: - pass - api = HfApi() From 5caf424af6b31c5cf00af6b924bbc293c2f4e1c2 Mon Sep 17 00:00:00 2001 From: fra Date: Tue, 4 Jan 2022 15:59:36 +0100 Subject: [PATCH 11/11] quality + test --- src/huggingface_hub/hf_api.py | 1 + tests/test_hf_api.py | 3 +++ 2 files changed, 4 insertions(+) diff --git a/src/huggingface_hub/hf_api.py b/src/huggingface_hub/hf_api.py index e5d4ce60ad..df2624722e 100644 --- a/src/huggingface_hub/hf_api.py +++ b/src/huggingface_hub/hf_api.py @@ -21,6 +21,7 @@ from io import BufferedIOBase, RawIOBase from os.path import expanduser from typing import IO, Dict, Iterable, List, Optional, Tuple, Union + import requests from requests.exceptions import HTTPError diff --git a/tests/test_hf_api.py b/tests/test_hf_api.py index c2acf22d68..192615cad1 100644 --- a/tests/test_hf_api.py +++ b/tests/test_hf_api.py @@ -21,6 +21,7 @@ import unittest import uuid from io import BytesIO + import pytest import requests @@ -477,6 +478,7 @@ def test_list_models_author(self): models = _api.list_models(author="google") self.assertGreater(len(models), 10) self.assertIsInstance(models[0], ModelInfo) + [self.assertTrue("google" in model.author for model in models)] @with_production_testing def test_list_models_search(self): @@ -484,6 +486,7 @@ def test_list_models_search(self): models = _api.list_models(search="bert") self.assertGreater(len(models), 10) self.assertIsInstance(models[0], ModelInfo) + [self.assertTrue("bert" in model.modelId.lower()) for model in models] @with_production_testing def test_list_models_complex_query(self):