Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add list_spaces to HfApi #889

Merged
merged 13 commits into from
Jun 29, 2022
89 changes: 89 additions & 0 deletions src/huggingface_hub/hf_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -1076,6 +1076,94 @@ def list_metrics(self) -> List[MetricInfo]:
d = r.json()
return [MetricInfo(**x) for x in d]

def list_spaces(
self,
*,
filter: Union[str, Iterable[str], None] = None,
author: Optional[str] = None,
search: Optional[str] = None,
sort: Union[Literal["lastModified"], str, None] = None,
direction: Optional[Literal[-1]] = None,
limit: Optional[int] = None,
datasets: Union[str, Iterable[str], None] = None,
models: Union[str, Iterable[str], None] = None,
linked: Optional[bool] = None,
full: Optional[bool] = None,
use_auth_token: Optional[str] = None,
) -> List[SpaceInfo]:
"""
Get the public list of all Spaces on huggingface.co

Args:
filter `str` or `Iterable`, *optional*):
A string tag or list of tags that can be used to identify Spaces on the Hub.
author (`str`, *optional*):
A string which identify the author of the returned Spaces.
search (`str`, *optional*):
A string that will be contained in the returned Spaces.
sort (`Literal["lastModified"]` or `str`, *optional*):
The key with which to sort the resulting Spaces. Possible
values are the properties of the `SpaceInfo` class.
direction (`Literal[-1]` or `int`, *optional*):
Direction in which to sort. The value `-1` sorts by descending
order while all other values sort by ascending order.
limit (`int`, *optional*):
The limit on the number of Spaces fetched. Leaving this option
to `None` fetches all Spaces.
datasets (`str` or `Iterable`, *optional*):
Whether to return Spaces that make use of a dataset.
The name of a specific dataset can be passed as a string.
models (`str` or `Iterable`, *optional*):
Whether to return Spaces that make use of a model.
The name of a specific model can be passed as a string.
linked (`bool`, *optional*):
Whether to return Spaces that make use of either a model or a dataset.
full (`bool`, *optional*):
Whether to fetch all Spaces data, including the `lastModified`
and the `cardData`.
use_auth_token (`bool` or `str`, *optional*):
Whether to use the `auth_token` provided from the
`huggingface_hub` cli. If not logged in, a valid `auth_token`
can be passed in as a string.

Returns:
`List[SpaceInfo]`: a list of [`SpaceInfo`] objects
"""
path = f"{self.endpoint}/api/spaces"
if use_auth_token:
token, name = self._validate_or_retrieve_token(use_auth_token)
headers = {"authorization": f"Bearer {token}"} if use_auth_token else None
params = {}
if filter is not None:
params.update({"filter": filter})
if author is not None:
params.update({"author": author})
if search is not None:
params.update({"search": search})
if sort is not None:
params.update({"sort": sort})
if direction is not None:
params.update({"direction": direction})
if limit is not None:
params.update({"limit": limit})
if full is not None:
if full:
params.update({"full": True})
if linked is not None:
if linked:
params.update({"linked": True})
if datasets is not None:
if datasets:
params.update({"datasets": datasets})
cakiki marked this conversation as resolved.
Show resolved Hide resolved
if models is not None:
if models:
params.update({"models": models})
r = requests.get(path, params=params, headers=headers)
r.raise_for_status()
d = r.json()
return [SpaceInfo(**x) for x in d]

@_deprecate_positional_args
def model_info(
self,
repo_id: str,
Expand Down Expand Up @@ -2307,6 +2395,7 @@ def delete_token(cls):
list_datasets = api.list_datasets
dataset_info = api.dataset_info

list_spaces = api.list_spaces
space_info = api.space_info

repo_info = api.repo_info
Expand Down
89 changes: 89 additions & 0 deletions tests/test_hf_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
MetricInfo,
ModelInfo,
ModelSearchArguments,
SpaceInfo,
erase_from_credential_store,
read_from_credential_store,
repo_type_and_id_from_hf_id,
Expand Down Expand Up @@ -1239,6 +1240,88 @@ def test_filter_emissions_with_min_and_max(self):
)
)

@with_production_testing
def test_list_spaces_full(self):
_api = HfApi()
spaces = _api.list_spaces(full=True)
self.assertGreater(len(spaces), 100)
space = spaces[0]
self.assertIsInstance(space, SpaceInfo)
self.assertTrue(any(space.cardData for space in spaces))

@with_production_testing
def test_list_spaces_author(self):
_api = HfApi()
spaces = _api.list_spaces(author="evaluate-metric")
self.assertGreater(len(spaces), 10)
self.assertTrue(
set([space.id for space in spaces]).issuperset(
set(["evaluate-metric/trec_eval", "evaluate-metric/perplexity"])
)
)

@with_production_testing
def test_list_spaces_search(self):
_api = HfApi()
spaces = _api.list_spaces(search="wikipedia")
space = spaces[0]
self.assertTrue("wikipedia" in space.id.lower())

@with_production_testing
def test_list_spaces_sort_and_direction(self):
_api = HfApi()
spaces_descending_likes = _api.list_spaces(sort="likes", direction=-1)
spaces_ascending_likes = _api.list_spaces(sort="likes")
self.assertGreater(
spaces_descending_likes[0].likes, spaces_descending_likes[1].likes
)
self.assertLess(
spaces_ascending_likes[-2].likes, spaces_ascending_likes[-1].likes
)

@with_production_testing
def test_list_spaces_limit(self):
_api = HfApi()
spaces = _api.list_spaces(limit=5)
self.assertEqual(len(spaces), 5)

@with_production_testing
def test_list_spaces_with_models(self):
_api = HfApi()
spaces = _api.list_spaces(models="bert-base-uncased")
self.assertTrue("bert-base-uncased" in getattr(spaces[0], "models", []))

@with_production_testing
def test_list_spaces_with_datasets(self):
_api = HfApi()
spaces = _api.list_spaces(datasets="wikipedia")
self.assertTrue("wikipedia" in getattr(spaces[0], "datasets", []))

@with_production_testing
def test_list_spaces_linked(self):
_api = HfApi()
spaces = _api.list_spaces(linked=True)
self.assertTrue(
any((getattr(space, "models", None) is not None) for space in spaces)
)
self.assertTrue(
any((getattr(space, "datasets", None) is not None) for space in spaces)
)
self.assertTrue(
any(
(getattr(space, "models", None) is not None)
and getattr(space, "datasets", None) is not None
)
for space in spaces
)
self.assertTrue(
all(
(getattr(space, "models", None) is not None)
or getattr(space, "datasets", None) is not None
)
for space in spaces
)


class HfApiPrivateTest(HfApiCommonTestWithLogin):
@retry_endpoint
Expand Down Expand Up @@ -1280,6 +1363,12 @@ def test_list_private_models(self):
new = len(self._api.list_models(use_auth_token=self._token))
self.assertGreater(new, orig)

@with_production_testing
def test_list_private_spaces(self):
orig = len(self._api.list_spaces())
new = len(self._api.list_spaces(use_auth_token=self._token))
self.assertGreaterEqual(new, orig)


class HfFolderTest(unittest.TestCase):
def test_token_workflow(self):
Expand Down