Skip to content

Commit

Permalink
Filter Competitions (#3032)
Browse files Browse the repository at this point in the history
  • Loading branch information
connor-mccorm authored Jan 31, 2023
1 parent 5a9a726 commit c732996
Showing 1 changed file with 12 additions and 1 deletion.
13 changes: 12 additions & 1 deletion ludwig/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,21 +87,32 @@ def list_datasets() -> List[str]:


@PublicAPI
def get_datasets_output_features(dataset: str = None) -> dict:
def get_datasets_output_features(dataset: str = None, include_competitions: bool = True) -> dict:
"""Returns a dictionary with the output features for each dataset. Optionally, you can pass a dataset name
which will then cause the function to return a dictionary with the output features for that dataset.
:param dataset: (str) name of the dataset
:param include_competitions: (bool) whether to include the output features from kaggle competition datasets
:return: (dict) dictionary with the output features for each dataset or a dictionary with the output features for
the specified dataset
"""
ordered_configs = OrderedDict(sorted(_get_dataset_configs().items()))
competition_datasets = []

for name, config in ordered_configs.items():
if not include_competitions and config.kaggle_competition:
competition_datasets.append(name)
continue

ordered_configs[name] = {"name": config.name, "output_features": config.output_features}

if dataset:
return ordered_configs[dataset]

if not include_competitions:
for competition in competition_datasets:
del ordered_configs[competition]

return ordered_configs


Expand Down

0 comments on commit c732996

Please sign in to comment.