Skip to content

Commit

Permalink
Revert "Add H&M fashion recommendation dataset (#2708)"
Browse files Browse the repository at this point in the history
This reverts commit abfdc05.
  • Loading branch information
jppgks committed Nov 4, 2022
1 parent 30caa73 commit aec761b
Show file tree
Hide file tree
Showing 5 changed files with 6 additions and 158 deletions.
17 changes: 0 additions & 17 deletions ludwig/datasets/configs/hm_fashion_recommendations.yaml

This file was deleted.

11 changes: 3 additions & 8 deletions ludwig/datasets/kaggle.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ def download_kaggle_dataset(
kaggle_competition: Optional[str] = None,
kaggle_username: Optional[str] = None,
kaggle_key: Optional[str] = None,
filenames: Optional[list] = None,
):
"""Download all files in a kaggle dataset. One of kaggle_dataset_id,
Expand All @@ -41,12 +40,8 @@ def download_kaggle_dataset(
api = create_kaggle_client()
api.authenticate()
with upload_output_directory(download_directory) as (tmpdir, _):
dataset_or_competition = kaggle_competition or kaggle_dataset_id
if filenames:
download_fn = api.competition_download_file if kaggle_competition else api.dataset_download_file
for filename in filenames:
download_fn(dataset_or_competition, filename, path=tmpdir)
if kaggle_competition:
api.competition_download_files(kaggle_competition, path=tmpdir)
else:
download_fn = api.competition_download_files if kaggle_competition else api.dataset_download_files
download_fn(dataset_or_competition, path=tmpdir)
api.dataset_download_files(kaggle_dataset_id, path=tmpdir)
return [os.path.join(download_directory, f) for f in os.listdir(download_directory)]
3 changes: 1 addition & 2 deletions ludwig/datasets/loaders/dataset_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ def load(self, split=False, kaggle_username=None, kaggle_key=None) -> pd.DataFra
:param split: (bool) splits dataset along 'split' column if present. The split column should always have values
0: train, 1: validation, 2: test.
"""
self._download_and_process(kaggle_username=kaggle_username, kaggle_key=kaggle_key)
self._download_and_process()
if self.state == DatasetState.TRANSFORMED:
dataset_df = self.load_transformed_dataset()
if split:
Expand All @@ -297,7 +297,6 @@ def download(self, kaggle_username=None, kaggle_key=None):
kaggle_competition=self.config.kaggle_competition,
kaggle_username=kaggle_username,
kaggle_key=kaggle_key,
filenames=self.download_filenames,
)
else:
for url, filename in zip(self.download_urls, self.download_filenames):
Expand Down
128 changes: 0 additions & 128 deletions ludwig/datasets/loaders/hm_fashion_recommendations.py

This file was deleted.

5 changes: 2 additions & 3 deletions tests/ludwig/datasets/titanic/test_titanic_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,16 +77,15 @@ def test_download_titanic_dataset(tmpdir):
test_filenames="test.csv",
)

def download_file(competition_name, filename, path):
def download_files(competition_name, path):
assert competition_name == "titanic"
assert filename == "titanic.zip"
copy(archive_filename, path)

ludwig.datasets._get_dataset_configs.cache_clear()
with mock.patch("ludwig.datasets._load_dataset_config", return_value=config):
with mock.patch("ludwig.datasets.kaggle.create_kaggle_client") as mock_kaggle_cls:
mock_kaggle_api = mock.MagicMock()
mock_kaggle_api.competition_download_file = download_file
mock_kaggle_api.competition_download_files = download_files
mock_kaggle_cls.return_value = mock_kaggle_api

dataset = ludwig.datasets.get_dataset("titanic", cache_dir=tmpdir)
Expand Down

0 comments on commit aec761b

Please sign in to comment.