diff --git a/src/datasets/load.py b/src/datasets/load.py index 7c94b33046d..f38329abb06 100644 --- a/src/datasets/load.py +++ b/src/datasets/load.py @@ -1871,6 +1871,7 @@ def load_dataset_builder( if ( path in _PACKAGED_DATASETS_MODULES and data_files is None + and not data_files and dataset_module.builder_configs_parameters.builder_configs[0].data_files is None ): error_msg = f"Please specify the data files or data directory to load for the {path} dataset builder." diff --git a/src/datasets/packaged_modules/folder_based_builder/folder_based_builder.py b/src/datasets/packaged_modules/folder_based_builder/folder_based_builder.py index eee1ac9d61a..f62df051c4d 100644 --- a/src/datasets/packaged_modules/folder_based_builder/folder_based_builder.py +++ b/src/datasets/packaged_modules/folder_based_builder/folder_based_builder.py @@ -55,7 +55,7 @@ def _info(self): return datasets.DatasetInfo(features=self.config.features) def _split_generators(self, dl_manager): - if not self.config.data_files: + if self.config.data_files is not None and not self.config.data_files: raise ValueError(f"At least one data file must be specified, but got data_files={self.config.data_files}") dl_manager.download_config.extract_on_the_fly = True # Do an early pass if: