diff --git a/src/datasets/packaged_modules/folder_based_builder/folder_based_builder.py b/src/datasets/packaged_modules/folder_based_builder/folder_based_builder.py index 182de467b14..5f000fcd986 100644 --- a/src/datasets/packaged_modules/folder_based_builder/folder_based_builder.py +++ b/src/datasets/packaged_modules/folder_based_builder/folder_based_builder.py @@ -147,6 +147,12 @@ def analyze(files_or_archives, downloaded_files_or_dirs, split): if self.config.drop_labels is None else not self.config.drop_labels ) + + if add_labels and labels: + common_split_names = {"train", "training", "test", "testing", "val", "valid", "validation", "dev", "eval"} + split_names = set(data_files.keys()) + if labels.issubset(common_split_names | split_names): + add_labels = False if add_labels: logger.info("Adding the labels inferred from data directories to the dataset's features...")