huggingface · patrickvonplaten · Mar 5, 2021 · Mar 5, 2021
diff --git a/datasets/timit_asr/dataset_infos.json b/datasets/timit_asr/dataset_infos.json
@@ -1 +1 @@
-{"clean": {"description": "The TIMIT corpus of reading speech has been developed to provide speech data for acoustic-phonetic research studies\nand for the evaluation of automatic speech recognition systems.\n\nTIMIT contains high quality recordings of 630 individuals/speakers with 8 different American English dialects,\nwith each individual reading upto 10 phonetically rich sentences.\n\nMore info on TIMIT dataset can be understood from the \"README\" which can be found here:\nhttps://catalog.ldc.upenn.edu/docs/LDC93S1/readme.txt\n", "citation": "@inproceedings{\n  title={TIMIT Acoustic-Phonetic Continuous Speech Corpus},\n  author={Garofolo, John S., et al},\n  ldc_catalog_no={LDC93S1},\n  DOI={https://doi.org/10.35111/17gk-bn40},\n  journal={Linguistic Data Consortium, Philadelphia},\n  year={1983}\n}\n", "homepage": "https://catalog.ldc.upenn.edu/LDC93S1", "license": "", "features": {"file": {"dtype": "string", "id": null, "_type": "Value"}, "text": {"dtype": "string", "id": null, "_type": "Value"}, "phonetic_detail": {"feature": {"start": {"dtype": "int64", "id": null, "_type": "Value"}, "stop": {"dtype": "int64", "id": null, "_type": "Value"}, "utterance": {"dtype": "string", "id": null, "_type": "Value"}}, "length": -1, "id": null, "_type": "Sequence"}, "word_detail": {"feature": {"start": {"dtype": "int64", "id": null, "_type": "Value"}, "stop": {"dtype": "int64", "id": null, "_type": "Value"}, "utterance": {"dtype": "string", "id": null, "_type": "Value"}}, "length": -1, "id": null, "_type": "Sequence"}, "dialect_region": {"dtype": "string", "id": null, "_type": "Value"}, "sentence_type": {"dtype": "string", "id": null, "_type": "Value"}, "speaker_id": {"dtype": "string", "id": null, "_type": "Value"}, "id": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": {"input": "file", "output": "text"}, "builder_name": "timit_asr", "config_name": "clean", "version": {"version_str": "2.0.1", "description": "", "major": 2, "minor": 0, "patch": 1}, "splits": {"train": {"name": "train", "num_bytes": 5220656, "num_examples": 4620, "dataset_name": "timit_asr"}, "test": {"name": "test", "num_bytes": 2380616, "num_examples": 1680, "dataset_name": "timit_asr"}}, "download_checksums": {"https://data.deepai.org/timit.zip": {"num_bytes": 869007403, "checksum": "b79af42068b53045510d86854e2239a13ff77c4bd27803b40c28dce4bb5aeb0d"}}, "download_size": 869007403, "post_processing_size": null, "dataset_size": 7601272, "size_in_bytes": 876608675}}
+{"clean": {"description": "The TIMIT corpus of reading speech has been developed to provide speech data for acoustic-phonetic research studies\nand for the evaluation of automatic speech recognition systems.\n\nTIMIT contains high quality recordings of 630 individuals/speakers with 8 different American English dialects,\nwith each individual reading upto 10 phonetically rich sentences.\n\nMore info on TIMIT dataset can be understood from the \"README\" which can be found here:\nhttps://catalog.ldc.upenn.edu/docs/LDC93S1/readme.txt\n", "citation": "@inproceedings{\n  title={TIMIT Acoustic-Phonetic Continuous Speech Corpus},\n  author={Garofolo, John S., et al},\n  ldc_catalog_no={LDC93S1},\n  DOI={https://doi.org/10.35111/17gk-bn40},\n  journal={Linguistic Data Consortium, Philadelphia},\n  year={1983}\n}\n", "homepage": "https://catalog.ldc.upenn.edu/LDC93S1", "license": "", "features": {"file": {"dtype": "string", "id": null, "_type": "Value"}, "text": {"dtype": "string", "id": null, "_type": "Value"}, "phonetic_detail": {"feature": {"start": {"dtype": "int64", "id": null, "_type": "Value"}, "stop": {"dtype": "int64", "id": null, "_type": "Value"}, "utterance": {"dtype": "string", "id": null, "_type": "Value"}}, "length": -1, "id": null, "_type": "Sequence"}, "word_detail": {"feature": {"start": {"dtype": "int64", "id": null, "_type": "Value"}, "stop": {"dtype": "int64", "id": null, "_type": "Value"}, "utterance": {"dtype": "string", "id": null, "_type": "Value"}}, "length": -1, "id": null, "_type": "Sequence"}, "dialect_region": {"dtype": "string", "id": null, "_type": "Value"}, "sentence_type": {"dtype": "string", "id": null, "_type": "Value"}, "speaker_id": {"dtype": "string", "id": null, "_type": "Value"}, "id": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": {"input": "file", "output": "text"}, "builder_name": "timit_asr", "config_name": "clean", "version": {"version_str": "2.0.1", "description": "", "major": 2, "minor": 0, "patch": 1}, "splits": {"train": {"name": "train", "num_bytes": 6076580, "num_examples": 4620, "dataset_name": "timit_asr"}, "test": {"name": "test", "num_bytes": 2202968, "num_examples": 1680, "dataset_name": "timit_asr"}}, "download_checksums": {"https://data.deepai.org/timit.zip": {"num_bytes": 869007403, "checksum": "b79af42068b53045510d86854e2239a13ff77c4bd27803b40c28dce4bb5aeb0d"}}, "download_size": 869007403, "post_processing_size": null, "dataset_size": 8279548, "size_in_bytes": 877286951}}
diff --git a/datasets/timit_asr/timit_asr.py b/datasets/timit_asr/timit_asr.py
@@ -129,7 +129,7 @@ def _generate_examples(self, data_info_csv):
 
         # Iterating the contents of the data to extract the relevant information
         for audio_idx in range(data_info.shape[0]):
-            audio_data = data_info.iloc[0]
+            audio_data = data_info.iloc[audio_idx]
 
             # extract the path to audio
             wav_path = os.path.join(data_path, *(audio_data["path_from_data_dir"].split("/")))
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		{"clean": {"description": "The TIMIT corpus of reading speech has been developed to provide speech data for acoustic-phonetic research studies\nand for the evaluation of automatic speech recognition systems.\n\nTIMIT contains high quality recordings of 630 individuals/speakers with 8 different American English dialects,\nwith each individual reading upto 10 phonetically rich sentences.\n\nMore info on TIMIT dataset can be understood from the \"README\" which can be found here:\nhttps://catalog.ldc.upenn.edu/docs/LDC93S1/readme.txt\n", "citation": "@inproceedings{\n title={TIMIT Acoustic-Phonetic Continuous Speech Corpus},\n author={Garofolo, John S., et al},\n ldc_catalog_no={LDC93S1},\n DOI={https://doi.org/10.35111/17gk-bn40},\n journal={Linguistic Data Consortium, Philadelphia},\n year={1983}\n}\n", "homepage": "https://catalog.ldc.upenn.edu/LDC93S1", "license": "", "features": {"file": {"dtype": "string", "id": null, "_type": "Value"}, "text": {"dtype": "string", "id": null, "_type": "Value"}, "phonetic_detail": {"feature": {"start": {"dtype": "int64", "id": null, "_type": "Value"}, "stop": {"dtype": "int64", "id": null, "_type": "Value"}, "utterance": {"dtype": "string", "id": null, "_type": "Value"}}, "length": -1, "id": null, "_type": "Sequence"}, "word_detail": {"feature": {"start": {"dtype": "int64", "id": null, "_type": "Value"}, "stop": {"dtype": "int64", "id": null, "_type": "Value"}, "utterance": {"dtype": "string", "id": null, "_type": "Value"}}, "length": -1, "id": null, "_type": "Sequence"}, "dialect_region": {"dtype": "string", "id": null, "_type": "Value"}, "sentence_type": {"dtype": "string", "id": null, "_type": "Value"}, "speaker_id": {"dtype": "string", "id": null, "_type": "Value"}, "id": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": {"input": "file", "output": "text"}, "builder_name": "timit_asr", "config_name": "clean", "version": {"version_str": "2.0.1", "description": "", "major": 2, "minor": 0, "patch": 1}, "splits": {"train": {"name": "train", "num_bytes": 5220656, "num_examples": 4620, "dataset_name": "timit_asr"}, "test": {"name": "test", "num_bytes": 2380616, "num_examples": 1680, "dataset_name": "timit_asr"}}, "download_checksums": {"https://data.deepai.org/timit.zip": {"num_bytes": 869007403, "checksum": "b79af42068b53045510d86854e2239a13ff77c4bd27803b40c28dce4bb5aeb0d"}}, "download_size": 869007403, "post_processing_size": null, "dataset_size": 7601272, "size_in_bytes": 876608675}}
		{"clean": {"description": "The TIMIT corpus of reading speech has been developed to provide speech data for acoustic-phonetic research studies\nand for the evaluation of automatic speech recognition systems.\n\nTIMIT contains high quality recordings of 630 individuals/speakers with 8 different American English dialects,\nwith each individual reading upto 10 phonetically rich sentences.\n\nMore info on TIMIT dataset can be understood from the \"README\" which can be found here:\nhttps://catalog.ldc.upenn.edu/docs/LDC93S1/readme.txt\n", "citation": "@inproceedings{\n title={TIMIT Acoustic-Phonetic Continuous Speech Corpus},\n author={Garofolo, John S., et al},\n ldc_catalog_no={LDC93S1},\n DOI={https://doi.org/10.35111/17gk-bn40},\n journal={Linguistic Data Consortium, Philadelphia},\n year={1983}\n}\n", "homepage": "https://catalog.ldc.upenn.edu/LDC93S1", "license": "", "features": {"file": {"dtype": "string", "id": null, "_type": "Value"}, "text": {"dtype": "string", "id": null, "_type": "Value"}, "phonetic_detail": {"feature": {"start": {"dtype": "int64", "id": null, "_type": "Value"}, "stop": {"dtype": "int64", "id": null, "_type": "Value"}, "utterance": {"dtype": "string", "id": null, "_type": "Value"}}, "length": -1, "id": null, "_type": "Sequence"}, "word_detail": {"feature": {"start": {"dtype": "int64", "id": null, "_type": "Value"}, "stop": {"dtype": "int64", "id": null, "_type": "Value"}, "utterance": {"dtype": "string", "id": null, "_type": "Value"}}, "length": -1, "id": null, "_type": "Sequence"}, "dialect_region": {"dtype": "string", "id": null, "_type": "Value"}, "sentence_type": {"dtype": "string", "id": null, "_type": "Value"}, "speaker_id": {"dtype": "string", "id": null, "_type": "Value"}, "id": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": {"input": "file", "output": "text"}, "builder_name": "timit_asr", "config_name": "clean", "version": {"version_str": "2.0.1", "description": "", "major": 2, "minor": 0, "patch": 1}, "splits": {"train": {"name": "train", "num_bytes": 6076580, "num_examples": 4620, "dataset_name": "timit_asr"}, "test": {"name": "test", "num_bytes": 2202968, "num_examples": 1680, "dataset_name": "timit_asr"}}, "download_checksums": {"https://data.deepai.org/timit.zip": {"num_bytes": 869007403, "checksum": "b79af42068b53045510d86854e2239a13ff77c4bd27803b40c28dce4bb5aeb0d"}}, "download_size": 869007403, "post_processing_size": null, "dataset_size": 8279548, "size_in_bytes": 877286951}}