From bd83e95a38e7c8603f97431c7b7247990ca6b6ee Mon Sep 17 00:00:00 2001 From: Luo Mingshuang <739314837@qq.com> Date: Tue, 29 Jun 2021 09:29:46 +0800 Subject: [PATCH 01/12] prepare timit manifests --- lhotse/recipes/__init__.py | 1 + lhotse/recipes/timit.py | 141 +++++++++++++++++++++++++++++++++++++ 2 files changed, 142 insertions(+) create mode 100644 lhotse/recipes/timit.py diff --git a/lhotse/recipes/__init__.py b/lhotse/recipes/__init__.py index c7121f490..bc383da7e 100644 --- a/lhotse/recipes/__init__.py +++ b/lhotse/recipes/__init__.py @@ -1,4 +1,5 @@ from .aishell import prepare_aishell +from .timit import download_and_unzip, prepare_timit from .ami import download_ami, prepare_ami from .babel import prepare_single_babel_language from .broadcast_news import prepare_broadcast_news diff --git a/lhotse/recipes/timit.py b/lhotse/recipes/timit.py new file mode 100644 index 000000000..2b33115b5 --- /dev/null +++ b/lhotse/recipes/timit.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python3 + +# Copyright 2021 Xiaomi Corporation (Author: Mingshuang Luo) +# Apache 2.0 + +from collections import defaultdict + +import os +import glob +import json +import logging +import shutil +import tarfile +import string +from pathlib import Path +from tqdm.auto import tqdm +from typing import Dict, Optional, Union +from concurrent.futures.thread import ThreadPoolExecutor + +from lhotse import validate_recordings_and_supervisions +from lhotse.audio import Recording, RecordingSet +from lhotse.supervision import SupervisionSegment, SupervisionSet +from lhotse.utils import Pathlike, urlretrieve_progress + + +def download_and_unzip( + target_dir: Pathlike = '.', + force_download: Optional[bool] = False, + base_url: Optional[str] = 'https://data.deepai.org/timit.zip') -> None: + """ + Download and unzip the dataset, supporting both TIMIT + :param target_dir: Pathlike, the path of the dir to storage the dataset. + :param force_download: Bool, if True, download the zips no matter if the zips exists. + :param base_url: str, the url of the TIMIT download for free. + """ + target_dir = Path(target_dir) + target_dir.mkdir(parents=True, exist_ok=True) + + tar_name = f'timit.zip' + tar_path = target_dir / tar_name + if force_download or not tar_path.is_file(): + urlretrieve_progress(f'{base_url}', filename=tar_path, desc=f'Downloading {tar_name}') + + +def prepare_timit( + corpus_dir: Pathlike, + splits_dir: Pathlike, + output_dir: Optional[Pathlike] = None, + num_jobs: int = 1 +) -> Dict[str, Dict[str, Union[RecordingSet, SupervisionSet]]]: + """ + """ + corpus_dir = Path(corpus_dir) + assert corpus_dir.is_dir(), f'No such directory: {corpus_dir}' + + splits_dir = Path(splits_dir) + assert corpus_dir.is_dir(), f'No such directory: {splits_dir}' + + if output_dir is not None: + output_dir = Path(output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + + manifests = defaultdict(dict) + dataset_parts = ['TRAIN', 'DEV', 'TEST'] + + punctuation_strings = string.punctuation + + id_texts = [] + + with ThreadPoolExecutor(num_jobs) as ex: + for part in dataset_parts: + wav_files = [] + file_name = '' + + if part == 'TRAIN': + file_name = os.path.join(splits_dir, 'train_samples.txt') + elif part == 'DEV': + file_name = os.path.join(splits_dir, 'dev_samples.txt') + else: + file_name = os.path.join(splits_dir, 'tst_samples.txt') + wav_files = [] + with open(file_name, 'r') as f: + lines = f.readlines() + for line in lines: + items = line.strip().split(' ') + wav = os.path.join(corpus_dir, items[-1]) + wav_files.append(wav) + print(f'{part} dataset manifest generation.') + recordings = [] + supervisions = [] + + for wav_file in tqdm(wav_files): + items = wav_file.split('/') + idx = items[-2] + '-' + items[-1][:-4] + speaker = items[-2] + transcript_file = wav_file[:-3] + 'PHN' ###the phone file + if not Path(wav_file).is_file(): + logging.warning(f'No such file: {wav_file}') + continue + if not Path(transcript_file).is_file(): + logging.warning(f'No transcript: {transcript_file}') + continue + text = [] + with open(transcript_file, 'r') as f: + lines = f.readlines() + for line in lines: + phone = line.rstrip('\n').split(' ')[-1] + text.append(phone) + text = ' '.join(text).replace('h#', 'sil') + + for i in punctuation_strings: + if i != "'": + text = text.replace(i, '') + + recording = Recording.from_file(path=wav_file, recording_id=idx) + recordings.append(recording) + segment = SupervisionSegment( + id=idx, + recording_id=idx, + start=0.0, + duration=recording.duration, + channel=0, + language='English', + speaker=speaker, + text=text.strip()) + + supervisions.append(segment) + + recording_set = RecordingSet.from_recordings(recordings) + supervision_set = SupervisionSet.from_segments(supervisions) + validate_recordings_and_supervisions(recording_set, supervision_set) + + if output_dir is not None: + supervision_set.to_json(output_dir / f'supervisions_{part}.json') + recording_set.to_json(output_dir / f'recordings_{part}.json') + + manifests[part] = { + 'recordings': recording_set, + 'supervisions': supervision_set} + + return manifests From 539304fd123a82a6d104afd415261f14b068d07a Mon Sep 17 00:00:00 2001 From: Mingshuang Luo <37799481+luomingshuang@users.noreply.github.com> Date: Tue, 29 Jun 2021 10:37:47 +0800 Subject: [PATCH 02/12] Update timit.py --- lhotse/recipes/timit.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/lhotse/recipes/timit.py b/lhotse/recipes/timit.py index 2b33115b5..c111ef25a 100644 --- a/lhotse/recipes/timit.py +++ b/lhotse/recipes/timit.py @@ -65,8 +65,6 @@ def prepare_timit( punctuation_strings = string.punctuation - id_texts = [] - with ThreadPoolExecutor(num_jobs) as ex: for part in dataset_parts: wav_files = [] From f65fe9388ae2d2ff6e7b3d3eecc3f891557348de Mon Sep 17 00:00:00 2001 From: Mingshuang Luo <37799481+luomingshuang@users.noreply.github.com> Date: Tue, 29 Jun 2021 13:00:49 +0800 Subject: [PATCH 03/12] Update __init__.py --- lhotse/recipes/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lhotse/recipes/__init__.py b/lhotse/recipes/__init__.py index bc383da7e..a3aec3ee9 100644 --- a/lhotse/recipes/__init__.py +++ b/lhotse/recipes/__init__.py @@ -1,5 +1,5 @@ from .aishell import prepare_aishell -from .timit import download_and_unzip, prepare_timit +from .timit import download_timit, prepare_timit from .ami import download_ami, prepare_ami from .babel import prepare_single_babel_language from .broadcast_news import prepare_broadcast_news From 16a5d78e298416b58754d2449ff647f4bfefe94a Mon Sep 17 00:00:00 2001 From: Mingshuang Luo <37799481+luomingshuang@users.noreply.github.com> Date: Tue, 29 Jun 2021 13:02:12 +0800 Subject: [PATCH 04/12] Update timit.py --- lhotse/recipes/timit.py | 166 +++++++++++++++++++++------------------- 1 file changed, 87 insertions(+), 79 deletions(-) diff --git a/lhotse/recipes/timit.py b/lhotse/recipes/timit.py index c111ef25a..4c7d43d00 100644 --- a/lhotse/recipes/timit.py +++ b/lhotse/recipes/timit.py @@ -6,14 +6,11 @@ from collections import defaultdict import os -import glob -import json +import zipfile import logging -import shutil -import tarfile import string +from tqdm import tqdm from pathlib import Path -from tqdm.auto import tqdm from typing import Dict, Optional, Union from concurrent.futures.thread import ThreadPoolExecutor @@ -22,25 +19,31 @@ from lhotse.supervision import SupervisionSegment, SupervisionSet from lhotse.utils import Pathlike, urlretrieve_progress - -def download_and_unzip( +def download_timit( target_dir: Pathlike = '.', force_download: Optional[bool] = False, base_url: Optional[str] = 'https://data.deepai.org/timit.zip') -> None: """ - Download and unzip the dataset, supporting both TIMIT + Download and unzip the dataset TIMIT. :param target_dir: Pathlike, the path of the dir to storage the dataset. :param force_download: Bool, if True, download the zips no matter if the zips exists. :param base_url: str, the url of the TIMIT download for free. """ target_dir = Path(target_dir) target_dir.mkdir(parents=True, exist_ok=True) - tar_name = f'timit.zip' tar_path = target_dir / tar_name if force_download or not tar_path.is_file(): urlretrieve_progress(f'{base_url}', filename=tar_path, desc=f'Downloading {tar_name}') - + + zip_file = zipfile.ZipFile(tar_path) + if os.path.isdir(tar_name[:-4]): + pass + else: + os.mkdir(tar_name[:-4]) + for names in zip_file.namelist(): + zip_file.extract(names, tar_name[:-4]) + zip_file.close() def prepare_timit( corpus_dir: Pathlike, @@ -49,12 +52,17 @@ def prepare_timit( num_jobs: int = 1 ) -> Dict[str, Dict[str, Union[RecordingSet, SupervisionSet]]]: """ + Returns the manifests which consists of the Recodings and Supervisions. + :param corpus_dir: Pathlike, the path of the data dir. + :param splits_dir: Pathlike, the path of the txt files for data division (train, dev, tst). + :param output_dir: Pathlike, the path where to write and save the manifests. + :return: a Dict whose key is the dataset part, and the value is Dicts with the keys 'audio' and 'supervisions'. """ corpus_dir = Path(corpus_dir) assert corpus_dir.is_dir(), f'No such directory: {corpus_dir}' splits_dir = Path(splits_dir) - assert corpus_dir.is_dir(), f'No such directory: {splits_dir}' + assert splits_dir.is_dir(), f'No such directory: {splits_dir}' if output_dir is not None: output_dir = Path(output_dir) @@ -67,73 +75,73 @@ def prepare_timit( with ThreadPoolExecutor(num_jobs) as ex: for part in dataset_parts: - wav_files = [] - file_name = '' - - if part == 'TRAIN': - file_name = os.path.join(splits_dir, 'train_samples.txt') - elif part == 'DEV': - file_name = os.path.join(splits_dir, 'dev_samples.txt') - else: - file_name = os.path.join(splits_dir, 'tst_samples.txt') - wav_files = [] - with open(file_name, 'r') as f: - lines = f.readlines() - for line in lines: - items = line.strip().split(' ') - wav = os.path.join(corpus_dir, items[-1]) - wav_files.append(wav) - print(f'{part} dataset manifest generation.') - recordings = [] - supervisions = [] - - for wav_file in tqdm(wav_files): - items = wav_file.split('/') - idx = items[-2] + '-' + items[-1][:-4] - speaker = items[-2] - transcript_file = wav_file[:-3] + 'PHN' ###the phone file - if not Path(wav_file).is_file(): - logging.warning(f'No such file: {wav_file}') - continue - if not Path(transcript_file).is_file(): - logging.warning(f'No transcript: {transcript_file}') - continue - text = [] - with open(transcript_file, 'r') as f: - lines = f.readlines() - for line in lines: - phone = line.rstrip('\n').split(' ')[-1] - text.append(phone) - text = ' '.join(text).replace('h#', 'sil') - - for i in punctuation_strings: - if i != "'": - text = text.replace(i, '') - - recording = Recording.from_file(path=wav_file, recording_id=idx) - recordings.append(recording) - segment = SupervisionSegment( - id=idx, - recording_id=idx, - start=0.0, - duration=recording.duration, - channel=0, - language='English', - speaker=speaker, - text=text.strip()) - - supervisions.append(segment) - - recording_set = RecordingSet.from_recordings(recordings) - supervision_set = SupervisionSet.from_segments(supervisions) - validate_recordings_and_supervisions(recording_set, supervision_set) - - if output_dir is not None: - supervision_set.to_json(output_dir / f'supervisions_{part}.json') - recording_set.to_json(output_dir / f'recordings_{part}.json') - - manifests[part] = { - 'recordings': recording_set, - 'supervisions': supervision_set} + wav_files = [] + file_name = '' + + if part == 'TRAIN': + file_name = os.path.join(splits_dir, 'train_samples.txt') + elif part == 'DEV': + file_name = os.path.join(splits_dir, 'dev_samples.txt') + else: + file_name = os.path.join(splits_dir, 'tst_samples.txt') + wav_files = [] + with open(file_name, 'r') as f: + lines = f.readlines() + for line in lines: + items = line.strip().split(' ') + wav = os.path.join(corpus_dir, items[-1]) + wav_files.append(wav) + print(f'{part} dataset manifest generation.') + recordings = [] + supervisions = [] + + for wav_file in tqdm(wav_files): + items = wav_file.split('/') + idx = items[-2] + '-' + items[-1][:-4] + speaker = items[-2] + transcript_file = wav_file[:-3] + 'PHN' ###the phone file + if not Path(wav_file).is_file(): + logging.warning(f'No such file: {wav_file}') + continue + if not Path(transcript_file).is_file(): + logging.warning(f'No transcript: {transcript_file}') + continue + text = [] + with open(transcript_file, 'r') as f: + lines = f.readlines() + for line in lines: + phone = line.rstrip('\n').split(' ')[-1] + text.append(phone) + text = ' '.join(text).replace('h#', 'sil') + + for i in punctuation_strings: + if i != "'": + text = text.replace(i, '') + + recording = Recording.from_file(path=wav_file, recording_id=idx) + recordings.append(recording) + segment = SupervisionSegment( + id=idx, + recording_id=idx, + start=0.0, + duration=recording.duration, + channel=0, + language='English', + speaker=speaker, + text=text.strip()) + + supervisions.append(segment) + + recording_set = RecordingSet.from_recordings(recordings) + supervision_set = SupervisionSet.from_segments(supervisions) + validate_recordings_and_supervisions(recording_set, supervision_set) + + if output_dir is not None: + supervision_set.to_json(output_dir / f'supervisions_{part}.json') + recording_set.to_json(output_dir / f'recordings_{part}.json') + + manifests[part] = { + 'recordings': recording_set, + 'supervisions': supervision_set} return manifests From 51146b2834578a1c0aa130e8aa01fa9142f3e102 Mon Sep 17 00:00:00 2001 From: Mingshuang Luo <37799481+luomingshuang@users.noreply.github.com> Date: Tue, 29 Jun 2021 13:04:22 +0800 Subject: [PATCH 05/12] Update timit.py --- lhotse/recipes/timit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lhotse/recipes/timit.py b/lhotse/recipes/timit.py index 4c7d43d00..431153dd0 100644 --- a/lhotse/recipes/timit.py +++ b/lhotse/recipes/timit.py @@ -84,7 +84,7 @@ def prepare_timit( file_name = os.path.join(splits_dir, 'dev_samples.txt') else: file_name = os.path.join(splits_dir, 'tst_samples.txt') - wav_files = [] + with open(file_name, 'r') as f: lines = f.readlines() for line in lines: From ba57dc70cf4bab1918408cd1cc38ecfc49283748 Mon Sep 17 00:00:00 2001 From: Mingshuang Luo <37799481+luomingshuang@users.noreply.github.com> Date: Tue, 29 Jun 2021 14:24:02 +0800 Subject: [PATCH 06/12] Update timit.py --- lhotse/recipes/timit.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/lhotse/recipes/timit.py b/lhotse/recipes/timit.py index 431153dd0..a8117bb2a 100644 --- a/lhotse/recipes/timit.py +++ b/lhotse/recipes/timit.py @@ -78,15 +78,15 @@ def prepare_timit( wav_files = [] file_name = '' - if part == 'TRAIN': - file_name = os.path.join(splits_dir, 'train_samples.txt') + if part == 'TRAIN': + file_name = splits_dir/'train_samples.txt' elif part == 'DEV': - file_name = os.path.join(splits_dir, 'dev_samples.txt') + file_name = splits_dir/'dev_samples.txt' else: - file_name = os.path.join(splits_dir, 'tst_samples.txt') - + file_name = splits_dir/'tst_samples.txt' + wav_files = [] with open(file_name, 'r') as f: - lines = f.readlines() + lines = f.readlines() for line in lines: items = line.strip().split(' ') wav = os.path.join(corpus_dir, items[-1]) @@ -98,8 +98,8 @@ def prepare_timit( for wav_file in tqdm(wav_files): items = wav_file.split('/') idx = items[-2] + '-' + items[-1][:-4] - speaker = items[-2] - transcript_file = wav_file[:-3] + 'PHN' ###the phone file + speaker = items[-2] + transcript_file = Path(wav_file).with_suffix('.PHN') if not Path(wav_file).is_file(): logging.warning(f'No such file: {wav_file}') continue From 752ce90ccf5264d67f655855ade8503fc81215b3 Mon Sep 17 00:00:00 2001 From: Mingshuang Luo <37799481+luomingshuang@users.noreply.github.com> Date: Tue, 29 Jun 2021 15:05:34 +0800 Subject: [PATCH 07/12] Update timit.py --- lhotse/recipes/timit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lhotse/recipes/timit.py b/lhotse/recipes/timit.py index a8117bb2a..3223d6d04 100644 --- a/lhotse/recipes/timit.py +++ b/lhotse/recipes/timit.py @@ -96,7 +96,7 @@ def prepare_timit( supervisions = [] for wav_file in tqdm(wav_files): - items = wav_file.split('/') + items = wav_file.split('/').strip() idx = items[-2] + '-' + items[-1][:-4] speaker = items[-2] transcript_file = Path(wav_file).with_suffix('.PHN') From 9554ad570ac786974f3431a5336c6eefa6024698 Mon Sep 17 00:00:00 2001 From: Mingshuang Luo <37799481+luomingshuang@users.noreply.github.com> Date: Tue, 29 Jun 2021 15:09:37 +0800 Subject: [PATCH 08/12] Update timit.py --- lhotse/recipes/timit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lhotse/recipes/timit.py b/lhotse/recipes/timit.py index 3223d6d04..11e857506 100644 --- a/lhotse/recipes/timit.py +++ b/lhotse/recipes/timit.py @@ -31,7 +31,7 @@ def download_timit( """ target_dir = Path(target_dir) target_dir.mkdir(parents=True, exist_ok=True) - tar_name = f'timit.zip' + tar_name = 'timit.zip' tar_path = target_dir / tar_name if force_download or not tar_path.is_file(): urlretrieve_progress(f'{base_url}', filename=tar_path, desc=f'Downloading {tar_name}') From 123fb1b0cc5fc73fd8d3a96c78f2e7d73dda9c93 Mon Sep 17 00:00:00 2001 From: Mingshuang Luo <37799481+luomingshuang@users.noreply.github.com> Date: Tue, 29 Jun 2021 15:37:56 +0800 Subject: [PATCH 09/12] Update timit.py --- lhotse/recipes/timit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lhotse/recipes/timit.py b/lhotse/recipes/timit.py index 11e857506..5ce3dbd47 100644 --- a/lhotse/recipes/timit.py +++ b/lhotse/recipes/timit.py @@ -96,7 +96,7 @@ def prepare_timit( supervisions = [] for wav_file in tqdm(wav_files): - items = wav_file.split('/').strip() + items = wav_file.strip().split('/') idx = items[-2] + '-' + items[-1][:-4] speaker = items[-2] transcript_file = Path(wav_file).with_suffix('.PHN') From 69479c837583eb976006adf5ac4d6a9f94025d76 Mon Sep 17 00:00:00 2001 From: Mingshuang Luo <37799481+luomingshuang@users.noreply.github.com> Date: Thu, 15 Jul 2021 13:53:00 +0800 Subject: [PATCH 10/12] Update timit.py --- lhotse/recipes/timit.py | 153 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 143 insertions(+), 10 deletions(-) diff --git a/lhotse/recipes/timit.py b/lhotse/recipes/timit.py index 5ce3dbd47..9272831b8 100644 --- a/lhotse/recipes/timit.py +++ b/lhotse/recipes/timit.py @@ -34,7 +34,7 @@ def download_timit( tar_name = 'timit.zip' tar_path = target_dir / tar_name if force_download or not tar_path.is_file(): - urlretrieve_progress(f'{base_url}', filename=tar_path, desc=f'Downloading {tar_name}') + urlretrieve_progress(f'{base_url}', filename=tar_path, desc=f'Downloading and unzip {tar_name}') zip_file = zipfile.ZipFile(tar_path) if os.path.isdir(tar_name[:-4]): @@ -49,6 +49,7 @@ def prepare_timit( corpus_dir: Pathlike, splits_dir: Pathlike, output_dir: Optional[Pathlike] = None, + num_phones: int = 48, num_jobs: int = 1 ) -> Dict[str, Dict[str, Union[RecordingSet, SupervisionSet]]]: """ @@ -71,19 +72,18 @@ def prepare_timit( manifests = defaultdict(dict) dataset_parts = ['TRAIN', 'DEV', 'TEST'] - punctuation_strings = string.punctuation + phones_dict = get_phonemes(num_phones) with ThreadPoolExecutor(num_jobs) as ex: for part in dataset_parts: - wav_files = [] file_name = '' if part == 'TRAIN': - file_name = splits_dir/'train_samples.txt' + file_name = splits_dir / 'train_samples.txt' elif part == 'DEV': - file_name = splits_dir/'dev_samples.txt' + file_name = splits_dir / 'dev_samples.txt' else: - file_name = splits_dir/'tst_samples.txt' + file_name = splits_dir / 'tst_samples.txt' wav_files = [] with open(file_name, 'r') as f: lines = f.readlines() @@ -111,12 +111,10 @@ def prepare_timit( lines = f.readlines() for line in lines: phone = line.rstrip('\n').split(' ')[-1] + if num_phones != 60: phone = phones_dict[str(phone)] text.append(phone) - text = ' '.join(text).replace('h#', 'sil') - for i in punctuation_strings: - if i != "'": - text = text.replace(i, '') + text = ' '.join(text).replace('h#', 'sil') recording = Recording.from_file(path=wav_file, recording_id=idx) recordings.append(recording) @@ -145,3 +143,138 @@ def prepare_timit( 'supervisions': supervision_set} return manifests + +def get_phonemes(num_phones): + phonemes = {} + + if num_phones == int(48): + # This dictionary is used to conver the 60 phoneme set into the 48 one + phonemes["sil"] = "sil" + phonemes["aa"] = "aa" + phonemes["ae"] = "ae" + phonemes["ah"] = "ah" + phonemes["ao"] = "ao" + phonemes["aw"] = "aw" + phonemes["ax"] = "ax" + phonemes["ax-h"] = "ax" + phonemes["axr"] = "er" + phonemes["ay"] = "ay" + phonemes["b"] = "b" + phonemes["bcl"] = "vcl" + phonemes["ch"] = "ch" + phonemes["d"] = "d" + phonemes["dcl"] = "vcl" + phonemes["dh"] = "dh" + phonemes["dx"] = "dx" + phonemes["eh"] = "eh" + phonemes["el"] = "el" + phonemes["em"] = "m" + phonemes["en"] = "en" + phonemes["eng"] = "ng" + phonemes["epi"] = "epi" + phonemes["er"] = "er" + phonemes["ey"] = "ey" + phonemes["f"] = "f" + phonemes["g"] = "g" + phonemes["gcl"] = "vcl" + phonemes["h#"] = "sil" + phonemes["hh"] = "hh" + phonemes["hv"] = "hh" + phonemes["ih"] = "ih" + phonemes["ix"] = "ix" + phonemes["iy"] = "iy" + phonemes["jh"] = "jh" + phonemes["k"] = "k" + phonemes["kcl"] = "cl" + phonemes["l"] = "l" + phonemes["m"] = "m" + phonemes["n"] = "n" + phonemes["ng"] = "ng" + phonemes["nx"] = "n" + phonemes["ow"] = "ow" + phonemes["oy"] = "oy" + phonemes["p"] = "p" + phonemes["pau"] = "sil" + phonemes["pcl"] = "cl" + phonemes["q"] = "" + phonemes["r"] = "r" + phonemes["s"] = "s" + phonemes["sh"] = "sh" + phonemes["t"] = "t" + phonemes["tcl"] = "cl" + phonemes["th"] = "th" + phonemes["uh"] = "uh" + phonemes["uw"] = "uw" + phonemes["ux"] = "uw" + phonemes["v"] = "v" + phonemes["w"] = "w" + phonemes["y"] = "y" + phonemes["z"] = "z" + phonemes["zh"] = "zh" + + if num_phones == int(39): + # This dictionary is used to conver the 60 phoneme set into the 39 one + phonemes["sil"] = "sil" + phonemes["aa"] = "aa" + phonemes["ae"] = "ae" + phonemes["ah"] = "ah" + phonemes["ao"] = "aa" + phonemes["aw"] = "aw" + phonemes["ax"] = "ah" + phonemes["ax-h"] = "ah" + phonemes["axr"] = "er" + phonemes["ay"] = "ay" + phonemes["b"] = "b" + phonemes["bcl"] = "sil" + phonemes["ch"] = "ch" + phonemes["d"] = "d" + phonemes["dcl"] = "sil" + phonemes["dh"] = "dh" + phonemes["dx"] = "dx" + phonemes["eh"] = "eh" + phonemes["el"] = "l" + phonemes["em"] = "m" + phonemes["en"] = "n" + phonemes["eng"] = "ng" + phonemes["epi"] = "sil" + phonemes["er"] = "er" + phonemes["ey"] = "ey" + phonemes["f"] = "f" + phonemes["g"] = "g" + phonemes["gcl"] = "sil" + phonemes["h#"] = "sil" + phonemes["hh"] = "hh" + phonemes["hv"] = "hh" + phonemes["ih"] = "ih" + phonemes["ix"] = "ih" + phonemes["iy"] = "iy" + phonemes["jh"] = "jh" + phonemes["k"] = "k" + phonemes["kcl"] = "sil" + phonemes["l"] = "l" + phonemes["m"] = "m" + phonemes["ng"] = "ng" + phonemes["n"] = "n" + phonemes["nx"] = "n" + phonemes["ow"] = "ow" + phonemes["oy"] = "oy" + phonemes["p"] = "p" + phonemes["pau"] = "sil" + phonemes["pcl"] = "sil" + phonemes["q"] = "" + phonemes["r"] = "r" + phonemes["s"] = "s" + phonemes["sh"] = "sh" + phonemes["t"] = "t" + phonemes["tcl"] = "sil" + phonemes["th"] = "th" + phonemes["uh"] = "uh" + phonemes["uw"] = "uw" + phonemes["ux"] = "uw" + phonemes["v"] = "v" + phonemes["w"] = "w" + phonemes["y"] = "y" + phonemes["z"] = "z" + phonemes["zh"] = "sh" + + return phonemes From 73ce2bbb3dfd9b85a20bee3e597b36fcc16c36f5 Mon Sep 17 00:00:00 2001 From: Mingshuang Luo <37799481+luomingshuang@users.noreply.github.com> Date: Sun, 18 Jul 2021 15:11:39 +0800 Subject: [PATCH 11/12] Update timit.py --- lhotse/recipes/timit.py | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/lhotse/recipes/timit.py b/lhotse/recipes/timit.py index 9272831b8..f608e35aa 100644 --- a/lhotse/recipes/timit.py +++ b/lhotse/recipes/timit.py @@ -57,6 +57,7 @@ def prepare_timit( :param corpus_dir: Pathlike, the path of the data dir. :param splits_dir: Pathlike, the path of the txt files for data division (train, dev, tst). :param output_dir: Pathlike, the path where to write and save the manifests. + :param num_phones: int=48, the number of phones (60, 48 or 39) for modeling and 48 is regarded as the default value. :return: a Dict whose key is the dataset part, and the value is Dicts with the keys 'audio' and 'supervisions'. """ corpus_dir = Path(corpus_dir) @@ -71,8 +72,17 @@ def prepare_timit( manifests = defaultdict(dict) dataset_parts = ['TRAIN', 'DEV', 'TEST'] + + phones_dict = {} - phones_dict = get_phonemes(num_phones) + try: + if num_phones in [60, 48, 39]: + phones_dict = get_phonemes(num_phones) + else: + raise ValueError("The value of num_phones must be in [60, 48, 39].") + except ValueError as e: + print("Exception: ", repr(e)) + raise with ThreadPoolExecutor(num_jobs) as ex: for part in dataset_parts: @@ -146,9 +156,10 @@ def prepare_timit( def get_phonemes(num_phones): phonemes = {} - + if num_phones == int(48): - # This dictionary is used to conver the 60 phoneme set into the 48 one + print("Using 48 phones for modeling!") + # This dictionary is used to conver the 60 phoneme set into the 48 one. phonemes["sil"] = "sil" phonemes["aa"] = "aa" phonemes["ae"] = "ae" @@ -212,8 +223,9 @@ def get_phonemes(num_phones): phonemes["z"] = "z" phonemes["zh"] = "zh" - if num_phones == int(39): - # This dictionary is used to conver the 60 phoneme set into the 39 one + elif num_phones == int(39): + print("Using 39 phones for modeling!") + # This dictionary is used to conver the 60 phoneme set into the 39 one. phonemes["sil"] = "sil" phonemes["aa"] = "aa" phonemes["ae"] = "ae" @@ -276,5 +288,8 @@ def get_phonemes(num_phones): phonemes["y"] = "y" phonemes["z"] = "z" phonemes["zh"] = "sh" + + else: + print("Using 60 phones for modeling!") return phonemes From 7789a2ca2192fe9b659fec729d756c6acfee76c6 Mon Sep 17 00:00:00 2001 From: Mingshuang Luo <37799481+luomingshuang@users.noreply.github.com> Date: Mon, 19 Jul 2021 09:45:19 +0800 Subject: [PATCH 12/12] Update timit.py --- lhotse/recipes/timit.py | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/lhotse/recipes/timit.py b/lhotse/recipes/timit.py index f608e35aa..ec9505cb4 100644 --- a/lhotse/recipes/timit.py +++ b/lhotse/recipes/timit.py @@ -8,7 +8,6 @@ import os import zipfile import logging -import string from tqdm import tqdm from pathlib import Path from typing import Dict, Optional, Union @@ -21,13 +20,13 @@ def download_timit( target_dir: Pathlike = '.', - force_download: Optional[bool] = False, + force_download: bool = False, base_url: Optional[str] = 'https://data.deepai.org/timit.zip') -> None: """ Download and unzip the dataset TIMIT. - :param target_dir: Pathlike, the path of the dir to storage the dataset. - :param force_download: Bool, if True, download the zips no matter if the zips exists. - :param base_url: str, the url of the TIMIT download for free. + :param target_dir: Pathlike, the path of the dir to store the dataset. + :param force_download: bool, if True, download the zips no matter if the zips exists. + :param base_url: str, the URL of the TIMIT dataset to download. """ target_dir = Path(target_dir) target_dir.mkdir(parents=True, exist_ok=True) @@ -75,14 +74,10 @@ def prepare_timit( phones_dict = {} - try: - if num_phones in [60, 48, 39]: - phones_dict = get_phonemes(num_phones) - else: - raise ValueError("The value of num_phones must be in [60, 48, 39].") - except ValueError as e: - print("Exception: ", repr(e)) - raise + if num_phones in [60, 48, 39]: + phones_dict = get_phonemes(num_phones) + else: + raise ValueError("The value of num_phones must be in [60, 48, 39].") with ThreadPoolExecutor(num_jobs) as ex: for part in dataset_parts: @@ -99,14 +94,14 @@ def prepare_timit( lines = f.readlines() for line in lines: items = line.strip().split(' ') - wav = os.path.join(corpus_dir, items[-1]) + wav = corpus_dir / items[-1] wav_files.append(wav) - print(f'{part} dataset manifest generation.') + logging.debug(f'{part} dataset manifest generation.') recordings = [] supervisions = [] for wav_file in tqdm(wav_files): - items = wav_file.strip().split('/') + items = str(wav_file).strip().split('/') idx = items[-2] + '-' + items[-1][:-4] speaker = items[-2] transcript_file = Path(wav_file).with_suffix('.PHN') @@ -155,11 +150,15 @@ def prepare_timit( return manifests def get_phonemes(num_phones): + """ + Choose and convert the phones for modeling. + :param num_phones: the number of phones for modeling. + """ phonemes = {} if num_phones == int(48): - print("Using 48 phones for modeling!") - # This dictionary is used to conver the 60 phoneme set into the 48 one. + logging.debug("Using 48 phones for modeling!") + # This dictionary is used to convert the 60 phoneme set into the 48 one. phonemes["sil"] = "sil" phonemes["aa"] = "aa" phonemes["ae"] = "ae" @@ -224,8 +223,8 @@ def get_phonemes(num_phones): phonemes["zh"] = "zh" elif num_phones == int(39): - print("Using 39 phones for modeling!") - # This dictionary is used to conver the 60 phoneme set into the 39 one. + logging.debug("Using 39 phones for modeling!") + # This dictionary is used to convert the 60 phoneme set into the 39 one. phonemes["sil"] = "sil" phonemes["aa"] = "aa" phonemes["ae"] = "ae" @@ -290,6 +289,6 @@ def get_phonemes(num_phones): phonemes["zh"] = "sh" else: - print("Using 60 phones for modeling!") + logging.debug("Using 60 phones for modeling!") return phonemes