-
Notifications
You must be signed in to change notification settings - Fork 17
/
generate_training_list.py
43 lines (31 loc) · 1.5 KB
/
generate_training_list.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# ==============================================================================
# Copyright (c) 2018, Yamagishi Laboratory, National Institute of Informatics
# Author: Yusuke Yasuda (yasuda@nii.ac.jp)
# All rights reserved.
# ==============================================================================
"""Generate record ID lists for training, validation, test.
Usage: generate_training_list.py [options]
Options:
--data-root=<dir> Directory contains preprocessed features.
--dataset=<name> Dataset name.
"""
import importlib
from docopt import docopt
from datasets.corpus import Corpus
if __name__ == "__main__":
args = docopt(__doc__)
data_root = args["--data-root"]
dataset_name = args["--dataset"]
assert dataset_name in ["ljspeech"]
corpus = importlib.import_module("datasets." + dataset_name)
corpus_instance: Corpus = corpus.instantiate(in_dir="", out_dir=data_root)
training, validation, test = corpus_instance.random_sample()
with open(corpus_instance.training_list_filepath, mode="w") as f:
f.write("\n".join(training))
print("Generated " + corpus_instance.training_list_filepath)
with open(corpus_instance.validation_list_filepath, mode="w") as f:
f.write("\n".join(validation))
print("Generated " + corpus_instance.validation_list_filepath)
with open(corpus_instance.test_list_filepath, mode="w") as f:
f.write("\n".join(test))
print("Generated " + corpus_instance.test_list_filepath)