-
Notifications
You must be signed in to change notification settings - Fork 53
/
Copy pathsynthesize_e2e.sh
136 lines (120 loc) · 4.97 KB
/
synthesize_e2e.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
set -e
am_type=fastspeech2_aishell3
am_model_name=fastspeech2_bili3_aishell3
am_checkpoints=snapshot_iter_179790
voc_type=hifigan_csmsc
voc_model_name=hifigan_azi_nanami
voc_checkpoints=snapshot_iter_115000
# am_type=speedyspeech_aishell3
# am_model_name=speedyspeech_azi_nanami_1_9
# am_checkpoints=snapshot_iter_63726
# voc_type=hifigan_csmsc
# voc_model_name=hifigan_azi_nanami_ft
# voc_checkpoints=snapshot_iter_310000
fastspeech2=True
multiple=True
use_style=True
use_gst=False
use_vae=False
spk_id=175
ngpu=0
str="ft"
if [[ $voc_model_name =~ $str ]]
then
echo "voc_config is finetuned!"
voc_config=finetune
else
echo "voc_config is default!"
voc_config=default
fi
if [ ${fastspeech2} == True ] && [ ${multiple} == True ]; then
echo "model: fastspeech2, multiple"
python train/exps/synthesize_e2e.py \
--am=${am_type} \
--am_config=exp/${am_model_name}/default_multi.yaml \
--am_ckpt=exp/${am_model_name}/checkpoints/${am_checkpoints}.pdz \
--am_stat=exp/${am_model_name}/speech_stats.npy \
--pitch_stat=exp/${am_model_name}/pitch_stats.npy \
--energy_stat=exp/${am_model_name}/energy_stats.npy \
--voc=${voc_type} \
--voc_config=pretrained_models/${voc_model_name}/${voc_config}.yaml \
--voc_ckpt=pretrained_models/${voc_model_name}/checkpoints/${voc_checkpoints}.pdz \
--voc_stat=pretrained_models/${voc_model_name}/feats_stats.npy \
--lang=zh \
--text=sentences.txt \
--output_dir=train/test_e2e \
--inference_dir=train/inference \
--phones_dict=exp/${am_model_name}/phone_id_map.txt \
--speaker_dict=exp/${am_model_name}/speaker_id_map.txt \
--ngpu=${ngpu} \
--spk_id=${spk_id} \
--use_gst=${use_gst} \
--use_vae=${use_vae} \
--use_style=${use_style} \
--pitch_stat=exp/${am_model_name}/pitch_stats.npy \
--energy_stat=exp/${am_model_name}/energy_stats.npy
fi
if [ ${fastspeech2} == True ] && [ ${multiple} == False ]; then
python train/exps/synthesize_e2e.py \
--am=${am_type} \
--am_config=exp/${am_model_name}/default_multi.yaml \
--am_ckpt=exp/${am_model_name}/checkpoints/${am_checkpoints}.pdz \
--am_stat=exp/${am_model_name}/speech_stats.npy \
--pitch_stat=exp/${am_model_name}/pitch_stats.npy \
--energy_stat=exp/${am_model_name}/energy_stats.npy \
--voc=${voc_type} \
--voc_config=pretrained_models/${voc_model_name}/${voc_config}.yaml \
--voc_ckpt=pretrained_models/${voc_model_name}/checkpoints/${voc_checkpoints}.pdz \
--voc_stat=pretrained_models/${voc_model_name}/feats_stats.npy \
--lang=zh \
--text=sentences.txt \
--output_dir=train/test_e2e \
--inference_dir=train/inference \
--phones_dict=exp/${am_model_name}/phone_id_map.txt \
--ngpu=${ngpu} \
--use_gst=${use_gst} \
--use_vae=${use_vae} \
--use_style=${use_style} \
--pitch_stat=exp/${am_model_name}/pitch_stats.npy \
--energy_stat=exp/${am_model_name}/energy_stats.npy
fi
if [ ${fastspeech2} == False ] && [ ${multiple} == True ]; then
echo "model: speedyspeech, multiple"
python3 train/exps/synthesize_e2e.py \
--am=${am_type} \
--am_config=exp/${am_model_name}/default_multi.yaml \
--am_ckpt=exp/${am_model_name}/checkpoints/${am_checkpoints}.pdz \
--am_stat=exp/${am_model_name}/feats_stats.npy \
--voc=${voc_type} \
--voc_config=pretrained_models/${voc_model_name}/${voc_config}.yaml \
--voc_ckpt=pretrained_models/${voc_model_name}/checkpoints/${voc_checkpoints}.pdz \
--voc_stat=pretrained_models/${voc_model_name}/feats_stats.npy \
--lang=zh \
--text=sentences.txt \
--output_dir=train/test_e2e \
--inference_dir=train/inference \
--phones_dict=exp/${am_model_name}/phone_id_map.txt \
--tones_dict=exp/${am_model_name}/tone_id_map.txt \
--speaker_dict=exp/${am_model_name}/speaker_id_map.txt \
--ngpu=${ngpu} \
--spk_id=${spk_id}
fi
if [ ${fastspeech2} == False ] && [ ${multiple} == False ]; then
echo "model: speedyspeech, single"
python3 train/exps/synthesize_e2e.py \
--am=${am_type} \
--am_config=exp/${am_model_name}/default_multi.yaml \
--am_ckpt=exp/${am_model_name}/checkpoints/${am_checkpoints}.pdz \
--am_stat=exp/${am_model_name}/feats_stats.npy \
--voc=${voc_type} \
--voc_config=pretrained_models/${voc_model_name}/${voc_config}.yaml \
--voc_ckpt=pretrained_models/${voc_model_name}/checkpoints/${voc_checkpoints}.pdz \
--voc_stat=pretrained_models/${voc_model_name}/feats_stats.npy \
--lang=zh \
--text=sentences.txt \
--output_dir=train/test_e2e \
--inference_dir=train/inference \
--phones_dict=exp/${am_model_name}/phone_id_map.txt \
--tones_dict=exp/${am_model_name}/tone_id_map.txt \
--ngpu=${ngpu}
fi