-
Notifications
You must be signed in to change notification settings - Fork 40
/
config_ljs_decoder.json
153 lines (153 loc) · 4.89 KB
/
config_ljs_decoder.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
{
"train_config": {
"output_directory": "/debug",
"epochs": 10000000,
"optim_algo": "RAdam",
"learning_rate": 1e-4,
"weight_decay": 1e-6,
"sigma": 1.0,
"iters_per_checkpoint": 2500,
"batch_size": 16,
"seed": null,
"checkpoint_path": "",
"ignore_layers": [],
"ignore_layers_warmstart": [],
"finetune_layers": [],
"include_layers": [],
"vocoder_config_path": "models/hifigan_config_22khz.json",
"vocoder_checkpoint_path": "models/hifigan_ljs_generator_v1",
"log_attribute_samples": false,
"log_decoder_samples": true,
"warmstart_checkpoint_path": "",
"use_amp": false,
"grad_clip_val": 1.0,
"loss_weights": {
"blank_logprob": -1,
"ctc_loss_weight": 0.1,
"binarization_loss_weight": 1.0,
"dur_loss_weight": 1.0,
"f0_loss_weight": 1.0,
"energy_loss_weight": 1.0,
"vpred_loss_weight": 1.0
},
"binarization_start_iter": 6000,
"kl_loss_start_iter": 18000,
"unfreeze_modules": "all"
},
"data_config": {
"training_files": {
"LJS": {
"basedir": "filelists/",
"audiodir": "wavs",
"filelist": "ljs_audiopath_text_speaker_train_filelist.txt",
"lmdbpath": ""
}
},
"validation_files": {
"LJS": {
"basedir": "filelists/",
"audiodir": "wavs",
"filelist": "ljs_audiopath_text_speaker_val_filelist.txt",
"lmdbpath": ""
}
},
"dur_min": 0.1,
"dur_max": 10.2,
"sampling_rate": 22050,
"filter_length": 1024,
"hop_length": 256,
"win_length": 1024,
"n_mel_channels": 80,
"mel_fmin": 0.0,
"mel_fmax": 8000.0,
"f0_min": 80.0,
"f0_max": 640.0,
"max_wav_value": 32768.0,
"use_f0": true,
"use_log_f0": 0,
"use_energy_avg": true,
"use_scaled_energy": true,
"symbol_set": "radtts",
"cleaner_names": ["radtts_cleaners"],
"heteronyms_path": "tts_text_processing/heteronyms",
"phoneme_dict_path": "tts_text_processing/cmudict-0.7b",
"p_phoneme": 1.0,
"handle_phoneme": "word",
"handle_phoneme_ambiguous": "ignore",
"include_speakers": null,
"n_frames": -1,
"betabinom_cache_path": "data_cache/",
"lmdb_cache_path": "",
"use_attn_prior_masking": true,
"prepend_space_to_text": true,
"append_space_to_text": true,
"add_bos_eos_to_text": false,
"betabinom_scaling_factor": 1.0,
"distance_tx_unvoiced": false,
"mel_noise_scale": 0.0
},
"dist_config": {
"dist_backend": "nccl",
"dist_url": "tcp://localhost:54321"
},
"model_config": {
"n_speakers": 1,
"n_speaker_dim": 16,
"n_text": 185,
"n_text_dim": 512,
"n_flows": 8,
"n_conv_layers_per_step": 4,
"n_mel_channels": 80,
"n_hidden": 1024,
"mel_encoder_n_hidden": 512,
"dummy_speaker_embedding": false,
"n_early_size": 2,
"n_early_every": 2,
"n_group_size": 2,
"affine_model": "wavenet",
"include_modules": "decatnvpred",
"scaling_fn": "tanh",
"matrix_decomposition": "LUS",
"learn_alignments": true,
"use_speaker_emb_for_alignment": false,
"attn_straight_through_estimator": true,
"use_context_lstm": true,
"context_lstm_norm": "spectral",
"context_lstm_w_f0_and_energy": true,
"text_encoder_lstm_norm": "spectral",
"n_f0_dims": 1,
"n_energy_avg_dims": 1,
"use_first_order_features": false,
"unvoiced_bias_activation": "relu",
"decoder_use_partial_padding": true,
"decoder_use_unvoiced_bias": true,
"ap_pred_log_f0": true,
"ap_use_unvoiced_bias": true,
"ap_use_voiced_embeddings": true,
"dur_model_config": null,
"f0_model_config": null,
"energy_model_config": null,
"v_model_config": {
"name": "dap",
"hparams": {
"n_speaker_dim": 16,
"take_log_of_input": false,
"bottleneck_hparams": {
"in_dim": 512,
"reduction_factor": 16,
"norm": "weightnorm",
"non_linearity": "relu"
},
"arch_hparams": {
"out_dim": 1,
"n_layers": 2,
"n_channels": 256,
"kernel_size": 3,
"p_dropout": 0.5,
"lstm_type": "",
"use_linear": 1
}
}
}
}
}