1
+ {
2
+ "nbformat" : 4 ,
3
+ "nbformat_minor" : 0 ,
4
+ "metadata" : {
5
+ "colab" : {
6
+ "provenance" : []
7
+ },
8
+ "kernelspec" : {
9
+ "name" : " python3" ,
10
+ "display_name" : " Python 3"
11
+ },
12
+ "language_info" : {
13
+ "name" : " python"
14
+ },
15
+ "accelerator" : " GPU" ,
16
+ "gpuClass" : " standard"
17
+ },
18
+ "cells" : [
19
+ {
20
+ "cell_type" : " markdown" ,
21
+ "source" : [
22
+ " https://github.com/PlayVoice/so-vits-svc-5.0/\n " ,
23
+ " \n " ,
24
+ " ↑原仓库\n " ,
25
+ " \n " ,
26
+ " *《colab保持连接的方法》*https://zhuanlan.zhihu.com/p/144629818\n " ,
27
+ " \n " ,
28
+ " 预览版本,可使用预设模型进行推理"
29
+ ],
30
+ "metadata" : {
31
+ "id" : " SggegFslkbbK"
32
+ }
33
+ },
34
+ {
35
+ "cell_type" : " markdown" ,
36
+ "source" : [
37
+ " # **环境配置&必要文件下载**\n "
38
+ ],
39
+ "metadata" : {
40
+ "id" : " M1MdDryJP73G"
41
+ }
42
+ },
43
+ {
44
+ "cell_type" : " code" ,
45
+ "execution_count" : null ,
46
+ "metadata" : {
47
+ "id" : " xfJWCr_EkO2i"
48
+ },
49
+ "outputs" : [],
50
+ "source" : [
51
+ " #@title 看看抽了个啥卡~~基本都是T4~~\n " ,
52
+ " !nvidia-smi"
53
+ ]
54
+ },
55
+ {
56
+ "cell_type" : " code" ,
57
+ "source" : [
58
+ " #@title 克隆github仓库\n " ,
59
+ " !git clone https://github.com/PlayVoice/so-vits-svc-5.0/ -b bigvgan"
60
+ ],
61
+ "metadata" : {
62
+ "id" : " nMspj8t3knR6"
63
+ },
64
+ "execution_count" : null ,
65
+ "outputs" : []
66
+ },
67
+ {
68
+ "cell_type" : " code" ,
69
+ "source" : [
70
+ " #@title 安装依赖&下载必要文件\n " ,
71
+ " %cd /content/so-vits-svc-5.0\n " ,
72
+ " \n " ,
73
+ " !apt install ffmpeg\n " ,
74
+ " !pip install -r requirements.txt\n " ,
75
+ " !pip install --upgrade pip setuptools numpy numba\n " ,
76
+ " !gdown --id \" 1UPjQ2LVSIt3o-9QMKMJcdzT8aZRZCI-E\" --output /content/so-vits-svc-5.0/speaker_pretrain/best_model.pth.tar\n " ,
77
+ " !wget -P whisper_pretrain/ https://openaipublic.azureedge.net/main/whisper/models/345ae4da62f9b3d59415adc60127b97c714f32e89e936602e85993674d08dcb1/medium.pt\n " ,
78
+ " !wget -P chkpt/sovits5.0/ https://github.com/PlayVoice/so-vits-svc-5.0/releases/download/bigvgan_release/sovits5.0_bigvgan_555.pth"
79
+ ],
80
+ "metadata" : {
81
+ "id" : " Kj2j81K6kubj"
82
+ },
83
+ "execution_count" : null ,
84
+ "outputs" : []
85
+ },
86
+ {
87
+ "cell_type" : " code" ,
88
+ "source" : [
89
+ " #@title 加载Google云端硬盘\n " ,
90
+ " from google.colab import drive\n " ,
91
+ " drive.mount('/content/drive')"
92
+ ],
93
+ "metadata" : {
94
+ "id" : " v9zHS9VXly9b"
95
+ },
96
+ "execution_count" : null ,
97
+ "outputs" : []
98
+ },
99
+ {
100
+ "cell_type" : " markdown" ,
101
+ "source" : [
102
+ " # 包含多说话人的推理预览"
103
+ ],
104
+ "metadata" : {
105
+ "id" : " hZ5KH8NgQ7os"
106
+ }
107
+ },
108
+ {
109
+ "cell_type" : " code" ,
110
+ "source" : [
111
+ " #@title 提取内容编码\n " ,
112
+ " \n " ,
113
+ " #@markdown **将处理好的\" .wav \" 输入源文件上传到云盘根目录,并修改以下选项**\n " ,
114
+ " \n " ,
115
+ " #@markdown **\" .wav \" 文件【文件名】**\n " ,
116
+ " input = \"\\ u30AE\\ u30BF\\ u30FC\\ u3068\\ u5B64\\ u72EC\\ u3068\\ u84BC\\ u3044\\ u60D1\\ u661F\" #@param {type:\" string\" }\n " ,
117
+ " input_path = \" /content/drive/MyDrive/\"\n " ,
118
+ " input_name = input_path + input\n " ,
119
+ " !PYTHONPATH=. python whisper/inference.py -w {input_name}.wav -p test.ppg.npy"
120
+ ],
121
+ "metadata" : {
122
+ "id" : " 2o6m3D0IsphU"
123
+ },
124
+ "execution_count" : null ,
125
+ "outputs" : []
126
+ },
127
+ {
128
+ "cell_type" : " code" ,
129
+ "source" : [
130
+ " #@title 推理\n " ,
131
+ " \n " ,
132
+ " #@markdown **将处理好的\" .wav \" 输入源文件上传到云盘根目录,并修改以下选项**\n " ,
133
+ " \n " ,
134
+ " #@markdown **\" .wav \" 文件【文件名】**\n " ,
135
+ " input = \"\\ u30AE\\ u30BF\\ u30FC\\ u3068\\ u5B64\\ u72EC\\ u3068\\ u84BC\\ u3044\\ u60D1\\ u661F\" #@param {type:\" string\" }\n " ,
136
+ " input_path = \" /content/drive/MyDrive/\"\n " ,
137
+ " input_name = input_path + input\n " ,
138
+ " #@markdown **指定说话人(0001~0056)(推荐0022、0030、0047、0051)**\n " ,
139
+ " speaker = \" 0002\" #@param {type:\" string\" }\n " ,
140
+ " !PYTHONPATH=. python svc_inference.py --config configs/base.yaml --model sovits5.0.pretrain.pth --spk ./configs/singers/singer{speaker}.npy --wave {input_name}.wav --ppg test.ppg.npy"
141
+ ],
142
+ "metadata" : {
143
+ "id" : " A7nvX5mRlwJ7"
144
+ },
145
+ "execution_count" : null ,
146
+ "outputs" : []
147
+ },
148
+ {
149
+ "cell_type" : " markdown" ,
150
+ "source" : [
151
+ " 推理结果保存在根目录,文件名为svc_out.wav"
152
+ ],
153
+ "metadata" : {
154
+ "id" : " F8oerogXyd3u"
155
+ }
156
+ },
157
+ {
158
+ "cell_type" : " markdown" ,
159
+ "source" : [
160
+ " # 训练"
161
+ ],
162
+ "metadata" : {
163
+ "id" : " qKX17GElPuso"
164
+ }
165
+ },
166
+ {
167
+ "cell_type" : " markdown" ,
168
+ "source" : [
169
+ " 将音频剪裁为小于30秒的音频段,响度匹配并修改为单声道,预处理时会进行重采样所以对采样率无要求。(但是降低采样率的操作会降低你的数据质量)\n " ,
170
+ " \n " ,
171
+ " **使用Adobe Audition™的响度匹配功能可以一次性完成重采样修改声道和响度匹配。**\n " ,
172
+ " \n " ,
173
+ " 之后将音频文件保存为以下文件结构:\n " ,
174
+ " ```\n " ,
175
+ " dataset_raw\n " ,
176
+ " ├───speaker0\n " ,
177
+ " │ ├───xxx1-xxx1.wav\n " ,
178
+ " │ ├───...\n " ,
179
+ " │ └───Lxx-0xx8.wav\n " ,
180
+ " └───speaker1\n " ,
181
+ " ├───xx2-0xxx2.wav\n " ,
182
+ " ├───...\n " ,
183
+ " └───xxx7-xxx007.wav\n " ,
184
+ " ```\n " ,
185
+ " \n " ,
186
+ " 打包为zip格式,命名为data.zip,上传到网盘根目录。"
187
+ ],
188
+ "metadata" : {
189
+ "id" : " sVe0lEGWQBLU"
190
+ }
191
+ },
192
+ {
193
+ "cell_type" : " code" ,
194
+ "source" : [
195
+ " #@title 从云盘获取数据集\n " ,
196
+ " !unzip -d /content/so-vits-svc-5.0/ /content/drive/MyDrive/data.zip #自行修改路径与文件名"
197
+ ],
198
+ "metadata" : {
199
+ "id" : " vC8IthV8VYgy"
200
+ },
201
+ "execution_count" : null ,
202
+ "outputs" : []
203
+ },
204
+ {
205
+ "cell_type" : " code" ,
206
+ "source" : [
207
+ " #@title 重采样\n " ,
208
+ " # 生成采样率16000Hz音频, 存储路径为:./data_svc/waves-16k\n " ,
209
+ " !python prepare/preprocess_a.py -w ./data_raw -o ./data_svc/waves-16k -s 16000\n " ,
210
+ " # 生成采样率48000Hz音频, 存储路径为:./data_svc/waves-48k\n " ,
211
+ " !python prepare/preprocess_a.py -w ./data_raw -o ./data_svc/waves-32k -s 32000"
212
+ ],
213
+ "metadata" : {
214
+ "id" : " J101PiFUSL1N"
215
+ },
216
+ "execution_count" : null ,
217
+ "outputs" : []
218
+ },
219
+ {
220
+ "cell_type" : " code" ,
221
+ "source" : [
222
+ " #@title 提取f0\n " ,
223
+ " !python prepare/preprocess_f0.py -w data_svc/waves-16k/ -p data_svc/pitch"
224
+ ],
225
+ "metadata" : {
226
+ "id" : " ZpxeYJCBSbgf"
227
+ },
228
+ "execution_count" : null ,
229
+ "outputs" : []
230
+ },
231
+ {
232
+ "cell_type" : " code" ,
233
+ "source" : [
234
+ " #@title 提取内容特征\n " ,
235
+ " !PYTHONPATH=. python prepare/preprocess_ppg.py -w data_svc/waves-16k/ -p data_svc/whisper"
236
+ ],
237
+ "metadata" : {
238
+ "id" : " 7VasDGhDSlP5"
239
+ },
240
+ "execution_count" : null ,
241
+ "outputs" : []
242
+ },
243
+ {
244
+ "cell_type" : " code" ,
245
+ "source" : [
246
+ " #@title 提取音色特征\n " ,
247
+ " !PYTHONPATH=. python prepare/preprocess_speaker.py data_svc/waves-16k/ data_svc/speaker"
248
+ ],
249
+ "metadata" : {
250
+ "id" : " ovRqQUINSoII"
251
+ },
252
+ "execution_count" : null ,
253
+ "outputs" : []
254
+ },
255
+ {
256
+ "cell_type" : " code" ,
257
+ "source" : [
258
+ " #(解决“.ipynb_checkpoints”相关的错)\n " ,
259
+ " !rm -rf \" find -type d -name .ipynb_checkpoints\" "
260
+ ],
261
+ "metadata" : {
262
+ "id" : " s8Ba8Fd1bzzX"
263
+ },
264
+ "execution_count" : null ,
265
+ "outputs" : []
266
+ },
267
+ {
268
+ "cell_type" : " code" ,
269
+ "source" : [
270
+ " #(解决“.ipynb_checkpoints”相关的错)\n " ,
271
+ " !rm -rf .ipynb_checkpoints\n " ,
272
+ " !find . -name \" .ipynb_checkpoints\" -exec rm -rf {} \\ ;"
273
+ ],
274
+ "metadata" : {
275
+ "id" : " ic9q599_b0Ae"
276
+ },
277
+ "execution_count" : null ,
278
+ "outputs" : []
279
+ },
280
+ {
281
+ "cell_type" : " code" ,
282
+ "source" : [
283
+ " #@title 提取平均音色\n " ,
284
+ " !PYTHONPATH=. python prepare/preprocess_speaker_ave.py data_svc/speaker/ data_svc/singer"
285
+ ],
286
+ "metadata" : {
287
+ "id" : " QamG3_B6o3vF"
288
+ },
289
+ "execution_count" : null ,
290
+ "outputs" : []
291
+ },
292
+ {
293
+ "cell_type" : " code" ,
294
+ "source" : [
295
+ " #@title 提取spec\n " ,
296
+ " !PYTHONPATH=. python prepare/preprocess_spec.py -w data_svc/waves-32k/ -s data_svc/specs"
297
+ ],
298
+ "metadata" : {
299
+ "id" : " 3wBmyQHvSs6K"
300
+ },
301
+ "execution_count" : null ,
302
+ "outputs" : []
303
+ },
304
+ {
305
+ "cell_type" : " code" ,
306
+ "source" : [
307
+ " #@title 生成索引\n " ,
308
+ " !python prepare/preprocess_train.py"
309
+ ],
310
+ "metadata" : {
311
+ "id" : " tUcljCLbS5O3"
312
+ },
313
+ "execution_count" : null ,
314
+ "outputs" : []
315
+ },
316
+ {
317
+ "cell_type" : " code" ,
318
+ "source" : [
319
+ " #@title 训练文件调试\n " ,
320
+ " !PYTHONPATH=. python prepare/preprocess_zzz.py"
321
+ ],
322
+ "metadata" : {
323
+ "id" : " 30fXnscFS7Wo"
324
+ },
325
+ "execution_count" : null ,
326
+ "outputs" : []
327
+ },
328
+ {
329
+ "cell_type" : " code" ,
330
+ "source" : [
331
+ " #@title 设定模型备份\n " ,
332
+ " #@markdown **是否备份模型到云盘,colab随时爆炸建议备份,默认保存到云盘根目录Sovits5.0文件夹**\n " ,
333
+ " Save_to_drive = True #@param {type:\" boolean\" }\n " ,
334
+ " if Save_to_drive:\n " ,
335
+ " !mkdir -p /content/so-vits-svc-5.0/chkpt/\n " ,
336
+ " !rm -rf /content/so-vits-svc-5.0/chkpt/\n " ,
337
+ " !mkdir -p /content/drive/MyDrive/Sovits5.0\n " ,
338
+ " !ln -s /content/drive/MyDrive/Sovits5.0 /content/so-vits-svc-5.0/chkpt/"
339
+ ],
340
+ "metadata" : {
341
+ "id" : " hacR8qDFVOWo"
342
+ },
343
+ "execution_count" : null ,
344
+ "outputs" : []
345
+ },
346
+ {
347
+ "cell_type" : " code" ,
348
+ "source" : [
349
+ " #@title 开始训练\n " ,
350
+ " %load_ext tensorboard\n " ,
351
+ " %tensorboard --logdir /content/so-vits-svc-5.0/logs/\n " ,
352
+ " \n " ,
353
+ " !PYTHONPATH=. python svc_trainer.py -c configs/base.yaml -n sovits5.0"
354
+ ],
355
+ "metadata" : {
356
+ "id" : " 5BIiKIAoU3Kd"
357
+ },
358
+ "execution_count" : null ,
359
+ "outputs" : []
360
+ }
361
+ ]
362
+ }
0 commit comments