PlayVoice · Oct 23, 2022 · Oct 23, 2022 · Oct 24, 2022 · Oct 24, 2022 · Oct 25, 2022
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,10 @@
+*.pth
+*.pyc
+filelists/singing_train.txt
+filelists/singing_valid.txt
+filelists/vits_file.txt
+logs
+singing_out
+*/*_res
+*.zip
+nohup.out
diff --git a/README.md b/README.md
@@ -1,56 +1,77 @@
 # Init
-Use VITS and Opencpop to develop singing voice synthesis; 
-Different from VISinger, It is just VITS without MAS and DurationPredictor.
+Unofficial Implement of VISinger
 
-# 本项目基于
+# Reference Repos
 https://github.com/jaywalnut310/vits
 
 https://github.com/MoonInTheRiver/DiffSinger
 
 https://wenet.org.cn/opencpop/
 
-# 数据预处理
+https://github.com/PlayVoice/VI-SVS
+
+# Data Preprocess
+```bash
 export PYTHONPATH=.
+```
 
-python prepare/data_vits.py
+Generate ../VISinger_data/label_vits_phn/XXX._label.npy|XXX._label_dur.npy|XXX_score.npy|XXX_score_dur.npy|XXX_pitch.npy|XXX_slurs.npy
 
-      生成文件../VISinger_data/label_vits/XXX._label.npy|XXX_score.npy|XXX_pitch.npy|XXX_slurs.npy
+```bash
+python prepare/data_vits_phn.py
+```
 
-      生成文件filelists/vits_file.txt; 内容格式：wave path|label path|score path|pitch path|slurs path;
+Generate filelists/vits_file.txt
+Format: wave path|label path|label duration path|score path|score duration path|pitch path|slurs path;
 
+```bash
 python prepare/preprocess.py
+```
 
-# VITS训练
+# VISinger training
 
+```bash
 python train.py -c configs/singing_base.json -m singing_base
+```
 
-# 测试验证
+or
 
-1,训练集生成验证:F0根据音频提取
+```bash
+./train.sh
+```
 
-python vsinging_debug.py
+# Inference
 
-2,推理验证:F0根据规则生成
+```bash
+./evaluate_score.sh
+```
 
-python vsinging_infer.py
+![LOSS](/resource/vising_loss.png)
+![MEL](/resource/vising_mel.png)
 
-3,完整歌曲合成（**使用release模型**）
+# Samples
 
-pyton vsinging_song.py
+<audio id="audio" controls="" preload="none">
+      <source id="wav" src="/resource/2005000151.wav">
+</audio>
 
-4,F0的问题可以额外训练F0预测器,或者使用UTAU绘制pit曲线
+<audio id="audio" controls="" preload="none">
+      <source id="wav" src="/resource/2005000152.wav">
+</audio>
 
+<audio id="audio" controls="" preload="none">
+      <source id="wav" src="/resource/2005000186.wav">
+</audio>
 
-![LOSS值](/resource/vising_loss.png)
-![MEL谱](/resource/vising_mel.png)
+<audio id="audio" controls="" preload="none">
+      <source id="wav" src="/resource/2005000187.wav">
+</audio>
 
 <audio id="audio" controls="" preload="none">
-      <source id="wav" src="/resource/vising_sample.wav">
+      <source id="wav" src="/resource/2005000268.wav">
 </audio>
 
-# 样例音频
 
-[vits_singing_样例.wav](/resource/vising_sample.wav)
 
-# AI修复
-https://github.com/brentspell/hifi-gan-bwe
+
+
diff --git a/_config.yml b/_config.yml
@@ -0,0 +1,3 @@
+remote_theme: pages-themes/cayman@v0.2.0
+plugins:
+- jekyll-remote-theme # add this line to the plugins list if you already have one
diff --git a/configs/singing_base.json b/configs/singing_base.json
@@ -1,26 +1,30 @@
 {
   "train": {
     "log_interval": 200,
-    "eval_interval": 10000,
+    "eval_interval": 2000,
     "seed": 1234,
     "epochs": 20000,
     "learning_rate": 1e-4,
-    "betas": [0.8, 0.99],
+    "betas": [
+      0.8,
+      0.99
+    ],
     "eps": 1e-9,
-    "batch_size": 16,
+    "batch_size": 6,
     "fp16_run": false,
     "lr_decay": 0.999875,
     "segment_size": 8192,
     "init_lr_ratio": 1,
     "warmup_epochs": 0,
     "c_mel": 45,
-    "c_kl": 1.0
+    "c_kl": 1.0,
+    "keep_n_models": 20
   },
   "data": {
-    "training_files":"filelists/singing_train.txt",
-    "validation_files":"filelists/singing_valid.txt",
+    "training_files": "filelists/singing_train.txt",
+    "validation_files": "filelists/singing_valid.txt",
     "max_wav_value": 32768.0,
-    "sampling_rate": 16000,
+    "sampling_rate": 24000,
     "filter_length": 1024,
     "hop_length": 256,
     "win_length": 1024,
@@ -38,11 +42,41 @@
     "kernel_size": 3,
     "p_dropout": 0.1,
     "resblock": "1",
-    "resblock_kernel_sizes": [3,7,11],
-    "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
-    "upsample_rates": [8,8,2,2],
+    "resblock_kernel_sizes": [
+      3,
+      7,
+      11
+    ],
+    "resblock_dilation_sizes": [
+      [
+        1,
+        3,
+        5
+      ],
+      [
+        1,
+        3,
+        5
+      ],
+      [
+        1,
+        3,
+        5
+      ]
+    ],
+    "upsample_rates": [
+      8,
+      8,
+      2,
+      2
+    ],
     "upsample_initial_channel": 384,
-    "upsample_kernel_sizes": [16,16,4,4],
+    "upsample_kernel_sizes": [
+      16,
+      16,
+      4,
+      4
+    ],
     "use_spectral_norm": false
   }
-}
+}