From 6ce4134fb1599374f485a45bdd5ce61331d9a08f Mon Sep 17 00:00:00 2001
From: yifeng <jerryuhoo@gmail.com>
Date: Sat, 29 Oct 2022 00:35:50 -0400
Subject: [PATCH] add volume norm and test dataset

---
 evaluate_score.sh         | 14 ++++++++++----
 normalize_wav.py          | 17 +++++++++++++++++
 prepare/align_wav_spec.py | 16 +++++++++++++++-
 prepare/preprocess.py     | 10 ++++++++--
 4 files changed, 50 insertions(+), 7 deletions(-)
 create mode 100644 normalize_wav.py

diff --git a/evaluate_score.sh b/evaluate_score.sh
index 557249c..d87dd4e 100755
--- a/evaluate_score.sh
+++ b/evaluate_score.sh
@@ -5,6 +5,12 @@ _gt_wavscp="/home/yyu479/VISinger_data/wav_dump_16k"
 _dir="evaluate"
 _gen_wavdir="singing_out"
 
+if [ ! -d "singing_gt" ] ; then
+    echo "copy gt"
+    mkdir -p "singing_gt"
+    python normalize_wav.py
+fi
+
 # Objective Evaluation - MCD
 echo "Begin Scoring for MCD metrics on ${dset}, results are written under ${_dir}/MCD_res"
 
@@ -12,7 +18,7 @@ mkdir -p "${_dir}/MCD_res"
 python evaluate/evaluate_mcd.py \
     ${_gen_wavdir} \
     ${_gt_wavscp} \
-    --outdir "${_dir}/MCD_res"
+    --outdir "${_gen_wavdir}/MCD_res"
 
 # Objective Evaluation - log-F0 RMSE
 echo "Begin Scoring for F0 related metrics on ${dset}, results are written under ${_dir}/F0_res"
@@ -21,7 +27,7 @@ mkdir -p "${_dir}/F0_res"
 python evaluate/evaluate_f0.py \
     ${_gen_wavdir} \
     ${_gt_wavscp} \
-    --outdir "${_dir}/F0_res"
+    --outdir "${_gen_wavdir}/F0_res"
 
 # Objective Evaluation - semitone ACC
 echo "Begin Scoring for SEMITONE related metrics on ${dset}, results are written under ${_dir}/SEMITONE_res"
@@ -30,7 +36,7 @@ mkdir -p "${_dir}/SEMITONE_res"
 python evaluate/evaluate_semitone.py \
     ${_gen_wavdir} \
     ${_gt_wavscp} \
-    --outdir "${_dir}/SEMITONE_res"
+    --outdir "${_gen_wavdir}/SEMITONE_res"
 
     # Objective Evaluation - VUV error
 echo "Begin Scoring for VUV related metrics on ${dset}, results are written under ${_dir}/VUV_res"
@@ -39,4 +45,4 @@ mkdir -p "${_dir}/VUV_res"
 python evaluate/evaluate_vuv.py \
     ${_gen_wavdir} \
     ${_gt_wavscp} \
-    --outdir "${_dir}/VUV_res"
+    --outdir "${_gen_wavdir}/VUV_res"
diff --git a/normalize_wav.py b/normalize_wav.py
new file mode 100644
index 0000000..dc3db61
--- /dev/null
+++ b/normalize_wav.py
@@ -0,0 +1,17 @@
+from prepare.align_wav_spec import Align
+import os
+from tqdm import tqdm
+
+align = Align(32768, 16000, 1024, 256, 1024)
+output_path = "singing_gt"
+input_path = "/home/yyu479/VISinger_data/wav_dump_16k"
+
+files = os.listdir(path=input_path)
+for i, wav_file in enumerate(tqdm(files)):
+    suffix = os.path.splitext(os.path.split(wav_file)[-1])[1]
+    if not suffix == ".wav":
+        continue
+    basename = os.path.splitext(os.path.split(wav_file)[-1])[0][:-7]
+    align.normalize_wav(
+        os.path.join(input_path, wav_file), os.path.join(output_path, wav_file)
+    )
diff --git a/prepare/align_wav_spec.py b/prepare/align_wav_spec.py
index ae75bdf..b647dd0 100644
--- a/prepare/align_wav_spec.py
+++ b/prepare/align_wav_spec.py
@@ -7,6 +7,7 @@
 import scipy.io.wavfile as sciwav
 import os
 
+
 class Align:
     def __init__(
         self, max_wav_value, sampling_rate, filter_length, hop_length, win_length
@@ -70,7 +71,10 @@ def align_wav_spec(self, filename, phone_dur):
 
             # rewrite aligned wav
             audio = (
-                (audio_norm * self.max_wav_value).transpose(0, 1).numpy().astype(np.int16)
+                (audio_norm * self.max_wav_value)
+                .transpose(0, 1)
+                .numpy()
+                .astype(np.int16)
             )
 
             sciwav.write(
@@ -82,3 +86,13 @@ def align_wav_spec(self, filename, phone_dur):
             spec = torch.squeeze(spec, 0)
             torch.save(spec, spec_filename)
         return spec.shape[1]
+
+    def normalize_wav(self, input_path, output_path):
+        audio, sampling_rate = load_wav_to_torch(input_path)
+        audio_norm = audio.numpy() / self.max_wav_value
+        audio_norm *= 32767 / max(0.01, np.max(np.abs(audio_norm))) * 0.6
+        sciwav.write(
+            output_path,
+            sampling_rate,
+            audio_norm.astype(np.int16),
+        )
diff --git a/prepare/preprocess.py b/prepare/preprocess.py
index fb760a3..c888679 100644
--- a/prepare/preprocess.py
+++ b/prepare/preprocess.py
@@ -17,8 +17,9 @@
         alls.append(message)
     fo.close()
 
-    valids = alls[:200]
-    trains = alls[200:]
+    valids = alls[:150]
+    tests = alls[150:300]
+    trains = alls[300:]
 
     random.shuffle(trains)
 
@@ -27,7 +28,12 @@
         print(strs, file=fw)
     fw.close()
 
+    fw = open("./filelists/singing_test.txt", "w", encoding="utf-8")
+    for strs in tests:
+        print(strs, file=fw)
+
     fw = open("./filelists/singing_train.txt", "w", encoding="utf-8")
     for strs in trains:
         print(strs, file=fw)
+
     fw.close()