Skip to content

Commit

Permalink
add volume norm and test dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
jerryuhoo committed Oct 29, 2022
1 parent e01394d commit 6ce4134
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 7 deletions.
14 changes: 10 additions & 4 deletions evaluate_score.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,20 @@ _gt_wavscp="/home/yyu479/VISinger_data/wav_dump_16k"
_dir="evaluate"
_gen_wavdir="singing_out"

if [ ! -d "singing_gt" ] ; then
echo "copy gt"
mkdir -p "singing_gt"
python normalize_wav.py
fi

# Objective Evaluation - MCD
echo "Begin Scoring for MCD metrics on ${dset}, results are written under ${_dir}/MCD_res"

mkdir -p "${_dir}/MCD_res"
python evaluate/evaluate_mcd.py \
${_gen_wavdir} \
${_gt_wavscp} \
--outdir "${_dir}/MCD_res"
--outdir "${_gen_wavdir}/MCD_res"

# Objective Evaluation - log-F0 RMSE
echo "Begin Scoring for F0 related metrics on ${dset}, results are written under ${_dir}/F0_res"
Expand All @@ -21,7 +27,7 @@ mkdir -p "${_dir}/F0_res"
python evaluate/evaluate_f0.py \
${_gen_wavdir} \
${_gt_wavscp} \
--outdir "${_dir}/F0_res"
--outdir "${_gen_wavdir}/F0_res"

# Objective Evaluation - semitone ACC
echo "Begin Scoring for SEMITONE related metrics on ${dset}, results are written under ${_dir}/SEMITONE_res"
Expand All @@ -30,7 +36,7 @@ mkdir -p "${_dir}/SEMITONE_res"
python evaluate/evaluate_semitone.py \
${_gen_wavdir} \
${_gt_wavscp} \
--outdir "${_dir}/SEMITONE_res"
--outdir "${_gen_wavdir}/SEMITONE_res"

# Objective Evaluation - VUV error
echo "Begin Scoring for VUV related metrics on ${dset}, results are written under ${_dir}/VUV_res"
Expand All @@ -39,4 +45,4 @@ mkdir -p "${_dir}/VUV_res"
python evaluate/evaluate_vuv.py \
${_gen_wavdir} \
${_gt_wavscp} \
--outdir "${_dir}/VUV_res"
--outdir "${_gen_wavdir}/VUV_res"
17 changes: 17 additions & 0 deletions normalize_wav.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from prepare.align_wav_spec import Align
import os
from tqdm import tqdm

align = Align(32768, 16000, 1024, 256, 1024)
output_path = "singing_gt"
input_path = "/home/yyu479/VISinger_data/wav_dump_16k"

files = os.listdir(path=input_path)
for i, wav_file in enumerate(tqdm(files)):
suffix = os.path.splitext(os.path.split(wav_file)[-1])[1]
if not suffix == ".wav":
continue
basename = os.path.splitext(os.path.split(wav_file)[-1])[0][:-7]
align.normalize_wav(
os.path.join(input_path, wav_file), os.path.join(output_path, wav_file)
)
16 changes: 15 additions & 1 deletion prepare/align_wav_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import scipy.io.wavfile as sciwav
import os


class Align:
def __init__(
self, max_wav_value, sampling_rate, filter_length, hop_length, win_length
Expand Down Expand Up @@ -70,7 +71,10 @@ def align_wav_spec(self, filename, phone_dur):

# rewrite aligned wav
audio = (
(audio_norm * self.max_wav_value).transpose(0, 1).numpy().astype(np.int16)
(audio_norm * self.max_wav_value)
.transpose(0, 1)
.numpy()
.astype(np.int16)
)

sciwav.write(
Expand All @@ -82,3 +86,13 @@ def align_wav_spec(self, filename, phone_dur):
spec = torch.squeeze(spec, 0)
torch.save(spec, spec_filename)
return spec.shape[1]

def normalize_wav(self, input_path, output_path):
audio, sampling_rate = load_wav_to_torch(input_path)
audio_norm = audio.numpy() / self.max_wav_value
audio_norm *= 32767 / max(0.01, np.max(np.abs(audio_norm))) * 0.6
sciwav.write(
output_path,
sampling_rate,
audio_norm.astype(np.int16),
)
10 changes: 8 additions & 2 deletions prepare/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@
alls.append(message)
fo.close()

valids = alls[:200]
trains = alls[200:]
valids = alls[:150]
tests = alls[150:300]
trains = alls[300:]

random.shuffle(trains)

Expand All @@ -27,7 +28,12 @@
print(strs, file=fw)
fw.close()

fw = open("./filelists/singing_test.txt", "w", encoding="utf-8")
for strs in tests:
print(strs, file=fw)

fw = open("./filelists/singing_train.txt", "w", encoding="utf-8")
for strs in trains:
print(strs, file=fw)

fw.close()

0 comments on commit 6ce4134

Please sign in to comment.