diff --git a/evaluate_score.sh b/evaluate_score.sh index 557249c..d87dd4e 100755 --- a/evaluate_score.sh +++ b/evaluate_score.sh @@ -5,6 +5,12 @@ _gt_wavscp="/home/yyu479/VISinger_data/wav_dump_16k" _dir="evaluate" _gen_wavdir="singing_out" +if [ ! -d "singing_gt" ] ; then + echo "copy gt" + mkdir -p "singing_gt" + python normalize_wav.py +fi + # Objective Evaluation - MCD echo "Begin Scoring for MCD metrics on ${dset}, results are written under ${_dir}/MCD_res" @@ -12,7 +18,7 @@ mkdir -p "${_dir}/MCD_res" python evaluate/evaluate_mcd.py \ ${_gen_wavdir} \ ${_gt_wavscp} \ - --outdir "${_dir}/MCD_res" + --outdir "${_gen_wavdir}/MCD_res" # Objective Evaluation - log-F0 RMSE echo "Begin Scoring for F0 related metrics on ${dset}, results are written under ${_dir}/F0_res" @@ -21,7 +27,7 @@ mkdir -p "${_dir}/F0_res" python evaluate/evaluate_f0.py \ ${_gen_wavdir} \ ${_gt_wavscp} \ - --outdir "${_dir}/F0_res" + --outdir "${_gen_wavdir}/F0_res" # Objective Evaluation - semitone ACC echo "Begin Scoring for SEMITONE related metrics on ${dset}, results are written under ${_dir}/SEMITONE_res" @@ -30,7 +36,7 @@ mkdir -p "${_dir}/SEMITONE_res" python evaluate/evaluate_semitone.py \ ${_gen_wavdir} \ ${_gt_wavscp} \ - --outdir "${_dir}/SEMITONE_res" + --outdir "${_gen_wavdir}/SEMITONE_res" # Objective Evaluation - VUV error echo "Begin Scoring for VUV related metrics on ${dset}, results are written under ${_dir}/VUV_res" @@ -39,4 +45,4 @@ mkdir -p "${_dir}/VUV_res" python evaluate/evaluate_vuv.py \ ${_gen_wavdir} \ ${_gt_wavscp} \ - --outdir "${_dir}/VUV_res" + --outdir "${_gen_wavdir}/VUV_res" diff --git a/normalize_wav.py b/normalize_wav.py new file mode 100644 index 0000000..dc3db61 --- /dev/null +++ b/normalize_wav.py @@ -0,0 +1,17 @@ +from prepare.align_wav_spec import Align +import os +from tqdm import tqdm + +align = Align(32768, 16000, 1024, 256, 1024) +output_path = "singing_gt" +input_path = "/home/yyu479/VISinger_data/wav_dump_16k" + +files = os.listdir(path=input_path) +for i, wav_file in enumerate(tqdm(files)): + suffix = os.path.splitext(os.path.split(wav_file)[-1])[1] + if not suffix == ".wav": + continue + basename = os.path.splitext(os.path.split(wav_file)[-1])[0][:-7] + align.normalize_wav( + os.path.join(input_path, wav_file), os.path.join(output_path, wav_file) + ) diff --git a/prepare/align_wav_spec.py b/prepare/align_wav_spec.py index ae75bdf..b647dd0 100644 --- a/prepare/align_wav_spec.py +++ b/prepare/align_wav_spec.py @@ -7,6 +7,7 @@ import scipy.io.wavfile as sciwav import os + class Align: def __init__( self, max_wav_value, sampling_rate, filter_length, hop_length, win_length @@ -70,7 +71,10 @@ def align_wav_spec(self, filename, phone_dur): # rewrite aligned wav audio = ( - (audio_norm * self.max_wav_value).transpose(0, 1).numpy().astype(np.int16) + (audio_norm * self.max_wav_value) + .transpose(0, 1) + .numpy() + .astype(np.int16) ) sciwav.write( @@ -82,3 +86,13 @@ def align_wav_spec(self, filename, phone_dur): spec = torch.squeeze(spec, 0) torch.save(spec, spec_filename) return spec.shape[1] + + def normalize_wav(self, input_path, output_path): + audio, sampling_rate = load_wav_to_torch(input_path) + audio_norm = audio.numpy() / self.max_wav_value + audio_norm *= 32767 / max(0.01, np.max(np.abs(audio_norm))) * 0.6 + sciwav.write( + output_path, + sampling_rate, + audio_norm.astype(np.int16), + ) diff --git a/prepare/preprocess.py b/prepare/preprocess.py index fb760a3..c888679 100644 --- a/prepare/preprocess.py +++ b/prepare/preprocess.py @@ -17,8 +17,9 @@ alls.append(message) fo.close() - valids = alls[:200] - trains = alls[200:] + valids = alls[:150] + tests = alls[150:300] + trains = alls[300:] random.shuffle(trains) @@ -27,7 +28,12 @@ print(strs, file=fw) fw.close() + fw = open("./filelists/singing_test.txt", "w", encoding="utf-8") + for strs in tests: + print(strs, file=fw) + fw = open("./filelists/singing_train.txt", "w", encoding="utf-8") for strs in trains: print(strs, file=fw) + fw.close()