Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Export and replay generated wav #402

Merged
12 commits merged into from
Jul 9, 2020
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -7,6 +7,7 @@ numpy>=1.14.0
scipy>=1.0.0
tqdm
sounddevice
SoundFile
Unidecode
inflect
PyQt5
41 changes: 41 additions & 0 deletions toolbox/__init__.py
Original file line number Diff line number Diff line change
@@ -34,6 +34,9 @@
"VCTK-Corpus/wav48",
]

#Maximum of generated wavs to keep on memory
MAX_WAVES = 15

class Toolbox:
def __init__(self, datasets_root, enc_models_dir, syn_models_dir, voc_models_dir, low_mem):
sys.excepthook = self.excepthook
@@ -43,6 +46,10 @@ def __init__(self, datasets_root, enc_models_dir, syn_models_dir, voc_models_dir
self.current_generated = (None, None, None, None) # speaker_name, spec, breaks, wav

self.synthesizer = None # type: Synthesizer
self.current_wav = None
self.waves_list = []
self.waves_count = 0
self.waves_namelist = []

# Initialize the events and the interface
self.ui = UI()
@@ -82,8 +89,17 @@ def func():
self.ui.play_button.clicked.connect(func)
self.ui.stop_button.clicked.connect(self.ui.stop)
self.ui.record_button.clicked.connect(self.record)

#Audio
self.ui.setup_audio_devices(Synthesizer.sample_rate)

#Wav playback & save
func = lambda: self.replay_last_wav()
self.ui.replay_wav_button.clicked.connect(func)
func = lambda: self.export_current_wave()
self.ui.export_wav_button.clicked.connect(func)
self.ui.waves_cb.currentIndexChanged.connect(self.set_current_wav)

# Generation
func = lambda: self.synthesize() or self.vocode()
self.ui.generate_button.clicked.connect(func)
@@ -93,6 +109,15 @@ def func():
# UMAP legend
self.ui.clear_button.clicked.connect(self.clear_utterances)

def set_current_wav(self, index):
self.current_wav = self.waves_list[self.waves_count - 1 - index]
This conversation was marked as resolved.
Show resolved Hide resolved

def export_current_wave(self):
self.ui.save_audio_file(self.current_wav, Synthesizer.sample_rate)

def replay_last_wav(self):
self.ui.play(self.current_wav, Synthesizer.sample_rate)

def reset_ui(self, encoder_models_dir, synthesizer_models_dir, vocoder_models_dir):
self.ui.populate_browser(self.datasets_root, recognized_datasets, 0, True)
self.ui.populate_models(encoder_models_dir, synthesizer_models_dir, vocoder_models_dir)
@@ -212,6 +237,22 @@ def vocoder_progress(i, seq_len, b_size, gen_rate):
wav = wav / np.abs(wav).max() * 0.97
self.ui.play(wav, Synthesizer.sample_rate)

#Enable replay and save buttons:
self.ui.replay_wav_button.setDisabled(False)
self.ui.export_wav_button.setDisabled(False)

#Update waves combobox
self.waves_count += 1
if self.waves_count >= MAX_WAVES:
self.waves_list.pop(0)
self.waves_list.append(wav)
#TODO better naming for the combobox items?
self.waves_namelist = ["%d" % (self.waves_count - i) for i in range(0, min(self.waves_count, MAX_WAVES))]
self.ui.waves_cb.disconnect()
self.ui.waves_cb_model.setStringList(self.waves_namelist)
self.ui.waves_cb.setCurrentIndex(0)
self.ui.waves_cb.currentIndexChanged.connect(self.set_current_wav)

# Compute the embedding
# TODO: this is problematic with different sampling rates, gotta fix it
if not encoder.is_loaded():
37 changes: 35 additions & 2 deletions toolbox/ui.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas
from matplotlib.figure import Figure
from PyQt5.QtCore import Qt
from PyQt5.QtCore import Qt, QStringListModel
from PyQt5.QtWidgets import *
from encoder.inference import plot_embedding_as_heatmap
from toolbox.utterance import Utterance
from pathlib import Path
from typing import List, Set
import sounddevice as sd
import soundfile as sf
import matplotlib.pyplot as plt
import numpy as np
# from sklearn.manifold import TSNE # You can try with TSNE if you like, I prefer UMAP
@@ -137,7 +138,21 @@ def draw_umap_projections(self, utterances: Set[Utterance]):
self.umap_ax.set_yticks([])
self.umap_ax.figure.canvas.draw()

def setup_audio_devices(self,sample_rate):
def save_audio_file(self, wav, sample_rate):
dialog = QFileDialog()
dialog.setDefaultSuffix(".wav")
fpath, _ = dialog.getSaveFileName(
parent=self,
caption="Select a path to save the audio file",
filter="Audio Files (*.flac *.wav)"
matheusfillipe marked this conversation as resolved.
Show resolved Hide resolved
)
if fpath:
#Default format is wav
if Path(fpath).suffix == "":
fpath += ".wav"
sf.write(fpath, wav, sample_rate)

def setup_audio_devices(self, sample_rate):
input_devices = []
output_devices = []
for device in sd.query_devices():
@@ -389,6 +404,8 @@ def reset_interface(self):
self.generate_button.setDisabled(True)
self.synthesize_button.setDisabled(True)
self.vocode_button.setDisabled(True)
self.replay_wav_button.setDisabled(True)
self.export_wav_button.setDisabled(True)
[self.log("") for _ in range(self.max_log_lines)]

def __init__(self):
@@ -537,6 +554,22 @@ def __init__(self):
layout.addWidget(self.vocode_button)
gen_layout.addLayout(layout)


#Replay & Save Audio
layout2 = QHBoxLayout()
self.replay_wav_button = QPushButton("Replay")
self.replay_wav_button.setToolTip("Replay last generated vocoder")
layout2.addWidget(self.replay_wav_button)
self.export_wav_button = QPushButton("Export")
self.export_wav_button.setToolTip("Save last generated vocoder audio in filesystem as a wav file")
layout2.addWidget(self.export_wav_button)
self.waves_cb_model = QStringListModel()
self.waves_cb = QComboBox()
self.waves_cb.setModel(self.waves_cb_model)
self.waves_cb.setToolTip("Select one of the last generated waves in this section for replaying or exporting")
layout2.addWidget(self.waves_cb)
gen_layout.addLayout(layout2)

self.loading_bar = QProgressBar()
gen_layout.addWidget(self.loading_bar)