diff --git a/whisperspeech/a2wav.py b/whisperspeech/a2wav.py index 63d0c9f..22b8043 100644 --- a/whisperspeech/a2wav.py +++ b/whisperspeech/a2wav.py @@ -50,3 +50,10 @@ def decode_to_notebook(self, atoks): audio = self.decode(atoks) display(Audio(audio.cpu().numpy(), rate=24000)) + + def decode_to_playback(self, atoks): + import sounddevice as sd + audio = self.decode(atoks) + audio_np = audio.cpu().numpy().squeeze() + sd.play(audio_np, 24000) + sd.wait() diff --git a/whisperspeech/pipeline.py b/whisperspeech/pipeline.py index b25ab45..8a2e1b4 100644 --- a/whisperspeech/pipeline.py +++ b/whisperspeech/pipeline.py @@ -105,3 +105,12 @@ def generate_to_file(self, fname, text, speaker=None, lang='en', cps=15, step_ca def generate_to_notebook(self, text, speaker=None, lang='en', cps=15, step_callback=None): self.vocoder.decode_to_notebook(self.generate_atoks(text, speaker, lang=lang, cps=cps, step_callback=None)) + + def generate_to_playback(self, text, speaker=None, lang='en', cps=15, step_callback=None): + try: + import sounddevice as sd + except ImportError: + print("\033[93mThe 'sounddevice' library is required for direct text to playback functionality. Please install it using 'pip install sounddevice'.\033[0m") + return + + self.vocoder.decode_to_playback(self.generate_atoks(text, speaker, lang=lang, cps=cps, step_callback=step_callback))