-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
226 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,3 @@ | ||
add_subdirectory(espeakServer) | ||
add_subdirectory(speechRecognition) | ||
add_subdirectory(speechSynthesis) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
option(ENABLE_speechSynthesis "Install speechSynthesis program" ON) | ||
|
||
if(ENABLE_speechSynthesis) | ||
|
||
install(PROGRAMS speechSynthesis.py | ||
TYPE BIN | ||
RENAME speechSynthesis) | ||
|
||
endif() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
# Speech synthesis | ||
|
||
## Installation | ||
|
||
Through pip: | ||
|
||
```bash | ||
pip3 install mycroft-mimic3-tts | ||
``` | ||
|
||
Alternatively, install from sources: https://github.com/MycroftAI/mimic3 | ||
|
||
## Download voice models | ||
|
||
All voice data is located in a separate repository: https://github.com/MycroftAI/mimic3-voices | ||
|
||
To manually issue the download of all Spanish voices, run: | ||
|
||
```bash | ||
mimic3-download 'es_ES/*' | ||
``` | ||
|
||
In case the process gets stuck, download and unpack the files into `${HOME}/.local/share/mycroft/mimic3/voices`. However, you'll probably need to download the *generator.onnx* file separately (via GitHub) since it is handled by Git LFS. | ||
|
||
## Troubleshooting | ||
|
||
Try this: | ||
|
||
```bash | ||
mimic3 --voice es_ES/m-ailabs#tux "hola, me llamo teo y tengo 10 años" | ||
``` | ||
|
||
To enable GPU acceleration, run `pip3 install onnxruntime-gpu` and issue the `mimic3` command with `--cuda`. The `speechSynthesis` app also accepts this parameter. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,180 @@ | ||
#!/usr/bin/env python3 | ||
|
||
# adapted from https://github.com/MycroftAI/mimic3/blob/be72c18/mimic3_tts/__main__.py | ||
|
||
import argparse | ||
import queue | ||
import signal | ||
import shlex | ||
import shutil | ||
import subprocess | ||
import tempfile | ||
import threading | ||
import time | ||
|
||
import mimic3_tts | ||
import yarp | ||
import roboticslab_speech | ||
|
||
PLAY_PROGRAMS = ['paplay', 'play -q', 'aplay -q'] | ||
|
||
class TextToSpeechResponder(roboticslab_speech.TextToSpeechIDL): | ||
def __init__(self, engine): | ||
super().__init__() | ||
self.engine = engine | ||
self.is_playing = False | ||
self.p = None | ||
self.result_queue = queue.Queue(maxsize=5) | ||
self.result_thread = threading.Thread(target=self._process_result, daemon=True) | ||
self.result_thread.start() | ||
|
||
def setLanguage(self, language): | ||
if language.startswith('#'): | ||
# same voice, different speaker | ||
self.engine.speaker = language[1:] | ||
else: | ||
# different voice | ||
self.engine.voice = language | ||
|
||
if self.engine.voice not in list(self.getSupportedLangs()): | ||
print('Voice not available: %s' % self.engine.voice) | ||
return False | ||
else: | ||
print('Loaded voice: %s (speaker: %s)' % (self.engine.voice, self.engine.speaker or 'default')) | ||
return True | ||
|
||
def setSpeed(self, speed): | ||
self.engine.rate = float(speed) / 100 | ||
return True | ||
|
||
def setPitch(self, pitch): | ||
return super().setPitch(pitch) | ||
|
||
def getSpeed(self): | ||
return int(self.engine.rate * 100) | ||
|
||
def getPitch(self): | ||
return super().getPitch() | ||
|
||
def getSupportedLangs(self): | ||
all_voices = sorted(list(self.engine.get_voices()), key=lambda v: v.key) | ||
local_voices = filter(lambda v: not v.location.startswith('http'), all_voices) | ||
available_voices = [v.key for v in local_voices] | ||
return yarp.SVector(available_voices) | ||
|
||
def say(self, text): | ||
self.engine.begin_utterance() | ||
self.engine.speak_text(text) | ||
|
||
for result in self.engine.end_utterance(): | ||
self.result_queue.put(result) | ||
|
||
return True | ||
|
||
def play(self): | ||
return super().play() | ||
|
||
def pause(self): | ||
return super().pause() | ||
|
||
def stop(self): | ||
if self.p: | ||
self.p.terminate() | ||
|
||
return True | ||
|
||
def checkSayDone(self): | ||
return not self.is_playing | ||
|
||
def _process_result(self): | ||
while True: | ||
result = self.result_queue.get() | ||
|
||
if result is None: | ||
break | ||
|
||
wav_bytes = result.to_wav_bytes() | ||
|
||
if not wav_bytes: | ||
continue | ||
|
||
with tempfile.NamedTemporaryFile(mode='wb+', suffix='.wav') as wav_file: | ||
wav_file.write(wav_bytes) | ||
wav_file.seek(0) | ||
|
||
for play_program in reversed(PLAY_PROGRAMS): | ||
play_cmd = shlex.split(play_program) | ||
|
||
if not shutil.which(play_cmd[0]): | ||
continue | ||
|
||
play_cmd.append(wav_file.name) | ||
self.is_playing = True | ||
|
||
with subprocess.Popen(play_cmd) as self.p: | ||
try: | ||
self.p.wait() | ||
except: # e.g. on keyboard interrupt | ||
self.p.kill() | ||
|
||
self.is_playing = False | ||
break | ||
|
||
parser = argparse.ArgumentParser(prog='speechSynthesis', description='TTS service running a Mimic 3 engine') | ||
parser.add_argument('--voice', '-v', help='Name of voice (expected in <voices-dir>/<language>)', required=True) | ||
parser.add_argument('--speaker', '-s', help='Name or number of speaker (default: first speaker)') | ||
parser.add_argument('--noise-scale', type=float, help='Noise scale [0-1], default is 0.667') | ||
parser.add_argument('--length-scale', type=float, help='Length scale (1.0 is default speed, 0.5 is 2x faster)') | ||
parser.add_argument('--noise-w', type=float, help='Variation in cadence [0-1], default is 0.8') | ||
parser.add_argument('--cuda', action='store_true', help='Use Onnx CUDA execution provider (requires onnxruntime-gpu)') | ||
parser.add_argument('--port', '-p', default='/speechSynthesis', help='YARP port prefix') | ||
|
||
args = parser.parse_args() | ||
|
||
tts = mimic3_tts.Mimic3TextToSpeechSystem( | ||
mimic3_tts.Mimic3Settings( | ||
length_scale=args.length_scale, | ||
noise_scale=args.noise_scale, | ||
noise_w=args.noise_w, | ||
use_cuda=args.cuda, | ||
) | ||
) | ||
|
||
tts.voice = args.voice | ||
tts.speaker = args.speaker | ||
|
||
print('Preloading voice: %s' % args.voice) | ||
tts.preload_voice(args.voice) | ||
|
||
yarp.Network.init() | ||
|
||
if not yarp.Network.checkNetwork(): | ||
print('YARP network not found') | ||
raise SystemExit | ||
|
||
rpc = yarp.RpcServer() | ||
processor = TextToSpeechResponder(tts) | ||
|
||
if not rpc.open(args.port + '/rpc:s'): | ||
print('Cannot open port %s' % rpc.getName()) | ||
raise SystemExit | ||
|
||
processor.yarp().attachAsServer(rpc) | ||
|
||
quitRequested = False | ||
|
||
def askToStop(): | ||
global quitRequested | ||
quitRequested = True | ||
|
||
signal.signal(signal.SIGINT, lambda signal, frame: askToStop()) | ||
signal.signal(signal.SIGTERM, lambda signal, frame: askToStop()) | ||
|
||
while not quitRequested: | ||
time.sleep(0.1) | ||
|
||
rpc.interrupt() | ||
rpc.close() | ||
|
||
processor.result_queue.put(None) | ||
processor.result_thread.join() |