Skip to content

Commit 718adcc

Browse files
authored
Merge pull request #13 from NeonJarbas/feat/polly_TTS
add polly TTS
2 parents 96b0f02 + 2b621b2 commit 718adcc

File tree

4 files changed

+120
-2
lines changed

4 files changed

+120
-2
lines changed

mycroft/configuration/mycroft.conf

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -383,7 +383,13 @@
383383
"tts": {
384384
// Engine. Options: "mimic", "google", "marytts", "fatts", "espeak", "spdsay", "responsive_voice", "yandex"
385385
"pulse_duck": false,
386-
"module": "mimic",
386+
"module": "polly",
387+
"polly": {
388+
"voice": "Matthew",
389+
"region": "us-east-1",
390+
"key_id": "",
391+
"secret_key": ""
392+
},
387393
"mimic": {
388394
"voice": "ap"
389395
},

mycroft/tts/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -495,6 +495,7 @@ class TTSFactory:
495495
from mycroft.tts.responsive_voice_tts import ResponsiveVoice
496496
from mycroft.tts.mimic2_tts import Mimic2
497497
from mycroft.tts.yandex_tts import YandexTTS
498+
from mycroft.tts.polly_tts import PollyTTS
498499

499500
CLASSES = {
500501
"mimic": Mimic,
@@ -507,7 +508,8 @@ class TTSFactory:
507508
"watson": WatsonTTS,
508509
"bing": BingTTS,
509510
"responsive_voice": ResponsiveVoice,
510-
"yandex": YandexTTS
511+
"yandex": YandexTTS,
512+
"polly": PollyTTS
511513
}
512514

513515
@staticmethod

mycroft/tts/polly_tts.py

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
import boto3
2+
from mycroft.tts import TTS, TTSValidator
3+
from mycroft.configuration import Configuration
4+
5+
6+
class PollyTTS(TTS):
7+
8+
def __init__(self, lang="en-us", config=None):
9+
config = config or Configuration.get().get("tts", {}).get("polly", {})
10+
super(PollyTTS, self).__init__(lang, config, PollyTTSValidator(self),
11+
audio_ext="mp3",
12+
ssml_tags=["speak", "say-as", "voice",
13+
"prosody", "break",
14+
"emphasis", "sub", "lang",
15+
"phoneme", "w", "whisper",
16+
"amazon:auto-breaths",
17+
"p", "s", "amazon:effect",
18+
"mark"])
19+
20+
self.voice = self.config.get("voice", "Matthew")
21+
self.key_id = self.config.get("key_id", '')
22+
self.key = self.config.get("secret_key", '')
23+
self.region = self.config.get("region", 'us-east-1')
24+
25+
if self.keys.get("polly"):
26+
self.key_id = self.keys["polly"].get("key_id") or self.key_id
27+
self.key = self.keys["polly"].get("secret_key") or self.key
28+
self.region = self.keys["polly"].get("region") or self.region
29+
self.voice = self.keys["polly"].get("voice") or self.voice
30+
# these checks are separate in case we want to use different keys for the translate api for example
31+
elif self.keys.get("amazon"):
32+
self.key_id = self.keys["amazon"].get("key_id") or self.key_id
33+
self.key = self.keys["amazon"].get("secret_key") or self.key
34+
self.region = self.keys["amazon"].get("region") or self.region
35+
self.voice = self.keys["amazon"].get("voice") or self.voice
36+
37+
self.polly = boto3.Session(aws_access_key_id=self.key_id,
38+
aws_secret_access_key=self.key,
39+
region_name=self.region).client('polly')
40+
41+
def get_tts(self, sentence, wav_file):
42+
text_type = "text"
43+
if self.remove_ssml(sentence) != sentence:
44+
text_type = "ssml"
45+
sentence = sentence.replace("\whispered", "/amazon:effect") \
46+
.replace("\\whispered", "/amazon:effect") \
47+
.replace("whispered", "amazon:effect name=\"whispered\"")
48+
response = self.polly.synthesize_speech(
49+
OutputFormat=self.audio_ext,
50+
Text=sentence,
51+
TextType=text_type,
52+
VoiceId=self.voice)
53+
54+
with open(wav_file, 'wb') as f:
55+
f.write(response['AudioStream'].read())
56+
return (wav_file, None) # No phonemes
57+
58+
def describe_voices(self, language_code="en-US"):
59+
if language_code.islower():
60+
a, b = language_code.split("-")
61+
b = b.upper()
62+
language_code = "-".join([a, b])
63+
# example 'it-IT' useful to retrieve voices
64+
voices = self.polly.describe_voices(LanguageCode=language_code)
65+
66+
return voices
67+
68+
69+
class PollyTTSValidator(TTSValidator):
70+
def __init__(self, tts):
71+
super(PollyTTSValidator, self).__init__(tts)
72+
73+
def validate_lang(self):
74+
# TODO
75+
pass
76+
77+
def validate_dependencies(self):
78+
try:
79+
from boto3 import Session
80+
except ImportError:
81+
raise Exception(
82+
'PollyTTS dependencies not installed, please run pip install '
83+
'boto3 ')
84+
85+
def validate_connection(self):
86+
try:
87+
if not self.tts.voice:
88+
raise Exception("Polly TTS Voice not configured")
89+
output = self.tts.describe_voices()
90+
except TypeError:
91+
raise Exception(
92+
'PollyTTS server could not be verified. Please check your '
93+
'internet connection and credentials.')
94+
95+
def get_tts_class(self):
96+
return PollyTTS
97+
98+
99+
if __name__ == "__main__":
100+
e = PollyTTS()
101+
ssml = """<speak>
102+
This is my original voice, without any modifications. <amazon:effect vocal-tract-length="+15%">
103+
Now, imagine that I am much bigger. </amazon:effect> <amazon:effect vocal-tract-length="-15%">
104+
Or, perhaps you prefer my voice when I'm very small. </amazon:effect> You can also control the
105+
timbre of my voice by making minor adjustments. <amazon:effect vocal-tract-length="+10%">
106+
For example, by making me sound just a little bigger. </amazon:effect><amazon:effect
107+
vocal-tract-length="-10%"> Or, making me sound only somewhat smaller. </amazon:effect>
108+
</speak>"""
109+
e.get_tts(ssml, "polly.mp3")

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ padaos==0.1.9
2828
precise-runner==0.2.1
2929
petact==0.1.2
3030

31+
3132
jarbas_utils==0.4.1
3233
pycld2 # optional, needed for lang detection
3334
pycld3 # optional, needed for lang detection

0 commit comments

Comments
 (0)