Merge pull request #13 from NeonJarbas/feat/polly_TTS

NeonDaniel · web-flow · commit 718adcc56fa5 · 2020-02-14T10:42:06.000-08:00
add polly TTS
diff --git a/mycroft/configuration/mycroft.conf b/mycroft/configuration/mycroft.conf
@@ -383,7 +383,13 @@
   "tts": {
     // Engine.  Options: "mimic", "google", "marytts", "fatts", "espeak", "spdsay", "responsive_voice", "yandex"
     "pulse_duck": false,
-    "module": "mimic",
+    "module": "polly",
+    "polly": {
+        "voice": "Matthew",
+        "region": "us-east-1",
+        "key_id": "",
+        "secret_key": ""
+    },
     "mimic": {
       "voice": "ap"
     },
diff --git a/mycroft/tts/__init__.py b/mycroft/tts/__init__.py
@@ -495,6 +495,7 @@ class TTSFactory:
     from mycroft.tts.responsive_voice_tts import ResponsiveVoice
     from mycroft.tts.mimic2_tts import Mimic2
     from mycroft.tts.yandex_tts import YandexTTS
+    from mycroft.tts.polly_tts import PollyTTS
 
     CLASSES = {
         "mimic": Mimic,
@@ -507,7 +508,8 @@ class TTSFactory:
         "watson": WatsonTTS,
         "bing": BingTTS,
         "responsive_voice": ResponsiveVoice,
-        "yandex": YandexTTS
+        "yandex": YandexTTS,
+        "polly": PollyTTS
     }
 
     @staticmethod
diff --git a/mycroft/tts/polly_tts.py b/mycroft/tts/polly_tts.py
@@ -0,0 +1,109 @@
+import boto3
+from mycroft.tts import TTS, TTSValidator
+from mycroft.configuration import Configuration
+
+
+class PollyTTS(TTS):
+
+    def __init__(self, lang="en-us", config=None):
+        config = config or Configuration.get().get("tts", {}).get("polly", {})
+        super(PollyTTS, self).__init__(lang, config, PollyTTSValidator(self),
+                                       audio_ext="mp3",
+                                       ssml_tags=["speak", "say-as", "voice",
+                                                  "prosody", "break",
+                                                  "emphasis", "sub", "lang",
+                                                  "phoneme", "w", "whisper",
+                                                  "amazon:auto-breaths",
+                                                  "p", "s", "amazon:effect",
+                                                  "mark"])
+
+        self.voice = self.config.get("voice", "Matthew")
+        self.key_id = self.config.get("key_id", '')
+        self.key = self.config.get("secret_key", '')
+        self.region = self.config.get("region", 'us-east-1')
+
+        if self.keys.get("polly"):
+            self.key_id = self.keys["polly"].get("key_id") or self.key_id
+            self.key = self.keys["polly"].get("secret_key") or self.key
+            self.region = self.keys["polly"].get("region") or self.region
+            self.voice = self.keys["polly"].get("voice") or self.voice
+        # these checks are separate in case we want to use different keys for the translate api for example
+        elif self.keys.get("amazon"):
+            self.key_id = self.keys["amazon"].get("key_id") or self.key_id
+            self.key = self.keys["amazon"].get("secret_key") or self.key
+            self.region = self.keys["amazon"].get("region") or self.region
+            self.voice = self.keys["amazon"].get("voice") or self.voice
+
+        self.polly = boto3.Session(aws_access_key_id=self.key_id,
+                                   aws_secret_access_key=self.key,
+                                   region_name=self.region).client('polly')
+
+    def get_tts(self, sentence, wav_file):
+        text_type = "text"
+        if self.remove_ssml(sentence) != sentence:
+            text_type = "ssml"
+            sentence = sentence.replace("\whispered", "/amazon:effect") \
+                .replace("\\whispered", "/amazon:effect") \
+                .replace("whispered", "amazon:effect name=\"whispered\"")
+        response = self.polly.synthesize_speech(
+            OutputFormat=self.audio_ext,
+            Text=sentence,
+            TextType=text_type,
+            VoiceId=self.voice)
+
+        with open(wav_file, 'wb') as f:
+            f.write(response['AudioStream'].read())
+        return (wav_file, None)  # No phonemes
+
+    def describe_voices(self, language_code="en-US"):
+        if language_code.islower():
+            a, b = language_code.split("-")
+            b = b.upper()
+            language_code = "-".join([a, b])
+        # example 'it-IT' useful to retrieve voices
+        voices = self.polly.describe_voices(LanguageCode=language_code)
+
+        return voices
+
+
+class PollyTTSValidator(TTSValidator):
+    def __init__(self, tts):
+        super(PollyTTSValidator, self).__init__(tts)
+
+    def validate_lang(self):
+        # TODO
+        pass
+
+    def validate_dependencies(self):
+        try:
+            from boto3 import Session
+        except ImportError:
+            raise Exception(
+                'PollyTTS dependencies not installed, please run pip install '
+                'boto3 ')
+
+    def validate_connection(self):
+        try:
+            if not self.tts.voice:
+                raise Exception("Polly TTS Voice not configured")
+            output = self.tts.describe_voices()
+        except TypeError:
+            raise Exception(
+                'PollyTTS server could not be verified. Please check your '
+                'internet connection and credentials.')
+
+    def get_tts_class(self):
+        return PollyTTS
+
+
+if __name__ == "__main__":
+    e = PollyTTS()
+    ssml = """<speak>
+     This is my original voice, without any modifications. <amazon:effect vocal-tract-length="+15%"> 
+     Now, imagine that I am much bigger. </amazon:effect> <amazon:effect vocal-tract-length="-15%"> 
+     Or, perhaps you prefer my voice when I'm very small. </amazon:effect> You can also control the 
+     timbre of my voice by making minor adjustments. <amazon:effect vocal-tract-length="+10%"> 
+     For example, by making me sound just a little bigger. </amazon:effect><amazon:effect 
+     vocal-tract-length="-10%"> Or, making me sound only somewhat smaller. </amazon:effect> 
+</speak>"""
+    e.get_tts(ssml, "polly.mp3")
diff --git a/requirements.txt b/requirements.txt
@@ -28,6 +28,7 @@ padaos==0.1.9
 precise-runner==0.2.1
 petact==0.1.2
 
+
 jarbas_utils==0.4.1
 pycld2 # optional, needed for lang detection
 pycld3 # optional, needed for lang detection