|
| 1 | +import boto3 |
| 2 | +from mycroft.tts import TTS, TTSValidator |
| 3 | +from mycroft.configuration import Configuration |
| 4 | + |
| 5 | + |
| 6 | +class PollyTTS(TTS): |
| 7 | + |
| 8 | + def __init__(self, lang="en-us", config=None): |
| 9 | + config = config or Configuration.get().get("tts", {}).get("polly", {}) |
| 10 | + super(PollyTTS, self).__init__(lang, config, PollyTTSValidator(self), |
| 11 | + audio_ext="mp3", |
| 12 | + ssml_tags=["speak", "say-as", "voice", |
| 13 | + "prosody", "break", |
| 14 | + "emphasis", "sub", "lang", |
| 15 | + "phoneme", "w", "whisper", |
| 16 | + "amazon:auto-breaths", |
| 17 | + "p", "s", "amazon:effect", |
| 18 | + "mark"]) |
| 19 | + |
| 20 | + self.voice = self.config.get("voice", "Matthew") |
| 21 | + self.key_id = self.config.get("key_id", '') |
| 22 | + self.key = self.config.get("secret_key", '') |
| 23 | + self.region = self.config.get("region", 'us-east-1') |
| 24 | + |
| 25 | + if self.keys.get("polly"): |
| 26 | + self.key_id = self.keys["polly"].get("key_id") or self.key_id |
| 27 | + self.key = self.keys["polly"].get("secret_key") or self.key |
| 28 | + self.region = self.keys["polly"].get("region") or self.region |
| 29 | + self.voice = self.keys["polly"].get("voice") or self.voice |
| 30 | + # these checks are separate in case we want to use different keys for the translate api for example |
| 31 | + elif self.keys.get("amazon"): |
| 32 | + self.key_id = self.keys["amazon"].get("key_id") or self.key_id |
| 33 | + self.key = self.keys["amazon"].get("secret_key") or self.key |
| 34 | + self.region = self.keys["amazon"].get("region") or self.region |
| 35 | + self.voice = self.keys["amazon"].get("voice") or self.voice |
| 36 | + |
| 37 | + self.polly = boto3.Session(aws_access_key_id=self.key_id, |
| 38 | + aws_secret_access_key=self.key, |
| 39 | + region_name=self.region).client('polly') |
| 40 | + |
| 41 | + def get_tts(self, sentence, wav_file): |
| 42 | + text_type = "text" |
| 43 | + if self.remove_ssml(sentence) != sentence: |
| 44 | + text_type = "ssml" |
| 45 | + sentence = sentence.replace("\whispered", "/amazon:effect") \ |
| 46 | + .replace("\\whispered", "/amazon:effect") \ |
| 47 | + .replace("whispered", "amazon:effect name=\"whispered\"") |
| 48 | + response = self.polly.synthesize_speech( |
| 49 | + OutputFormat=self.audio_ext, |
| 50 | + Text=sentence, |
| 51 | + TextType=text_type, |
| 52 | + VoiceId=self.voice) |
| 53 | + |
| 54 | + with open(wav_file, 'wb') as f: |
| 55 | + f.write(response['AudioStream'].read()) |
| 56 | + return (wav_file, None) # No phonemes |
| 57 | + |
| 58 | + def describe_voices(self, language_code="en-US"): |
| 59 | + if language_code.islower(): |
| 60 | + a, b = language_code.split("-") |
| 61 | + b = b.upper() |
| 62 | + language_code = "-".join([a, b]) |
| 63 | + # example 'it-IT' useful to retrieve voices |
| 64 | + voices = self.polly.describe_voices(LanguageCode=language_code) |
| 65 | + |
| 66 | + return voices |
| 67 | + |
| 68 | + |
| 69 | +class PollyTTSValidator(TTSValidator): |
| 70 | + def __init__(self, tts): |
| 71 | + super(PollyTTSValidator, self).__init__(tts) |
| 72 | + |
| 73 | + def validate_lang(self): |
| 74 | + # TODO |
| 75 | + pass |
| 76 | + |
| 77 | + def validate_dependencies(self): |
| 78 | + try: |
| 79 | + from boto3 import Session |
| 80 | + except ImportError: |
| 81 | + raise Exception( |
| 82 | + 'PollyTTS dependencies not installed, please run pip install ' |
| 83 | + 'boto3 ') |
| 84 | + |
| 85 | + def validate_connection(self): |
| 86 | + try: |
| 87 | + if not self.tts.voice: |
| 88 | + raise Exception("Polly TTS Voice not configured") |
| 89 | + output = self.tts.describe_voices() |
| 90 | + except TypeError: |
| 91 | + raise Exception( |
| 92 | + 'PollyTTS server could not be verified. Please check your ' |
| 93 | + 'internet connection and credentials.') |
| 94 | + |
| 95 | + def get_tts_class(self): |
| 96 | + return PollyTTS |
| 97 | + |
| 98 | + |
| 99 | +if __name__ == "__main__": |
| 100 | + e = PollyTTS() |
| 101 | + ssml = """<speak> |
| 102 | + This is my original voice, without any modifications. <amazon:effect vocal-tract-length="+15%"> |
| 103 | + Now, imagine that I am much bigger. </amazon:effect> <amazon:effect vocal-tract-length="-15%"> |
| 104 | + Or, perhaps you prefer my voice when I'm very small. </amazon:effect> You can also control the |
| 105 | + timbre of my voice by making minor adjustments. <amazon:effect vocal-tract-length="+10%"> |
| 106 | + For example, by making me sound just a little bigger. </amazon:effect><amazon:effect |
| 107 | + vocal-tract-length="-10%"> Or, making me sound only somewhat smaller. </amazon:effect> |
| 108 | +</speak>""" |
| 109 | + e.get_tts(ssml, "polly.mp3") |
0 commit comments