2121 # would hang here
2222 engine.playback.stop()
2323"""
24- import hashlib
25- import os .path
24+ import inspect
2625import random
2726import re
28- from os .path import isfile , join
27+ import subprocess
28+ from os .path import isfile , join , splitext
29+ from pathlib import Path
2930from queue import Queue , Empty
3031from threading import Thread
3132from time import time , sleep
32- import subprocess
33- import os
34- from inspect import signature
3533
34+ import requests
35+ from phoneme_guesser .exceptions import FailedToGuessPhonemes
36+
37+ from ovos_plugin_manager .utils .tts_cache import TextToSpeechCache , hash_sentence
3638from ovos_utils import resolve_resource_file
39+ from ovos_utils .configuration import read_mycroft_config
3740from ovos_utils .enclosure .api import EnclosureAPI
41+ from ovos_utils .file_utils import get_cache_directory
3842from ovos_utils .lang .phonemes import get_phonemes
39- from phoneme_guesser .exceptions import FailedToGuessPhonemes
4043from ovos_utils .lang .visimes import VISIMES
4144from ovos_utils .log import LOG
4245from ovos_utils .messagebus import Message , FakeBus as BUS
46+ from ovos_utils .metrics import Stopwatch
4347from ovos_utils .signal import check_for_signal , create_signal
4448from ovos_utils .sound import play_mp3 , play_wav
45- from ovos_utils .metrics import Stopwatch
46- from ovos_utils .configuration import read_mycroft_config
4749
4850EMPTY_PLAYBACK_QUEUE_TUPLE = (None , None , None , None , None )
4951
5052
51- def get_cache_directory (folder ):
52- if os .name == 'nt' :
53- import tempfile
54- return tempfile .mkdtemp (folder )
55- else :
56- from memory_tempfile import MemoryTempfile
57- tempfile = MemoryTempfile (fallback = True )
58- path = os .path .join (tempfile .gettempdir (), folder )
59- if not os .path .exists (path ):
60- os .makedirs (path )
61- return path
62-
6353class PlaybackThread (Thread ):
6454 """Thread class for playing back tts audio and sending
6555 viseme data to enclosure.
@@ -206,7 +196,6 @@ class TTS:
206196 def __init__ (self , lang = "en-us" , config = None , validator = None ,
207197 audio_ext = 'wav' , phonetic_spelling = True , ssml_tags = None ):
208198 self .log_timestamps = False
209- super (TTS , self ).__init__ ()
210199 if not config :
211200 try :
212201 config_core = read_mycroft_config () or {}
@@ -217,29 +206,33 @@ def __init__(self, lang="en-us", config=None, validator=None,
217206
218207 self .stopwatch = Stopwatch ()
219208 self .tts_name = self .__class__ .__name__
220- self .bus = BUS ()
209+ self .bus = BUS () # initialized in "init" step
221210 self .lang = lang or config .get ("lang" ) or 'en-us'
222211 self .config = config or {}
223212 self .validator = validator or TTSValidator (self )
224213 self .phonetic_spelling = phonetic_spelling
225214 self .audio_ext = audio_ext
226215 self .ssml_tags = ssml_tags or []
216+ self .log_timestamps = self .config .get ("log_timestamps" , False )
227217
228218 self .voice = self .config .get ("voice" )
229- self .cache_dir = get_cache_directory (self .tts_name )
230- self .filename = join (self .cache_dir , 'tts.' + self .audio_ext )
219+ # TODO can self.filename be deprecated ? is it used anywhere at all?
220+ cache_dir = get_cache_directory (self .tts_name )
221+ self .filename = join (cache_dir , 'tts.' + self .audio_ext )
231222 self .enclosure = None
232223 random .seed ()
233224 self .queue = Queue ()
234225 self .playback = PlaybackThread (self .queue )
235- # NOTE playback start call has been omitted and moved to init method
236- # init is called by mycroft, but non mycroft usage wont call it,
237- # meaning outside mycroft the enclosure is not set, bus is dummy and
238- # playback thread is not used, playback queue is not wanted
239- # if some module is calling get_tts (which is the correct usage)
240- self .clear_cache ()
226+ # NOTE: self.playback.start() was moved to init method
227+ # playback queue is not wanted if we only care about get_tts
228+ # init is called by mycroft, but non mycroft usage wont call it,
229+ # outside mycroft the enclosure is not set, bus is dummy and
230+ # playback thread is not used
241231 self .spellings = self .load_spellings ()
242- self .log_timestamps = self .config .get ("log_timestamps" , False )
232+ self .cache = TextToSpeechCache (
233+ self .config , self .tts_name , self .audio_ext
234+ )
235+ self .clear_cache ()
243236 self .handle_metric ({"metric_type" : "tts.init" })
244237
245238 def handle_metric (self , metadata = None ):
@@ -294,6 +287,7 @@ def end_audio(self, listen=False):
294287 if listen :
295288 self .bus .emit (Message ('mycroft.mic.listen' ))
296289
290+ self .cache .curate ()
297291 # This check will clear the "signal"
298292 check_for_signal ("isSpeaking" )
299293 self .stopwatch .stop ()
@@ -412,58 +406,114 @@ def execute(self, sentence, ident=None, listen=False, **kwargs):
412406 # Re-raise to allow the Exception to be handled externally as well.
413407 raise
414408
415- def _execute (self , sentence , ident , listen , ** kwargs ):
409+ def _replace_phonetic_spellings (self , sentence ):
416410 if self .phonetic_spelling :
417411 for word in re .findall (r"[\w']+" , sentence ):
418412 if word .lower () in self .spellings :
419- sentence = sentence .replace (word ,
420- self .spellings [word .lower ()])
413+ spelled = self .spellings [word .lower ()]
414+ sentence = sentence .replace (word , spelled )
415+ return sentence
421416
417+ def _execute (self , sentence , ident , listen , ** kwargs ):
418+ sentence = self ._replace_phonetic_spellings (sentence )
422419 chunks = self ._preprocess_sentence (sentence )
423420 # Apply the listen flag to the last chunk, set the rest to False
424421 chunks = [(chunks [i ], listen if i == len (chunks ) - 1 else False )
425422 for i in range (len (chunks ))]
426423 self .handle_metric ({"metric_type" : "tts.preprocessed" ,
427424 "n_chunks" : len (chunks )})
428- for sentence , l in chunks :
429- key = str (hashlib .md5 (
430- sentence .encode ('utf-8' , 'ignore' )).hexdigest ())
431- wav_file = os .path .join (self .cache_dir , key + '.' + self .audio_ext )
432425
433- if os .path .exists (wav_file ):
434- LOG .debug ("TTS cache hit" )
435- phonemes = self .load_phonemes (key )
436- else :
437- self .handle_metric ({"metric_type" : "tts.synth.start" })
438- lang = kwargs .get ("lang" )
439- if not lang and kwargs .get ("message" ):
440- # some HolmesV derivatives accept a message object
441- try :
442- lang = kwargs ["message" ].data .get ("lang" ) or \
443- kwargs ["message" ].context .get ("lang" )
444- except : # not a mycroft message object
445- pass
446- lang = lang or self .lang
447- # check the signature to either pass lang or not
448- if len (signature (self .get_tts ).parameters ) == 3 :
449- wav_file , phonemes = self .get_tts (sentence , wav_file ,
450- lang = lang )
451- else :
452- wav_file , phonemes = self .get_tts (sentence , wav_file )
453- self .handle_metric ({"metric_type" : "tts.synth.finished" })
454- if phonemes :
455- self .save_phonemes (key , phonemes )
456- else :
457- try :
458- # TODO, debug why phonemes fail ?
459- phonemes = get_phonemes (sentence )
460- self .handle_metric ({"metric_type" : "tts.phonemes.guess" })
461- except (ImportError , FailedToGuessPhonemes ):
462- pass
463- vis = self .viseme (phonemes ) if phonemes else None
464- self .queue .put ((self .audio_ext , wav_file , vis , ident , l ))
426+ # synth -> queue for playback
427+ for sentence , l in chunks :
428+ sentence_hash = hash_sentence (sentence )
429+ if sentence_hash in self .cache : # load from cache
430+ audio_file , phonemes = self ._get_from_cache (sentence , sentence_hash )
431+ else : # synth + cache
432+ audio_file , phonemes = self ._synth (sentence , sentence_hash , ** kwargs )
433+
434+ viseme = self .viseme (phonemes ) if phonemes else None
435+ audio_ext = self ._determine_ext (audio_file )
436+ self .queue .put (
437+ (audio_ext , str (audio_file ), viseme , ident , l )
438+ )
465439 self .handle_metric ({"metric_type" : "tts.queued" })
466440
441+ def _determine_ext (self , audio_file ):
442+ # determine audio_ext on the fly
443+ # do not use the ext defined in the plugin since it might not match
444+ # some plugins support multiple extensions
445+ # or have caches in different extensions
446+ try :
447+ _ , audio_ext = splitext (str (audio_file ))
448+ return audio_ext
449+ except :
450+ return self .audio_ext
451+
452+ def _synth (self , sentence , sentence_hash = None , ** kwargs ):
453+ self .handle_metric ({"metric_type" : "tts.synth.start" })
454+ sentence_hash = sentence_hash or hash_sentence (sentence )
455+ audio = self .cache .define_audio_file (sentence_hash )
456+
457+ # parse requested language for this TTS request
458+ # NOTE: this is ovos only functionality, not in mycroft-core!
459+ lang = kwargs .get ("lang" )
460+ if not lang and kwargs .get ("message" ):
461+ # get lang from message object if possible
462+ try :
463+ lang = kwargs ["message" ].data .get ("lang" ) or \
464+ kwargs ["message" ].context .get ("lang" )
465+ except : # not a mycroft message object
466+ pass
467+ kwargs ["lang" ] = lang or self .lang
468+
469+ # filter kwargs per plugin, different plugins expose different options
470+ # mycroft-core -> no kwargs
471+ # ovos -> lang
472+ # neon-core -> message
473+ kwargs = {k : v for k , v in kwargs .items ()
474+ if k in inspect .signature (self .get_tts ).parameters
475+ and k not in ["sentence" , "wav_file" ]}
476+
477+ # finally do the TTS synth
478+ audio .path , phonemes = self .get_tts (sentence , str (audio ), ** kwargs )
479+ self .handle_metric ({"metric_type" : "tts.synth.finished" })
480+ # cache sentence + phonemes
481+ self ._cache_sentence (sentence , audio , phonemes , sentence_hash )
482+ return audio , phonemes
483+
484+ def _cache_phonemes (self , sentence , phonemes = None , sentence_hash = None ):
485+ sentence_hash = sentence_hash or hash_sentence (sentence )
486+ if not phonemes :
487+ try : # TODO debug why get_phonemes fails in the first place
488+ phonemes = get_phonemes (sentence )
489+ self .handle_metric ({"metric_type" : "tts.phonemes.guess" })
490+ except (ImportError , FailedToGuessPhonemes ):
491+ pass
492+ if phonemes :
493+ return self .save_phonemes (sentence_hash , phonemes )
494+ return None
495+
496+ def _cache_sentence (self , sentence , audio_file , phonemes = None , sentence_hash = None ):
497+ sentence_hash = sentence_hash or hash_sentence (sentence )
498+ # RANT: why do you hate strings ChrisV?
499+ if isinstance (audio_file .path , str ):
500+ audio_file .path = Path (audio_file .path )
501+ pho_file = self ._cache_phonemes (sentence , phonemes , sentence_hash )
502+ self .cache .cached_sentences [sentence_hash ] = (audio_file , pho_file )
503+ self .handle_metric ({"metric_type" : "tts.synth.cached" })
504+
505+ def _get_from_cache (self , sentence , sentence_hash = None ):
506+ sentence_hash = sentence_hash or hash_sentence (sentence )
507+ phonemes = None
508+ audio_file , pho_file = self .cache .cached_sentences [sentence_hash ]
509+ LOG .info (f"Found { audio_file .name } in TTS cache" )
510+ if not pho_file :
511+ # guess phonemes from sentence + cache them
512+ pho_file = self ._cache_phonemes (sentence , sentence_hash )
513+ if pho_file :
514+ phonemes = pho_file .load ()
515+ return audio_file , phonemes
516+
467517 def viseme (self , phonemes ):
468518 """Create visemes from phonemes.
469519
@@ -492,7 +542,7 @@ def viseme(self, phonemes):
492542
493543 def clear_cache (self ):
494544 """ Remove all cached files. """
495- pass
545+ self . cache . clear ()
496546
497547 def save_phonemes (self , key , phonemes ):
498548 """Cache phonemes
@@ -501,29 +551,18 @@ def save_phonemes(self, key, phonemes):
501551 key (str): Hash key for the sentence
502552 phonemes (str): phoneme string to save
503553 """
504- pho_file = os .path .join (self .cache_dir , key + ".pho" )
505- try :
506- with open (pho_file , "w" ) as cachefile :
507- cachefile .write (phonemes )
508- except Exception :
509- LOG .exception ("Failed to write {} to cache" .format (pho_file ))
510- pass
554+ phoneme_file = self .cache .define_phoneme_file (key )
555+ phoneme_file .save (phonemes )
556+ return phoneme_file
511557
512558 def load_phonemes (self , key ):
513559 """Load phonemes from cache file.
514560
515561 Arguments:
516562 key (str): Key identifying phoneme cache
517563 """
518- pho_file = os .path .join (self .cache_dir , key + ".pho" )
519- if os .path .exists (pho_file ):
520- try :
521- with open (pho_file , "r" ) as cachefile :
522- phonemes = cachefile .read ().strip ()
523- return phonemes
524- except Exception :
525- LOG .debug ("Failed to read .PHO from cache" )
526- return None
564+ phoneme_file = self .cache .define_phoneme_file (key )
565+ return phoneme_file .load ()
527566
528567 def stop (self ):
529568 try :
@@ -628,3 +667,38 @@ def get_tts(self, sentence, wav_file, lang=None):
628667 files , phonemes = self .sentence_to_files (sentence )
629668 wav_file = self .concat (files , wav_file )
630669 return wav_file , phonemes
670+
671+
672+ class RemoteTTSException (Exception ):
673+ pass
674+
675+
676+ class RemoteTTSTimeoutException (RemoteTTSException ):
677+ pass
678+
679+
680+ class RemoteTTS (TTS ):
681+ """
682+ Abstract class for a Remote TTS engine implementation.
683+ This class is only provided for backwards compatibility
684+ Usage is discouraged
685+ """
686+
687+ def __init__ (self , lang , config , url , api_path , validator ):
688+ super (RemoteTTS , self ).__init__ (lang , config , validator )
689+ self .api_path = api_path
690+ self .auth = None
691+ self .url = config .get ('url' , url ).rstrip ('/' )
692+
693+ def build_request_params (self , sentence ):
694+ pass
695+
696+ def get_tts (self , sentence , wav_file , lang = None ):
697+ r = requests .get (
698+ self .url + self .api_path , params = self .build_request_params (sentence ),
699+ timeout = 10 , verify = False , auth = self .auth )
700+ if r .status_code != 200 :
701+ return None
702+ with open (wav_file , 'wb' ) as f :
703+ f .write (r .content )
704+ return wav_file , None
0 commit comments