diff --git a/ovos_plugin_manager/g2p.py b/ovos_plugin_manager/g2p.py new file mode 100644 index 00000000..b81d57e4 --- /dev/null +++ b/ovos_plugin_manager/g2p.py @@ -0,0 +1,72 @@ +from ovos_plugin_manager.utils import load_plugin, find_plugins, PluginTypes +from ovos_plugin_manager.templates.g2p import Grapheme2PhonemePlugin, PhonemeAlphabet + + +def find_g2p_plugins(): + return find_plugins(PluginTypes.PHONEME) + + +def load_g2p_plugin(module_name): + return load_plugin(module_name, PluginTypes.PHONEME) + + +class OVOSG2PFactory: + """ replicates the base mycroft class, but uses only OPM enabled plugins""" + MAPPINGS = { + "cmudict": "ovos-g2p-plugin-cmudict", + "phoneme_guesser": "neon-g2p-plugin-phoneme-guesser", + "gruut": "neon-g2p-plugin-gruut" + } + + @staticmethod + def get_class(config=None): + """Factory method to get a G2P engine class based on configuration. + + The configuration file ``mycroft.conf`` contains a ``g2p`` section with + the name of a G2P module to be read by this method. + + "g2p": { + "module": + } + """ + config = config or get_g2p_config() + g2p_module = config.get("module") or 'cmudict' + if g2p_module == 'cmudict': + return G2P + if g2p_module in OVOSG2PFactory.MAPPINGS: + g2p_module = OVOSG2PFactory.MAPPINGS[g2p_module] + return load_g2p_plugin(g2p_module) + + @staticmethod + def create(config=None): + """Factory method to create a G2P engine based on configuration. + + The configuration file ``mycroft.conf`` contains a ``g2p`` section with + the name of a G2P module to be read by this method. + + "g2p": { + "module": + } + """ + g2p_config = get_g2p_config(config) + g2p_module = g2p_config.get('module', 'cmudict') + try: + clazz = OVOSG2PFactory.get_class(g2p_config) + LOG.info(f'Found plugin {g2p_module}') + g2p = clazz(g2p_lang, g2p_config) + g2p.validator.validate() + LOG.info(f'Loaded plugin {g2p_module}') + except Exception: + LOG.exception('The selected G2P plugin could not be loaded.') + raise + return g2p + + +def get_g2p_config(config=None): + config = config or read_mycroft_config() + if "g2p" in config: + config = config["g2p"] + g2p_module = config.get('module', 'cmudict') + g2p_config = config.get(g2p_module, {}) + g2p_config["module"] = g2p_module + return g2p_config diff --git a/ovos_plugin_manager/templates/g2p.py b/ovos_plugin_manager/templates/g2p.py new file mode 100644 index 00000000..23ea1c8b --- /dev/null +++ b/ovos_plugin_manager/templates/g2p.py @@ -0,0 +1,87 @@ +import enum +from ovos_utils.lang.phonemes import arpabet2ipa, ipa2arpabet +from ovos_utils.lang.visimes import VISIMES + + +class PhonemeAlphabet(str, enum.Enum): + ARPA = "arpa" + IPA = "ipa" + + +class OutOfVocabulary(ValueError): + """ could not get phonemes for word """ + + +class Grapheme2PhonemePlugin: + def __init__(self, config=None): + self.config = config or {} + + @property + def arpa_is_implemented(self): + return self.__class__.get_arpa is not Grapheme2PhonemePlugin.get_arpa + + @property + def ipa_is_implemented(self): + return self.__class__.get_ipa is not Grapheme2PhonemePlugin.get_ipa + + def get_arpa(self, word, lang): + # if ipa is implemented, use it and convert + if self.ipa_is_implemented: + ipa = self.get_ipa(word, lang) + norm = lambda k: k.replace('ˈ', "") + return [ipa2arpabet[norm(p)] for p in ipa + if norm(p) in ipa2arpabet] + return None + + def get_ipa(self, word, lang): + # if arpa is implemented, use it and convert + if self.arpa_is_implemented: + arpa = self.get_arpa(word, lang) + norm = lambda k: k.replace("9", "")\ + .replace("8", "")\ + .replace("7", "")\ + .replace("6", "")\ + .replace("5", "")\ + .replace("4", "")\ + .replace("3", "")\ + .replace("2", "")\ + .replace("1", "")\ + .replace("0", "") + return [arpabet2ipa[norm(p)] for p in arpa + if norm(p) in arpabet2ipa] + return None + + def utterance2arpa(self, utterance, lang, ignore_oov=False): + arpa = [] + for w in utterance.split(): + phones = self.get_arpa(w, lang, ignore_oov) or [] + if not phones and not ignore_oov: + raise OutOfVocabulary(f"unknown word: {w}") + arpa += phones + ["."] + if arpa: + return arpa[:-1] + if ignore_oov: + return None + raise OutOfVocabulary + + def utterance2ipa(self, utterance, lang, ignore_oov=False): + ipa = [] + for w in utterance.split(): + phones = self.get_ipa(w, lang, ignore_oov) or [] + if not phones and not ignore_oov: + raise OutOfVocabulary(f"unknown word: {w}") + ipa += phones + ["."] + if ipa: + return ipa[:-1] + if ignore_oov: + return None + raise OutOfVocabulary + + def utterance2visemes(self, utterance, lang, default_dur=0.4): + arpa = [] + for w in utterance.split(): + phones = self.get_arpa(w, lang) or \ + ['B', 'L', 'AE', '.', 'B', 'L', 'AE'] + arpa += phones + ["."] + return [(VISIMES.get(pho.lower(), '4'), default_dur) for pho in arpa] + diff --git a/ovos_plugin_manager/templates/tts.py b/ovos_plugin_manager/templates/tts.py index 2bcc288a..efe52589 100644 --- a/ovos_plugin_manager/templates/tts.py +++ b/ovos_plugin_manager/templates/tts.py @@ -32,14 +32,12 @@ from time import time, sleep import requests -from phoneme_guesser.exceptions import FailedToGuessPhonemes - +from ovos_plugin_manager.g2p import OVOSG2PFactory from ovos_plugin_manager.utils.tts_cache import TextToSpeechCache, hash_sentence from ovos_utils import resolve_resource_file from ovos_utils.configuration import read_mycroft_config from ovos_utils.enclosure.api import EnclosureAPI from ovos_utils.file_utils import get_cache_directory -from ovos_utils.lang.phonemes import get_phonemes from ovos_utils.lang.visimes import VISIMES from ovos_utils.log import LOG from ovos_utils.messagebus import Message, FakeBus as BUS @@ -243,6 +241,7 @@ def __init__(self, lang="en-us", config=None, validator=None, self.config, tts_id, self.audio_ext ) self.cache.curate() + self.g2p = OVOSG2PFactory.create(config_core) self.handle_metric({"metric_type": "tts.init"}) def handle_metric(self, metadata=None): @@ -441,7 +440,13 @@ def _execute(self, sentence, ident, listen, **kwargs): else: # synth + cache audio_file, phonemes = self._synth(sentence, sentence_hash, **kwargs) - viseme = self.viseme(phonemes) if phonemes else None + # get visemes/mouth movements + if phonemes: + viseme = self.viseme(phonemes) + else: + lang = self._get_lang(kwargs) + viseme = self.g2p.utterance2visemes(sentence, lang) + audio_ext = self._determine_ext(audio_file) self.queue.put( (audio_ext, str(audio_file), viseme, ident, l) @@ -459,11 +464,7 @@ def _determine_ext(self, audio_file): except: return self.audio_ext - def _synth(self, sentence, sentence_hash=None, **kwargs): - self.handle_metric({"metric_type": "tts.synth.start"}) - sentence_hash = sentence_hash or hash_sentence(sentence) - audio = self.cache.define_audio_file(sentence_hash) - + def _get_lang(self, kwargs): # parse requested language for this TTS request # NOTE: this is ovos only functionality, not in mycroft-core! lang = kwargs.get("lang") @@ -474,7 +475,16 @@ def _synth(self, sentence, sentence_hash=None, **kwargs): kwargs["message"].context.get("lang") except: # not a mycroft message object pass - kwargs["lang"] = lang or self.lang + return lang or self.lang + + def _synth(self, sentence, sentence_hash=None, **kwargs): + self.handle_metric({"metric_type": "tts.synth.start"}) + sentence_hash = sentence_hash or hash_sentence(sentence) + audio = self.cache.define_audio_file(sentence_hash) + + # parse requested language for this TTS request + # NOTE: this is ovos only functionality, not in mycroft-core! + kwargs["lang"] = self._get_lang(kwargs) # filter kwargs per plugin, different plugins expose different options # mycroft-core -> no kwargs @@ -494,11 +504,11 @@ def _synth(self, sentence, sentence_hash=None, **kwargs): def _cache_phonemes(self, sentence, phonemes=None, sentence_hash=None): sentence_hash = sentence_hash or hash_sentence(sentence) if not phonemes: - try: # TODO debug why get_phonemes fails in the first place - phonemes = get_phonemes(sentence) - self.handle_metric({"metric_type": "tts.phonemes.guess"}) - except (ImportError, FailedToGuessPhonemes): - pass + try: + phonemes = self.g2p.utterance2arpa(sentence, self.lang) + self.handle_metric({"metric_type": "tts.phonemes.g2p"}) + except Exception as e: + self.handle_metric({"metric_type": "tts.phonemes.g2p.error", "error": str(e)}) if phonemes: return self.save_phonemes(sentence_hash, phonemes) return None diff --git a/ovos_plugin_manager/utils/__init__.py b/ovos_plugin_manager/utils/__init__.py index f17ee141..479b55ab 100644 --- a/ovos_plugin_manager/utils/__init__.py +++ b/ovos_plugin_manager/utils/__init__.py @@ -18,6 +18,7 @@ class PluginTypes(str, Enum): SKILL = "ovos.plugin.skill" + PHONEME = "ovos.plugin.g2p" AUDIO = 'mycroft.plugin.audioservice' STT = 'mycroft.plugin.stt' TTS = 'mycroft.plugin.tts' diff --git a/requirements.txt b/requirements.txt index b1149bac..dc324c51 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,2 @@ ovos_utils~=0.0.14a3 -requests -phoneme_guesser \ No newline at end of file +requests \ No newline at end of file diff --git a/setup.py b/setup.py index 2eb005a5..16b8e689 100644 --- a/setup.py +++ b/setup.py @@ -9,8 +9,7 @@ author='jarbasAi', install_requires=["ovos_utils>=0.0.12a3", "requests", - "memory-tempfile", - "phoneme_guesser"], + "memory-tempfile"], author_email='jarbasai@mailfence.com', description='OpenVoiceOS plugin manager' )