feat/g2p_plugins (#21)

NeonJarbas · web-flow · commit 1b7d8b5a2eb1 · 2022-02-02T12:52:14.000Z
authored-by: jarbasai &lt;jarbasai@mailfence.com&gt;
diff --git a/ovos_plugin_manager/g2p.py b/ovos_plugin_manager/g2p.py
@@ -0,0 +1,72 @@
+from ovos_plugin_manager.utils import load_plugin, find_plugins, PluginTypes
+from ovos_plugin_manager.templates.g2p import Grapheme2PhonemePlugin, PhonemeAlphabet
+
+
+def find_g2p_plugins():
+    return find_plugins(PluginTypes.PHONEME)
+
+
+def load_g2p_plugin(module_name):
+    return load_plugin(module_name, PluginTypes.PHONEME)
+
+
+class OVOSG2PFactory:
+    """ replicates the base mycroft class, but uses only OPM enabled plugins"""
+    MAPPINGS = {
+        "cmudict": "ovos-g2p-plugin-cmudict",
+        "phoneme_guesser": "neon-g2p-plugin-phoneme-guesser",
+        "gruut": "neon-g2p-plugin-gruut"
+    }
+
+    @staticmethod
+    def get_class(config=None):
+        """Factory method to get a G2P engine class based on configuration.
+
+        The configuration file ``mycroft.conf`` contains a ``g2p`` section with
+        the name of a G2P module to be read by this method.
+
+        "g2p": {
+            "module": <engine_name>
+        }
+        """
+        config = config or get_g2p_config()
+        g2p_module = config.get("module") or 'cmudict'
+        if g2p_module == 'cmudict':
+            return G2P
+        if g2p_module in OVOSG2PFactory.MAPPINGS:
+            g2p_module = OVOSG2PFactory.MAPPINGS[g2p_module]
+        return load_g2p_plugin(g2p_module)
+
+    @staticmethod
+    def create(config=None):
+        """Factory method to create a G2P engine based on configuration.
+
+        The configuration file ``mycroft.conf`` contains a ``g2p`` section with
+        the name of a G2P module to be read by this method.
+
+        "g2p": {
+            "module": <engine_name>
+        }
+        """
+        g2p_config = get_g2p_config(config)
+        g2p_module = g2p_config.get('module', 'cmudict')
+        try:
+            clazz = OVOSG2PFactory.get_class(g2p_config)
+            LOG.info(f'Found plugin {g2p_module}')
+            g2p = clazz(g2p_lang, g2p_config)
+            g2p.validator.validate()
+            LOG.info(f'Loaded plugin {g2p_module}')
+        except Exception:
+            LOG.exception('The selected G2P plugin could not be loaded.')
+            raise
+        return g2p
+
+
+def get_g2p_config(config=None):
+    config = config or read_mycroft_config()
+    if "g2p" in config:
+        config = config["g2p"]
+    g2p_module = config.get('module', 'cmudict')
+    g2p_config = config.get(g2p_module, {})
+    g2p_config["module"] = g2p_module
+    return g2p_config
diff --git a/ovos_plugin_manager/templates/g2p.py b/ovos_plugin_manager/templates/g2p.py
@@ -0,0 +1,87 @@
+import enum
+from ovos_utils.lang.phonemes import arpabet2ipa, ipa2arpabet
+from ovos_utils.lang.visimes import VISIMES
+
+
+class PhonemeAlphabet(str, enum.Enum):
+    ARPA = "arpa"
+    IPA = "ipa"
+
+
+class OutOfVocabulary(ValueError):
+    """ could not get phonemes for word """
+
+
+class Grapheme2PhonemePlugin:
+    def __init__(self, config=None):
+        self.config = config or {}
+
+    @property
+    def arpa_is_implemented(self):
+        return self.__class__.get_arpa is not Grapheme2PhonemePlugin.get_arpa
+
+    @property
+    def ipa_is_implemented(self):
+        return self.__class__.get_ipa is not Grapheme2PhonemePlugin.get_ipa
+
+    def get_arpa(self, word, lang):
+        # if ipa is implemented, use it and convert
+        if self.ipa_is_implemented:
+            ipa = self.get_ipa(word, lang)
+            norm = lambda k: k.replace('ˈ', "")
+            return [ipa2arpabet[norm(p)] for p in ipa
+                    if norm(p) in ipa2arpabet]
+        return None
+
+    def get_ipa(self, word, lang):
+        # if arpa is implemented, use it and convert
+        if self.arpa_is_implemented:
+            arpa = self.get_arpa(word, lang)
+            norm = lambda k: k.replace("9", "")\
+                    .replace("8", "")\
+                    .replace("7", "")\
+                    .replace("6", "")\
+                    .replace("5", "")\
+                    .replace("4", "")\
+                    .replace("3", "")\
+                    .replace("2", "")\
+                    .replace("1", "")\
+                    .replace("0", "")
+            return [arpabet2ipa[norm(p)] for p in arpa
+                    if norm(p) in arpabet2ipa]
+        return None
+
+    def utterance2arpa(self, utterance, lang, ignore_oov=False):
+        arpa = []
+        for w in utterance.split():
+            phones = self.get_arpa(w, lang, ignore_oov) or []
+            if not phones and not ignore_oov:
+                raise OutOfVocabulary(f"unknown word: {w}")
+            arpa += phones + ["."]
+        if arpa:
+            return arpa[:-1]
+        if ignore_oov:
+            return None
+        raise OutOfVocabulary
+
+    def utterance2ipa(self, utterance, lang, ignore_oov=False):
+        ipa = []
+        for w in utterance.split():
+            phones = self.get_ipa(w, lang, ignore_oov) or []
+            if not phones and not ignore_oov:
+                raise OutOfVocabulary(f"unknown word: {w}")
+            ipa += phones + ["."]
+        if ipa:
+            return ipa[:-1]
+        if ignore_oov:
+            return None
+        raise OutOfVocabulary
+
+    def utterance2visemes(self, utterance, lang, default_dur=0.4):
+        arpa = []
+        for w in utterance.split():
+            phones = self.get_arpa(w, lang) or \
+                     ['B', 'L', 'AE', '.', 'B', 'L', 'AE']
+            arpa += phones + ["."]
+        return [(VISIMES.get(pho.lower(), '4'), default_dur) for pho in arpa]
+
diff --git a/ovos_plugin_manager/templates/tts.py b/ovos_plugin_manager/templates/tts.py
@@ -32,14 +32,12 @@
 from time import time, sleep
 
 import requests
-from phoneme_guesser.exceptions import FailedToGuessPhonemes
-
+from ovos_plugin_manager.g2p import OVOSG2PFactory
 from ovos_plugin_manager.utils.tts_cache import TextToSpeechCache, hash_sentence
 from ovos_utils import resolve_resource_file
 from ovos_utils.configuration import read_mycroft_config
 from ovos_utils.enclosure.api import EnclosureAPI
 from ovos_utils.file_utils import get_cache_directory
-from ovos_utils.lang.phonemes import get_phonemes
 from ovos_utils.lang.visimes import VISIMES
 from ovos_utils.log import LOG
 from ovos_utils.messagebus import Message, FakeBus as BUS
@@ -243,6 +241,7 @@ def __init__(self, lang="en-us", config=None, validator=None,
             self.config, tts_id, self.audio_ext
         )
         self.cache.curate()
+        self.g2p = OVOSG2PFactory.create(config_core)
         self.handle_metric({"metric_type": "tts.init"})
 
     def handle_metric(self, metadata=None):
@@ -441,7 +440,13 @@ def _execute(self, sentence, ident, listen, **kwargs):
             else:  # synth + cache
                 audio_file, phonemes = self._synth(sentence, sentence_hash, **kwargs)
 
-            viseme = self.viseme(phonemes) if phonemes else None
+            # get visemes/mouth movements
+            if phonemes:
+                viseme = self.viseme(phonemes)
+            else:
+                lang = self._get_lang(kwargs)
+                viseme = self.g2p.utterance2visemes(sentence, lang)
+
             audio_ext = self._determine_ext(audio_file)
             self.queue.put(
                 (audio_ext, str(audio_file), viseme, ident, l)
@@ -459,11 +464,7 @@ def _determine_ext(self, audio_file):
         except:
             return self.audio_ext
 
-    def _synth(self, sentence, sentence_hash=None, **kwargs):
-        self.handle_metric({"metric_type": "tts.synth.start"})
-        sentence_hash = sentence_hash or hash_sentence(sentence)
-        audio = self.cache.define_audio_file(sentence_hash)
-
+    def _get_lang(self, kwargs):
         # parse requested language for this TTS request
         # NOTE: this is ovos only functionality, not in mycroft-core!
         lang = kwargs.get("lang")
@@ -474,7 +475,16 @@ def _synth(self, sentence, sentence_hash=None, **kwargs):
                        kwargs["message"].context.get("lang")
             except:  # not a mycroft message object
                 pass
-        kwargs["lang"] = lang or self.lang
+        return lang or self.lang
+
+    def _synth(self, sentence, sentence_hash=None, **kwargs):
+        self.handle_metric({"metric_type": "tts.synth.start"})
+        sentence_hash = sentence_hash or hash_sentence(sentence)
+        audio = self.cache.define_audio_file(sentence_hash)
+
+        # parse requested language for this TTS request
+        # NOTE: this is ovos only functionality, not in mycroft-core!
+        kwargs["lang"] = self._get_lang(kwargs)
 
         # filter kwargs per plugin, different plugins expose different options
         #   mycroft-core -> no kwargs
@@ -494,11 +504,11 @@ def _synth(self, sentence, sentence_hash=None, **kwargs):
     def _cache_phonemes(self, sentence, phonemes=None, sentence_hash=None):
         sentence_hash = sentence_hash or hash_sentence(sentence)
         if not phonemes:
-            try:  # TODO debug why get_phonemes fails in the first place
-                phonemes = get_phonemes(sentence)
-                self.handle_metric({"metric_type": "tts.phonemes.guess"})
-            except (ImportError, FailedToGuessPhonemes):
-                pass
+            try:
+                phonemes = self.g2p.utterance2arpa(sentence, self.lang)
+                self.handle_metric({"metric_type": "tts.phonemes.g2p"})
+            except Exception as e:
+                self.handle_metric({"metric_type": "tts.phonemes.g2p.error", "error": str(e)})
         if phonemes:
             return self.save_phonemes(sentence_hash, phonemes)
         return None
diff --git a/ovos_plugin_manager/utils/__init__.py b/ovos_plugin_manager/utils/__init__.py
@@ -18,6 +18,7 @@
 
 class PluginTypes(str, Enum):
     SKILL = "ovos.plugin.skill"
+    PHONEME = "ovos.plugin.g2p"
     AUDIO = 'mycroft.plugin.audioservice'
     STT = 'mycroft.plugin.stt'
     TTS = 'mycroft.plugin.tts'
diff --git a/requirements.txt b/requirements.txt
@@ -1,3 +1,2 @@
 ovos_utils~=0.0.14a3
-requests
-phoneme_guesser
+requests
diff --git a/setup.py b/setup.py
@@ -9,8 +9,7 @@
     author='jarbasAi',
     install_requires=["ovos_utils>=0.0.12a3",
                       "requests",
-                      "memory-tempfile",
-                      "phoneme_guesser"],
+                      "memory-tempfile"],
     author_email='jarbasai@mailfence.com',
     description='OpenVoiceOS plugin manager'
 )