Skip to content

Commit 1b7d8b5

Browse files
authored
feat/g2p_plugins (#21)
authored-by: jarbasai <jarbasai@mailfence.com>
1 parent 6bf4715 commit 1b7d8b5

File tree

6 files changed

+187
-19
lines changed

6 files changed

+187
-19
lines changed

ovos_plugin_manager/g2p.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
from ovos_plugin_manager.utils import load_plugin, find_plugins, PluginTypes
2+
from ovos_plugin_manager.templates.g2p import Grapheme2PhonemePlugin, PhonemeAlphabet
3+
4+
5+
def find_g2p_plugins():
6+
return find_plugins(PluginTypes.PHONEME)
7+
8+
9+
def load_g2p_plugin(module_name):
10+
return load_plugin(module_name, PluginTypes.PHONEME)
11+
12+
13+
class OVOSG2PFactory:
14+
""" replicates the base mycroft class, but uses only OPM enabled plugins"""
15+
MAPPINGS = {
16+
"cmudict": "ovos-g2p-plugin-cmudict",
17+
"phoneme_guesser": "neon-g2p-plugin-phoneme-guesser",
18+
"gruut": "neon-g2p-plugin-gruut"
19+
}
20+
21+
@staticmethod
22+
def get_class(config=None):
23+
"""Factory method to get a G2P engine class based on configuration.
24+
25+
The configuration file ``mycroft.conf`` contains a ``g2p`` section with
26+
the name of a G2P module to be read by this method.
27+
28+
"g2p": {
29+
"module": <engine_name>
30+
}
31+
"""
32+
config = config or get_g2p_config()
33+
g2p_module = config.get("module") or 'cmudict'
34+
if g2p_module == 'cmudict':
35+
return G2P
36+
if g2p_module in OVOSG2PFactory.MAPPINGS:
37+
g2p_module = OVOSG2PFactory.MAPPINGS[g2p_module]
38+
return load_g2p_plugin(g2p_module)
39+
40+
@staticmethod
41+
def create(config=None):
42+
"""Factory method to create a G2P engine based on configuration.
43+
44+
The configuration file ``mycroft.conf`` contains a ``g2p`` section with
45+
the name of a G2P module to be read by this method.
46+
47+
"g2p": {
48+
"module": <engine_name>
49+
}
50+
"""
51+
g2p_config = get_g2p_config(config)
52+
g2p_module = g2p_config.get('module', 'cmudict')
53+
try:
54+
clazz = OVOSG2PFactory.get_class(g2p_config)
55+
LOG.info(f'Found plugin {g2p_module}')
56+
g2p = clazz(g2p_lang, g2p_config)
57+
g2p.validator.validate()
58+
LOG.info(f'Loaded plugin {g2p_module}')
59+
except Exception:
60+
LOG.exception('The selected G2P plugin could not be loaded.')
61+
raise
62+
return g2p
63+
64+
65+
def get_g2p_config(config=None):
66+
config = config or read_mycroft_config()
67+
if "g2p" in config:
68+
config = config["g2p"]
69+
g2p_module = config.get('module', 'cmudict')
70+
g2p_config = config.get(g2p_module, {})
71+
g2p_config["module"] = g2p_module
72+
return g2p_config
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
import enum
2+
from ovos_utils.lang.phonemes import arpabet2ipa, ipa2arpabet
3+
from ovos_utils.lang.visimes import VISIMES
4+
5+
6+
class PhonemeAlphabet(str, enum.Enum):
7+
ARPA = "arpa"
8+
IPA = "ipa"
9+
10+
11+
class OutOfVocabulary(ValueError):
12+
""" could not get phonemes for word """
13+
14+
15+
class Grapheme2PhonemePlugin:
16+
def __init__(self, config=None):
17+
self.config = config or {}
18+
19+
@property
20+
def arpa_is_implemented(self):
21+
return self.__class__.get_arpa is not Grapheme2PhonemePlugin.get_arpa
22+
23+
@property
24+
def ipa_is_implemented(self):
25+
return self.__class__.get_ipa is not Grapheme2PhonemePlugin.get_ipa
26+
27+
def get_arpa(self, word, lang):
28+
# if ipa is implemented, use it and convert
29+
if self.ipa_is_implemented:
30+
ipa = self.get_ipa(word, lang)
31+
norm = lambda k: k.replace('ˈ', "")
32+
return [ipa2arpabet[norm(p)] for p in ipa
33+
if norm(p) in ipa2arpabet]
34+
return None
35+
36+
def get_ipa(self, word, lang):
37+
# if arpa is implemented, use it and convert
38+
if self.arpa_is_implemented:
39+
arpa = self.get_arpa(word, lang)
40+
norm = lambda k: k.replace("9", "")\
41+
.replace("8", "")\
42+
.replace("7", "")\
43+
.replace("6", "")\
44+
.replace("5", "")\
45+
.replace("4", "")\
46+
.replace("3", "")\
47+
.replace("2", "")\
48+
.replace("1", "")\
49+
.replace("0", "")
50+
return [arpabet2ipa[norm(p)] for p in arpa
51+
if norm(p) in arpabet2ipa]
52+
return None
53+
54+
def utterance2arpa(self, utterance, lang, ignore_oov=False):
55+
arpa = []
56+
for w in utterance.split():
57+
phones = self.get_arpa(w, lang, ignore_oov) or []
58+
if not phones and not ignore_oov:
59+
raise OutOfVocabulary(f"unknown word: {w}")
60+
arpa += phones + ["."]
61+
if arpa:
62+
return arpa[:-1]
63+
if ignore_oov:
64+
return None
65+
raise OutOfVocabulary
66+
67+
def utterance2ipa(self, utterance, lang, ignore_oov=False):
68+
ipa = []
69+
for w in utterance.split():
70+
phones = self.get_ipa(w, lang, ignore_oov) or []
71+
if not phones and not ignore_oov:
72+
raise OutOfVocabulary(f"unknown word: {w}")
73+
ipa += phones + ["."]
74+
if ipa:
75+
return ipa[:-1]
76+
if ignore_oov:
77+
return None
78+
raise OutOfVocabulary
79+
80+
def utterance2visemes(self, utterance, lang, default_dur=0.4):
81+
arpa = []
82+
for w in utterance.split():
83+
phones = self.get_arpa(w, lang) or \
84+
['B', 'L', 'AE', '.', 'B', 'L', 'AE']
85+
arpa += phones + ["."]
86+
return [(VISIMES.get(pho.lower(), '4'), default_dur) for pho in arpa]
87+

ovos_plugin_manager/templates/tts.py

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -32,14 +32,12 @@
3232
from time import time, sleep
3333

3434
import requests
35-
from phoneme_guesser.exceptions import FailedToGuessPhonemes
36-
35+
from ovos_plugin_manager.g2p import OVOSG2PFactory
3736
from ovos_plugin_manager.utils.tts_cache import TextToSpeechCache, hash_sentence
3837
from ovos_utils import resolve_resource_file
3938
from ovos_utils.configuration import read_mycroft_config
4039
from ovos_utils.enclosure.api import EnclosureAPI
4140
from ovos_utils.file_utils import get_cache_directory
42-
from ovos_utils.lang.phonemes import get_phonemes
4341
from ovos_utils.lang.visimes import VISIMES
4442
from ovos_utils.log import LOG
4543
from ovos_utils.messagebus import Message, FakeBus as BUS
@@ -243,6 +241,7 @@ def __init__(self, lang="en-us", config=None, validator=None,
243241
self.config, tts_id, self.audio_ext
244242
)
245243
self.cache.curate()
244+
self.g2p = OVOSG2PFactory.create(config_core)
246245
self.handle_metric({"metric_type": "tts.init"})
247246

248247
def handle_metric(self, metadata=None):
@@ -441,7 +440,13 @@ def _execute(self, sentence, ident, listen, **kwargs):
441440
else: # synth + cache
442441
audio_file, phonemes = self._synth(sentence, sentence_hash, **kwargs)
443442

444-
viseme = self.viseme(phonemes) if phonemes else None
443+
# get visemes/mouth movements
444+
if phonemes:
445+
viseme = self.viseme(phonemes)
446+
else:
447+
lang = self._get_lang(kwargs)
448+
viseme = self.g2p.utterance2visemes(sentence, lang)
449+
445450
audio_ext = self._determine_ext(audio_file)
446451
self.queue.put(
447452
(audio_ext, str(audio_file), viseme, ident, l)
@@ -459,11 +464,7 @@ def _determine_ext(self, audio_file):
459464
except:
460465
return self.audio_ext
461466

462-
def _synth(self, sentence, sentence_hash=None, **kwargs):
463-
self.handle_metric({"metric_type": "tts.synth.start"})
464-
sentence_hash = sentence_hash or hash_sentence(sentence)
465-
audio = self.cache.define_audio_file(sentence_hash)
466-
467+
def _get_lang(self, kwargs):
467468
# parse requested language for this TTS request
468469
# NOTE: this is ovos only functionality, not in mycroft-core!
469470
lang = kwargs.get("lang")
@@ -474,7 +475,16 @@ def _synth(self, sentence, sentence_hash=None, **kwargs):
474475
kwargs["message"].context.get("lang")
475476
except: # not a mycroft message object
476477
pass
477-
kwargs["lang"] = lang or self.lang
478+
return lang or self.lang
479+
480+
def _synth(self, sentence, sentence_hash=None, **kwargs):
481+
self.handle_metric({"metric_type": "tts.synth.start"})
482+
sentence_hash = sentence_hash or hash_sentence(sentence)
483+
audio = self.cache.define_audio_file(sentence_hash)
484+
485+
# parse requested language for this TTS request
486+
# NOTE: this is ovos only functionality, not in mycroft-core!
487+
kwargs["lang"] = self._get_lang(kwargs)
478488

479489
# filter kwargs per plugin, different plugins expose different options
480490
# mycroft-core -> no kwargs
@@ -494,11 +504,11 @@ def _synth(self, sentence, sentence_hash=None, **kwargs):
494504
def _cache_phonemes(self, sentence, phonemes=None, sentence_hash=None):
495505
sentence_hash = sentence_hash or hash_sentence(sentence)
496506
if not phonemes:
497-
try: # TODO debug why get_phonemes fails in the first place
498-
phonemes = get_phonemes(sentence)
499-
self.handle_metric({"metric_type": "tts.phonemes.guess"})
500-
except (ImportError, FailedToGuessPhonemes):
501-
pass
507+
try:
508+
phonemes = self.g2p.utterance2arpa(sentence, self.lang)
509+
self.handle_metric({"metric_type": "tts.phonemes.g2p"})
510+
except Exception as e:
511+
self.handle_metric({"metric_type": "tts.phonemes.g2p.error", "error": str(e)})
502512
if phonemes:
503513
return self.save_phonemes(sentence_hash, phonemes)
504514
return None

ovos_plugin_manager/utils/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
class PluginTypes(str, Enum):
2020
SKILL = "ovos.plugin.skill"
21+
PHONEME = "ovos.plugin.g2p"
2122
AUDIO = 'mycroft.plugin.audioservice'
2223
STT = 'mycroft.plugin.stt'
2324
TTS = 'mycroft.plugin.tts'

requirements.txt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,2 @@
11
ovos_utils~=0.0.14a3
2-
requests
3-
phoneme_guesser
2+
requests

setup.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,7 @@
99
author='jarbasAi',
1010
install_requires=["ovos_utils>=0.0.12a3",
1111
"requests",
12-
"memory-tempfile",
13-
"phoneme_guesser"],
12+
"memory-tempfile"],
1413
author_email='jarbasai@mailfence.com',
1514
description='OpenVoiceOS plugin manager'
1615
)

0 commit comments

Comments
 (0)