-
-
Notifications
You must be signed in to change notification settings - Fork 12
Feat/g2p plugins #21
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Feat/g2p plugins #21
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,72 @@ | ||
| from ovos_plugin_manager.utils import load_plugin, find_plugins, PluginTypes | ||
| from ovos_plugin_manager.templates.g2p import Grapheme2PhonemePlugin, PhonemeAlphabet | ||
|
|
||
|
|
||
| def find_g2p_plugins(): | ||
| return find_plugins(PluginTypes.PHONEME) | ||
|
|
||
|
|
||
| def load_g2p_plugin(module_name): | ||
| return load_plugin(module_name, PluginTypes.PHONEME) | ||
|
|
||
|
|
||
| class OVOSG2PFactory: | ||
| """ replicates the base mycroft class, but uses only OPM enabled plugins""" | ||
| MAPPINGS = { | ||
| "cmudict": "ovos-g2p-plugin-cmudict", | ||
| "phoneme_guesser": "neon-g2p-plugin-phoneme-guesser", | ||
| "gruut": "neon-g2p-plugin-gruut" | ||
| } | ||
|
|
||
| @staticmethod | ||
| def get_class(config=None): | ||
| """Factory method to get a G2P engine class based on configuration. | ||
|
|
||
| The configuration file ``mycroft.conf`` contains a ``g2p`` section with | ||
| the name of a G2P module to be read by this method. | ||
|
|
||
| "g2p": { | ||
| "module": <engine_name> | ||
| } | ||
| """ | ||
| config = config or get_g2p_config() | ||
| g2p_module = config.get("module") or 'cmudict' | ||
| if g2p_module == 'cmudict': | ||
| return G2P | ||
| if g2p_module in OVOSG2PFactory.MAPPINGS: | ||
| g2p_module = OVOSG2PFactory.MAPPINGS[g2p_module] | ||
| return load_g2p_plugin(g2p_module) | ||
|
|
||
| @staticmethod | ||
| def create(config=None): | ||
| """Factory method to create a G2P engine based on configuration. | ||
|
|
||
| The configuration file ``mycroft.conf`` contains a ``g2p`` section with | ||
| the name of a G2P module to be read by this method. | ||
|
|
||
| "g2p": { | ||
| "module": <engine_name> | ||
| } | ||
| """ | ||
| g2p_config = get_g2p_config(config) | ||
| g2p_module = g2p_config.get('module', 'cmudict') | ||
| try: | ||
| clazz = OVOSG2PFactory.get_class(g2p_config) | ||
| LOG.info(f'Found plugin {g2p_module}') | ||
| g2p = clazz(g2p_lang, g2p_config) | ||
| g2p.validator.validate() | ||
| LOG.info(f'Loaded plugin {g2p_module}') | ||
| except Exception: | ||
| LOG.exception('The selected G2P plugin could not be loaded.') | ||
| raise | ||
| return g2p | ||
|
|
||
|
|
||
| def get_g2p_config(config=None): | ||
| config = config or read_mycroft_config() | ||
| if "g2p" in config: | ||
| config = config["g2p"] | ||
| g2p_module = config.get('module', 'cmudict') | ||
| g2p_config = config.get(g2p_module, {}) | ||
| g2p_config["module"] = g2p_module | ||
| return g2p_config |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,87 @@ | ||
| import enum | ||
| from ovos_utils.lang.phonemes import arpabet2ipa, ipa2arpabet | ||
| from ovos_utils.lang.visimes import VISIMES | ||
|
|
||
|
|
||
| class PhonemeAlphabet(str, enum.Enum): | ||
| ARPA = "arpa" | ||
| IPA = "ipa" | ||
|
|
||
|
|
||
| class OutOfVocabulary(ValueError): | ||
| """ could not get phonemes for word """ | ||
|
|
||
|
|
||
| class Grapheme2PhonemePlugin: | ||
| def __init__(self, config=None): | ||
| self.config = config or {} | ||
|
|
||
| @property | ||
| def arpa_is_implemented(self): | ||
| return self.__class__.get_arpa is not Grapheme2PhonemePlugin.get_arpa | ||
|
|
||
| @property | ||
| def ipa_is_implemented(self): | ||
| return self.__class__.get_ipa is not Grapheme2PhonemePlugin.get_ipa | ||
|
|
||
| def get_arpa(self, word, lang): | ||
| # if ipa is implemented, use it and convert | ||
| if self.ipa_is_implemented: | ||
| ipa = self.get_ipa(word, lang) | ||
| norm = lambda k: k.replace('ˈ', "") | ||
| return [ipa2arpabet[norm(p)] for p in ipa | ||
| if norm(p) in ipa2arpabet] | ||
| return None | ||
|
|
||
| def get_ipa(self, word, lang): | ||
| # if arpa is implemented, use it and convert | ||
| if self.arpa_is_implemented: | ||
| arpa = self.get_arpa(word, lang) | ||
| norm = lambda k: k.replace("9", "")\ | ||
| .replace("8", "")\ | ||
| .replace("7", "")\ | ||
| .replace("6", "")\ | ||
| .replace("5", "")\ | ||
| .replace("4", "")\ | ||
| .replace("3", "")\ | ||
| .replace("2", "")\ | ||
| .replace("1", "")\ | ||
| .replace("0", "") | ||
| return [arpabet2ipa[norm(p)] for p in arpa | ||
| if norm(p) in arpabet2ipa] | ||
| return None | ||
|
|
||
| def utterance2arpa(self, utterance, lang, ignore_oov=False): | ||
| arpa = [] | ||
| for w in utterance.split(): | ||
| phones = self.get_arpa(w, lang, ignore_oov) or [] | ||
| if not phones and not ignore_oov: | ||
| raise OutOfVocabulary(f"unknown word: {w}") | ||
| arpa += phones + ["."] | ||
| if arpa: | ||
| return arpa[:-1] | ||
| if ignore_oov: | ||
| return None | ||
| raise OutOfVocabulary | ||
|
|
||
| def utterance2ipa(self, utterance, lang, ignore_oov=False): | ||
| ipa = [] | ||
| for w in utterance.split(): | ||
| phones = self.get_ipa(w, lang, ignore_oov) or [] | ||
| if not phones and not ignore_oov: | ||
| raise OutOfVocabulary(f"unknown word: {w}") | ||
| ipa += phones + ["."] | ||
| if ipa: | ||
| return ipa[:-1] | ||
| if ignore_oov: | ||
| return None | ||
| raise OutOfVocabulary | ||
|
|
||
| def utterance2visemes(self, utterance, lang, default_dur=0.4): | ||
| arpa = [] | ||
| for w in utterance.split(): | ||
| phones = self.get_arpa(w, lang) or \ | ||
| ['B', 'L', 'AE', '.', 'B', 'L', 'AE'] | ||
| arpa += phones + ["."] | ||
| return [(VISIMES.get(pho.lower(), '4'), default_dur) for pho in arpa] | ||
|
|
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,3 +1,2 @@ | ||
| ovos_utils~=0.0.14a3 | ||
| requests | ||
| phoneme_guesser | ||
| requests |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.