feat/fallback_stt

JarbasAl · JarbasAl · commit 114091e02a24 · 2022-03-01T18:52:07.000Z
diff --git a/mycroft/client/speech/listener.py b/mycroft/client/speech/listener.py
@@ -12,20 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import json
 import time
+from queue import Queue, Empty
 from threading import Thread
+
 import pyaudio
 from pyee import EventEmitter
+
 from mycroft.client.speech.hotword_factory import HotWordFactory
 from mycroft.client.speech.mic import MutableMicrophone, ResponsiveRecognizer
 from mycroft.configuration import Configuration
 from mycroft.metrics import Stopwatch, report_timing
 from mycroft.session import SessionManager
 from mycroft.stt import STTFactory
-from mycroft.util.log import LOG
 from mycroft.util import find_input_device
-from queue import Queue, Empty
-import json
+from mycroft.util.log import LOG
 
 MAX_MIC_RESTARTS = 20
 
@@ -195,7 +197,14 @@ def send_unknown_intent():
 
         try:
             # Invoke the STT engine on the audio clip
-            text = self.loop.stt.execute(audio, language=lang)
+            try:
+                text = self.loop.stt.execute(audio, language=lang)
+            except Exception as e:
+                if self.loop.fallback_stt:
+                    LOG.warning(f"Using fallback STT, main plugin failed: {e}")
+                    text = self.loop.fallback_stt.execute(audio, language=lang)
+                else:
+                    raise e
             if text is not None:
                 text = text.lower().strip()
                 LOG.debug("STT: " + text)
@@ -240,23 +249,25 @@ class RecognizerLoop(EventEmitter):
                 (optional, can be set later via self.bind )
     """
 
-    def __init__(self, bus, watchdog=None, stt=None):
+    def __init__(self, bus, watchdog=None, stt=None, fallback_stt=None):
         super(RecognizerLoop, self).__init__()
         self._watchdog = watchdog
         self.mute_calls = 0
         self.stt = stt
+        self.fallback_stt = fallback_stt
         self.bus = bus
         self.engines = {}
-        self.stt = None
         self.queue = None
         self.audio_consumer = None
         self.audio_producer = None
         self.responsive_recognizer = None
 
         self._load_config()
 
-    def bind(self, stt):
+    def bind(self, stt, fallback_stt=None):
         self.stt = stt
+        if fallback_stt:
+            self.fallback_stt = fallback_stt
 
     def _load_config(self):
         """Load configuration parameters from configuration."""
@@ -325,6 +336,20 @@ def start_async(self):
         self.state.running = True
         if not self.stt:
             self.stt = STTFactory.create()
+        if not self.fallback_stt:
+            stt_config = Configuration.get().get('stt', {})
+            engine = stt_config.get("fallback_module")
+            if not engine:
+                LOG.warning("No fallback STT configured")
+            else:
+                plugin_config = stt_config.get(engine) or {}
+                plugin_config["lang"] = plugin_config.get("lang") or \
+                                        self.config_core.get("lang", "en-us")
+                try:
+                    self.fallback_stt = STTFactory.create({"module": engine,
+                                                           engine: plugin_config})
+                except Exception as e:
+                    LOG.error(f"Failed to create fallback STT")
         self.queue = Queue()
         self.audio_consumer = AudioConsumer(self)
         self.audio_consumer.start()
diff --git a/mycroft/configuration/mycroft.conf b/mycroft/configuration/mycroft.conf
@@ -432,19 +432,8 @@
   "stt": {
     // Engine.  Options: "mycroft", "google", "wit", "ibm", "kaldi", "bing",
     //                   "houndify", "deepspeech_server", "govivace", "yandex"
-    "module": "mycroft"
-    // "deepspeech_server": {
-    //   "uri": "http://localhost:8080/stt"
-    // },
-    // "kaldi": {
-    //   "uri": "http://localhost:8080/client/dynamic/recognize"
-    // },
-    //"govivace": {
-    //   "uri": "https://services.govivace.com:49149/telephony",
-    //   "credential": {
-    //      "token": "xxxxx"
-    //      }
-    //}
+    "module": "mycroft",
+    "fallback_module": "ovos-stt-plugin-vosk"
   },
 
   // Text to Speech parameters
diff --git a/mycroft/stt/__init__.py b/mycroft/stt/__init__.py
@@ -63,9 +63,10 @@ def execute(self, audio, language=None):
 
 class STTFactory(OVOSSTTFactory):
     @staticmethod
-    def create():
-        config = Configuration.get().get("stt", {})
+    def create(config=None):
+        config = config or Configuration.get().get("stt", {})
         module = config.get("module", "mycroft")
+        LOG.info(f"Creating STT engine: {module}")
         if module == "mycroft":
             return MycroftSTT()
         return OVOSSTTFactory.create(config)
diff --git a/requirements/minimal.txt b/requirements/minimal.txt
@@ -4,4 +4,4 @@ mycroft-messagebus-client~=0.9.1,!=0.9.2,!=0.9.3
 psutil~=5.6.6
 combo-lock~=0.2
 ovos-utils~=0.0.18
-ovos-plugin-manager~=0.0.10
+ovos-plugin-manager~=0.0.11a1
diff --git a/requirements/requirements.txt b/requirements/requirements.txt
@@ -9,7 +9,7 @@ combo-lock~=0.2
 PyYAML~=5.4
 
 ovos-utils~=0.0.18
-ovos-plugin-manager~=0.0.10
+ovos-plugin-manager~=0.0.11a1
 ovos-tts-plugin-mimic>=0.2.6
 ovos-tts-plugin-mimic2>=0.1.4
 ovos-tts-plugin-google-tx>=0.0.3