WebThingsIO · JRMeyer · Feb 23, 2022 · Feb 23, 2022 · Feb 23, 2022 · Feb 23, 2022
diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@
 
 A voice add-on for the
 [WebThings Gateway](https://github.com/WebThingsIO/gateway) which uses
-[DeepSpeech](https://github.com/mozilla/DeepSpeech) as the speech-to-text (STT)
+[:frog:STT](https://github.com/coqui-ai/stt) as the speech-to-text (STT)
 engine.
 
 The add-on uses a microphone attached to the gateway host.
@@ -69,4 +69,4 @@ The add-on uses a microphone attached to the gateway host.
 ## Credits
 
 This add-on was originally created by André Natal (@andrenatal). DeepSpeech
-support was initially added by Alexandre Lissy (@lissyx).
+support was initially added by Alexandre Lissy (@lissyx), and upgraded to Coqui (:frog:STT) by Josh Meyer (@JRMeyer)
diff --git a/lib/adapter.js b/lib/adapter.js
@@ -9,7 +9,7 @@
 'use strict';
 
 const {Adapter, Event} = require('gateway-addon');
-const DeepSpeechInterface = require('./deep-speech-interface');
+const CoquiSttInterface = require('./coqui-stt-interface');
 const levenshtein = require('js-levenshtein');
 const manifest = require('../manifest.json');
 const {normalizeDeviceName} = require('./command-utils');
@@ -29,14 +29,14 @@ class VoiceAdapter extends Adapter {
 
       addonManager.addAdapter(this);
 
-      this._dsInterface = new DeepSpeechInterface(this);
+      this._sttInterface = new CoquiSttInterface(this);
       this.startPairing();
     });
   }
 
   startPairing() {
     if (!this.devices['voice-controller']) {
-      this.handleDeviceAdded(new VoiceDevice(this, this._dsInterface));
+      this.handleDeviceAdded(new VoiceDevice(this, this._sttInterface));
     }
   }
 
@@ -110,12 +110,12 @@ class VoiceAdapter extends Adapter {
     this._nameMap[deviceId] = normalizeDeviceName(device.title);
 
     this._deviceSavedTimeout = setTimeout(() => {
-      this._dsInterface.generateLocalLM(Object.values(this._nameMap));
+      this._sttInterface.generateLocalLM(Object.values(this._nameMap));
     }, 1000);
   }
 
   unload() {
-    this._dsInterface.stopMicrophone();
+    this._sttInterface.stopMicrophone();
     return Promise.resolve();
   }
 }

diff --git a/lib/deep-speech-interface.js → lib/coqui-stt-interface.js b/lib/deep-speech-interface.js → lib/coqui-stt-interface.js
@@ -1,12 +1,6 @@
 'use strict';
 
-let Ds;
-try {
-  Ds = require('deepspeech');
-} catch (_) {
-  Ds = require('deepspeech-tflite');
-}
-
+const Stt = require('stt');
 const {spawnSync} = require('child_process');
 const fs = require('fs');
 const {
@@ -162,8 +156,8 @@ class DeepSpeechInterface {
       console.debug(`Loading model from ${this._modelsDir}`);
     }
 
-    this._model = new Ds.Model(
-      path.join(this._assetsDir, `deepspeech-model.tflite`)
+    this._model = new Stt.Model(
+      path.join(this._assetsDir, `model.tflite`)
     );
 
     if (this._debug) {
@@ -229,7 +223,7 @@ class DeepSpeechInterface {
   }
 
   resumeMicrophone() {
-    const dsStream = this._model.createStream();
+    const sttStream = this._model.createStream();
     const micStream = this._mic.getStream();
     let silenceCount = 0;
 
@@ -239,7 +233,7 @@ class DeepSpeechInterface {
 
     let runningTranscript = '';
     const interimTimer = setInterval(() => {
-      const transcript = dsStream.intermediateDecode();
+      const transcript = sttStream.intermediateDecode();
 
       if (this._debug) {
         console.debug('interim:', transcript);
@@ -254,7 +248,7 @@ class DeepSpeechInterface {
     }, 1000);
 
     const dataHandler = (data) => {
-      dsStream.feedAudioContent(data);
+      sttStream.feedAudioContent(data);
     };
 
     micStream.once('pauseComplete', () => {
@@ -270,7 +264,7 @@ class DeepSpeechInterface {
 
       micStream.removeListener('data', dataHandler);
 
-      const transcript = dsStream.finishStream().trim();
+      const transcript = sttStream.finishStream().trim();
       if (!transcript) {
         if (this._debug) {
           console.debug('Transcript was empty');

diff --git a/lib/device.js b/lib/device.js
@@ -4,10 +4,10 @@ const {Device} = require('gateway-addon');
 const VoiceProperty = require('./property');
 
 class VoiceDevice extends Device {
-  constructor(adapter, dsInterface) {
+  constructor(adapter, sttInterface) {
     super(adapter, 'voice-controller');
 
-    this._dsInterface = dsInterface;
+    this._sttInterface = sttInterface;
     this.name = 'Voice Controller';
     this.description = 'Voice Controller';
     this['@type'] = ['OnOffSwitch'];
@@ -53,9 +53,9 @@ class VoiceDevice extends Device {
 
   toggle(value) {
     if (value) {
-      this._dsInterface.enable();
+      this._sttInterface.enable();
     } else {
-      this._dsInterface.disable();
+      this._sttInterface.disable();
     }
   }
 }

diff --git a/package.json b/package.json
@@ -17,8 +17,7 @@
     "url": "https://github.com/WebThingsIO/voice-addon/issues"
   },
   "dependencies": {
-    "deepspeech": "^0.9.0",
-    "deepspeech-tflite": "^0.9.0",
+    "stt": "^1.2.0",
     "js-levenshtein": "^1.1.6",
     "mic": "^2.1.2",
     "sound-player": "^1.0.13",
@@ -37,7 +36,7 @@
     "SHA256SUMS",
     "assets/LICENSE",
     "assets/alphabet.txt",
-    "assets/deepspeech-model.tflite",
+    "assets/model.tflite",
     "assets/error.wav",
     "assets/no-input.wav",
     "assets/success.wav",
@@ -48,7 +47,7 @@
     "index.js",
     "lib/adapter.js",
     "lib/command-utils.js",
-    "lib/deep-speech-interface.js",
+    "lib/coqui-stt-interface.js",
     "lib/device.js",
     "lib/matrix-microphone.js",
     "lib/property.js",

diff --git a/package.sh b/package.sh
@@ -1,6 +1,7 @@
 #!/bin/bash -e
 
-_DS_VERSION="0.9.0"
+_STT_VERSION="1.2.0"
+_STT_MODEL_VERSION="1.0.0"
 
 # Setup environment for building inside Dockerized toolchain
 export NVM_DIR="${HOME}/.nvm"
@@ -41,58 +42,41 @@ rm -rf "${here}/kenlm"
 pushd "${here}/bin"
 case "$ADDON_ARCH" in
   linux-x64)
-    _SCORER_TARBALL="native_client.amd64.cpu.linux.tar.xz"
+    _SCORER_TARBALL="native_client.tflite.Linux.tar.xz"
     ;;
   linux-arm)
-    _SCORER_TARBALL="native_client.rpi3.cpu.linux.tar.xz"
+    _SCORER_TARBALL="native_client.tflite.linux.armv7.tar.xz"
     ;;
   linux-arm64)
-    _SCORER_TARBALL="native_client.arm64.cpu.linux.tar.xz"
+    _SCORER_TARBALL="native_client.tflite.linux.aarch64.tar.xz"
     ;;
   darwin-x64)
-    _SCORER_TARBALL="native_client.amd64.cpu.osx.tar.xz"
+    _SCORER_TARBALL="native_client.tflite.macOS.tar.xz"
     ;;
 esac
 
 curl \
-  -L "https://github.com/mozilla/DeepSpeech/releases/download/v${_DS_VERSION}/${_SCORER_TARBALL}" | \
+  -L "https://github.com/coqui-ai/STT/releases/download/v${_STT_VERSION}/${_SCORER_TARBALL}" | \
   tar xJ generate_scorer_package
 popd
 
 # download the DeepSpeech model
 pushd "${here}/assets"
 curl \
-  -o "deepspeech-model.tflite" \
-  -L "https://github.com/mozilla/DeepSpeech/releases/download/v${_DS_VERSION}/deepspeech-${_DS_VERSION}-models.tflite"
+  -o "model.tflite" \
+  -L "https://github.com/coqui-ai/STT-models/releases/download/english%2Fcoqui%2Fv${_STT_MODEL_VERSION}-huge-vocab/model.tflite"
 popd
 
-# remove one of the DS dependencies, based on architecture
-KEEP_DEP="deepspeech"
-REMOVE_DEP="deepspeech-tflite"
-if [[ -n "$ADDON_ARCH" && $ADDON_ARCH =~ x64 ]]; then
-  KEEP_DEP="deepspeech-tflite"
-  REMOVE_DEP="deepspeech"
-fi
-python -c "import json, os; \
-    from collections import OrderedDict; \
-    fname = os.path.join(os.getcwd(), 'package.json'); \
-    d = json.loads(open(fname).read(), object_pairs_hook=OrderedDict); \
-    del d['dependencies']['${REMOVE_DEP}']; \
-    f = open(fname, 'wt'); \
-    json.dump(d, f, indent=2); \
-    f.close()
-"
-
 npm install --production
 
-# keep only the compiled DS binary that we need
+# keep only the compiled STT binary that we need
 module_version=$(node -e 'console.log(`node-v${process.config.variables.node_module_version}`)')
-find "node_modules/${KEEP_DEP}/lib/binding/v${_DS_VERSION}" \
+find "node_modules/stt/lib/binding/v${_STT_VERSION}" \
   -mindepth 1 \
   -maxdepth 1 \
   \! -name "${ADDON_ARCH}" \
   -exec rm -rf {} \;
-find "node_modules/${KEEP_DEP}/lib/binding/v${_DS_VERSION}/${ADDON_ARCH}" \
+find "node_modules/stt/lib/binding/v${_STT_VERSION}/${ADDON_ARCH}" \
   -mindepth 1 \
   -maxdepth 1 \
   -type d \