Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

A voice add-on for the
[WebThings Gateway](https://github.com/WebThingsIO/gateway) which uses
[DeepSpeech](https://github.com/mozilla/DeepSpeech) as the speech-to-text (STT)
[:frog:STT](https://github.com/coqui-ai/stt) as the speech-to-text (STT)
engine.

The add-on uses a microphone attached to the gateway host.
Expand Down Expand Up @@ -69,4 +69,4 @@ The add-on uses a microphone attached to the gateway host.
## Credits

This add-on was originally created by André Natal (@andrenatal). DeepSpeech
support was initially added by Alexandre Lissy (@lissyx).
support was initially added by Alexandre Lissy (@lissyx), and upgraded to Coqui (:frog:STT) by Josh Meyer (@JRMeyer)
10 changes: 5 additions & 5 deletions lib/adapter.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
'use strict';

const {Adapter, Event} = require('gateway-addon');
const DeepSpeechInterface = require('./deep-speech-interface');
const CoquiSttInterface = require('./coqui-stt-interface');
const levenshtein = require('js-levenshtein');
const manifest = require('../manifest.json');
const {normalizeDeviceName} = require('./command-utils');
Expand All @@ -29,14 +29,14 @@ class VoiceAdapter extends Adapter {

addonManager.addAdapter(this);

this._dsInterface = new DeepSpeechInterface(this);
this._sttInterface = new CoquiSttInterface(this);
this.startPairing();
});
}

startPairing() {
if (!this.devices['voice-controller']) {
this.handleDeviceAdded(new VoiceDevice(this, this._dsInterface));
this.handleDeviceAdded(new VoiceDevice(this, this._sttInterface));
}
}

Expand Down Expand Up @@ -110,12 +110,12 @@ class VoiceAdapter extends Adapter {
this._nameMap[deviceId] = normalizeDeviceName(device.title);

this._deviceSavedTimeout = setTimeout(() => {
this._dsInterface.generateLocalLM(Object.values(this._nameMap));
this._sttInterface.generateLocalLM(Object.values(this._nameMap));
}, 1000);
}

unload() {
this._dsInterface.stopMicrophone();
this._sttInterface.stopMicrophone();
return Promise.resolve();
}
}
Expand Down
20 changes: 7 additions & 13 deletions lib/deep-speech-interface.js → lib/coqui-stt-interface.js
Original file line number Diff line number Diff line change
@@ -1,12 +1,6 @@
'use strict';

let Ds;
try {
Ds = require('deepspeech');
} catch (_) {
Ds = require('deepspeech-tflite');
}

const Stt = require('stt');
const {spawnSync} = require('child_process');
const fs = require('fs');
const {
Expand Down Expand Up @@ -162,8 +156,8 @@ class DeepSpeechInterface {
console.debug(`Loading model from ${this._modelsDir}`);
}

this._model = new Ds.Model(
path.join(this._assetsDir, `deepspeech-model.tflite`)
this._model = new Stt.Model(
path.join(this._assetsDir, `model.tflite`)
);

if (this._debug) {
Expand Down Expand Up @@ -229,7 +223,7 @@ class DeepSpeechInterface {
}

resumeMicrophone() {
const dsStream = this._model.createStream();
const sttStream = this._model.createStream();
const micStream = this._mic.getStream();
let silenceCount = 0;

Expand All @@ -239,7 +233,7 @@ class DeepSpeechInterface {

let runningTranscript = '';
const interimTimer = setInterval(() => {
const transcript = dsStream.intermediateDecode();
const transcript = sttStream.intermediateDecode();

if (this._debug) {
console.debug('interim:', transcript);
Expand All @@ -254,7 +248,7 @@ class DeepSpeechInterface {
}, 1000);

const dataHandler = (data) => {
dsStream.feedAudioContent(data);
sttStream.feedAudioContent(data);
};

micStream.once('pauseComplete', () => {
Expand All @@ -270,7 +264,7 @@ class DeepSpeechInterface {

micStream.removeListener('data', dataHandler);

const transcript = dsStream.finishStream().trim();
const transcript = sttStream.finishStream().trim();
if (!transcript) {
if (this._debug) {
console.debug('Transcript was empty');
Expand Down
8 changes: 4 additions & 4 deletions lib/device.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@ const {Device} = require('gateway-addon');
const VoiceProperty = require('./property');

class VoiceDevice extends Device {
constructor(adapter, dsInterface) {
constructor(adapter, sttInterface) {
super(adapter, 'voice-controller');

this._dsInterface = dsInterface;
this._sttInterface = sttInterface;
this.name = 'Voice Controller';
this.description = 'Voice Controller';
this['@type'] = ['OnOffSwitch'];
Expand Down Expand Up @@ -53,9 +53,9 @@ class VoiceDevice extends Device {

toggle(value) {
if (value) {
this._dsInterface.enable();
this._sttInterface.enable();
} else {
this._dsInterface.disable();
this._sttInterface.disable();
}
}
}
Expand Down
7 changes: 3 additions & 4 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@
"url": "https://github.com/WebThingsIO/voice-addon/issues"
},
"dependencies": {
"deepspeech": "^0.9.0",
"deepspeech-tflite": "^0.9.0",
"stt": "^1.2.0",
"js-levenshtein": "^1.1.6",
"mic": "^2.1.2",
"sound-player": "^1.0.13",
Expand All @@ -37,7 +36,7 @@
"SHA256SUMS",
"assets/LICENSE",
"assets/alphabet.txt",
"assets/deepspeech-model.tflite",
"assets/model.tflite",
"assets/error.wav",
"assets/no-input.wav",
"assets/success.wav",
Expand All @@ -48,7 +47,7 @@
"index.js",
"lib/adapter.js",
"lib/command-utils.js",
"lib/deep-speech-interface.js",
"lib/coqui-stt-interface.js",
"lib/device.js",
"lib/matrix-microphone.js",
"lib/property.js",
Expand Down
40 changes: 12 additions & 28 deletions package.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/bin/bash -e

_DS_VERSION="0.9.0"
_STT_VERSION="1.2.0"
_STT_MODEL_VERSION="1.0.0"

# Setup environment for building inside Dockerized toolchain
export NVM_DIR="${HOME}/.nvm"
Expand Down Expand Up @@ -41,58 +42,41 @@ rm -rf "${here}/kenlm"
pushd "${here}/bin"
case "$ADDON_ARCH" in
linux-x64)
_SCORER_TARBALL="native_client.amd64.cpu.linux.tar.xz"
_SCORER_TARBALL="native_client.tflite.Linux.tar.xz"
;;
linux-arm)
_SCORER_TARBALL="native_client.rpi3.cpu.linux.tar.xz"
_SCORER_TARBALL="native_client.tflite.linux.armv7.tar.xz"
;;
linux-arm64)
_SCORER_TARBALL="native_client.arm64.cpu.linux.tar.xz"
_SCORER_TARBALL="native_client.tflite.linux.aarch64.tar.xz"
;;
darwin-x64)
_SCORER_TARBALL="native_client.amd64.cpu.osx.tar.xz"
_SCORER_TARBALL="native_client.tflite.macOS.tar.xz"
;;
esac

curl \
-L "https://github.com/mozilla/DeepSpeech/releases/download/v${_DS_VERSION}/${_SCORER_TARBALL}" | \
-L "https://github.com/coqui-ai/STT/releases/download/v${_STT_VERSION}/${_SCORER_TARBALL}" | \
tar xJ generate_scorer_package
popd

# download the DeepSpeech model
pushd "${here}/assets"
curl \
-o "deepspeech-model.tflite" \
-L "https://github.com/mozilla/DeepSpeech/releases/download/v${_DS_VERSION}/deepspeech-${_DS_VERSION}-models.tflite"
-o "model.tflite" \
-L "https://github.com/coqui-ai/STT-models/releases/download/english%2Fcoqui%2Fv${_STT_MODEL_VERSION}-huge-vocab/model.tflite"
popd

# remove one of the DS dependencies, based on architecture
KEEP_DEP="deepspeech"
REMOVE_DEP="deepspeech-tflite"
if [[ -n "$ADDON_ARCH" && $ADDON_ARCH =~ x64 ]]; then
KEEP_DEP="deepspeech-tflite"
REMOVE_DEP="deepspeech"
fi
python -c "import json, os; \
from collections import OrderedDict; \
fname = os.path.join(os.getcwd(), 'package.json'); \
d = json.loads(open(fname).read(), object_pairs_hook=OrderedDict); \
del d['dependencies']['${REMOVE_DEP}']; \
f = open(fname, 'wt'); \
json.dump(d, f, indent=2); \
f.close()
"

npm install --production

# keep only the compiled DS binary that we need
# keep only the compiled STT binary that we need
module_version=$(node -e 'console.log(`node-v${process.config.variables.node_module_version}`)')
find "node_modules/${KEEP_DEP}/lib/binding/v${_DS_VERSION}" \
find "node_modules/stt/lib/binding/v${_STT_VERSION}" \
-mindepth 1 \
-maxdepth 1 \
\! -name "${ADDON_ARCH}" \
-exec rm -rf {} \;
find "node_modules/${KEEP_DEP}/lib/binding/v${_DS_VERSION}/${ADDON_ARCH}" \
find "node_modules/stt/lib/binding/v${_STT_VERSION}/${ADDON_ARCH}" \
-mindepth 1 \
-maxdepth 1 \
-type d \
Expand Down