Skip to content
This repository has been archived by the owner on Jan 13, 2022. It is now read-only.

Commit

Permalink
Added Kaldi implementation of STTService
Browse files Browse the repository at this point in the history
Signed-off-by: Kelly Davis <kdavis@mozilla.com>
  • Loading branch information
kdavis-mozilla committed Mar 21, 2016
1 parent 5effc03 commit de54da6
Show file tree
Hide file tree
Showing 12 changed files with 644 additions and 1 deletion.
1 change: 1 addition & 0 deletions bundles/io/org.eclipse.smarthome.io.voice/.classpath
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
<classpathentry exported="true" kind="lib" path="lib/marytts-signalproc-5.1-SNAPSHOT.jar"/>
<classpathentry exported="true" kind="lib" path="lib/marytts-common-5.1-SNAPSHOT.jar"/>
<classpathentry exported="true" kind="lib" path="lib/marytts-server-5.1-SNAPSHOT.jar"/>
<classpathentry exported="true" kind="lib" path="ib/net-speech-api-0.2.0.jar"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.7"/>
<classpathentry kind="con" path="org.eclipse.pde.core.requiredPlugins"/>
<classpathentry kind="src" path="src/main/java"/>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,11 @@ Bundle-ClassPath: .,
lib/marytts-signalproc-5.1-SNAPSHOT.jar,
lib/marytts-lang-de-5.1-SNAPSHOT.jar,
lib/marytts-lang-en-5.1-SNAPSHOT.jar,
lib/net-speech-api-0.2.0.jar,
lib/opennlp-maxent-3.0.1-incubating.jar,
lib/opennlp-tools-1.5.1-incubating.jar
lib/opennlp-tools-1.5.1-incubating.jar,
lib/Java-WebSocket-1.3.0.jar,
lib/org.json.simple-0.4.jar
Import-Package: javax.sound.sampled,
org.apache.commons.collections.map,
org.apache.commons.io,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Copyright (c) 2014-2015 openHAB UG (haftungsbeschraenkt) and others.
All rights reserved. This program and the accompanying materials
are made available under the terms of the Eclipse Public License v1.0
which accompanies this distribution, and is available at
http://www.eclipse.org/legal/epl-v10.html
-->
<scr:component xmlns:scr="http://www.osgi.org/xmlns/scr/v1.1.0" immediate="true" name="org.eclipse.smarthome.io.voice.kaldi">
<implementation class="org.eclipse.smarthome.io.voice.internal.STTServiceKaldi"/>
<service>
<provide interface="org.eclipse.smarthome.io.voice.STTService"/>
</service>
<property name="os" type="String" value="any"/>
</scr:component>
3 changes: 3 additions & 0 deletions bundles/io/org.eclipse.smarthome.io.voice/build.properties
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@ bin.includes = META-INF/,\
lib/marytts-signalproc-5.1-SNAPSHOT.jar,\
lib/marytts-lang-de-5.1-SNAPSHOT.jar,\
lib/marytts-lang-en-5.1-SNAPSHOT.jar,\
lib/net-speech-api-0.2.0.jar,\
lib/opennlp-maxent-3.0.1-incubating.jar,\
lib/opennlp-tools-1.5.1-incubating.jar,\
lib/Java-WebSocket-1.3.0.jar,\
lib/org.json.simple-0.4.jar,\
about.html
source.. = src/main/java/
Binary file not shown.
22 changes: 22 additions & 0 deletions bundles/io/org.eclipse.smarthome.io.voice/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,28 @@
</stagingDirectory>
<artifactItems>

<artifactItem>
<groupId>org.apache.clerezza.ext</groupId>
<artifactId>org.json.simple</artifactId>
<version>0.4</version>
<packaging>jar</packaging>
<downloadUrl>
https://jcenter.bintray.com/org/apache/clerezza/ext/org.json.simple/0.4/org.json.simple-0.4.jar
</downloadUrl>
<force>true</force>
</artifactItem>

<artifactItem>
<groupId>org.java-websocket</groupId>
<artifactId>Java-WebSocket</artifactId>
<version>1.3.0</version>
<packaging>jar</packaging>
<downloadUrl>
https://jcenter.bintray.com/org/java-websocket/Java-WebSocket/1.3.0/Java-WebSocket-1.3.0.jar
</downloadUrl>
<force>true</force>
</artifactItem>

<artifactItem>
<groupId>com.sun.speech.freetts</groupId>
<artifactId>freetts</artifactId>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
/**
* Copyright (c) 2014-2016 openHAB UG (haftungsbeschraenkt) and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*/
package org.eclipse.smarthome.io.voice.internal;

import java.util.List;

import ee.ioc.phon.netspeechapi.duplex.RecognitionEvent;
import ee.ioc.phon.netspeechapi.duplex.RecognitionEventListener;

import org.eclipse.smarthome.io.voice.RecognitionStopEvent;
import org.eclipse.smarthome.io.voice.SpeechRecognitionErrorEvent;
import org.eclipse.smarthome.io.voice.SpeechRecognitionEvent;
import org.eclipse.smarthome.io.voice.STTEvent;
import org.eclipse.smarthome.io.voice.STTListener;

/**
* A RecognitionEventListener forwarding RecognitionEvent's to STTEvent's
*
* @author Kelly Davis - Initial contribution and API
*
*/
public class RecognitionEventListenerKaldi implements RecognitionEventListener {
/**
* Target for forwarded events
*/
private final STTListener sttListener;

/**
* A RecognitionEventListener that forwards RecognitionEvent's to STTEvent's
*
* The target of the STTEvent's is the passed STTListener
*
* @param sttListener The targeted STTListener
*/
public RecognitionEventListenerKaldi(STTListener sttListener) {
this.sttListener = sttListener;
}

/**
* Target of RecognitionEvent events that are forwarded to the contained STTListener
*
* @param recognitionEvent The fired RecognitionEvent
*/
public void onRecognitionEvent(RecognitionEvent recognitionEvent) {
int status = recognitionEvent.getStatus();
switch(status) {
case RecognitionEvent.STATUS_SUCCESS:
RecognitionEvent.Result result = recognitionEvent.getResult();
if (result.isFinal()) {
sttListener.sttEventReceived(getSTTEvent(recognitionEvent));
}
break;
case RecognitionEvent.STATUS_NO_SPEECH:
sttListener.sttEventReceived(new SpeechRecognitionErrorEvent("No speech"));
break;
case RecognitionEvent.STATUS_ABORTED:
sttListener.sttEventReceived(new SpeechRecognitionErrorEvent("Aborted"));
break;
case RecognitionEvent.STATUS_AUDIO_CAPTURE:
sttListener.sttEventReceived(new SpeechRecognitionErrorEvent("Error with audio capture"));
break;
case RecognitionEvent.STATUS_NETWORK:
sttListener.sttEventReceived(new SpeechRecognitionErrorEvent("Network error"));
break;
case RecognitionEvent.STATUS_NOT_ALLOWED:
sttListener.sttEventReceived(new SpeechRecognitionErrorEvent("Not allowed"));
break;
case RecognitionEvent.STATUS_SERVICE_NOT_ALLOWED:
sttListener.sttEventReceived(new SpeechRecognitionErrorEvent("Service not allowed"));
break;
case RecognitionEvent.STATUS_BAD_GRAMMAR:
sttListener.sttEventReceived(new SpeechRecognitionErrorEvent("Grammar invalid"));
break;
case RecognitionEvent.STATUS_LANGUAGE_NOT_SUPPORTED:
sttListener.sttEventReceived(new SpeechRecognitionErrorEvent("Language not supported"));
break;
}
}

/**
* Utility method to create a STTEvent from a successful, final RecognitionEvent
*
* @param recognitionEvent The successful, final RecognitionEvent
* @return A STTEvent created from the passed RecognitionEvent
*/
private STTEvent getSTTEvent(RecognitionEvent recognitionEvent) {
RecognitionEvent.Result result = recognitionEvent.getResult();
List<RecognitionEvent.Hypothesis> hypotheses = result.getHypotheses();

float confidence = -1.0f;
String transcript = new String();
for (RecognitionEvent.Hypothesis hypothesis : hypotheses) {
if (confidence < hypothesis.getConfidence()) {
confidence = hypothesis.getConfidence();
transcript = hypothesis.getTranscript();
}
}
return new SpeechRecognitionEvent(transcript, confidence);
}

/**
* Called when the WebSocket is closed
*/
public void onClose() {
sttListener.sttEventReceived(new RecognitionStopEvent());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/**
* Copyright (c) 2014-2016 openHAB UG (haftungsbeschraenkt) and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*/
package org.eclipse.smarthome.io.voice.internal;

import org.eclipse.smarthome.io.voice.STTServiceHandle;

/**
* Kaldi implementation of a STTServiceHandle
*
* @author Kelly Davis - Initial contribution and API
*
*/
public class STTServiceHandleKaldi implements STTServiceHandle {
/**
* STTServiceKaldiRunnable managed by this instance
*/
private final STTServiceKaldiRunnable sttServiceKaldiRunnable;

/**
* Creates an instance to manage the passed STTServiceKaldiRunnable
*
* @param sttServiceKaldiRunnable The managed STTServiceKaldiRunnable
*/
public STTServiceHandleKaldi(STTServiceKaldiRunnable sttServiceKaldiRunnable) {
this.sttServiceKaldiRunnable = sttServiceKaldiRunnable;
}

/**
* {@inheritDoc}
*/
public void abort() {
this.sttServiceKaldiRunnable.abort();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
/**
* Copyright (c) 2014-2016 openHAB UG (haftungsbeschraenkt) and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*/
package org.eclipse.smarthome.io.voice.internal;

import java.io.IOException;
import java.net.URISyntaxException;
import java.util.Set;
import java.util.HashSet;
import java.util.Locale;

import ee.ioc.phon.netspeechapi.duplex.WsDuplexRecognitionSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.eclipse.smarthome.io.audio.AudioFormat;
import org.eclipse.smarthome.io.audio.AudioSource;
import org.eclipse.smarthome.io.voice.STTException;
import org.eclipse.smarthome.io.voice.STTService;
import org.eclipse.smarthome.io.voice.STTListener;
import org.eclipse.smarthome.io.voice.STTServiceHandle;

/**
* This is a STT service implementation using Kaldi.
*
* @author Kelly Davis - Initial contribution and API
*
*/
public class STTServiceKaldi implements STTService {

private static final Logger logger = LoggerFactory.getLogger(STTServiceKaldi.class);

/**
* WebSocket URL to the head node of the Kaldi server cluster
*/
private static final String kaldiWebSocketURL = "ws://52.37.26.79:8888/client/ws/speech";

/**
* Set of supported locales
*/
private final HashSet<Locale> locales = initLocales();

/**
* Set of supported audio formats
*/
private final HashSet<AudioFormat> audioFormats = initAudioFormats();

/**
* {@inheritDoc}
*/
public Set<Locale> getSupportedLocales() {
return this.locales;
}

/**
* {@inheritDoc}
*/
public Set<AudioFormat> getSupportedFormats() {
return this.audioFormats;
}

/**
* {@inheritDoc}
*/
public STTServiceHandle recognize(STTListener sttListener, AudioSource audioSource, Locale locale, Set<String> grammars) throws STTException {
// Validate arguments
if (null == sttListener) {
throw new IllegalArgumentException("The passed STTListener is null");
}
if (null == audioSource) {
throw new IllegalArgumentException("The passed AudioSource is null");
}
boolean isAudioFormatValid = false;
AudioFormat audioFormat = audioSource.getFormat();
for (AudioFormat currentAudioFormat : this.audioFormats) {
if (currentAudioFormat.isCompatible(audioFormat)) {
isAudioFormatValid = true;
break;
}
}
if (!isAudioFormatValid) {
throw new IllegalArgumentException("The passed AudioSource's AudioFormat is unsupported");
}
if (null == audioFormat.getBitRate()) {
throw new IllegalArgumentException("The passed AudioSource's AudioFormat's bit rate is not set");
}
if (!this.locales.contains(locale)) {
throw new IllegalArgumentException("The passed Locale is unsupported");
}
// Note: Currently Kaldi doesn't use grammars. Thus grammars isn't validated

// Setup WsDuplexRecognitionSession
WsDuplexRecognitionSession recognitionSession;
try {
recognitionSession = new WsDuplexRecognitionSession(kaldiWebSocketURL);
} catch(IOException e) {
throw new STTException("Error connected to the server", e);
} catch(URISyntaxException e) {
throw new STTException("Invalid WebSocket URL", e);
}
// One need not call recognitionSession.setContentType(...) [See http://bit.ly/1TGvQzA]
recognitionSession.addRecognitionEventListener(new RecognitionEventListenerKaldi(sttListener));

// Start recognition
STTServiceKaldiRunnable sttServiceKaldiRunnable = new STTServiceKaldiRunnable(recognitionSession, sttListener, audioSource);
Thread thread = new Thread(sttServiceKaldiRunnable);
thread.start();

// Return STTServiceHandleKaldi
return new STTServiceHandleKaldi(sttServiceKaldiRunnable);
}

/**
* Initializes this.locales
*
* @return The locales of this instance
*/
private final HashSet<Locale> initLocales() {
HashSet<Locale> locales = new HashSet<Locale>();
locales.add(new Locale("en", "US")); // For now we only support American English
return locales;
}

/**
* Initializes this.audioFormats
*
* @return The audio formats of this instance
*/
private final HashSet<AudioFormat> initAudioFormats() {
HashSet<AudioFormat> audioFormats = new HashSet<AudioFormat>();

String containers[] = {"NONE", "ASF", "AVI", "DVR-MS", "MKV", "MPEG", "OGG", "QuickTime", "RealMedia", "WAVE"};
String codecs[] = {"PCM_SIGNED", "RAW", "A52", "ADPCM", "FLAC", "GSM", "A-LAW", "MU-LAW", "MP3", "QDM", "SPEEX", "VORBIS", "NIST", "VOC"};

for (String container : containers) {
for (String codec : codecs) {
audioFormats.add(new AudioFormat(container, codec, null, null, null, null)); // TODO: Allow only valid combinations
}
}

return audioFormats;
}
}
Loading

0 comments on commit de54da6

Please sign in to comment.