Skip to content

Commit

Permalink
[mimictts] Add LRU cache
Browse files Browse the repository at this point in the history
And simplifies code with new core capabilities (no more need to create temporary files implementing FixedLengthAudioStream)
Signed-off-by: Gwendal Roulleau <gwendal.roulleau@gmail.com>
  • Loading branch information
dalgwen committed Jul 4, 2023
1 parent 07e6403 commit 6c13d97
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 143 deletions.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,17 @@

import org.eclipse.jdt.annotation.NonNullByDefault;
import org.eclipse.jdt.annotation.Nullable;
import org.openhab.core.audio.AudioException;
import org.openhab.core.audio.AudioFormat;
import org.openhab.core.audio.FixedLengthAudioStream;
import org.openhab.core.audio.AudioStream;
import org.openhab.core.audio.SizeableAudioStream;

/**
* An AudioStream with an {@link InputStream} inside
*
* @author Gwendal Roulleau - Initial contribution
*/
@NonNullByDefault
public class InputStreamAudioStream extends FixedLengthAudioStream {
public class InputStreamAudioStream extends AudioStream implements SizeableAudioStream {

public InputStream innerInputStream;
public AudioFormat audioFormat;
Expand Down Expand Up @@ -115,9 +115,4 @@ public long transferTo(@Nullable OutputStream out) throws IOException {
public long length() {
return length;
}

@Override
public InputStream getClonedStream() throws AudioException {
throw new AudioException("Operation not supported");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,4 @@ public class MimicConfiguration {
public Double speakingRate = 1.0;
public Double audioVolatility = 0.667;
public Double phonemeVolatility = 0.8;
public Boolean workaroundServletSink = false;
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,20 +12,19 @@
*/
package org.openhab.voice.mimic.internal;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.math.BigInteger;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.StandardCopyOption;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
Expand All @@ -38,13 +37,13 @@
import org.eclipse.jetty.client.util.StringContentProvider;
import org.eclipse.jetty.http.HttpHeader;
import org.eclipse.jetty.http.HttpStatus;
import org.openhab.core.OpenHAB;
import org.openhab.core.audio.AudioException;
import org.openhab.core.audio.AudioFormat;
import org.openhab.core.audio.AudioStream;
import org.openhab.core.config.core.ConfigurableService;
import org.openhab.core.io.net.http.HttpClientFactory;
import org.openhab.core.io.net.http.HttpRequestBuilder;
import org.openhab.core.voice.AbstractCachedTTSService;
import org.openhab.core.voice.TTSCache;
import org.openhab.core.voice.TTSException;
import org.openhab.core.voice.TTSService;
import org.openhab.core.voice.Voice;
Expand All @@ -67,11 +66,11 @@
* @author Gwendal Roulleau - Initial contribution
*/
@Component(configurationPid = MimicTTSService.SERVICE_PID, property = Constants.SERVICE_PID + "="
+ MimicTTSService.SERVICE_PID)
+ MimicTTSService.SERVICE_PID, service = TTSService.class)
@ConfigurableService(category = MimicTTSService.SERVICE_CATEGORY, label = MimicTTSService.SERVICE_NAME
+ " Text-to-Speech", description_uri = MimicTTSService.SERVICE_CATEGORY + ":" + MimicTTSService.SERVICE_ID)
@NonNullByDefault
public class MimicTTSService implements TTSService {
public class MimicTTSService extends AbstractCachedTTSService {

private final Logger logger = LoggerFactory.getLogger(MimicTTSService.class);

Expand All @@ -84,7 +83,6 @@ public class MimicTTSService implements TTSService {
* Configuration parameters
*/
private static final String PARAM_URL = "url";
private static final String PARAM_WORKAROUNDSERVLETSINK = "workaroundServletSink";
private static final String PARAM_SPEAKINGRATE = "speakingRate";
private static final String PARAM_AUDIOVOLATITLITY = "audioVolatility";
private static final String PARAM_PHONEMEVOLATITLITY = "phonemeVolatility";
Expand All @@ -108,7 +106,9 @@ public class MimicTTSService implements TTSService {
private final HttpClient httpClient;

@Activate
public MimicTTSService(final @Reference HttpClientFactory httpClientFactory, Map<String, Object> config) {
public MimicTTSService(final @Reference HttpClientFactory httpClientFactory, @Reference TTSCache ttsCache,
Map<String, Object> config) {
super(ttsCache);
updateConfig(config);
this.httpClient = httpClientFactory.getCommonHttpClient();
}
Expand All @@ -130,12 +130,6 @@ private void updateConfig(Map<String, Object> newConfig) {
config.url = param.toString();
}

// workaround
param = newConfig.get(PARAM_WORKAROUNDSERVLETSINK);
if (param != null) {
config.workaroundServletSink = Boolean.parseBoolean(param.toString());
}

// audio volatility
try {
param = newConfig.get(PARAM_AUDIOVOLATITLITY);
Expand Down Expand Up @@ -227,7 +221,7 @@ public Set<AudioFormat> getSupportedFormats() {
* @throws TTSException in case the service is unavailable or a parameter is invalid.
*/
@Override
public AudioStream synthesize(String text, Voice voice, AudioFormat requestedFormat) throws TTSException {
public AudioStream synthesizeForCache(String text, Voice voice, AudioFormat requestedFormat) throws TTSException {

if (!availableVoices.contains(voice)) {
// let a chance for the service to update :
Expand Down Expand Up @@ -294,24 +288,7 @@ public AudioStream synthesize(String text, Voice voice, AudioFormat requestedFor
}

InputStream inputStreamFromMimic = inputStreamResponseListener.getInputStream();
try {
if (!config.workaroundServletSink) {
return new InputStreamAudioStream(inputStreamFromMimic, AUDIO_FORMAT, length);
} else {
// Some audio sinks use the openHAB servlet to get audio. This servlet require the
// getClonedStream()
// method
// So we cache the file on disk, thus implementing the method thanks to FileAudioStream.
return createTemporaryFile(inputStreamFromMimic, AUDIO_FORMAT);
}
} catch (TTSException e) {
try {
inputStreamFromMimic.close();
} catch (IOException e1) {
}
throw e;
}

return new InputStreamAudioStream(inputStreamFromMimic, AUDIO_FORMAT, length);
} else {
String errorMessage = "Cannot get wav from mimic url " + urlTTS + " with HTTP response code "
+ response.getStatus() + " for reason " + response.getReason();
Expand All @@ -325,16 +302,16 @@ public AudioStream synthesize(String text, Voice voice, AudioFormat requestedFor
}
}

private AudioStream createTemporaryFile(InputStream inputStream, AudioFormat audioFormat) throws TTSException {
File mimicDirectory = new File(OpenHAB.getUserDataFolder(), "mimic");
mimicDirectory.mkdir();
@Override
public String getCacheKey(String text, Voice voice, AudioFormat requestedFormat) {
MessageDigest md;
try {
File tempFile = File.createTempFile(UUID.randomUUID().toString(), ".wav", mimicDirectory);
tempFile.deleteOnExit();
Files.copy(inputStream, tempFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
return new AutoDeleteFileAudioStream(tempFile, audioFormat);
} catch (AudioException | IOException e) {
throw new TTSException("Cannot create temporary audio file", e);
md = MessageDigest.getInstance("MD5");
} catch (NoSuchAlgorithmException e) {
return "nomd5algorithm";
}
byte[] binaryKey = ((text + voice.getUID() + requestedFormat.toString() + config.speakingRate
+ config.audioVolatility + config.phonemeVolatility).getBytes());
return String.format("%032x", new BigInteger(1, md.digest(binaryKey)));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,6 @@
<description>Mimic 3 URL.</description>
<default>http://localhost:59125</default>
</parameter>
<parameter name="workaroundServletSink" type="boolean" required="false">
<label>Workaround For Servlet-Based Audiosink</label>
<description>Enable this workaround to store temporarily the file on disk. Needed if you play on audiosink based on
the openHAB audio servlet.</description>
<default>false</default>
</parameter>
<parameter name="speakingRate" min="0" max="1" type="decimal" required="false">
<label>Speaking Rate</label>
<description>Controls how fast the voice speaks the text. A value of 1 is the speed of the training dataset. Less
Expand Down

0 comments on commit 6c13d97

Please sign in to comment.