Skip to content

Commit

Permalink
[mimictts] Add LRU cache
Browse files Browse the repository at this point in the history
And simplifies code with new core capabilities (no more need to create temporary files implementing FixedLengthAudioStream)
Signed-off-by: Gwendal Roulleau <gwendal.roulleau@gmail.com>
  • Loading branch information
dalgwen committed Jun 19, 2023
1 parent 07e6403 commit 33b4fb6
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 148 deletions.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,16 @@

import org.eclipse.jdt.annotation.NonNullByDefault;
import org.eclipse.jdt.annotation.Nullable;
import org.openhab.core.audio.AudioException;
import org.openhab.core.audio.AudioFormat;
import org.openhab.core.audio.FixedLengthAudioStream;
import org.openhab.core.audio.AudioStream;

/**
* An AudioStream with an {@link InputStream} inside
*
* @author Gwendal Roulleau - Initial contribution
*/
@NonNullByDefault
public class InputStreamAudioStream extends FixedLengthAudioStream {
public class InputStreamAudioStream extends AudioStream {

public InputStream innerInputStream;
public AudioFormat audioFormat;
Expand Down Expand Up @@ -110,14 +109,4 @@ public boolean markSupported() {
public long transferTo(@Nullable OutputStream out) throws IOException {
return innerInputStream.transferTo(out);
}

@Override
public long length() {
return length;
}

@Override
public InputStream getClonedStream() throws AudioException {
throw new AudioException("Operation not supported");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,4 @@ public class MimicConfiguration {
public Double speakingRate = 1.0;
public Double audioVolatility = 0.667;
public Double phonemeVolatility = 0.8;
public Boolean workaroundServletSink = false;
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,20 +12,19 @@
*/
package org.openhab.voice.mimic.internal;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.math.BigInteger;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.StandardCopyOption;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
Expand All @@ -38,13 +37,13 @@
import org.eclipse.jetty.client.util.StringContentProvider;
import org.eclipse.jetty.http.HttpHeader;
import org.eclipse.jetty.http.HttpStatus;
import org.openhab.core.OpenHAB;
import org.openhab.core.audio.AudioException;
import org.openhab.core.audio.AudioFormat;
import org.openhab.core.audio.AudioStream;
import org.openhab.core.config.core.ConfigurableService;
import org.openhab.core.io.net.http.HttpClientFactory;
import org.openhab.core.io.net.http.HttpRequestBuilder;
import org.openhab.core.voice.AbstractCachedTTSService;
import org.openhab.core.voice.TTSCache;
import org.openhab.core.voice.TTSException;
import org.openhab.core.voice.TTSService;
import org.openhab.core.voice.Voice;
Expand All @@ -67,11 +66,11 @@
* @author Gwendal Roulleau - Initial contribution
*/
@Component(configurationPid = MimicTTSService.SERVICE_PID, property = Constants.SERVICE_PID + "="
+ MimicTTSService.SERVICE_PID)
+ MimicTTSService.SERVICE_PID, service = TTSService.class)
@ConfigurableService(category = MimicTTSService.SERVICE_CATEGORY, label = MimicTTSService.SERVICE_NAME
+ " Text-to-Speech", description_uri = MimicTTSService.SERVICE_CATEGORY + ":" + MimicTTSService.SERVICE_ID)
@NonNullByDefault
public class MimicTTSService implements TTSService {
public class MimicTTSService extends AbstractCachedTTSService {

private final Logger logger = LoggerFactory.getLogger(MimicTTSService.class);

Expand Down Expand Up @@ -108,7 +107,9 @@ public class MimicTTSService implements TTSService {
private final HttpClient httpClient;

@Activate
public MimicTTSService(final @Reference HttpClientFactory httpClientFactory, Map<String, Object> config) {
public MimicTTSService(final @Reference HttpClientFactory httpClientFactory, @Reference TTSCache ttsCache,
Map<String, Object> config) {
super(ttsCache);
updateConfig(config);
this.httpClient = httpClientFactory.getCommonHttpClient();
}
Expand All @@ -130,12 +131,6 @@ private void updateConfig(Map<String, Object> newConfig) {
config.url = param.toString();
}

// workaround
param = newConfig.get(PARAM_WORKAROUNDSERVLETSINK);
if (param != null) {
config.workaroundServletSink = Boolean.parseBoolean(param.toString());
}

// audio volatility
try {
param = newConfig.get(PARAM_AUDIOVOLATITLITY);
Expand Down Expand Up @@ -214,7 +209,7 @@ public void refreshVoices() {

@Override
public Set<AudioFormat> getSupportedFormats() {
return Set.<AudioFormat> of(AUDIO_FORMAT);
return Set.<AudioFormat>of(AUDIO_FORMAT);
}

/**
Expand All @@ -227,7 +222,7 @@ public Set<AudioFormat> getSupportedFormats() {
* @throws TTSException in case the service is unavailable or a parameter is invalid.
*/
@Override
public AudioStream synthesize(String text, Voice voice, AudioFormat requestedFormat) throws TTSException {
public AudioStream synthesizeForCache(String text, Voice voice, AudioFormat requestedFormat) throws TTSException {

if (!availableVoices.contains(voice)) {
// let a chance for the service to update :
Expand Down Expand Up @@ -294,24 +289,7 @@ public AudioStream synthesize(String text, Voice voice, AudioFormat requestedFor
}

InputStream inputStreamFromMimic = inputStreamResponseListener.getInputStream();
try {
if (!config.workaroundServletSink) {
return new InputStreamAudioStream(inputStreamFromMimic, AUDIO_FORMAT, length);
} else {
// Some audio sinks use the openHAB servlet to get audio. This servlet require the
// getClonedStream()
// method
// So we cache the file on disk, thus implementing the method thanks to FileAudioStream.
return createTemporaryFile(inputStreamFromMimic, AUDIO_FORMAT);
}
} catch (TTSException e) {
try {
inputStreamFromMimic.close();
} catch (IOException e1) {
}
throw e;
}

return new InputStreamAudioStream(inputStreamFromMimic, AUDIO_FORMAT, length);
} else {
String errorMessage = "Cannot get wav from mimic url " + urlTTS + " with HTTP response code "
+ response.getStatus() + " for reason " + response.getReason();
Expand All @@ -325,16 +303,16 @@ public AudioStream synthesize(String text, Voice voice, AudioFormat requestedFor
}
}

private AudioStream createTemporaryFile(InputStream inputStream, AudioFormat audioFormat) throws TTSException {
File mimicDirectory = new File(OpenHAB.getUserDataFolder(), "mimic");
mimicDirectory.mkdir();
@Override
public String getCacheKey(String text, Voice voice, AudioFormat requestedFormat) {
MessageDigest md;
try {
File tempFile = File.createTempFile(UUID.randomUUID().toString(), ".wav", mimicDirectory);
tempFile.deleteOnExit();
Files.copy(inputStream, tempFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
return new AutoDeleteFileAudioStream(tempFile, audioFormat);
} catch (AudioException | IOException e) {
throw new TTSException("Cannot create temporary audio file", e);
md = MessageDigest.getInstance("MD5");
} catch (NoSuchAlgorithmException e) {
return "nomd5algorithm";
}
byte[] binaryKey = ((text + voice.getUID() + requestedFormat.toString() + config.speakingRate
+ config.audioVolatility + config.phonemeVolatility).getBytes());
return String.format("%032x", new BigInteger(1, md.digest(binaryKey)));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,6 @@
<description>Mimic 3 URL.</description>
<default>http://localhost:59125</default>
</parameter>
<parameter name="workaroundServletSink" type="boolean" required="false">
<label>Workaround For Servlet-Based Audiosink</label>
<description>Enable this workaround to store temporarily the file on disk. Needed if you play on audiosink based on
the openHAB audio servlet.</description>
<default>false</default>
</parameter>
<parameter name="speakingRate" min="0" max="1" type="decimal" required="false">
<label>Speaking Rate</label>
<description>Controls how fast the voice speaks the text. A value of 1 is the speed of the training dataset. Less
Expand Down

0 comments on commit 33b4fb6

Please sign in to comment.