[googletts] Replace custom TTS cache with common TTS cache (#15208)

* [googletts] Replace custom TTS cache with common TTS cache -------- Signed-off-by: Gwendal Roulleau <gwendal.roulleau@gmail.com>
openhab · Jul 10, 2023 · 2899421 · 2899421
1 parent 72c0e1f
commit 2899421
Show file tree

Hide file tree

Showing 6 changed files with 38 additions and 170 deletions.
diff --git a/bundles/org.openhab.voice.googletts/README.md b/bundles/org.openhab.voice.googletts/README.md
@@ -2,8 +2,7 @@
 
 Google Cloud TTS Service uses the non-free Google Cloud Text-to-Speech API to convert text or Speech Synthesis Markup Language (SSML) input into audio data of natural human speech. 
 It provides multiple voices, available in different languages and variants and applies DeepMind’s groundbreaking research in WaveNet and Google’s powerful neural networks. 
-The implementation caches the converted texts to reduce the load on the API and make the conversion faster.
-You can find them in the `$OPENHAB_USERDATA/cache/org.openhab.voice.googletts` folder.
+The Google Cloud TTS service uses the openHAB TTS cache to cache audio files produced from the most recent queries in order to reduce traffic, improve performance and reduce number of requests.
 Be aware, that using this service may incur cost on your Google Cloud account.
 You can find pricing information on the [documentation page](https://cloud.google.com/text-to-speech/#pricing-summary).
 
@@ -47,10 +46,6 @@ It is recommended to clear this configuration parameter afterwards.
 * **Pitch** - The pitch of selected voice, up to 20 semitones.
 * **Volume Gain** - The volume of the output between 16dB and -96dB.
 * **Speaking Rate** - The speaking rate can be 4x faster or slower than the normal rate.
-* **Purge Cache** - Purges the cache e.g. after testing different voice configuration parameters.
-
-When enabled the cache is purged once.
-Make sure to disable this setting again so the cache is maintained after restarts.
 
 In case you would like to setup the service via a text file, create a new file in `$OPENHAB_ROOT/conf/services` named `googletts.cfg`
 
@@ -63,7 +58,6 @@ org.openhab.voice.googletts:authcode=XXXXX
 org.openhab.voice.googletts:pitch=0
 org.openhab.voice.googletts:volumeGain=0
 org.openhab.voice.googletts:speakingRate=1
-org.openhab.voice.googletts:purgeCache=false
 ```
 
 ### Default Text-to-Speech and Voice Configuration

diff --git a/...ab.voice.googletts/src/main/java/org/openhab/voice/googletts/internal/GoogleCloudAPI.java b/...ab.voice.googletts/src/main/java/org/openhab/voice/googletts/internal/GoogleCloudAPI.java
@@ -12,17 +12,9 @@
  */
 package org.openhab.voice.googletts.internal;
 
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.FileOutputStream;
 import java.io.IOException;
-import java.math.BigInteger;
 import java.nio.charset.StandardCharsets;
-import java.nio.file.Files;
-import java.security.MessageDigest;
-import java.security.NoSuchAlgorithmException;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Base64;
 import java.util.Dictionary;
 import java.util.HashMap;
@@ -69,10 +61,6 @@
  */
 class GoogleCloudAPI {
 
-    private static final char EXTENSION_SEPARATOR = '.';
-    private static final char UNIX_SEPARATOR = '/';
-    private static final char WINDOWS_SEPARATOR = '\\';
-
     private static final String BEARER = "Bearer ";
 
     private static final String GCP_AUTH_URI = "https://accounts.google.com/o/oauth2/auth";
@@ -103,11 +91,6 @@ class GoogleCloudAPI {
      */
     private final Map<Locale, Set<GoogleTTSVoice>> voices = new HashMap<>();
 
-    /**
-     * Cache folder
-     */
-    private File cacheFolder;
-
     /**
      * Configuration
      */
@@ -122,12 +105,10 @@ class GoogleCloudAPI {
     /**
      * Constructor.
      *
-     * @param cacheFolder Service cache folder
      */
-    GoogleCloudAPI(ConfigurationAdmin configAdmin, OAuthFactory oAuthFactory, File cacheFolder) {
+    GoogleCloudAPI(ConfigurationAdmin configAdmin, OAuthFactory oAuthFactory) {
         this.configAdmin = configAdmin;
         this.oAuthFactory = oAuthFactory;
-        this.cacheFolder = cacheFolder;
     }
 
     /**
@@ -161,15 +142,6 @@ void setConfig(GoogleTTSConfig config) {
         } else {
             voices.clear();
         }
-
-        // maintain cache
-        if (config.purgeCache) {
-            File[] files = cacheFolder.listFiles();
-            if (files != null && files.length > 0) {
-                Arrays.stream(files).forEach(File::delete);
-            }
-            logger.debug("Cache purged.");
-        }
     }
 
     public void dispose() {
@@ -341,97 +313,32 @@ private List<GoogleTTSVoice> listVoices() throws AuthenticationException, Commun
      * @param codec Requested codec
      * @return String array of Google audio format and the file extension to use.
      */
-    private String[] getFormatForCodec(String codec) {
+    private String getFormatForCodec(String codec) {
         switch (codec) {
             case AudioFormat.CODEC_MP3:
-                return new String[] { AudioEncoding.MP3.toString(), "mp3" };
+                return AudioEncoding.MP3.toString();
             case AudioFormat.CODEC_PCM_SIGNED:
-                return new String[] { AudioEncoding.LINEAR16.toString(), "wav" };
+                return AudioEncoding.LINEAR16.toString();
             default:
                 throw new IllegalArgumentException("Audio format " + codec + " is not yet supported");
         }
     }
 
     public byte[] synthesizeSpeech(String text, GoogleTTSVoice voice, String codec) {
-        String[] format = getFormatForCodec(codec);
-        String fileNameInCache = getUniqueFilenameForText(text, voice.getTechnicalName());
-        File audioFileInCache = new File(cacheFolder, fileNameInCache + "." + format[1]);
+        String format = getFormatForCodec(codec);
         try {
-            // check if in cache
-            if (audioFileInCache.exists()) {
-                logger.debug("Audio file {} was found in cache.", audioFileInCache.getName());
-                return Files.readAllBytes(audioFileInCache.toPath());
-            }
-
-            // if not in cache, get audio data and put to cache
-            byte[] audio = synthesizeSpeechByGoogle(text, voice, format[0]);
-            if (audio != null) {
-                saveAudioAndTextToFile(text, audioFileInCache, audio, voice.getTechnicalName());
-            }
-            return audio;
+            return synthesizeSpeechByGoogle(text, voice, format);
         } catch (AuthenticationException | CommunicationException e) {
             logger.warn("Error initializing Google Cloud TTS service: {}", e.getMessage());
             if (oAuthService != null) {
                 oAuthFactory.ungetOAuthService(GoogleTTSService.SERVICE_PID);
                 oAuthService = null;
             }
             voices.clear();
-        } catch (FileNotFoundException e) {
-            logger.warn("Could not write file {} to cache: {}", audioFileInCache, e.getMessage());
-        } catch (IOException e) {
-            logger.debug("An unexpected IOException occurred: {}", e.getMessage());
         }
         return null;
     }
 
-    /**
-     * Create cache entry.
-     *
-     * @param text Converted text.
-     * @param cacheFile Cache entry file.
-     * @param audio Byte array of the audio.
-     * @param voiceName Used voice
-     * @throws FileNotFoundException
-     * @throws IOException in case of file handling exceptions
-     */
-    private void saveAudioAndTextToFile(String text, File cacheFile, byte[] audio, String voiceName)
-            throws IOException, FileNotFoundException {
-        logger.debug("Caching audio file {}", cacheFile.getName());
-        try (FileOutputStream audioFileOutputStream = new FileOutputStream(cacheFile)) {
-            audioFileOutputStream.write(audio);
-        }
-
-        // write text to file for transparency too
-        // this allows to know which contents is in which audio file
-        String textFileName = removeExtension(cacheFile.getName()) + ".txt";
-        logger.debug("Caching text file {}", textFileName);
-        try (FileOutputStream textFileOutputStream = new FileOutputStream(new File(cacheFolder, textFileName))) {
-            // @formatter:off
-            StringBuilder sb = new StringBuilder("Config: ")
-                    .append(config.toConfigString())
-                    .append(",voice=")
-                    .append(voiceName)
-                    .append(System.lineSeparator())
-                    .append("Text: ")
-                    .append(text)
-                    .append(System.lineSeparator());
-            // @formatter:on
-            textFileOutputStream.write(sb.toString().getBytes(StandardCharsets.UTF_8));
-        }
-    }
-
-    /**
-     * Removes the extension of a file name.
-     *
-     * @param fileName the file name to remove the extension of
-     * @return the filename without the extension
-     */
-    private String removeExtension(String fileName) {
-        int extensionPos = fileName.lastIndexOf(EXTENSION_SEPARATOR);
-        int lastSeparator = Math.max(fileName.lastIndexOf(UNIX_SEPARATOR), fileName.lastIndexOf(WINDOWS_SEPARATOR));
-        return lastSeparator > extensionPos ? fileName : fileName.substring(0, extensionPos);
-    }
-
     /**
      * Call Google service to synthesize the required text
      *
@@ -476,25 +383,6 @@ private byte[] synthesizeSpeechByGoogle(String text, GoogleTTSVoice voice, Strin
         return null;
     }
 
-    /**
-     * Gets a unique filename for a give text, by creating a MD5 hash of it. It
-     * will be preceded by the locale.
-     * <p>
-     * Sample: "en-US_00a2653ac5f77063bc4ea2fee87318d3"
-     */
-    private String getUniqueFilenameForText(String text, String voiceName) {
-        try {
-            MessageDigest md = MessageDigest.getInstance("MD5");
-            byte[] bytesOfMessage = (config.toConfigString() + text).getBytes(StandardCharsets.UTF_8);
-            String fileNameHash = String.format("%032x", new BigInteger(1, md.digest(bytesOfMessage)));
-            return voiceName + "_" + fileNameHash;
-        } catch (NoSuchAlgorithmException e) {
-            // should not happen
-            logger.error("Could not create MD5 hash for '{}'", text, e);
-            return null;
-        }
-    }
-
     boolean isInitialized() {
         return oAuthService != null;
     }

diff --git a/...b.voice.googletts/src/main/java/org/openhab/voice/googletts/internal/GoogleTTSConfig.java b/...b.voice.googletts/src/main/java/org/openhab/voice/googletts/internal/GoogleTTSConfig.java
@@ -44,15 +44,10 @@ class GoogleTTSConfig {
      */
     public Double speakingRate = 1d;
 
-    /**
-     * Purge cache after configuration changes.
-     */
-    public Boolean purgeCache = Boolean.FALSE;
-
     @Override
     public String toString() {
         return "GoogleTTSConfig{pitch=" + pitch + ", speakingRate=" + speakingRate + ", volumeGainDb=" + volumeGainDb
-                + ", purgeCache=" + purgeCache + '}';
+                + '}';
     }
 
     String toConfigString() {

diff --git a/....voice.googletts/src/main/java/org/openhab/voice/googletts/internal/GoogleTTSService.java b/....voice.googletts/src/main/java/org/openhab/voice/googletts/internal/GoogleTTSService.java
@@ -15,24 +15,29 @@
 import static org.openhab.voice.googletts.internal.GoogleTTSService.*;
 
 import java.io.ByteArrayInputStream;
-import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
+import java.math.BigInteger;
+import java.nio.charset.StandardCharsets;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
 import java.util.Collections;
 import java.util.HashSet;
 import java.util.Locale;
 import java.util.Map;
 import java.util.Set;
 
+import org.eclipse.jdt.annotation.NonNull;
 import org.eclipse.jdt.annotation.NonNullByDefault;
 import org.eclipse.jdt.annotation.Nullable;
-import org.openhab.core.OpenHAB;
 import org.openhab.core.audio.AudioFormat;
 import org.openhab.core.audio.AudioStream;
 import org.openhab.core.audio.ByteArrayAudioStream;
 import org.openhab.core.audio.utils.AudioWaveUtils;
 import org.openhab.core.auth.client.oauth2.OAuthFactory;
 import org.openhab.core.config.core.ConfigurableService;
+import org.openhab.core.voice.AbstractCachedTTSService;
+import org.openhab.core.voice.TTSCache;
 import org.openhab.core.voice.TTSException;
 import org.openhab.core.voice.TTSService;
 import org.openhab.core.voice.Voice;
@@ -52,10 +57,11 @@
  *
  * @author Gabor Bicskei - Initial contribution
  */
-@Component(configurationPid = SERVICE_PID, property = Constants.SERVICE_PID + "=" + SERVICE_PID)
+@Component(configurationPid = SERVICE_PID, property = Constants.SERVICE_PID + "="
+        + SERVICE_PID, service = TTSService.class)
 @ConfigurableService(category = SERVICE_CATEGORY, label = SERVICE_NAME
         + " Text-to-Speech", description_uri = SERVICE_CATEGORY + ":" + SERVICE_ID)
-public class GoogleTTSService implements TTSService {
+public class GoogleTTSService extends AbstractCachedTTSService {
     /**
      * Service name
      */
@@ -76,11 +82,6 @@ public class GoogleTTSService implements TTSService {
      */
     static final String SERVICE_PID = "org.openhab." + SERVICE_CATEGORY + "." + SERVICE_ID;
 
-    /**
-     * Cache folder under $userdata
-     */
-    private static final String CACHE_FOLDER_NAME = "cache";
-
     /**
      * Configuration parameters
      */
@@ -90,7 +91,6 @@ public class GoogleTTSService implements TTSService {
     private static final String PARAM_PITCH = "pitch";
     private static final String PARAM_SPEAKING_RATE = "speakingRate";
     private static final String PARAM_VOLUME_GAIN_DB = "volumeGainDb";
-    private static final String PARAM_PURGE_CACHE = "purgeCache";
 
     /**
      * Logger.
@@ -117,8 +117,9 @@ public class GoogleTTSService implements TTSService {
     private final GoogleTTSConfig config = new GoogleTTSConfig();
 
     @Activate
-    public GoogleTTSService(final @Reference ConfigurationAdmin configAdmin,
-            final @Reference OAuthFactory oAuthFactory) {
+    public GoogleTTSService(final @Reference ConfigurationAdmin configAdmin, final @Reference OAuthFactory oAuthFactory,
+            @Reference TTSCache ttsCache, Map<String, Object> config) {
+        super(ttsCache);
         this.configAdmin = configAdmin;
         this.oAuthFactory = oAuthFactory;
     }
@@ -128,15 +129,7 @@ public GoogleTTSService(final @Reference ConfigurationAdmin configAdmin,
      */
     @Activate
     protected void activate(Map<String, Object> config) {
-        // create cache folder
-        File userData = new File(OpenHAB.getUserDataFolder());
-        File cacheFolder = new File(new File(userData, CACHE_FOLDER_NAME), SERVICE_PID);
-        if (!cacheFolder.exists()) {
-            cacheFolder.mkdirs();
-        }
-        logger.debug("Using cache folder {}", cacheFolder.getAbsolutePath());
-
-        apiImpl = new GoogleCloudAPI(configAdmin, oAuthFactory, cacheFolder);
+        apiImpl = new GoogleCloudAPI(configAdmin, oAuthFactory);
         updateConfig(config);
     }
 
@@ -236,13 +229,6 @@ private void updateConfig(Map<String, Object> newConfig) {
                 config.volumeGainDb = Double.parseDouble(param);
             }
 
-            // purgeCache
-            param = newConfig.containsKey(PARAM_PURGE_CACHE) ? newConfig.get(PARAM_PURGE_CACHE).toString() : null;
-            if (param != null) {
-                config.purgeCache = Boolean.parseBoolean(param);
-            }
-            logger.trace("New configuration: {}", config.toString());
-
             if (config.clientId != null && !config.clientId.isEmpty() && config.clientSecret != null
                     && !config.clientSecret.isEmpty()) {
                 apiImpl.setConfig(config);
@@ -313,7 +299,7 @@ public Set<AudioFormat> getSupportedFormats() {
      * @throws TTSException in case the service is unavailable or a parameter is invalid.
      */
     @Override
-    public AudioStream synthesize(String text, Voice voice, AudioFormat requestedFormat) throws TTSException {
+    public AudioStream synthesizeForCache(String text, Voice voice, AudioFormat requestedFormat) throws TTSException {
         logger.debug("Synthesize '{}' for voice '{}' in format {}", text, voice.getUID(), requestedFormat);
         // Validate known api key
         if (!apiImpl.isInitialized()) {
@@ -361,4 +347,19 @@ private AudioFormat parseAudioFormat(byte[] audio) throws TTSException {
             throw new TTSException("Cannot parse WAV format", e);
         }
     }
+
+    @Override
+    public @NonNull String getCacheKey(@NonNull String text, @NonNull Voice voice,
+            @NonNull AudioFormat requestedFormat) {
+        try {
+            MessageDigest md = MessageDigest.getInstance("MD5");
+            byte[] bytesOfMessage = (config.toConfigString() + text + requestedFormat).getBytes(StandardCharsets.UTF_8);
+            String hash = String.format("%032x", new BigInteger(1, md.digest(bytesOfMessage)));
+            return ((GoogleTTSVoice) voice).getTechnicalName() + "_" + hash;
+        } catch (NoSuchAlgorithmException e) {
+            // should not happen
+            logger.warn("Could not create MD5 hash for '{}'", text, e);
+            return "nomd5algorithm";
+        }
+    }
 }
diff --git a/bundles/org.openhab.voice.googletts/src/main/resources/OH-INF/config/config.xml b/bundles/org.openhab.voice.googletts/src/main/resources/OH-INF/config/config.xml
@@ -43,13 +43,5 @@
 			<description>Speaking rate can be 4x faster or slower than the normal rate.</description>
 			<default>1</default>
 		</parameter>
-		<parameter name="purgeCache" type="boolean">
-			<advanced>true</advanced>
-			<label>Purge Cache</label>
-			<description>Purges the cache e.g. after testing different voice configuration parameters. When enabled the cache is
-				purged once. Make sure to disable this setting again so the cache is maintained after restarts.</description>
-			<default>false</default>
-		</parameter>
 	</config-description>
-
 </config-description:config-descriptions>