diff --git a/ovos_plugin_manager/templates/tts.py b/ovos_plugin_manager/templates/tts.py
index 841d1270..2bcc288a 100644
--- a/ovos_plugin_manager/templates/tts.py
+++ b/ovos_plugin_manager/templates/tts.py
@@ -21,45 +21,35 @@
         # would hang here
         engine.playback.stop()
 """
-import hashlib
-import os.path
+import inspect
 import random
 import re
-from os.path import isfile, join
+import subprocess
+from os.path import isfile, join, splitext
+from pathlib import Path
 from queue import Queue, Empty
 from threading import Thread
 from time import time, sleep
-import subprocess
-import os
-from inspect import signature
 
+import requests
+from phoneme_guesser.exceptions import FailedToGuessPhonemes
+
+from ovos_plugin_manager.utils.tts_cache import TextToSpeechCache, hash_sentence
 from ovos_utils import resolve_resource_file
+from ovos_utils.configuration import read_mycroft_config
 from ovos_utils.enclosure.api import EnclosureAPI
+from ovos_utils.file_utils import get_cache_directory
 from ovos_utils.lang.phonemes import get_phonemes
-from phoneme_guesser.exceptions import FailedToGuessPhonemes
 from ovos_utils.lang.visimes import VISIMES
 from ovos_utils.log import LOG
 from ovos_utils.messagebus import Message, FakeBus as BUS
-from ovos_utils.signal import check_for_signal, create_signal
-from ovos_utils.sound import play_mp3, play_wav
 from ovos_utils.metrics import Stopwatch
-from ovos_utils.configuration import read_mycroft_config
+from ovos_utils.signal import check_for_signal, create_signal
+from ovos_utils.sound import play_audio
 
 EMPTY_PLAYBACK_QUEUE_TUPLE = (None, None, None, None, None)
 
 
-def get_cache_directory(folder):
-    if os.name == 'nt':
-        import tempfile
-        return tempfile.mkdtemp(folder)
-    else:
-        from memory_tempfile import MemoryTempfile
-        tempfile = MemoryTempfile(fallback=True)
-        path = os.path.join(tempfile.gettempdir(), folder)
-        if not os.path.exists(path):
-            os.makedirs(path)
-        return path
-
 class PlaybackThread(Thread):
     """Thread class for playing back tts audio and sending
     viseme data to enclosure.
@@ -74,6 +64,7 @@ def __init__(self, queue):
         self.enclosure = None
         self.p = None
         self.tts = None
+        self._now_playing = None
 
     def init(self, tts):
         self.tts = tts
@@ -93,6 +84,44 @@ def clear_queue(self):
         except Exception:
             pass
 
+    def on_start(self):
+        self.blink(0.5)
+        if not self._processing_queue:
+            self._processing_queue = True
+            self.tts.begin_audio()
+
+    def on_end(self, listen=False):
+        if self._processing_queue:
+            self.tts.end_audio(listen)
+            self._processing_queue = False
+        self.blink(0.2)
+
+    def _play(self):
+        listen = False
+        try:
+            if len(self._now_playing) == 5:
+                # new mycroft style
+                snd_type, data, visemes, ident, listen = self._now_playing
+            else:
+                # old mycroft style
+                snd_type, data, visemes, ident = self._now_playing
+            self.on_start()
+            self.p = play_audio(data)
+            if visemes:
+                self.show_visemes(visemes)
+            if self.p:
+                self.p.communicate()
+                self.p.wait()
+            if self.queue.empty():
+                self.on_end(listen)
+        except Empty:
+            pass
+        except Exception as e:
+            LOG.exception(e)
+            if self._processing_queue:
+                self.on_end(listen)
+        self._now_playing = None
+
     def run(self, cb=None):
         """Thread main loop. Get audio and extra data from queue and play.
 
@@ -111,45 +140,13 @@ def run(self, cb=None):
         """
         self._paused = False
         while not self._terminated:
-            while self._paused:  # barge-in support etc
+            while self._paused:
                 sleep(0.2)
-            listen = False
             try:
-                snd_data = self.queue.get(timeout=2)
-                if len(snd_data) == 5:
-                    # new mycroft style
-                    snd_type, data, visemes, ident, listen = snd_data
-                else:
-                    # old mycroft style
-                    snd_type, data, visemes, ident = snd_data
-
-                self.blink(0.5)
-                if not self._processing_queue:
-                    self._processing_queue = True
-                    self.tts.begin_audio()
-
-                if snd_type == 'wav':
-                    self.p = play_wav(data)
-                elif snd_type == 'mp3':
-                    self.p = play_mp3(data)
-
-                if visemes:
-                    self.show_visemes(visemes)
-                if self.p:
-                    self.p.communicate()
-                    self.p.wait()
-
-                if self.queue.empty():
-                    self.tts.end_audio(listen)
-                    self._processing_queue = False
-                self.blink(0.2)
-            except Empty:
-                pass
+                self._now_playing = self.queue.get(timeout=2)
+                self._play()
             except Exception as e:
-                LOG.exception(e)
-                if self._processing_queue:
-                    self.tts.end_audio(listen)
-                    self._processing_queue = False
+                pass
 
     def show_visemes(self, pairs):
         """Send viseme data to enclosure
@@ -166,12 +163,13 @@ def show_visemes(self, pairs):
     def pause(self):
         """pause thread"""
         self._paused = True
-        # TODO is this desired?
-        # if self.playback_process:
-        #    self.playback_process.terminate()
+        if self.p:
+            self.p.terminate()
 
     def resume(self):
         """resume thread"""
+        if self._now_playing:
+            self._play()
         self._paused = False
 
     def clear(self):
@@ -185,6 +183,7 @@ def blink(self, rate=1.0):
 
     def stop(self):
         """Stop thread"""
+        self._now_playing = None
         self._terminated = True
         self.clear_queue()
 
@@ -206,7 +205,6 @@ class TTS:
     def __init__(self, lang="en-us", config=None, validator=None,
                  audio_ext='wav', phonetic_spelling=True, ssml_tags=None):
         self.log_timestamps = False
-        super(TTS, self).__init__()
         if not config:
             try:
                 config_core = read_mycroft_config() or {}
@@ -217,36 +215,41 @@ def __init__(self, lang="en-us", config=None, validator=None,
 
         self.stopwatch = Stopwatch()
         self.tts_name = self.__class__.__name__
-        self.bus = BUS()
+        self.bus = BUS()  # initialized in "init" step
         self.lang = lang or config.get("lang") or 'en-us'
         self.config = config or {}
         self.validator = validator or TTSValidator(self)
         self.phonetic_spelling = phonetic_spelling
         self.audio_ext = audio_ext
         self.ssml_tags = ssml_tags or []
+        self.log_timestamps = self.config.get("log_timestamps", False)
 
-        self.voice = self.config.get("voice")
-        self.cache_dir = get_cache_directory(self.tts_name)
-        self.filename = join(self.cache_dir, 'tts.' + self.audio_ext)
+        self.voice = self.config.get("voice") or "default"
+        # TODO can self.filename be deprecated ? is it used anywhere at all?
+        cache_dir = get_cache_directory(self.tts_name)
+        self.filename = join(cache_dir, 'tts.' + self.audio_ext)
         self.enclosure = None
         random.seed()
         self.queue = Queue()
         self.playback = PlaybackThread(self.queue)
-        # NOTE playback start call has been omitted and moved to init method
-        # init is called by mycroft, but non mycroft usage wont call it,
-        # meaning outside mycroft the enclosure is not set, bus is dummy and
-        # playback thread is not used, playback queue is not wanted
-        # if some module is calling get_tts (which is the correct usage)
-        self.clear_cache()
+        # NOTE: self.playback.start() was moved to init method
+        #   playback queue is not wanted if we only care about get_tts
+        #   init is called by mycroft, but non mycroft usage wont call it,
+        #   outside mycroft the enclosure is not set, bus is dummy and
+        #   playback thread is not used
         self.spellings = self.load_spellings()
-        self.log_timestamps = self.config.get("log_timestamps", False)
+        tts_id = join(self.tts_name, self.voice, self.lang)
+        self.cache = TextToSpeechCache(
+            self.config, tts_id, self.audio_ext
+        )
+        self.cache.curate()
         self.handle_metric({"metric_type": "tts.init"})
 
     def handle_metric(self, metadata=None):
         """ receive timing metrics for diagnostics
         does nothing by default but plugins might use it, eg, NeonCore"""
         if self.log_timestamps:
-            LOG.debug(f"stopwatch: {self.stopwatch.time} metric: {metadata}")
+            LOG.debug(f"time delta: {self.stopwatch.delta} metric: {metadata}")
 
     def load_spellings(self, config=None):
         """Load phonetic spellings of words as dictionary."""
@@ -275,7 +278,6 @@ def begin_audio(self):
         create_signal("isSpeaking")
         # Create signals informing start of speech
         self.bus.emit(Message("recognizer_loop:audio_output_start"))
-        self.stopwatch.start()
         self.handle_metric({"metric_type": "tts.start"})
 
     def end_audio(self, listen=False):
@@ -296,8 +298,9 @@ def end_audio(self, listen=False):
 
         # This check will clear the "signal"
         check_for_signal("isSpeaking")
-        self.stopwatch.stop()
         self.handle_metric({"metric_type": "tts.end"})
+        self.stopwatch.stop()
+        self.cache.curate()
 
     def init(self, bus=None):
         """ Performs intial setup of TTS object.
@@ -412,58 +415,115 @@ def execute(self, sentence, ident=None, listen=False, **kwargs):
             # Re-raise to allow the Exception to be handled externally as well.
             raise
 
-    def _execute(self, sentence, ident, listen, **kwargs):
+    def _replace_phonetic_spellings(self, sentence):
         if self.phonetic_spelling:
             for word in re.findall(r"[\w']+", sentence):
                 if word.lower() in self.spellings:
-                    sentence = sentence.replace(word,
-                                                self.spellings[word.lower()])
+                    spelled = self.spellings[word.lower()]
+                    sentence = sentence.replace(word, spelled)
+        return sentence
 
+    def _execute(self, sentence, ident, listen, **kwargs):
+        self.stopwatch.start()
+        sentence = self._replace_phonetic_spellings(sentence)
         chunks = self._preprocess_sentence(sentence)
         # Apply the listen flag to the last chunk, set the rest to False
         chunks = [(chunks[i], listen if i == len(chunks) - 1 else False)
                   for i in range(len(chunks))]
         self.handle_metric({"metric_type": "tts.preprocessed",
                             "n_chunks": len(chunks)})
-        for sentence, l in chunks:
-            key = str(hashlib.md5(
-                sentence.encode('utf-8', 'ignore')).hexdigest())
-            wav_file = os.path.join(self.cache_dir, key + '.' + self.audio_ext)
 
-            if os.path.exists(wav_file):
-                LOG.debug("TTS cache hit")
-                phonemes = self.load_phonemes(key)
-            else:
-                self.handle_metric({"metric_type": "tts.synth.start"})
-                lang = kwargs.get("lang")
-                if not lang and kwargs.get("message"):
-                    # some HolmesV derivatives accept a message object
-                    try:
-                        lang = kwargs["message"].data.get("lang") or \
-                               kwargs["message"].context.get("lang")
-                    except:  # not a mycroft message object
-                        pass
-                lang = lang or self.lang
-                # check the signature to either pass lang or not
-                if len(signature(self.get_tts).parameters) == 3:
-                    wav_file, phonemes = self.get_tts(sentence, wav_file,
-                                                      lang=lang)
-                else:
-                    wav_file, phonemes = self.get_tts(sentence, wav_file)
-                self.handle_metric({"metric_type": "tts.synth.finished"})
-                if phonemes:
-                    self.save_phonemes(key, phonemes)
-                else:
-                    try:
-                        # TODO, debug why phonemes fail ?
-                        phonemes = get_phonemes(sentence)
-                        self.handle_metric({"metric_type": "tts.phonemes.guess"})
-                    except (ImportError, FailedToGuessPhonemes):
-                        pass
-            vis = self.viseme(phonemes) if phonemes else None
-            self.queue.put((self.audio_ext, wav_file, vis, ident, l))
+        # synth -> queue for playback
+        for sentence, l in chunks:
+            sentence_hash = hash_sentence(sentence)
+            if sentence_hash in self.cache:  # load from cache
+                audio_file, phonemes = self._get_from_cache(sentence, sentence_hash)
+            else:  # synth + cache
+                audio_file, phonemes = self._synth(sentence, sentence_hash, **kwargs)
+
+            viseme = self.viseme(phonemes) if phonemes else None
+            audio_ext = self._determine_ext(audio_file)
+            self.queue.put(
+                (audio_ext, str(audio_file), viseme, ident, l)
+            )
             self.handle_metric({"metric_type": "tts.queued"})
 
+    def _determine_ext(self, audio_file):
+        # determine audio_ext on the fly
+        # do not use the ext defined in the plugin since it might not match
+        # some plugins support multiple extensions
+        # or have caches in different extensions
+        try:
+            _, audio_ext = splitext(str(audio_file))
+            return audio_ext[1:]
+        except:
+            return self.audio_ext
+
+    def _synth(self, sentence, sentence_hash=None, **kwargs):
+        self.handle_metric({"metric_type": "tts.synth.start"})
+        sentence_hash = sentence_hash or hash_sentence(sentence)
+        audio = self.cache.define_audio_file(sentence_hash)
+
+        # parse requested language for this TTS request
+        # NOTE: this is ovos only functionality, not in mycroft-core!
+        lang = kwargs.get("lang")
+        if not lang and kwargs.get("message"):
+            # get lang from message object if possible
+            try:
+                lang = kwargs["message"].data.get("lang") or \
+                       kwargs["message"].context.get("lang")
+            except:  # not a mycroft message object
+                pass
+        kwargs["lang"] = lang or self.lang
+
+        # filter kwargs per plugin, different plugins expose different options
+        #   mycroft-core -> no kwargs
+        #   ovos -> lang
+        #   neon-core -> message
+        kwargs = {k: v for k, v in kwargs.items()
+                  if k in inspect.signature(self.get_tts).parameters
+                  and k not in ["sentence", "wav_file"]}
+
+        # finally do the TTS synth
+        audio.path, phonemes = self.get_tts(sentence, str(audio), **kwargs)
+        self.handle_metric({"metric_type": "tts.synth.finished"})
+        # cache sentence + phonemes
+        self._cache_sentence(sentence, audio, phonemes, sentence_hash)
+        return audio, phonemes
+
+    def _cache_phonemes(self, sentence, phonemes=None, sentence_hash=None):
+        sentence_hash = sentence_hash or hash_sentence(sentence)
+        if not phonemes:
+            try:  # TODO debug why get_phonemes fails in the first place
+                phonemes = get_phonemes(sentence)
+                self.handle_metric({"metric_type": "tts.phonemes.guess"})
+            except (ImportError, FailedToGuessPhonemes):
+                pass
+        if phonemes:
+            return self.save_phonemes(sentence_hash, phonemes)
+        return None
+
+    def _cache_sentence(self, sentence, audio_file, phonemes=None, sentence_hash=None):
+        sentence_hash = sentence_hash or hash_sentence(sentence)
+        # RANT: why do you hate strings ChrisV?
+        if isinstance(audio_file.path, str):
+            audio_file.path = Path(audio_file.path)
+        pho_file = self._cache_phonemes(sentence, phonemes, sentence_hash)
+        self.cache.cached_sentences[sentence_hash] = (audio_file, pho_file)
+        self.handle_metric({"metric_type": "tts.synth.cached"})
+
+    def _get_from_cache(self, sentence, sentence_hash=None):
+        sentence_hash = sentence_hash or hash_sentence(sentence)
+        phonemes = None
+        audio_file, pho_file = self.cache.cached_sentences[sentence_hash]
+        LOG.info(f"Found {audio_file.name} in TTS cache")
+        if not pho_file:
+            # guess phonemes from sentence + cache them
+            pho_file = self._cache_phonemes(sentence, sentence_hash)
+        if pho_file:
+            phonemes = pho_file.load()
+        return audio_file, phonemes
+
     def viseme(self, phonemes):
         """Create visemes from phonemes.
 
@@ -492,7 +552,7 @@ def viseme(self, phonemes):
 
     def clear_cache(self):
         """ Remove all cached files. """
-        pass
+        self.cache.clear()
 
     def save_phonemes(self, key, phonemes):
         """Cache phonemes
@@ -501,13 +561,9 @@ def save_phonemes(self, key, phonemes):
             key (str):        Hash key for the sentence
             phonemes (str):   phoneme string to save
         """
-        pho_file = os.path.join(self.cache_dir, key + ".pho")
-        try:
-            with open(pho_file, "w") as cachefile:
-                cachefile.write(phonemes)
-        except Exception:
-            LOG.exception("Failed to write {} to cache".format(pho_file))
-            pass
+        phoneme_file = self.cache.define_phoneme_file(key)
+        phoneme_file.save(phonemes)
+        return phoneme_file
 
     def load_phonemes(self, key):
         """Load phonemes from cache file.
@@ -515,15 +571,8 @@ def load_phonemes(self, key):
         Arguments:
             key (str): Key identifying phoneme cache
         """
-        pho_file = os.path.join(self.cache_dir, key + ".pho")
-        if os.path.exists(pho_file):
-            try:
-                with open(pho_file, "r") as cachefile:
-                    phonemes = cachefile.read().strip()
-                return phonemes
-            except Exception:
-                LOG.debug("Failed to read .PHO from cache")
-        return None
+        phoneme_file = self.cache.define_phoneme_file(key)
+        return phoneme_file.load()
 
     def stop(self):
         try:
@@ -628,3 +677,38 @@ def get_tts(self, sentence, wav_file, lang=None):
         files, phonemes = self.sentence_to_files(sentence)
         wav_file = self.concat(files, wav_file)
         return wav_file, phonemes
+
+
+class RemoteTTSException(Exception):
+    pass
+
+
+class RemoteTTSTimeoutException(RemoteTTSException):
+    pass
+
+
+class RemoteTTS(TTS):
+    """
+    Abstract class for a Remote TTS engine implementation.
+    This class is only provided for backwards compatibility
+    Usage is discouraged
+    """
+
+    def __init__(self, lang, config, url, api_path, validator):
+        super(RemoteTTS, self).__init__(lang, config, validator)
+        self.api_path = api_path
+        self.auth = None
+        self.url = config.get('url', url).rstrip('/')
+
+    def build_request_params(self, sentence):
+        pass
+
+    def get_tts(self, sentence, wav_file, lang=None):
+        r = requests.get(
+            self.url + self.api_path, params=self.build_request_params(sentence),
+            timeout=10, verify=False, auth=self.auth)
+        if r.status_code != 200:
+            return None
+        with open(wav_file, 'wb') as f:
+            f.write(r.content)
+        return wav_file, None
diff --git a/ovos_plugin_manager/utils.py b/ovos_plugin_manager/utils/__init__.py
similarity index 100%
rename from ovos_plugin_manager/utils.py
rename to ovos_plugin_manager/utils/__init__.py
diff --git a/ovos_plugin_manager/utils/tts_cache.py b/ovos_plugin_manager/utils/tts_cache.py
new file mode 100644
index 00000000..6d431cc2
--- /dev/null
+++ b/ovos_plugin_manager/utils/tts_cache.py
@@ -0,0 +1,329 @@
+import hashlib
+import json
+import os
+from os.path import join
+import shutil
+from pathlib import Path
+from stat import S_ISREG, ST_MTIME, ST_MODE, ST_SIZE
+
+import xdg.BaseDirectory
+
+from ovos_utils.file_utils import get_cache_directory
+from ovos_utils.log import LOG
+
+
+def hash_sentence(sentence: str):
+    """Convert the sentence into a hash value used for the file name
+
+    Args:
+        sentence: The sentence to be cached
+    """
+    encoded_sentence = sentence.encode("utf-8", "ignore")
+    sentence_hash = hashlib.md5(encoded_sentence).hexdigest()
+    return sentence_hash
+
+
+def hash_from_path(path: Path) -> str:
+    """Returns hash from a given path.
+
+    Simply removes extension and folder structure leaving the hash.
+
+    NOTE: this does not do any hashing at all and naming is misleading
+          however we keep the method around for backwards compat imports
+          this is exclusively for usage with cached TTS files
+
+    Args:
+        path: path to get hash from
+
+    Returns:
+        Hash reference for file.
+    """
+    # NOTE: this does not do any hashing at all and naming is misleading
+    # however we keep the method around for backwards compat imports
+    # this is assumed to be used only to load cached TTS which is already named with an hash
+    return path.with_suffix('').name
+
+
+def mb_to_bytes(size):
+    """Takes a size in MB and returns the number of bytes.
+
+    Args:
+        size(int/float): size in Mega Bytes
+
+    Returns:
+        (int/float) size in bytes
+    """
+    return size * 1024 * 1024
+
+
+def _get_cache_entries(directory):
+    """Get information tuple for all regular files in directory.
+
+    Args:
+        directory (str): path to directory to check
+
+    Returns:
+        (tuple) (modification time, size, filepath)
+    """
+    entries = (os.path.join(directory, fn) for fn in os.listdir(directory))
+    entries = ((os.stat(path), path) for path in entries)
+
+    # leave only regular files, insert modification date
+    return ((stat[ST_MTIME], stat[ST_SIZE], path)
+            for stat, path in entries if S_ISREG(stat[ST_MODE]))
+
+
+def _delete_oldest(entries, bytes_needed):
+    """Delete files with oldest modification date until space is freed.
+
+    Args:
+        entries (tuple): file + file stats tuple
+        bytes_needed (int): disk space that needs to be freed
+
+    Returns:
+        (list) all removed paths
+    """
+    deleted_files = []
+    space_freed = 0
+    for moddate, fsize, path in sorted(entries):
+        try:
+            os.remove(path)
+            space_freed += fsize
+            deleted_files.append(path)
+        except Exception:
+            pass
+
+        if space_freed > bytes_needed:
+            break  # deleted enough!
+
+    return deleted_files
+
+
+def curate_cache(directory, min_free_percent=5.0, min_free_disk=50):
+    """Clear out the directory if needed.
+
+    The curation will only occur if both the precentage and actual disk space
+    is below the limit. This assumes all the files in the directory can be
+    deleted as freely.
+
+    Args:
+        directory (str): directory path that holds cached files
+        min_free_percent (float): percentage (0.0-100.0) of drive to keep free,
+                                  default is 5% if not specified.
+        min_free_disk (float): minimum allowed disk space in MB, default
+                               value is 50 MB if not specified.
+    """
+    # Simpleminded implementation -- keep a certain percentage of the
+    # disk available.
+    # TODO: Would be easy to add more options, like whitelisted files, etc.
+    deleted_files = []
+
+    # Get the disk usage statistics bout the given path
+    space = shutil.disk_usage(directory)
+
+    percent_free = space.free * 100 / space.total
+
+    min_free_disk = mb_to_bytes(min_free_disk)
+
+    if percent_free < min_free_percent and space.free < min_free_disk:
+        LOG.info('Low diskspace detected, cleaning cache')
+        # calculate how many bytes we need to delete
+        bytes_needed = (min_free_percent - percent_free) / 100.0 * space.total
+        bytes_needed = int(bytes_needed + 1.0)
+
+        # get all entries in the directory w/ stats
+        entries = _get_cache_entries(directory)
+        # delete as many as needed starting with the oldest
+        deleted_files = _delete_oldest(entries, bytes_needed)
+
+    return deleted_files
+
+
+class AudioFile:
+    def __init__(self, cache_dir: Path, sentence_hash: str, file_type: str):
+        self.name = f"{sentence_hash}.{file_type}"
+        if isinstance(cache_dir, str):
+            cache_dir = Path(cache_dir)
+        self.path = cache_dir.joinpath(self.name)
+        self.audio_data = None
+
+    def save(self, audio: bytes):
+        """Write a TTS cache file containing the audio to be spoken.
+        Args:
+            audio: TTS inference of a sentence
+        """
+        try:
+            self.audio_data = audio
+            with open(self.path, "wb") as audio_file:
+                audio_file.write(audio)
+        except Exception:
+            LOG.exception(f"Failed to write {self.name} to cache")
+
+    def load(self):
+        """Load audio data from cached file."""
+        if self.path.exists():
+            try:
+                with open(self.path, "rb") as audio:
+                    self.audio_data = audio.read()
+            except Exception:
+                LOG.exception(f"Failed to read {self.name} audio data from cache")
+        return self.audio_data
+
+    def exists(self):
+        return self.path.exists()
+
+    def __str__(self):
+        return str(self.path)
+
+
+class PhonemeFile:
+    def __init__(self, cache_dir: Path, sentence_hash: str):
+        self.name = f"{sentence_hash}.pho"
+        if isinstance(cache_dir, str):
+            cache_dir = Path(cache_dir)
+        self.path = cache_dir.joinpath(self.name)
+        self.phonemes = None
+
+    def load(self):
+        """Load phonemes from cache file."""
+        if self.path.exists():
+            try:
+                with open(self.path) as phoneme_file:
+                    phonemes = phoneme_file.read().strip()
+                self.phonemes = json.loads(phonemes)
+            except Exception:
+                LOG.exception(f"Failed to read {self.name} phonemes from cache")
+        return self.phonemes
+
+    def save(self, phonemes):
+        """Write a TTS cache file containing the phoneme to be displayed.
+        Args:
+            phonemes: instructions for how to make the mouth on a device move
+        """
+        self.phonemes = phonemes
+        try:
+            rec = json.dumps(phonemes)
+            with open(self.path, "w") as phoneme_file:
+                phoneme_file.write(rec)
+        except Exception:
+            LOG.error(f"Failed to write {self.name} to cache")
+
+    def exists(self):
+        return self.path.exists()
+
+    def __str__(self):
+        return str(self.path)
+
+
+class TextToSpeechCache:
+    """Class for all persistent and temporary caching operations."""
+
+    def __init__(self, tts_config, tts_name, audio_file_type):
+        self.config = tts_config
+        self.tts_name = tts_name
+        self.audio_file_type = audio_file_type
+
+        persistent_cache = self.config.get("preloaded_cache") or \
+                           join(xdg.BaseDirectory.xdg_data_home, tts_name)
+        tmp_cache = get_cache_directory(f"tts/{tts_name}")
+        os.makedirs(tmp_cache, exist_ok=True)
+        os.makedirs(persistent_cache, exist_ok=True)
+
+        self.persistent_cache_dir = Path(persistent_cache)
+        self.temporary_cache_dir = Path(tmp_cache)
+        self.cached_sentences = {}
+        # curate cache if disk usage is above min %
+        self.min_free_percent = self.config.get("min_free_percent", 75)
+        # save to permanent cache settings
+        self.persist = self.config.get("persist_cache", False)
+        # only persist if utterance is spoken >= N times
+        self.persist_thresh = self.config.get("persist_thresh", 1)
+        self._sentence_count = {}
+
+    def __contains__(self, sha):
+        """The cache contains a SHA if it knows of it and it exists on disk."""
+        if sha not in self.cached_sentences:
+            return False  # Doesn't know of it
+        else:
+            # Audio file must exist, phonemes are optional.
+            audio, phonemes = self.cached_sentences[sha]
+            return (audio.exists() and
+                    (phonemes is None or phonemes.exists()))
+
+    def load_persistent_cache(self):
+        """There may be files pre-loaded in the persistent cache directory
+        prior to run time, such as pre-recorded audio files.
+        """
+        if self.persistent_cache_dir is not None:
+            self._load_existing_audio_files()
+            self._load_existing_phoneme_files()
+            LOG.info("Persistent TTS cache files loaded successfully.")
+
+    def _load_existing_audio_files(self):
+        """Find the TTS audio files already in the persistent cache."""
+        glob_pattern = "*." + self.audio_file_type
+        for file_path in self.persistent_cache_dir.glob(glob_pattern):
+            sentence_hash = file_path.name.split(".")[0]
+            audio_file = self.define_audio_file(sentence_hash, persistent=True)
+            self.cached_sentences[sentence_hash] = audio_file, None
+
+    def _load_existing_phoneme_files(self):
+        """Find the TTS phoneme files already in the persistent cache.
+        A phoneme file is no good without an audio file to pair it with.  If
+        no audio file matches, do not load the phoneme.
+        """
+        for file_path in self.persistent_cache_dir.glob("*.pho"):
+            sentence_hash = file_path.name.split(".")[0]
+            cached_sentence = self.cached_sentences.get(sentence_hash)
+            if cached_sentence is not None:
+                audio_file = cached_sentence[0]
+                phoneme_file = self.define_phoneme_file(sentence_hash, persistent=True)
+                self.cached_sentences[sentence_hash] = audio_file, phoneme_file
+
+    def clear(self):
+        """Remove all files from the temporary cache."""
+        for cache_file_path in self.temporary_cache_dir.iterdir():
+            if cache_file_path.is_dir():
+                for sub_path in cache_file_path.iterdir():
+                    if sub_path.is_file():
+                        sub_path.unlink()
+            elif cache_file_path.is_file():
+                cache_file_path.unlink()
+
+    def curate(self):
+        """Remove cache data if disk space is running low."""
+        files_removed = curate_cache(str(self.temporary_cache_dir),
+                                     min_free_percent=self.min_free_percent)
+        hashes = set([hash_from_path(Path(path)) for path in files_removed])
+        for sentence_hash in hashes:
+            if sentence_hash in self.cached_sentences:
+                self.cached_sentences.pop(sentence_hash)
+
+    def define_audio_file(self, sentence_hash: str, persistent=False) -> AudioFile:
+        """Build an instance of an object representing an audio file."""
+        if persistent or self._should_persist(sentence_hash):
+            audio_file = AudioFile(
+                self.persistent_cache_dir, sentence_hash, self.audio_file_type
+            )
+        else:
+            audio_file = AudioFile(
+                self.temporary_cache_dir, sentence_hash, self.audio_file_type
+            )
+        return audio_file
+
+    def define_phoneme_file(self, sentence_hash: str, persistent=False) -> PhonemeFile:
+        """Build an instance of an object representing an phoneme file."""
+        if persistent or self._should_persist(sentence_hash):
+            phoneme_file = PhonemeFile(self.persistent_cache_dir, sentence_hash)
+        else:
+            phoneme_file = PhonemeFile(self.temporary_cache_dir, sentence_hash)
+        return phoneme_file
+
+    def _should_persist(self, sentence_hash: str):
+        if self.persist:
+            if sentence_hash not in self._sentence_count:
+                self._sentence_count[sentence_hash] = 0
+            self._sentence_count[sentence_hash] += 1
+            if self._sentence_count[sentence_hash] >= self.persist_thresh:
+                return True
+        return False
diff --git a/requirements.txt b/requirements.txt
index 0e0037ae..b1149bac 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,3 @@
-ovos_utils>=0.0.12a9
+ovos_utils~=0.0.14a3
 requests
-phoneme_guesser
-memory-tempfile
\ No newline at end of file
+phoneme_guesser
\ No newline at end of file