Skip to content

Commit 5d5f4ee

Browse files
authored
refactor/deprecate mycroft.tts (#16)
bumps ovos plugin manager to 0.0.3a1 and removes duplicted code latest mycroft-core tts cache implementation ported in OpenVoiceOS/ovos-plugin-manager#15 psutil is no longer a mandatory requirement authored-by: jarbasai <jarbasai@mailfence.com>
1 parent fb69fa7 commit 5d5f4ee

File tree

12 files changed

+32
-810
lines changed

12 files changed

+32
-810
lines changed

mycroft/tts/__init__.py

Lines changed: 7 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,8 @@
1-
# Copyright 2017 Mycroft AI Inc.
2-
#
3-
# Licensed under the Apache License, Version 2.0 (the "License");
4-
# you may not use this file except in compliance with the License.
5-
# You may obtain a copy of the License at
6-
#
7-
# http://www.apache.org/licenses/LICENSE-2.0
8-
#
9-
# Unless required by applicable law or agreed to in writing, software
10-
# distributed under the License is distributed on an "AS IS" BASIS,
11-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12-
# See the License for the specific language governing permissions and
13-
# limitations under the License.
14-
#
15-
"""The TTS module contains TTS classes for interfacing with various TTS
16-
services. This includes both local and remote services. The module also
17-
declares a "factory" for spawning a TTS service based on configuration.
181
"""
19-
20-
from mycroft.tts.tts import TTSFactory, TTS, TTSValidator, PlaybackThread
2+
NOTE: this is dead code! do not use!
3+
This file is only present to ensure backwards compatibility
4+
in case someone is importing from here
5+
This is only meant for 3rd party code expecting ovos-core
6+
to be a drop in replacement for mycroft-core
7+
"""
8+
from mycroft.tts.tts import TTSFactory, TTS, TTSValidator, PlaybackThread

mycroft/tts/cache.py

Lines changed: 8 additions & 338 deletions
Original file line numberDiff line numberDiff line change
@@ -1,339 +1,9 @@
1-
# Copyright 2021 Mycroft AI Inc.
2-
#
3-
# Licensed under the Apache License, Version 2.0 (the "License");
4-
# you may not use this file except in compliance with the License.
5-
# You may obtain a copy of the License at
6-
#
7-
# http://www.apache.org/licenses/LICENSE-2.0
8-
#
9-
# Unless required by applicable law or agreed to in writing, software
10-
# distributed under the License is distributed on an "AS IS" BASIS,
11-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12-
# See the License for the specific language governing permissions and
13-
# limitations under the License.
14-
"""TTS cache maintenance.
15-
16-
There are two types of cache available to a TTS engine. Both are comprised of
17-
audio and phoneme files. TTS engines can use the cache to improve performance
18-
by not performing inference on sentences in the cache.
19-
20-
The first type of cache is a persistent cache. The cache is considered
21-
persistent because the files are stored in a location that is not cleared on
22-
reboot. TTS inference on these sentences should only need to occur once. The
23-
persistent cache contains commonly spoken sentences.
24-
25-
The second cache type is a temporary cache stored in the /tmp directory,
26-
which is cleared when a device is rebooted. Sentences are added to this cache
27-
on the fly every time a TTS engine returns audio for a sentence that is not
28-
already cached.
291
"""
30-
import base64
31-
import hashlib
32-
import json
33-
import re
34-
from pathlib import Path
35-
from typing import List, Set, Tuple
36-
from urllib import parse
37-
38-
import requests
39-
40-
from mycroft.util.file_utils import (
41-
ensure_directory_exists, get_cache_directory, curate_cache
42-
)
43-
from mycroft.util.log import LOG
44-
45-
46-
def _get_mimic2_audio(sentence: str, url: str) -> Tuple[bytes, str]:
47-
"""Use the Mimic2 API to retrieve the audio for a sentence.
48-
49-
Args:
50-
sentence: The sentence to be cached
51-
"""
52-
LOG.debug("Retrieving Mimic2 audio for sentence \"{}\'".format(sentence))
53-
mimic2_url = url + parse.quote(sentence) + '&visimes=True'
54-
response = requests.get(mimic2_url)
55-
response_data = response.json()
56-
audio = base64.b64decode(response_data["audio_base64"])
57-
phonemes = response_data["visimes"]
58-
59-
return audio, phonemes
60-
61-
62-
def hash_sentence(sentence: str):
63-
"""Convert the sentence into a hash value used for the file name
64-
65-
Args:
66-
sentence: The sentence to be cached
67-
"""
68-
encoded_sentence = sentence.encode("utf-8", "ignore")
69-
sentence_hash = hashlib.md5(encoded_sentence).hexdigest()
70-
71-
return sentence_hash
72-
73-
74-
def hash_from_path(path: Path) -> str:
75-
"""Returns hash from a given path.
76-
77-
Simply removes extension and folder structure leaving the hash.
78-
79-
Args:
80-
path: path to get hash from
81-
82-
Returns:
83-
Hash reference for file.
84-
"""
85-
return path.with_suffix('').name
86-
87-
88-
class AudioFile:
89-
def __init__(self, cache_dir: Path, sentence_hash: str, file_type: str):
90-
self.name = f"{sentence_hash}.{file_type}"
91-
self.path = cache_dir.joinpath(self.name)
92-
93-
def save(self, audio: bytes):
94-
"""Write a TTS cache file containing the audio to be spoken.
95-
96-
Args:
97-
audio: TTS inference of a sentence
98-
"""
99-
try:
100-
with open(self.path, "wb") as audio_file:
101-
audio_file.write(audio)
102-
except Exception:
103-
LOG.exception("Failed to write {} to cache".format(self.name))
104-
105-
def exists(self):
106-
return self.path.exists()
107-
108-
109-
class PhonemeFile:
110-
def __init__(self, cache_dir: Path, sentence_hash: str):
111-
self.name = f"{sentence_hash}.pho"
112-
self.path = cache_dir.joinpath(self.name)
113-
114-
def load(self) -> List:
115-
"""Load phonemes from cache file."""
116-
phonemes = None
117-
if self.path.exists():
118-
try:
119-
with open(self.path) as phoneme_file:
120-
phonemes = phoneme_file.read().strip()
121-
except Exception:
122-
LOG.exception("Failed to read phoneme from cache")
123-
124-
return json.loads(phonemes)
125-
126-
def save(self, phonemes):
127-
"""Write a TTS cache file containing the phoneme to be displayed.
128-
129-
Args:
130-
phonemes: instructions for how to make the mouth on a device move
131-
"""
132-
try:
133-
rec = json.dumps(phonemes)
134-
with open(self.path, "w") as phoneme_file:
135-
phoneme_file.write(rec)
136-
except Exception:
137-
LOG.error(f"Failed to write {self.name} to cache")
138-
139-
def exists(self):
140-
return self.path.exists()
141-
142-
143-
class TextToSpeechCache:
144-
"""Class for all persistent and temporary caching operations."""
145-
def __init__(self, tts_config, tts_name, audio_file_type):
146-
self.config = tts_config
147-
self.tts_name = tts_name
148-
if "preloaded_cache" in self.config:
149-
self.persistent_cache_dir = Path(self.config["preloaded_cache"])
150-
ensure_directory_exists(
151-
str(self.persistent_cache_dir), permissions=0o755
152-
)
153-
else:
154-
self.persistent_cache_dir = None
155-
self.temporary_cache_dir = Path(
156-
get_cache_directory("tts/" + tts_name)
157-
)
158-
ensure_directory_exists(
159-
str(self.temporary_cache_dir), permissions=0o755
160-
)
161-
self.audio_file_type = audio_file_type
162-
self.resource_dir = Path(__file__).parent.parent.joinpath("res")
163-
self.cached_sentences = {}
164-
# curate cache if disk usage is above min %
165-
self.min_free_percent = self.config.get("min_free_percent", 75)
166-
167-
def __contains__(self, sha):
168-
"""The cache contains a SHA if it knows of it and it exists on disk."""
169-
if sha not in self.cached_sentences:
170-
return False # Doesn't know of it
171-
else:
172-
# Audio file must exist, phonemes are optional.
173-
audio, phonemes = self.cached_sentences[sha]
174-
return (audio.exists() and
175-
(phonemes is None or phonemes.exists()))
176-
177-
def load_persistent_cache(self):
178-
"""Load the contents of dialog files to the persistent cache directory.
179-
180-
Parse the dialog files in the resource directory into sentences. Then
181-
add the audio for each sentence to the cache directory.
182-
183-
NOTE: There may be files pre-loaded in the persistent cache directory
184-
prior to run time, such as pre-recorded audio files. This will add
185-
files that do not already exist.
186-
187-
ANOTHER NOTE: Mimic2 is the only TTS engine that supports
188-
downloading missing files. This logic will need to change if another
189-
TTS engine implements it.
190-
"""
191-
if self.persistent_cache_dir is not None:
192-
LOG.info("Adding dialog resources to persistent TTS cache...")
193-
self._load_existing_audio_files()
194-
self._load_existing_phoneme_files()
195-
dialogs = self._collect_dialogs()
196-
sentences = self._parse_dialogs(dialogs)
197-
for sentence in sentences:
198-
self._load_sentence(sentence)
199-
LOG.info("Persistent TTS cache files added successfully.")
200-
201-
def _load_existing_audio_files(self):
202-
"""Find the TTS audio files already in the persistent cache."""
203-
glob_pattern = "*." + self.audio_file_type
204-
for file_path in self.persistent_cache_dir.glob(glob_pattern):
205-
sentence_hash = file_path.name.split(".")[0]
206-
audio_file = AudioFile(
207-
self.persistent_cache_dir, sentence_hash, self.audio_file_type
208-
)
209-
self.cached_sentences[sentence_hash] = audio_file, None
210-
211-
def _load_existing_phoneme_files(self):
212-
"""Find the TTS phoneme files already in the persistent cache.
213-
214-
A phoneme file is no good without an audio file to pair it with. If
215-
no audio file matches, do not load the phoneme.
216-
"""
217-
for file_path in self.persistent_cache_dir.glob("*.pho"):
218-
sentence_hash = file_path.name.split(".")[0]
219-
cached_sentence = self.cached_sentences.get(sentence_hash)
220-
if cached_sentence is not None:
221-
audio_file = cached_sentence[0]
222-
phoneme_file = PhonemeFile(
223-
self.persistent_cache_dir, sentence_hash
224-
)
225-
self.cached_sentences[sentence_hash] = audio_file, phoneme_file
226-
227-
def _collect_dialogs(self) -> List:
228-
"""Build a set of unique sentences from the dialog files.
229-
230-
The sentences will be parsed from *.dialog files present in
231-
mycroft/res/text/en-us.
232-
"""
233-
dialogs = []
234-
dialog_directory = Path(self.resource_dir, "text", "en-us")
235-
for dialog_file_path in dialog_directory.glob("*.dialog"):
236-
with open(dialog_file_path) as dialog_file:
237-
for dialog in dialog_file.readlines():
238-
dialogs.append(dialog.strip())
239-
240-
return dialogs
241-
242-
@staticmethod
243-
def _parse_dialogs(dialogs: List[str]) -> Set[str]:
244-
"""Split each dialog in the resources directory into sentences.
245-
246-
Do not consider sentences with special characters other than
247-
punctuation
248-
example : <<< LOADING <<<
249-
250-
Args:
251-
dialogs: a list of the records in the dialog resource files
252-
"""
253-
sentences = set()
254-
dialog_split_regex = r"(?<=\.|\;|\?)\s"
255-
special_characters_regex = re.compile(r"[@#$%^*()<>/|}{~:]")
256-
for dialog in dialogs:
257-
dialog_sentences = re.split(dialog_split_regex, dialog)
258-
for sentence in dialog_sentences:
259-
match = special_characters_regex.search(sentence)
260-
if match is None:
261-
sentences.add(sentence)
262-
263-
return sentences
264-
265-
def _load_sentence(self, sentence: str):
266-
"""Build audio and phoneme files for each sentence to be cached.
267-
268-
Perform TTS inference on sentences parsed from dialog files. Store
269-
the results in the persistent cache directory.
270-
271-
ASSUMPTION: The only TTS that supports persistent cache right now is
272-
Mimic2. This method assumes a call to the Mimic2 API. If other TTS
273-
engines want to take advantage of the persistent cache, this logic
274-
will need to be more dynamic.
275-
"""
276-
# TODO support multiple engines
277-
if self.tts_name != "Mimic2":
278-
return
279-
280-
sentence_hash = hash_sentence(sentence)
281-
if sentence_hash not in self.cached_sentences:
282-
LOG.info("Adding \"{}\" to cache".format(sentence))
283-
try:
284-
mimic2_url = self.config["url"]
285-
audio, phonemes = _get_mimic2_audio(sentence, mimic2_url)
286-
except Exception:
287-
log_msg = "Failed to get audio for sentence \"{}\""
288-
LOG.exception(log_msg.format(sentence))
289-
else:
290-
self._add_to_persistent_cache(sentence_hash, audio, phonemes)
291-
292-
def _add_to_persistent_cache(
293-
self, sentence_hash: str, audio: bytes, phonemes: str
294-
):
295-
"""Add a audio/phoneme file pair to the persistent cache."""
296-
audio_file = AudioFile(
297-
self.persistent_cache_dir, sentence_hash, self.audio_file_type
298-
)
299-
audio_file.save(audio)
300-
if phonemes is None:
301-
phoneme_file = None
302-
else:
303-
phoneme_file = PhonemeFile(
304-
self.persistent_cache_dir, sentence_hash
305-
)
306-
phoneme_file.save(phonemes)
307-
self.cached_sentences[sentence_hash] = audio_file, phoneme_file
308-
309-
def clear(self):
310-
"""Remove all files from the temporary cache."""
311-
for cache_file_path in self.temporary_cache_dir.iterdir():
312-
if cache_file_path.is_dir():
313-
for sub_path in cache_file_path.iterdir():
314-
if sub_path.is_file():
315-
sub_path.unlink()
316-
elif cache_file_path.is_file():
317-
cache_file_path.unlink()
318-
319-
def curate(self):
320-
"""Remove cache data if disk space is running low."""
321-
files_removed = curate_cache(self.temporary_cache_dir,
322-
min_free_percent=self.min_free_percent)
323-
324-
hashes = set([hash_from_path(Path(path)) for path in files_removed])
325-
for sentence_hash in hashes:
326-
if sentence_hash in self.cached_sentences:
327-
self.cached_sentences.pop(sentence_hash)
328-
329-
def define_audio_file(self, sentence_hash: str) -> AudioFile:
330-
"""Build an instance of an object representing an audio file."""
331-
audio_file = AudioFile(
332-
self.temporary_cache_dir, sentence_hash, self.audio_file_type
333-
)
334-
return audio_file
335-
336-
def define_phoneme_file(self, sentence_hash: str) -> PhonemeFile:
337-
"""Build an instance of an object representing an phoneme file."""
338-
phoneme_file = PhonemeFile(self.temporary_cache_dir, sentence_hash)
339-
return phoneme_file
2+
NOTE: this is dead code! do not use!
3+
This file is only present to ensure backwards compatibility
4+
in case someone is importing from here
5+
This is only meant for 3rd party code expecting ovos-core
6+
to be a drop in replacement for mycroft-core
7+
"""
8+
from ovos_plugin_manager.utils.tts_cache import hash_sentence, hash_from_path,\
9+
AudioFile, PhonemeFile, TextToSpeechCache

0 commit comments

Comments
 (0)