Skip to content

Commit

Permalink
Fix tts-server for multi-lingual models (#2257)
Browse files Browse the repository at this point in the history
  • Loading branch information
marius851000 authored Feb 6, 2023
1 parent 994be16 commit 1f4d8bf
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 7 deletions.
13 changes: 11 additions & 2 deletions TTS/server/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,13 @@ def convert_boolean(x):
use_multi_speaker = hasattr(synthesizer.tts_model, "num_speakers") and (
synthesizer.tts_model.num_speakers > 1 or synthesizer.tts_speakers_file is not None
)

speaker_manager = getattr(synthesizer.tts_model, "speaker_manager", None)

use_multi_language = hasattr(synthesizer.tts_model, "num_languages") and (
synthesizer.tts_model.num_languages > 1 or synthesizer.tts_languages_file is not None
)
language_manager = getattr(synthesizer.tts_model, "language_manager", None)

# TODO: set this from SpeakerManager
use_gst = synthesizer.tts_config.get("use_gst", False)
app = Flask(__name__)
Expand Down Expand Up @@ -147,7 +152,9 @@ def index():
"index.html",
show_details=args.show_details,
use_multi_speaker=use_multi_speaker,
use_multi_language=use_multi_language,
speaker_ids=speaker_manager.name_to_id if speaker_manager is not None else None,
language_ids=language_manager.name_to_id if language_manager is not None else None,
use_gst=use_gst,
)

Expand Down Expand Up @@ -177,11 +184,13 @@ def tts():
with lock:
text = request.args.get("text")
speaker_idx = request.args.get("speaker_id", "")
language_idx = request.args.get("language_id", "")
style_wav = request.args.get("style_wav", "")
style_wav = style_wav_uri_to_dict(style_wav)
print(" > Model input: {}".format(text))
print(" > Speaker Idx: {}".format(speaker_idx))
wavs = synthesizer.tts(text, speaker_name=speaker_idx, style_wav=style_wav)
print(" > Language Idx: {}".format(language_idx))
wavs = synthesizer.tts(text, speaker_name=speaker_idx, language_name=language_idx, style_wav=style_wav)
out = io.BytesIO()
synthesizer.save_wav(wavs, out)
return send_file(out, mimetype="audio/wav")
Expand Down
19 changes: 15 additions & 4 deletions TTS/server/templates/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@
</ul>

{%if use_gst%}
<input value='{"0": 0.1}' id="style_wav" placeholder="style wav (dict or path ot wav).." size=45
<input value='{"0": 0.1}' id="style_wav" placeholder="style wav (dict or path to wav).." size=45
type="text" name="style_wav">
{%endif%}

Expand All @@ -81,6 +81,16 @@
</select><br /><br />
{%endif%}

{%if use_multi_language%}
Choose a language:
<select id="language_id" name=language_id method="GET" action="/">
{% for language_id in language_ids %}
<option value="{{language_id}}" SELECTED>{{language_id}}</option>"
{% endfor %}
</select><br /><br />
{%endif%}


{%if show_details%}
<button id="details-button" onclick="location.href = 'details'" name="model-details">Model
Details</button><br /><br />
Expand All @@ -106,11 +116,12 @@
const text = q('#text').value
const speaker_id = getTextValue('#speaker_id')
const style_wav = getTextValue('#style_wav')
const language_id = getTextValue('#language_id')
if (text) {
q('#message').textContent = 'Synthesizing...'
q('#speak-button').disabled = true
q('#audio').hidden = true
synthesize(text, speaker_id, style_wav)
synthesize(text, speaker_id, style_wav, language_id)
}
e.preventDefault()
return false
Expand All @@ -121,8 +132,8 @@
do_tts(e)
}
})
function synthesize(text, speaker_id = "", style_wav = "") {
fetch(`/api/tts?text=${encodeURIComponent(text)}&speaker_id=${encodeURIComponent(speaker_id)}&style_wav=${encodeURIComponent(style_wav)}`, { cache: 'no-cache' })
function synthesize(text, speaker_id = "", style_wav = "", language_id = "") {
fetch(`/api/tts?text=${encodeURIComponent(text)}&speaker_id=${encodeURIComponent(speaker_id)}&style_wav=${encodeURIComponent(style_wav)}&language_id=${encodeURIComponent(language_id)}`, { cache: 'no-cache' })
.then(function (res) {
if (!res.ok) throw Error(res.statusText)
return res.blob()
Expand Down
2 changes: 1 addition & 1 deletion TTS/utils/synthesizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ def tts(
"Define path for speaker.json if it is a multi-speaker model or remove defined speaker idx. "
)

# handle multi-lingaul
# handle multi-lingual
language_id = None
if self.tts_languages_file or (
hasattr(self.tts_model, "language_manager") and self.tts_model.language_manager is not None
Expand Down

0 comments on commit 1f4d8bf

Please sign in to comment.