Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Python API implementation #2195

Merged
merged 11 commits into from
Dec 12, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/aux_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ jobs:
cache-dependency-path: 'requirements*'
- name: check OS
run: cat /etc/os-release
- name: set ENV
run: export TRAINER_TELEMETRY=0
- name: Install dependencies
run: |
sudo apt-get update
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/data_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ jobs:
cache-dependency-path: 'requirements*'
- name: check OS
run: cat /etc/os-release
- name: set ENV
run: export TRAINER_TELEMETRY=0
- name: Install dependencies
run: |
sudo apt-get update
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/inference_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,13 @@ jobs:
cache-dependency-path: 'requirements*'
- name: check OS
run: cat /etc/os-release
- name: set ENV
run: export TRAINER_TELEMETRY=0
- name: Install dependencies
run: |
sudo apt-get update
sudo apt-get install -y --no-install-recommends git make gcc
sudo apt-get install espeak-ng
make system-deps
- name: Install/upgrade Python setup deps
run: python3 -m pip install --upgrade pip setuptools wheel
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/text_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ jobs:
cache-dependency-path: 'requirements*'
- name: check OS
run: cat /etc/os-release
- name: set ENV
run: export TRAINER_TELEMETRY=0
- name: Install dependencies
run: |
sudo apt-get update
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/tts_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ jobs:
cache-dependency-path: 'requirements*'
- name: check OS
run: cat /etc/os-release
- name: set ENV
run: export TRAINER_TELEMETRY=0
- name: Install dependencies
run: |
sudo apt-get update
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/vocoder_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ jobs:
cache-dependency-path: 'requirements*'
- name: check OS
run: cat /etc/os-release
- name: set ENV
run: export TRAINER_TELEMETRY=0
- name: Install dependencies
run: |
sudo apt-get update
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/zoo_tests0.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ jobs:
cache-dependency-path: 'requirements*'
- name: check OS
run: cat /etc/os-release
- name: set ENV
run: export TRAINER_TELEMETRY=0
- name: Install dependencies
run: |
sudo apt-get update
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/zoo_tests1.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ jobs:
cache-dependency-path: 'requirements*'
- name: check OS
run: cat /etc/os-release
- name: set ENV
run: export TRAINER_TELEMETRY=0
- name: Install dependencies
run: |
sudo apt-get update
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/zoo_tests2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ jobs:
cache-dependency-path: 'requirements*'
- name: check OS
run: cat /etc/os-release
- name: set ENV
run: export TRAINER_TELEMETRY=0
- name: Install dependencies
run: |
sudo apt-get update
Expand Down
146 changes: 146 additions & 0 deletions TTS/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
from pathlib import Path

from TTS.utils.manage import ModelManager
from TTS.utils.synthesizer import Synthesizer


class TTS:
"""TODO: Add voice conversion and Capacitron support."""

def __init__(self, model_name: str = None, progress_bar: bool = True, gpu=False):
"""🐸TTS python interface that allows to load and use the released models.

Example with a multi-speaker model:
>>> from TTS.api import TTS
>>> tts = TTS(TTS.list_models()[0])
>>> wav = tts.tts("This is a test! This is also a test!!", speaker=tts.speakers[0], language=tts.languages[0])
>>> tts.tts_to_file(text="Hello world!", speaker=tts.speakers[0], language=tts.languages[0], file_path="output.wav")

Example with a single-speaker model:
>>> tts = TTS(model_name="tts_models/de/thorsten/tacotron2-DDC", progress_bar=False, gpu=False)
>>> tts.tts_to_file(text="Ich bin eine Testnachricht.", file_path="output.wav")

Args:
model_name (str, optional): Model name to load. You can list models by ```tts.models```. Defaults to None.
progress_bar (bool, optional): Whether to pring a progress bar while downloading a model. Defaults to True.
gpu (bool, optional): Enable/disable GPU. Some models might be too slow on CPU. Defaults to False.
"""
self.manager = ModelManager(models_file=self.get_models_file_path(), progress_bar=progress_bar, verbose=False)
self.synthesizer = None
if model_name:
self.load_model_by_name(model_name, gpu)

@property
def models(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know a good solution, but it would be nice to be able to have that list of model before instantiating that class with the model name.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe we could create a static method called list_tts_models and inside of it instance ModelManager and called the method list_models.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can keep the models in a python file instead of JSON and we can list it.

However can you give a code example for your intended use?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or just read the JSON

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is already what tts.models does. Are you suggesting something different?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But it works only when the class is already initialized because it uses a model manager instance. But this could be avoided if models was a @staticmethod instead.

return self.manager.list_tts_models()

@property
def is_multi_speaker(self):
if hasattr(self.synthesizer.tts_model, "speaker_manager") and self.synthesizer.tts_model.speaker_manager:
return self.synthesizer.tts_model.speaker_manager.num_speakers > 1
return False

@property
def is_multi_lingual(self):
if hasattr(self.synthesizer.tts_model, "language_manager") and self.synthesizer.tts_model.language_manager:
return self.synthesizer.tts_model.language_manager.num_languages > 1
return False

@property
def speakers(self):
if not self.is_multi_speaker:
return None
return self.synthesizer.tts_model.speaker_manager.speaker_names

@property
def languages(self):
if not self.is_multi_lingual:
return None
return self.synthesizer.tts_model.language_manager.language_names

@staticmethod
def get_models_file_path():
return Path(__file__).parent / ".models.json"

@staticmethod
def list_models():
manager = ModelManager(models_file=TTS.get_models_file_path(), progress_bar=False, verbose=False)
return manager.list_tts_models()

def download_model_by_name(self, model_name: str):
model_path, config_path, model_item = self.manager.download_model(model_name)
if model_item["default_vocoder"] is None:
return model_path, config_path, None, None
vocoder_path, vocoder_config_path, _ = self.manager.download_model(model_item["default_vocoder"])
return model_path, config_path, vocoder_path, vocoder_config_path

def load_model_by_name(self, model_name: str, gpu: bool = False):
model_path, config_path, vocoder_path, vocoder_config_path = self.download_model_by_name(model_name)
# init synthesizer
# None values are fetch from the model
self.synthesizer = Synthesizer(
tts_checkpoint=model_path,
tts_config_path=config_path,
tts_speakers_file=None,
tts_languages_file=None,
vocoder_checkpoint=vocoder_path,
vocoder_config=vocoder_config_path,
encoder_checkpoint=None,
encoder_config=None,
use_cuda=gpu,
)

def _check_arguments(self, speaker: str = None, language: str = None):
if self.is_multi_speaker and speaker is None:
raise ValueError("Model is multi-speaker but no speaker is provided.")
if self.is_multi_lingual and language is None:
raise ValueError("Model is multi-lingual but no language is provided.")
if not self.is_multi_speaker and speaker is not None:
raise ValueError("Model is not multi-speaker but speaker is provided.")
if not self.is_multi_lingual and language is not None:
raise ValueError("Model is not multi-lingual but language is provided.")

def tts(self, text: str, speaker: str = None, language: str = None):
"""Convert text to speech.

Args:
text (str):
Input text to synthesize.
speaker (str, optional):
Speaker name for multi-speaker. You can check whether loaded model is multi-speaker by
`tts.is_multi_speaker` and list speakers by `tts.speakers`. Defaults to None.
language (str, optional):
Language code for multi-lingual models. You can check whether loaded model is multi-lingual
`tts.is_multi_lingual` and list available languages by `tts.languages`. Defaults to None.
"""
self._check_arguments(speaker=speaker, language=language)

wav = self.synthesizer.tts(
text=text,
speaker_name=speaker,
language_name=language,
speaker_wav=None,
reference_wav=None,
style_wav=None,
style_text=None,
reference_speaker_name=None,
)
return wav

def tts_to_file(self, text: str, speaker: str = None, language: str = None, file_path: str = "output.wav"):
"""Convert text to speech.

Args:
text (str):
Input text to synthesize.
speaker (str, optional):
Speaker name for multi-speaker. You can check whether loaded model is multi-speaker by
`tts.is_multi_speaker` and list speakers by `tts.speakers`. Defaults to None.
language (str, optional):
Language code for multi-lingual models. You can check whether loaded model is multi-lingual
`tts.is_multi_lingual` and list available languages by `tts.languages`. Defaults to None.
file_path (str, optional):
Output file path. Defaults to "output.wav".
"""
wav = self.tts(text=text, speaker=speaker, language=language)
self.synthesizer.save_wav(wav=wav, path=file_path)
19 changes: 11 additions & 8 deletions TTS/utils/manage.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,13 @@ class ModelManager(object):
models_file (str): path to .model.json file. Defaults to None.
output_prefix (str): prefix to `tts` to download models. Defaults to None
progress_bar (bool): print a progress bar when donwloading a file. Defaults to False.
verbose (bool): print info. Defaults to True.
"""

def __init__(self, models_file=None, output_prefix=None, progress_bar=False):
def __init__(self, models_file=None, output_prefix=None, progress_bar=False, verbose=True):
erogol marked this conversation as resolved.
Show resolved Hide resolved
super().__init__()
self.progress_bar = progress_bar
self.verbose = verbose
if output_prefix is None:
self.output_prefix = get_user_data_dir("tts")
else:
Expand All @@ -62,30 +64,31 @@ def read_models_file(self, file_path):
self.models_dict = json.load(json_file)

def _list_models(self, model_type, model_count=0):
if self.verbose:
print(" Name format: type/language/dataset/model")
model_list = []
for lang in self.models_dict[model_type]:
for dataset in self.models_dict[model_type][lang]:
for model in self.models_dict[model_type][lang][dataset]:
model_full_name = f"{model_type}--{lang}--{dataset}--{model}"
output_path = os.path.join(self.output_prefix, model_full_name)
if os.path.exists(output_path):
print(f" {model_count}: {model_type}/{lang}/{dataset}/{model} [already downloaded]")
else:
print(f" {model_count}: {model_type}/{lang}/{dataset}/{model}")
if self.verbose:
if os.path.exists(output_path):
print(f" {model_count}: {model_type}/{lang}/{dataset}/{model} [already downloaded]")
else:
print(f" {model_count}: {model_type}/{lang}/{dataset}/{model}")
model_list.append(f"{model_type}/{lang}/{dataset}/{model}")
model_count += 1
return model_list

def _list_for_model_type(self, model_type):
print(" Name format: language/dataset/model")
models_name_list = []
model_count = 1
model_type = "tts_models"
models_name_list.extend(self._list_models(model_type, model_count))
return [name.replace(model_type + "/", "") for name in models_name_list]
return models_name_list

def list_models(self):
print(" Name format: type/language/dataset/model")
models_name_list = []
model_count = 1
for model_type in self.models_dict:
Expand Down
1 change: 0 additions & 1 deletion TTS/utils/synthesizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ def __init__(
self.tts_model = None
self.vocoder_model = None
self.speaker_manager = None
self.num_speakers = 0
self.tts_speakers = {}
self.language_manager = None
self.num_languages = 0
Expand Down
30 changes: 28 additions & 2 deletions docs/source/inference.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ After the installation, 2 terminal commands are available.

1. TTS Command Line Interface (CLI). - `tts`
2. Local Demo Server. - `tts-server`
3. In 🐍Python. - `from TTS.api import TTS`

## On the Commandline - `tts`
![cli.gif](https://github.com/coqui-ai/TTS/raw/main/images/tts_cli.gif)
Expand Down Expand Up @@ -99,5 +100,30 @@ tts-server --model_name "<type>/<language>/<dataset>/<model_name>" \
--vocoder_name "<type>/<language>/<dataset>/<model_name>"
```

## TorchHub
You can also use [this simple colab notebook](https://colab.research.google.com/drive/1iAe7ZdxjUIuN6V4ooaCt0fACEGKEn7HW?usp=sharing) using TorchHub to synthesize speech.
## Python API

You can run a multi-speaker and multi-lingual model in Python as

```python
from TTS.api import TTS

# List available 🐸TTS models and choose the first one
model_name = TTS.list_models()[0]
# Init TTS
tts = TTS(model_name)
# Run TTS
# ❗ Since this model is multi-speaker and multi-lingual, we must set the target speaker and the language
# Text to speech with a numpy output
wav = tts.tts("This is a test! This is also a test!!", speaker=tts.speakers[0], language=tts.languages[0])
# Text to speech to a file
tts.tts_to_file(text="Hello world!", speaker=tts.speakers[0], language=tts.languages[0], file_path="output.wav")
```

Here is an example for a single speaker model.

```python
# Init TTS with the target model name
tts = TTS(model_name="tts_models/de/thorsten/tacotron2-DDC", progress_bar=False, gpu=False)
# Run TTS
tts.tts_to_file(text="Ich bin eine Testnachricht.", file_path=OUTPUT_PATH)
```
36 changes: 36 additions & 0 deletions tests/inference_tests/test_python_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import os
import unittest

from tests import get_tests_output_path
from TTS.api import TTS

OUTPUT_PATH = os.path.join(get_tests_output_path(), "test_python_api.wav")


class TTSTest(unittest.TestCase):
def test_single_speaker_model(self):
tts = TTS(model_name="tts_models/de/thorsten/tacotron2-DDC", progress_bar=False, gpu=False)

error_raised = False
try:
tts.tts_to_file(text="Ich bin eine Testnachricht.", speaker="Thorsten", language="de")
except ValueError:
error_raised = True

tts.tts_to_file(text="Ich bin eine Testnachricht.", file_path=OUTPUT_PATH)

self.assertTrue(error_raised)
self.assertFalse(tts.is_multi_speaker)
self.assertFalse(tts.is_multi_lingual)
self.assertIsNone(tts.speakers)
self.assertIsNone(tts.languages)

def test_multi_speaker_multi_lingual_model(self):
tts = TTS()
tts.load_model_by_name(tts.models[0]) # YourTTS
tts.tts_to_file(text="Hello world!", speaker=tts.speakers[0], language=tts.languages[0], file_path=OUTPUT_PATH)

self.assertTrue(tts.is_multi_speaker)
self.assertTrue(tts.is_multi_lingual)
self.assertGreater(len(tts.speakers), 1)
self.assertGreater(len(tts.languages), 1)