From 21172ececb09a4cd06292a8cc387c7ac7fc3511f Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Sun, 10 Nov 2024 16:24:52 +0100 Subject: [PATCH 1/4] ci: update uv and move into composite action --- .github/actions/setup-uv/action.yml | 11 +++++++++++ .github/workflows/pypi-release.yml | 8 ++------ .github/workflows/style_check.yml | 8 ++------ .github/workflows/tests.yml | 14 ++++---------- 4 files changed, 19 insertions(+), 22 deletions(-) create mode 100644 .github/actions/setup-uv/action.yml diff --git a/.github/actions/setup-uv/action.yml b/.github/actions/setup-uv/action.yml new file mode 100644 index 0000000000..619b138fb2 --- /dev/null +++ b/.github/actions/setup-uv/action.yml @@ -0,0 +1,11 @@ +name: Setup uv + +runs: + using: 'composite' + steps: + - name: Install uv + uses: astral-sh/setup-uv@v3 + with: + version: "0.5.1" + enable-cache: true + cache-dependency-glob: "**/pyproject.toml" diff --git a/.github/workflows/pypi-release.yml b/.github/workflows/pypi-release.yml index 4122f69f73..1b7f44654c 100644 --- a/.github/workflows/pypi-release.yml +++ b/.github/workflows/pypi-release.yml @@ -11,6 +11,8 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + - name: Setup uv + uses: ./.github/actions/setup-uv - name: Verify tag matches version run: | set -ex @@ -19,12 +21,6 @@ jobs: if [[ "v$version" != "$tag" ]]; then exit 1 fi - - name: Install uv - uses: astral-sh/setup-uv@v3 - with: - version: "0.4.27" - enable-cache: true - cache-dependency-glob: "**/pyproject.toml" - name: Set up Python run: uv python install 3.12 - name: Build sdist and wheel diff --git a/.github/workflows/style_check.yml b/.github/workflows/style_check.yml index a146213f7c..44f562d07e 100644 --- a/.github/workflows/style_check.yml +++ b/.github/workflows/style_check.yml @@ -15,12 +15,8 @@ jobs: python-version: [3.9] steps: - uses: actions/checkout@v4 - - name: Install uv - uses: astral-sh/setup-uv@v3 - with: - version: "0.4.27" - enable-cache: true - cache-dependency-glob: "**/pyproject.toml" + - name: Setup uv + uses: ./.github/actions/setup-uv - name: Set up Python ${{ matrix.python-version }} run: uv python install ${{ matrix.python-version }} - name: Lint check diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index be3f1b740b..02c6e25abb 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -16,12 +16,8 @@ jobs: subset: ["data_tests", "inference_tests", "test_aux", "test_text", "test_tts", "test_tts2", "test_vocoder", "test_xtts", "test_zoo0", "test_zoo1", "test_zoo2"] steps: - uses: actions/checkout@v4 - - name: Install uv - uses: astral-sh/setup-uv@v3 - with: - version: "0.4.27" - enable-cache: true - cache-dependency-glob: "**/pyproject.toml" + - name: Setup uv + uses: ./.github/actions/setup-uv - name: Set up Python ${{ matrix.python-version }} run: uv python install ${{ matrix.python-version }} - name: Install Espeak @@ -58,10 +54,8 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - name: Install uv - uses: astral-sh/setup-uv@v3 - with: - version: "0.4.27" + - name: Setup uv + uses: ./.github/actions/setup-uv - uses: actions/download-artifact@v4 with: pattern: coverage-data-* From 993da778b4bd4eb0408c6b1fc1a40d0c62b1eeae Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Sun, 10 Nov 2024 17:22:47 +0100 Subject: [PATCH 2/4] chore: use original instead of scarf urls These allowed Coqui to get download stats, which we don't need anymore --- .github/workflows/tests.yml | 4 - TTS/.models.json | 214 ++++++++++----------- TTS/demos/xtts_ft_demo/utils/gpt_train.py | 10 +- TTS/tts/layers/tortoise/arch_utils.py | 2 +- TTS/tts/layers/xtts/trainer/gpt_trainer.py | 2 +- TTS/utils/manage.py | 16 +- recipes/ljspeech/xtts_v1/train_gpt_xtts.py | 8 +- recipes/ljspeech/xtts_v2/train_gpt_xtts.py | 8 +- 8 files changed, 130 insertions(+), 134 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 02c6e25abb..b485f32fd1 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -30,10 +30,6 @@ jobs: sudo apt-get update sudo apt-get install -y --no-install-recommends git make gcc make system-deps - - name: Replace scarf urls - if: contains(fromJSON('["data_tests", "inference_tests", "test_aux", "test_tts", "test_tts2", "test_xtts", "test_zoo0", "test_zoo1", "test_zoo2"]'), matrix.subset) - run: | - sed -i 's/https:\/\/coqui.gateway.scarf.sh\//https:\/\/github.com\/coqui-ai\/TTS\/releases\/download\//g' TTS/.models.json - name: Unit tests run: | resolution=highest diff --git a/TTS/.models.json b/TTS/.models.json index a5add6e34f..7c3a498bff 100644 --- a/TTS/.models.json +++ b/TTS/.models.json @@ -5,11 +5,11 @@ "xtts_v2": { "description": "XTTS-v2.0.3 by Coqui with 17 languages.", "hf_url": [ - "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/main/model.pth", - "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/main/config.json", - "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/main/vocab.json", - "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/main/hash.md5", - "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/main/speakers_xtts.pth" + "https://huggingface.co/coqui/XTTS-v2/resolve/main/model.pth", + "https://huggingface.co/coqui/XTTS-v2/resolve/main/config.json", + "https://huggingface.co/coqui/XTTS-v2/resolve/main/vocab.json", + "https://huggingface.co/coqui/XTTS-v2/resolve/main/hash.md5", + "https://huggingface.co/coqui/XTTS-v2/resolve/main/speakers_xtts.pth" ], "model_hash": "10f92b55c512af7a8d39d650547a15a7", "default_vocoder": null, @@ -21,10 +21,10 @@ "xtts_v1.1": { "description": "XTTS-v1.1 by Coqui with 14 languages, cross-language voice cloning and reference leak fixed.", "hf_url": [ - "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/v1.1.2/model.pth", - "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/v1.1.2/config.json", - "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/v1.1.2/vocab.json", - "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/v1.1.2/hash.md5" + "https://huggingface.co/coqui/XTTS-v1/resolve/v1.1.2/model.pth", + "https://huggingface.co/coqui/XTTS-v1/resolve/v1.1.2/config.json", + "https://huggingface.co/coqui/XTTS-v1/resolve/v1.1.2/vocab.json", + "https://huggingface.co/coqui/XTTS-v1/resolve/v1.1.2/hash.md5" ], "model_hash": "7c62beaf58d39b729de287330dc254e7b515677416839b649a50e7cf74c3df59", "default_vocoder": null, @@ -35,7 +35,7 @@ }, "your_tts": { "description": "Your TTS model accompanying the paper https://arxiv.org/abs/2112.02418", - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.10.1_models/tts_models--multilingual--multi-dataset--your_tts.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.10.1_models/tts_models--multilingual--multi-dataset--your_tts.zip", "default_vocoder": null, "commit": "e9a1953e", "license": "CC BY-NC-ND 4.0", @@ -44,11 +44,11 @@ "bark": { "description": "đŸ¶ Bark TTS model released by suno-ai. You can find the original implementation in https://github.com/suno-ai/bark.", "hf_url": [ - "https://coqui.gateway.scarf.sh/hf/bark/coarse_2.pt", - "https://coqui.gateway.scarf.sh/hf/bark/fine_2.pt", - "https://coqui.gateway.scarf.sh/hf/bark/text_2.pt", - "https://coqui.gateway.scarf.sh/hf/bark/config.json", - "https://coqui.gateway.scarf.sh/hf/bark/tokenizer.pth" + "https://huggingface.co/erogol/bark/resolve/main/coarse_2.pt", + "https://huggingface.co/erogol/bark/resolve/main/fine_2.pt", + "https://huggingface.co/erogol/bark/resolve/main/text_2.pt", + "https://huggingface.co/erogol/bark/resolve/main/config.json", + "https://huggingface.co/erogol/bark/resolve/main/tokenizer.pth" ], "default_vocoder": null, "commit": "e9a1953e", @@ -60,7 +60,7 @@ "bg": { "cv": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--bg--cv--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/tts_models--bg--cv--vits.zip", "default_vocoder": null, "commit": null, "author": "@NeonGeckoCom", @@ -71,7 +71,7 @@ "cs": { "cv": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--cs--cv--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/tts_models--cs--cv--vits.zip", "default_vocoder": null, "commit": null, "author": "@NeonGeckoCom", @@ -82,7 +82,7 @@ "da": { "cv": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--da--cv--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/tts_models--da--cv--vits.zip", "default_vocoder": null, "commit": null, "author": "@NeonGeckoCom", @@ -93,7 +93,7 @@ "et": { "cv": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--et--cv--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/tts_models--et--cv--vits.zip", "default_vocoder": null, "commit": null, "author": "@NeonGeckoCom", @@ -104,7 +104,7 @@ "ga": { "cv": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--ga--cv--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/tts_models--ga--cv--vits.zip", "default_vocoder": null, "commit": null, "author": "@NeonGeckoCom", @@ -116,7 +116,7 @@ "ek1": { "tacotron2": { "description": "EK1 en-rp tacotron2 by NMStoker", - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--ek1--tacotron2.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/tts_models--en--ek1--tacotron2.zip", "default_vocoder": "vocoder_models/en/ek1/wavegrad", "commit": "c802255", "license": "apache 2.0" @@ -125,7 +125,7 @@ "ljspeech": { "tacotron2-DDC": { "description": "Tacotron2 with Double Decoder Consistency.", - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--ljspeech--tacotron2-DDC.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/tts_models--en--ljspeech--tacotron2-DDC.zip", "default_vocoder": "vocoder_models/en/ljspeech/hifigan_v2", "commit": "bae2ad0f", "author": "Eren Gölge @erogol", @@ -134,7 +134,7 @@ }, "tacotron2-DDC_ph": { "description": "Tacotron2 with Double Decoder Consistency with phonemes.", - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--ljspeech--tacotron2-DDC_ph.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/tts_models--en--ljspeech--tacotron2-DDC_ph.zip", "default_vocoder": "vocoder_models/en/ljspeech/univnet", "commit": "3900448", "author": "Eren Gölge @erogol", @@ -143,7 +143,7 @@ }, "glow-tts": { "description": "", - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--ljspeech--glow-tts.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/tts_models--en--ljspeech--glow-tts.zip", "stats_file": null, "default_vocoder": "vocoder_models/en/ljspeech/multiband-melgan", "commit": "", @@ -153,7 +153,7 @@ }, "speedy-speech": { "description": "Speedy Speech model trained on LJSpeech dataset using the Alignment Network for learning the durations.", - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--ljspeech--speedy-speech.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/tts_models--en--ljspeech--speedy-speech.zip", "stats_file": null, "default_vocoder": "vocoder_models/en/ljspeech/hifigan_v2", "commit": "4581e3d", @@ -163,7 +163,7 @@ }, "tacotron2-DCA": { "description": "", - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--ljspeech--tacotron2-DCA.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/tts_models--en--ljspeech--tacotron2-DCA.zip", "default_vocoder": "vocoder_models/en/ljspeech/multiband-melgan", "commit": "", "author": "Eren Gölge @erogol", @@ -172,7 +172,7 @@ }, "vits": { "description": "VITS is an End2End TTS model trained on LJSpeech dataset with phonemes.", - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--ljspeech--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/tts_models--en--ljspeech--vits.zip", "default_vocoder": null, "commit": "3900448", "author": "Eren Gölge @erogol", @@ -180,7 +180,7 @@ "contact": "egolge@coqui.com" }, "vits--neon": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--en--ljspeech--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/tts_models--en--ljspeech--vits.zip", "default_vocoder": null, "author": "@NeonGeckoCom", "license": "bsd-3-clause", @@ -189,7 +189,7 @@ }, "fast_pitch": { "description": "FastPitch model trained on LJSpeech using the Aligner Network", - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--ljspeech--fast_pitch.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/tts_models--en--ljspeech--fast_pitch.zip", "default_vocoder": "vocoder_models/en/ljspeech/hifigan_v2", "commit": "b27b3ba", "author": "Eren Gölge @erogol", @@ -198,7 +198,7 @@ }, "overflow": { "description": "Overflow model trained on LJSpeech", - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.10.0_models/tts_models--en--ljspeech--overflow.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.10.0_models/tts_models--en--ljspeech--overflow.zip", "default_vocoder": "vocoder_models/en/ljspeech/hifigan_v2", "commit": "3b1a28f", "author": "Eren Gölge @erogol", @@ -207,7 +207,7 @@ }, "neural_hmm": { "description": "Neural HMM model trained on LJSpeech", - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.11.0_models/tts_models--en--ljspeech--neural_hmm.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.11.0_models/tts_models--en--ljspeech--neural_hmm.zip", "default_vocoder": "vocoder_models/en/ljspeech/hifigan_v2", "commit": "3b1a28f", "author": "Shivam Metha @shivammehta25", @@ -218,7 +218,7 @@ "vctk": { "vits": { "description": "VITS End2End TTS model trained on VCTK dataset with 109 different speakers with EN accent.", - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--vctk--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/tts_models--en--vctk--vits.zip", "default_vocoder": null, "commit": "3900448", "author": "Eren @erogol", @@ -227,7 +227,7 @@ }, "fast_pitch": { "description": "FastPitch model trained on VCTK dataseset.", - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--vctk--fast_pitch.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/tts_models--en--vctk--fast_pitch.zip", "default_vocoder": null, "commit": "bdab788d", "author": "Eren @erogol", @@ -238,7 +238,7 @@ "sam": { "tacotron-DDC": { "description": "Tacotron2 with Double Decoder Consistency trained with Aceenture's Sam dataset.", - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--sam--tacotron-DDC.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/tts_models--en--sam--tacotron-DDC.zip", "default_vocoder": "vocoder_models/en/sam/hifigan_v2", "commit": "bae2ad0f", "author": "Eren Gölge @erogol", @@ -249,7 +249,7 @@ "blizzard2013": { "capacitron-t2-c50": { "description": "Capacitron additions to Tacotron 2 with Capacity at 50 as in https://arxiv.org/pdf/1906.03402.pdf", - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.7.0_models/tts_models--en--blizzard2013--capacitron-t2-c50.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.7.0_models/tts_models--en--blizzard2013--capacitron-t2-c50.zip", "commit": "d6284e7", "default_vocoder": "vocoder_models/en/blizzard2013/hifigan_v2", "author": "Adam Froghyar @a-froghyar", @@ -258,7 +258,7 @@ }, "capacitron-t2-c150_v2": { "description": "Capacitron additions to Tacotron 2 with Capacity at 150 as in https://arxiv.org/pdf/1906.03402.pdf", - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.7.1_models/tts_models--en--blizzard2013--capacitron-t2-c150_v2.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.7.1_models/tts_models--en--blizzard2013--capacitron-t2-c150_v2.zip", "commit": "a67039d", "default_vocoder": "vocoder_models/en/blizzard2013/hifigan_v2", "author": "Adam Froghyar @a-froghyar", @@ -270,15 +270,15 @@ "tortoise-v2": { "description": "Tortoise tts model https://github.com/neonbjb/tortoise-tts", "github_rls_url": [ - "https://coqui.gateway.scarf.sh/v0.14.1_models/autoregressive.pth", - "https://coqui.gateway.scarf.sh/v0.14.1_models/clvp2.pth", - "https://coqui.gateway.scarf.sh/v0.14.1_models/cvvp.pth", - "https://coqui.gateway.scarf.sh/v0.14.1_models/diffusion_decoder.pth", - "https://coqui.gateway.scarf.sh/v0.14.1_models/rlg_auto.pth", - "https://coqui.gateway.scarf.sh/v0.14.1_models/rlg_diffuser.pth", - "https://coqui.gateway.scarf.sh/v0.14.1_models/vocoder.pth", - "https://coqui.gateway.scarf.sh/v0.14.1_models/mel_norms.pth", - "https://coqui.gateway.scarf.sh/v0.14.1_models/config.json" + "https://github.com/coqui-ai/TTS/releases/download/v0.14.1_models/autoregressive.pth", + "https://github.com/coqui-ai/TTS/releases/download/v0.14.1_models/clvp2.pth", + "https://github.com/coqui-ai/TTS/releases/download/v0.14.1_models/cvvp.pth", + "https://github.com/coqui-ai/TTS/releases/download/v0.14.1_models/diffusion_decoder.pth", + "https://github.com/coqui-ai/TTS/releases/download/v0.14.1_models/rlg_auto.pth", + "https://github.com/coqui-ai/TTS/releases/download/v0.14.1_models/rlg_diffuser.pth", + "https://github.com/coqui-ai/TTS/releases/download/v0.14.1_models/vocoder.pth", + "https://github.com/coqui-ai/TTS/releases/download/v0.14.1_models/mel_norms.pth", + "https://github.com/coqui-ai/TTS/releases/download/v0.14.1_models/config.json" ], "commit": "c1875f6", "default_vocoder": null, @@ -289,7 +289,7 @@ "jenny": { "jenny": { "description": "VITS model trained with Jenny(Dioco) dataset. Named as Jenny as demanded by the license. Original URL for the model https://www.kaggle.com/datasets/noml4u/tts-models--en--jenny-dioco--vits", - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.14.0_models/tts_models--en--jenny--jenny.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.14.0_models/tts_models--en--jenny--jenny.zip", "default_vocoder": null, "commit": "ba40a1c", "license": "custom - see https://github.com/dioco-group/jenny-tts-dataset#important", @@ -300,7 +300,7 @@ "es": { "mai": { "tacotron2-DDC": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--es--mai--tacotron2-DDC.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/tts_models--es--mai--tacotron2-DDC.zip", "default_vocoder": "vocoder_models/universal/libri-tts/fullband-melgan", "commit": "", "author": "Eren Gölge @erogol", @@ -310,7 +310,7 @@ }, "css10": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--es--css10--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/tts_models--es--css10--vits.zip", "default_vocoder": null, "commit": null, "author": "@NeonGeckoCom", @@ -321,7 +321,7 @@ "fr": { "mai": { "tacotron2-DDC": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--fr--mai--tacotron2-DDC.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/tts_models--fr--mai--tacotron2-DDC.zip", "default_vocoder": "vocoder_models/universal/libri-tts/fullband-melgan", "commit": null, "author": "Eren Gölge @erogol", @@ -331,7 +331,7 @@ }, "css10": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--fr--css10--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/tts_models--fr--css10--vits.zip", "default_vocoder": null, "commit": null, "author": "@NeonGeckoCom", @@ -342,7 +342,7 @@ "uk": { "mai": { "glow-tts": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--uk--mai--glow-tts.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/tts_models--uk--mai--glow-tts.zip", "author": "@robinhad", "commit": "bdab788d", "license": "MIT", @@ -350,7 +350,7 @@ "default_vocoder": "vocoder_models/uk/mai/multiband-melgan" }, "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--uk--mai--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/tts_models--uk--mai--vits.zip", "default_vocoder": null, "commit": null, "author": "@NeonGeckoCom", @@ -361,7 +361,7 @@ "zh-CN": { "baker": { "tacotron2-DDC-GST": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--zh-CN--baker--tacotron2-DDC-GST.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/tts_models--zh-CN--baker--tacotron2-DDC-GST.zip", "commit": "unknown", "author": "@kirianguiller", "license": "apache 2.0", @@ -372,7 +372,7 @@ "nl": { "mai": { "tacotron2-DDC": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--nl--mai--tacotron2-DDC.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/tts_models--nl--mai--tacotron2-DDC.zip", "author": "@r-dh", "license": "apache 2.0", "default_vocoder": "vocoder_models/nl/mai/parallel-wavegan", @@ -382,7 +382,7 @@ }, "css10": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--nl--css10--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/tts_models--nl--css10--vits.zip", "default_vocoder": null, "commit": null, "author": "@NeonGeckoCom", @@ -393,21 +393,21 @@ "de": { "thorsten": { "tacotron2-DCA": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--de--thorsten--tacotron2-DCA.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/tts_models--de--thorsten--tacotron2-DCA.zip", "default_vocoder": "vocoder_models/de/thorsten/fullband-melgan", "author": "@thorstenMueller", "license": "apache 2.0", "commit": "unknown" }, "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.7.0_models/tts_models--de--thorsten--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.7.0_models/tts_models--de--thorsten--vits.zip", "default_vocoder": null, "author": "@thorstenMueller", "license": "apache 2.0", "commit": "unknown" }, "tacotron2-DDC": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--de--thorsten--tacotron2-DDC.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/tts_models--de--thorsten--tacotron2-DDC.zip", "default_vocoder": "vocoder_models/de/thorsten/hifigan_v1", "description": "Thorsten-Dec2021-22k-DDC", "author": "@thorstenMueller", @@ -417,7 +417,7 @@ }, "css10": { "vits-neon": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--de--css10--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/tts_models--de--css10--vits.zip", "default_vocoder": null, "author": "@NeonGeckoCom", "license": "bsd-3-clause", @@ -428,7 +428,7 @@ "ja": { "kokoro": { "tacotron2-DDC": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--ja--kokoro--tacotron2-DDC.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/tts_models--ja--kokoro--tacotron2-DDC.zip", "default_vocoder": "vocoder_models/ja/kokoro/hifigan_v1", "description": "Tacotron2 with Double Decoder Consistency trained with Kokoro Speech Dataset.", "author": "@kaiidams", @@ -440,7 +440,7 @@ "tr": { "common-voice": { "glow-tts": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--tr--common-voice--glow-tts.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/tts_models--tr--common-voice--glow-tts.zip", "default_vocoder": "vocoder_models/tr/common-voice/hifigan", "license": "MIT", "description": "Turkish GlowTTS model using an unknown speaker from the Common-Voice dataset.", @@ -452,7 +452,7 @@ "it": { "mai_female": { "glow-tts": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--it--mai_female--glow-tts.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/tts_models--it--mai_female--glow-tts.zip", "default_vocoder": null, "description": "GlowTTS model as explained on https://github.com/coqui-ai/TTS/issues/1148.", "author": "@nicolalandro", @@ -460,7 +460,7 @@ "commit": null }, "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--it--mai_female--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/tts_models--it--mai_female--vits.zip", "default_vocoder": null, "description": "GlowTTS model as explained on https://github.com/coqui-ai/TTS/issues/1148.", "author": "@nicolalandro", @@ -470,7 +470,7 @@ }, "mai_male": { "glow-tts": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--it--mai_male--glow-tts.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/tts_models--it--mai_male--glow-tts.zip", "default_vocoder": null, "description": "GlowTTS model as explained on https://github.com/coqui-ai/TTS/issues/1148.", "author": "@nicolalandro", @@ -478,7 +478,7 @@ "commit": null }, "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--it--mai_male--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/tts_models--it--mai_male--vits.zip", "default_vocoder": null, "description": "GlowTTS model as explained on https://github.com/coqui-ai/TTS/issues/1148.", "author": "@nicolalandro", @@ -490,7 +490,7 @@ "ewe": { "openbible": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.2_models/tts_models--ewe--openbible--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.2_models/tts_models--ewe--openbible--vits.zip", "default_vocoder": null, "license": "CC-BY-SA 4.0", "description": "Original work (audio and text) by Biblica available for free at www.biblica.com and open.bible.", @@ -502,7 +502,7 @@ "hau": { "openbible": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.2_models/tts_models--hau--openbible--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.2_models/tts_models--hau--openbible--vits.zip", "default_vocoder": null, "license": "CC-BY-SA 4.0", "description": "Original work (audio and text) by Biblica available for free at www.biblica.com and open.bible.", @@ -514,7 +514,7 @@ "lin": { "openbible": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.2_models/tts_models--lin--openbible--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.2_models/tts_models--lin--openbible--vits.zip", "default_vocoder": null, "license": "CC-BY-SA 4.0", "description": "Original work (audio and text) by Biblica available for free at www.biblica.com and open.bible.", @@ -526,7 +526,7 @@ "tw_akuapem": { "openbible": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.2_models/tts_models--tw_akuapem--openbible--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.2_models/tts_models--tw_akuapem--openbible--vits.zip", "default_vocoder": null, "license": "CC-BY-SA 4.0", "description": "Original work (audio and text) by Biblica available for free at www.biblica.com and open.bible.", @@ -538,7 +538,7 @@ "tw_asante": { "openbible": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.2_models/tts_models--tw_asante--openbible--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.2_models/tts_models--tw_asante--openbible--vits.zip", "default_vocoder": null, "license": "CC-BY-SA 4.0", "description": "Original work (audio and text) by Biblica available for free at www.biblica.com and open.bible.", @@ -550,7 +550,7 @@ "yor": { "openbible": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.2_models/tts_models--yor--openbible--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.2_models/tts_models--yor--openbible--vits.zip", "default_vocoder": null, "license": "CC-BY-SA 4.0", "description": "Original work (audio and text) by Biblica available for free at www.biblica.com and open.bible.", @@ -562,7 +562,7 @@ "hu": { "css10": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--hu--css10--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/tts_models--hu--css10--vits.zip", "default_vocoder": null, "commit": null, "author": "@NeonGeckoCom", @@ -573,7 +573,7 @@ "el": { "cv": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--el--cv--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/tts_models--el--cv--vits.zip", "default_vocoder": null, "commit": null, "author": "@NeonGeckoCom", @@ -584,7 +584,7 @@ "fi": { "css10": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--fi--css10--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/tts_models--fi--css10--vits.zip", "default_vocoder": null, "commit": null, "author": "@NeonGeckoCom", @@ -595,7 +595,7 @@ "hr": { "cv": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--hr--cv--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/tts_models--hr--cv--vits.zip", "default_vocoder": null, "commit": null, "author": "@NeonGeckoCom", @@ -606,7 +606,7 @@ "lt": { "cv": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--lt--cv--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/tts_models--lt--cv--vits.zip", "default_vocoder": null, "commit": null, "author": "@NeonGeckoCom", @@ -617,7 +617,7 @@ "lv": { "cv": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--lv--cv--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/tts_models--lv--cv--vits.zip", "default_vocoder": null, "commit": null, "author": "@NeonGeckoCom", @@ -628,7 +628,7 @@ "mt": { "cv": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--mt--cv--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/tts_models--mt--cv--vits.zip", "default_vocoder": null, "commit": null, "author": "@NeonGeckoCom", @@ -639,7 +639,7 @@ "pl": { "mai_female": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--pl--mai_female--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/tts_models--pl--mai_female--vits.zip", "default_vocoder": null, "commit": null, "author": "@NeonGeckoCom", @@ -650,7 +650,7 @@ "pt": { "cv": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--pt--cv--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/tts_models--pt--cv--vits.zip", "default_vocoder": null, "commit": null, "author": "@NeonGeckoCom", @@ -661,7 +661,7 @@ "ro": { "cv": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--ro--cv--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/tts_models--ro--cv--vits.zip", "default_vocoder": null, "commit": null, "author": "@NeonGeckoCom", @@ -672,7 +672,7 @@ "sk": { "cv": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--sk--cv--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/tts_models--sk--cv--vits.zip", "default_vocoder": null, "commit": null, "author": "@NeonGeckoCom", @@ -683,7 +683,7 @@ "sl": { "cv": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--sl--cv--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/tts_models--sl--cv--vits.zip", "default_vocoder": null, "commit": null, "author": "@NeonGeckoCom", @@ -694,7 +694,7 @@ "sv": { "cv": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--sv--cv--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/tts_models--sv--cv--vits.zip", "default_vocoder": null, "commit": null, "author": "@NeonGeckoCom", @@ -705,7 +705,7 @@ "ca": { "custom": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.10.1_models/tts_models--ca--custom--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.10.1_models/tts_models--ca--custom--vits.zip", "default_vocoder": null, "commit": null, "description": " It is trained from zero with 101460 utterances consisting of 257 speakers, approx 138 hours of speech. We used three datasets;\nFestcat and Google Catalan TTS (both TTS datasets) and also a part of Common Voice 8. It is trained with TTS v0.8.0.\nhttps://github.com/coqui-ai/TTS/discussions/930#discussioncomment-4466345", @@ -717,7 +717,7 @@ "fa": { "custom": { "glow-tts": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.10.1_models/tts_models--fa--custom--glow-tts.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.10.1_models/tts_models--fa--custom--glow-tts.zip", "default_vocoder": null, "commit": null, "description": "persian-tts-female-glow_tts model for text to speech purposes. Single-speaker female voice Trained on persian-tts-dataset-famale. \nThis model has no compatible vocoder thus the output quality is not very good. \nDataset: https://www.kaggle.com/datasets/magnoliasis/persian-tts-dataset-famale.", @@ -729,7 +729,7 @@ "bn": { "custom": { "vits-male": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.13.3_models/tts_models--bn--custom--vits_male.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.13.3_models/tts_models--bn--custom--vits_male.zip", "default_vocoder": null, "commit": null, "description": "Single speaker Bangla male model. For more information -> https://github.com/mobassir94/comprehensive-bangla-tts", @@ -737,7 +737,7 @@ "license": "Apache 2.0" }, "vits-female": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.13.3_models/tts_models--bn--custom--vits_female.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.13.3_models/tts_models--bn--custom--vits_female.zip", "default_vocoder": null, "commit": null, "description": "Single speaker Bangla female model. For more information -> https://github.com/mobassir94/comprehensive-bangla-tts", @@ -750,7 +750,7 @@ "common-voice": { "glow-tts":{ "description": "Belarusian GlowTTS model created by @alex73 (Github).", - "github_rls_url":"https://coqui.gateway.scarf.sh/v0.16.6/tts_models--be--common-voice--glow-tts.zip", + "github_rls_url":"https://github.com/coqui-ai/TTS/releases/download/v0.16.6/tts_models--be--common-voice--glow-tts.zip", "default_vocoder": "vocoder_models/be/common-voice/hifigan", "commit": "c0aabb85", "license": "CC-BY-SA 4.0", @@ -763,14 +763,14 @@ "universal": { "libri-tts": { "wavegrad": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--universal--libri-tts--wavegrad.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/vocoder_models--universal--libri-tts--wavegrad.zip", "commit": "ea976b0", "author": "Eren Gölge @erogol", "license": "MPL", "contact": "egolge@coqui.com" }, "fullband-melgan": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--universal--libri-tts--fullband-melgan.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/vocoder_models--universal--libri-tts--fullband-melgan.zip", "commit": "4132240", "author": "Eren Gölge @erogol", "license": "MPL", @@ -782,14 +782,14 @@ "ek1": { "wavegrad": { "description": "EK1 en-rp wavegrad by NMStoker", - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--en--ek1--wavegrad.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/vocoder_models--en--ek1--wavegrad.zip", "commit": "c802255", "license": "apache 2.0" } }, "ljspeech": { "multiband-melgan": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--en--ljspeech--multiband-melgan.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/vocoder_models--en--ljspeech--multiband-melgan.zip", "commit": "ea976b0", "author": "Eren Gölge @erogol", "license": "MPL", @@ -797,7 +797,7 @@ }, "hifigan_v2": { "description": "HiFiGAN_v2 LJSpeech vocoder from https://arxiv.org/abs/2010.05646.", - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--en--ljspeech--hifigan_v2.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/vocoder_models--en--ljspeech--hifigan_v2.zip", "commit": "bae2ad0f", "author": "@erogol", "license": "apache 2.0", @@ -805,7 +805,7 @@ }, "univnet": { "description": "UnivNet model finetuned on TacotronDDC_ph spectrograms for better compatibility.", - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--en--ljspeech--univnet_v2.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/vocoder_models--en--ljspeech--univnet_v2.zip", "commit": "4581e3d", "author": "Eren @erogol", "license": "apache 2.0", @@ -815,7 +815,7 @@ "blizzard2013": { "hifigan_v2": { "description": "HiFiGAN_v2 LJSpeech vocoder from https://arxiv.org/abs/2010.05646.", - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.7.0_models/vocoder_models--en--blizzard2013--hifigan_v2.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.7.0_models/vocoder_models--en--blizzard2013--hifigan_v2.zip", "commit": "d6284e7", "author": "Adam Froghyar @a-froghyar", "license": "apache 2.0", @@ -825,7 +825,7 @@ "vctk": { "hifigan_v2": { "description": "Finetuned and intended to be used with tts_models/en/vctk/sc-glow-tts", - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--en--vctk--hifigan_v2.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/vocoder_models--en--vctk--hifigan_v2.zip", "commit": "2f07160", "author": "Edresson Casanova", "license": "apache 2.0", @@ -835,7 +835,7 @@ "sam": { "hifigan_v2": { "description": "Finetuned and intended to be used with tts_models/en/sam/tacotron_DDC", - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--en--sam--hifigan_v2.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/vocoder_models--en--sam--hifigan_v2.zip", "commit": "2f07160", "author": "Eren Gölge @erogol", "license": "apache 2.0", @@ -846,7 +846,7 @@ "nl": { "mai": { "parallel-wavegan": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--nl--mai--parallel-wavegan.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/vocoder_models--nl--mai--parallel-wavegan.zip", "author": "@r-dh", "license": "apache 2.0", "commit": "unknown" @@ -856,19 +856,19 @@ "de": { "thorsten": { "wavegrad": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--de--thorsten--wavegrad.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/vocoder_models--de--thorsten--wavegrad.zip", "author": "@thorstenMueller", "license": "apache 2.0", "commit": "unknown" }, "fullband-melgan": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--de--thorsten--fullband-melgan.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/vocoder_models--de--thorsten--fullband-melgan.zip", "author": "@thorstenMueller", "license": "apache 2.0", "commit": "unknown" }, "hifigan_v1": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/vocoder_models--de--thorsten--hifigan_v1.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/vocoder_models--de--thorsten--hifigan_v1.zip", "description": "HifiGAN vocoder model for Thorsten Neutral Dec2021 22k Samplerate Tacotron2 DDC model", "author": "@thorstenMueller", "license": "apache 2.0", @@ -879,7 +879,7 @@ "ja": { "kokoro": { "hifigan_v1": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--ja--kokoro--hifigan_v1.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/vocoder_models--ja--kokoro--hifigan_v1.zip", "description": "HifiGAN model trained for kokoro dataset by @kaiidams", "author": "@kaiidams", "license": "apache 2.0", @@ -890,7 +890,7 @@ "uk": { "mai": { "multiband-melgan": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--uk--mai--multiband-melgan.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/vocoder_models--uk--mai--multiband-melgan.zip", "author": "@robinhad", "commit": "bdab788d", "license": "MIT", @@ -901,7 +901,7 @@ "tr": { "common-voice": { "hifigan": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--tr--common-voice--hifigan.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/vocoder_models--tr--common-voice--hifigan.zip", "description": "HifiGAN model using an unknown speaker from the Common-Voice dataset.", "author": "Fatih Akademi", "license": "MIT", @@ -912,7 +912,7 @@ "be": { "common-voice": { "hifigan": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.16.6/vocoder_models--be--common-voice--hifigan.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.16.6/vocoder_models--be--common-voice--hifigan.zip", "description": "Belarusian HiFiGAN model created by @alex73 (Github).", "author": "@alex73", "license": "CC-BY-SA 4.0", @@ -925,7 +925,7 @@ "multilingual": { "vctk": { "freevc24": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.13.0_models/voice_conversion_models--multilingual--vctk--freevc24.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.13.0_models/voice_conversion_models--multilingual--vctk--freevc24.zip", "description": "FreeVC model trained on VCTK dataset from https://github.com/OlaWod/FreeVC", "author": "Jing-Yi Li @OlaWod", "license": "MIT", diff --git a/TTS/demos/xtts_ft_demo/utils/gpt_train.py b/TTS/demos/xtts_ft_demo/utils/gpt_train.py index 7b41966b8f..f838297af3 100644 --- a/TTS/demos/xtts_ft_demo/utils/gpt_train.py +++ b/TTS/demos/xtts_ft_demo/utils/gpt_train.py @@ -43,8 +43,8 @@ def train_gpt(language, num_epochs, batch_size, grad_acumm, train_csv, eval_csv, os.makedirs(CHECKPOINTS_OUT_PATH, exist_ok=True) # DVAE files - DVAE_CHECKPOINT_LINK = "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/main/dvae.pth" - MEL_NORM_LINK = "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/main/mel_stats.pth" + DVAE_CHECKPOINT_LINK = "https://huggingface.co/coqui/XTTS-v2/resolve/main/dvae.pth" + MEL_NORM_LINK = "https://huggingface.co/coqui/XTTS-v2/resolve/main/mel_stats.pth" # Set the path to the downloaded files DVAE_CHECKPOINT = os.path.join(CHECKPOINTS_OUT_PATH, os.path.basename(DVAE_CHECKPOINT_LINK)) @@ -58,9 +58,9 @@ def train_gpt(language, num_epochs, batch_size, grad_acumm, train_csv, eval_csv, ) # Download XTTS v2.0 checkpoint if needed - TOKENIZER_FILE_LINK = "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/main/vocab.json" - XTTS_CHECKPOINT_LINK = "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/main/model.pth" - XTTS_CONFIG_LINK = "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/main/config.json" + TOKENIZER_FILE_LINK = "https://huggingface.co/coqui/XTTS-v2/resolve/main/vocab.json" + XTTS_CHECKPOINT_LINK = "https://huggingface.co/coqui/XTTS-v2/resolve/main/model.pth" + XTTS_CONFIG_LINK = "https://huggingface.co/coqui/XTTS-v2/resolve/main/config.json" # XTTS transfer learning parameters: You we need to provide the paths of XTTS model checkpoint that you want to do the fine tuning. TOKENIZER_FILE = os.path.join(CHECKPOINTS_OUT_PATH, os.path.basename(TOKENIZER_FILE_LINK)) # vocab.json file diff --git a/TTS/tts/layers/tortoise/arch_utils.py b/TTS/tts/layers/tortoise/arch_utils.py index 52c2526695..8eda251f93 100644 --- a/TTS/tts/layers/tortoise/arch_utils.py +++ b/TTS/tts/layers/tortoise/arch_utils.py @@ -293,7 +293,7 @@ def forward(self, x): return h[:, :, 0] -DEFAULT_MEL_NORM_FILE = "https://coqui.gateway.scarf.sh/v0.14.1_models/mel_norms.pth" +DEFAULT_MEL_NORM_FILE = "https://github.com/coqui-ai/TTS/releases/download/v0.14.1_models/mel_norms.pth" class TorchMelSpectrogram(nn.Module): diff --git a/TTS/tts/layers/xtts/trainer/gpt_trainer.py b/TTS/tts/layers/xtts/trainer/gpt_trainer.py index 9d9edd5758..0253d65ddd 100644 --- a/TTS/tts/layers/xtts/trainer/gpt_trainer.py +++ b/TTS/tts/layers/xtts/trainer/gpt_trainer.py @@ -50,7 +50,7 @@ class GPTArgs(XttsArgs): max_wav_length: int = 255995 # ~11.6 seconds max_text_length: int = 200 tokenizer_file: str = "" - mel_norm_file: str = "https://coqui.gateway.scarf.sh/v0.14.0_models/mel_norms.pth" + mel_norm_file: str = "https://github.com/coqui-ai/TTS/releases/download/v0.14.0_models/mel_norms.pth" dvae_checkpoint: str = "" xtts_checkpoint: str = "" gpt_checkpoint: str = "" # if defined it will replace the gpt weights on xtts model diff --git a/TTS/utils/manage.py b/TTS/utils/manage.py index fb5071d9b0..bd445b3a2f 100644 --- a/TTS/utils/manage.py +++ b/TTS/utils/manage.py @@ -230,7 +230,7 @@ def _download_hf_model(self, model_item: Dict, output_path: str): self._download_zip_file(model_item["hf_url"], output_path, self.progress_bar) def download_fairseq_model(self, model_name, output_path): - URI_PREFIX = "https://coqui.gateway.scarf.sh/fairseq/" + URI_PREFIX = "https://dl.fbaipublicfiles.com/mms/tts/" _, lang, _, _ = model_name.split("/") model_download_uri = os.path.join(URI_PREFIX, f"{lang}.tar.gz") self._download_tar_file(model_download_uri, output_path, self.progress_bar) @@ -243,9 +243,9 @@ def set_model_url(model_item: Dict): elif "hf_url" in model_item: model_item["model_url"] = model_item["hf_url"] elif "fairseq" in model_item["model_name"]: - model_item["model_url"] = "https://coqui.gateway.scarf.sh/fairseq/" + model_item["model_url"] = "https://dl.fbaipublicfiles.com/mms/tts/" elif "xtts" in model_item["model_name"]: - model_item["model_url"] = "https://coqui.gateway.scarf.sh/xtts/" + model_item["model_url"] = "https://huggingface.co/coqui/" return model_item def _set_model_item(self, model_name): @@ -278,11 +278,11 @@ def _set_model_item(self, model_name): "contact": "info@coqui.ai", "tos_required": True, "hf_url": [ - f"https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/{model_version}/model.pth", - f"https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/{model_version}/config.json", - f"https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/{model_version}/vocab.json", - f"https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/{model_version}/hash.md5", - f"https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/{model_version}/speakers_xtts.pth", + f"https://huggingface.co/coqui/XTTS-v2/resolve/{model_version}/model.pth", + f"https://huggingface.co/coqui/XTTS-v2/resolve/{model_version}/config.json", + f"https://huggingface.co/coqui/XTTS-v2/resolve/{model_version}/vocab.json", + f"https://huggingface.co/coqui/XTTS-v2/resolve/{model_version}/hash.md5", + f"https://huggingface.co/coqui/XTTS-v2/resolve/{model_version}/speakers_xtts.pth", ], } else: diff --git a/recipes/ljspeech/xtts_v1/train_gpt_xtts.py b/recipes/ljspeech/xtts_v1/train_gpt_xtts.py index 7d8f4064c5..d31ec8f1ed 100644 --- a/recipes/ljspeech/xtts_v1/train_gpt_xtts.py +++ b/recipes/ljspeech/xtts_v1/train_gpt_xtts.py @@ -41,8 +41,8 @@ # DVAE files -DVAE_CHECKPOINT_LINK = "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/v1.1.2/dvae.pth" -MEL_NORM_LINK = "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/v1.1.2/mel_stats.pth" +DVAE_CHECKPOINT_LINK = "https://huggingface.co/coqui/XTTS-v1/resolve/v1.1.2/dvae.pth" +MEL_NORM_LINK = "https://huggingface.co/coqui/XTTS-v1/resolve/v1.1.2/mel_stats.pth" # Set the path to the downloaded files DVAE_CHECKPOINT = os.path.join(CHECKPOINTS_OUT_PATH, DVAE_CHECKPOINT_LINK.split("/")[-1]) @@ -55,8 +55,8 @@ # Download XTTS v1.1 checkpoint if needed -TOKENIZER_FILE_LINK = "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/v1.1.2/vocab.json" -XTTS_CHECKPOINT_LINK = "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/v1.1.2/model.pth" +TOKENIZER_FILE_LINK = "https://huggingface.co/coqui/XTTS-v1/resolve/v1.1.2/vocab.json" +XTTS_CHECKPOINT_LINK = "https://huggingface.co/coqui/XTTS-v1/resolve/v1.1.2/model.pth" # XTTS transfer learning parameters: You we need to provide the paths of XTTS model checkpoint that you want to do the fine tuning. TOKENIZER_FILE = os.path.join(CHECKPOINTS_OUT_PATH, TOKENIZER_FILE_LINK.split("/")[-1]) # vocab.json file diff --git a/recipes/ljspeech/xtts_v2/train_gpt_xtts.py b/recipes/ljspeech/xtts_v2/train_gpt_xtts.py index 626917381a..ccaa97f1e4 100644 --- a/recipes/ljspeech/xtts_v2/train_gpt_xtts.py +++ b/recipes/ljspeech/xtts_v2/train_gpt_xtts.py @@ -41,8 +41,8 @@ # DVAE files -DVAE_CHECKPOINT_LINK = "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/main/dvae.pth" -MEL_NORM_LINK = "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/main/mel_stats.pth" +DVAE_CHECKPOINT_LINK = "https://huggingface.co/coqui/XTTS-v2/resolve/main/dvae.pth" +MEL_NORM_LINK = "https://huggingface.co/coqui/XTTS-v2/resolve/main/mel_stats.pth" # Set the path to the downloaded files DVAE_CHECKPOINT = os.path.join(CHECKPOINTS_OUT_PATH, os.path.basename(DVAE_CHECKPOINT_LINK)) @@ -55,8 +55,8 @@ # Download XTTS v2.0 checkpoint if needed -TOKENIZER_FILE_LINK = "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/main/vocab.json" -XTTS_CHECKPOINT_LINK = "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/main/model.pth" +TOKENIZER_FILE_LINK = "https://huggingface.co/coqui/XTTS-v2/resolve/main/vocab.json" +XTTS_CHECKPOINT_LINK = "https://huggingface.co/coqui/XTTS-v2/resolve/main/model.pth" # XTTS transfer learning parameters: You we need to provide the paths of XTTS model checkpoint that you want to do the fine tuning. TOKENIZER_FILE = os.path.join(CHECKPOINTS_OUT_PATH, os.path.basename(TOKENIZER_FILE_LINK)) # vocab.json file From 5de47e9a14e3f1df377af2a643e8d45e6a8093d7 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Sun, 10 Nov 2024 22:39:58 +0100 Subject: [PATCH 3/4] ci: run integration tests only on lowest and highest python --- .github/workflows/integration-tests.yml | 64 +++++++++++++++++++++++++ .github/workflows/style_check.yml | 2 +- .github/workflows/tests.yml | 6 +-- 3 files changed, 68 insertions(+), 4 deletions(-) create mode 100644 .github/workflows/integration-tests.yml diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml new file mode 100644 index 0000000000..7af0836248 --- /dev/null +++ b/.github/workflows/integration-tests.yml @@ -0,0 +1,64 @@ +name: integration + +on: + push: + branches: + - main + pull_request: + types: [opened, synchronize, reopened] +jobs: + test: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.9", "3.12"] + subset: ["test_tts", "test_tts2", "test_vocoder", "test_xtts", "test_zoo0", "test_zoo1", "test_zoo2"] + steps: + - uses: actions/checkout@v4 + - name: Setup uv + uses: ./.github/actions/setup-uv + - name: Set up Python ${{ matrix.python-version }} + run: uv python install ${{ matrix.python-version }} + - name: Install Espeak + if: contains(fromJSON('["test_tts", "test_tts2", "test_xtts", "test_zoo0", "test_zoo1", "test_zoo2"]'), matrix.subset) + run: | + sudo apt-get update + sudo apt-get install espeak espeak-ng + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y --no-install-recommends git make gcc + make system-deps + - name: Integration tests + run: | + resolution=highest + if [ "${{ matrix.python-version }}" == "3.9" ]; then + resolution=lowest-direct + fi + uv run --resolution=$resolution --extra server --extra languages make ${{ matrix.subset }} + - name: Upload coverage data + uses: actions/upload-artifact@v4 + with: + include-hidden-files: true + name: coverage-data-${{ matrix.subset }}-${{ matrix.python-version }} + path: .coverage.* + if-no-files-found: ignore + coverage: + if: always() + needs: test + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Setup uv + uses: ./.github/actions/setup-uv + - uses: actions/download-artifact@v4 + with: + pattern: coverage-data-* + merge-multiple: true + - name: Combine coverage + run: | + uv python install + uvx coverage combine + uvx coverage html --skip-covered --skip-empty + uvx coverage report --format=markdown >> $GITHUB_STEP_SUMMARY diff --git a/.github/workflows/style_check.yml b/.github/workflows/style_check.yml index 44f562d07e..d1060f6be2 100644 --- a/.github/workflows/style_check.yml +++ b/.github/workflows/style_check.yml @@ -7,7 +7,7 @@ on: pull_request: types: [opened, synchronize, reopened] jobs: - test: + lint: runs-on: ubuntu-latest strategy: fail-fast: false diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index b485f32fd1..16b680a93c 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -1,4 +1,4 @@ -name: tests +name: unit on: push: @@ -13,7 +13,7 @@ jobs: fail-fast: false matrix: python-version: [3.9, "3.10", "3.11", "3.12"] - subset: ["data_tests", "inference_tests", "test_aux", "test_text", "test_tts", "test_tts2", "test_vocoder", "test_xtts", "test_zoo0", "test_zoo1", "test_zoo2"] + subset: ["data_tests", "inference_tests", "test_aux", "test_text"] steps: - uses: actions/checkout@v4 - name: Setup uv @@ -21,7 +21,7 @@ jobs: - name: Set up Python ${{ matrix.python-version }} run: uv python install ${{ matrix.python-version }} - name: Install Espeak - if: contains(fromJSON('["inference_tests", "test_text", "test_tts", "test_tts2", "test_xtts", "test_zoo0", "test_zoo1", "test_zoo2"]'), matrix.subset) + if: contains(fromJSON('["inference_tests", "test_text"]'), matrix.subset) run: | sudo apt-get update sudo apt-get install espeak espeak-ng From d3c3ba3d565d61296dcea253b77896fb4d183f82 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Sun, 10 Nov 2024 22:58:37 +0100 Subject: [PATCH 4/4] build: set upper limit on transformers More breaking changes affecting the XTTS streaming code --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 379187feed..d66f33d602 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -82,7 +82,7 @@ dependencies = [ "gruut[de,es,fr]>=2.4.0", # Tortoise "einops>=0.6.0", - "transformers>=4.43.0", + "transformers>=4.43.0,<=4.46.2", # Bark "encodec>=0.1.1", # XTTS