Merge branch 'k2-fsa:master' into dev/tts/vctk/tokenizer

k2-fsa · Mar 18, 2024 · 7ea100a · 7ea100a
2 parents e69b60e + 2dfd5db
commit 7ea100a
Show file tree

Hide file tree

Showing 207 changed files with 23,079 additions and 366 deletions.
diff --git a/.github/scripts/.gitignore b/.github/scripts/.gitignore
@@ -0,0 +1 @@
+piper_phonemize.html
diff --git a/.github/scripts/generate-piper-phonemize-page.py b/.github/scripts/generate-piper-phonemize-page.py
@@ -0,0 +1,29 @@
+#!/usr/bin/env python3
+
+
+def main():
+ prefix = (
+ "https://github.com/csukuangfj/piper-phonemize/releases/download/2023.12.5/"
+ )
+ files = [
+ "piper_phonemize-1.2.0-cp310-cp310-macosx_10_14_x86_64.whl",
+ "piper_phonemize-1.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
+ "piper_phonemize-1.2.0-cp311-cp311-macosx_10_14_x86_64.whl",
+ "piper_phonemize-1.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
+ "piper_phonemize-1.2.0-cp312-cp312-macosx_10_14_x86_64.whl",
+ "piper_phonemize-1.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
+ "piper_phonemize-1.2.0-cp37-cp37m-macosx_10_14_x86_64.whl",
+ "piper_phonemize-1.2.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
+ "piper_phonemize-1.2.0-cp38-cp38-macosx_10_14_x86_64.whl",
+ "piper_phonemize-1.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
+ "piper_phonemize-1.2.0-cp39-cp39-macosx_10_14_x86_64.whl",
+ "piper_phonemize-1.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
+ ]
+ with open("piper_phonemize.html", "w") as f:
+ for file in files:
+ url = prefix + file
+ f.write(f'<a href="{url}">{file}</a><br/>\n')
+
+
+if __name__ == "__main__":
+ main()
diff --git a/.github/scripts/librispeech/ASR/run.sh b/.github/scripts/librispeech/ASR/run.sh
@@ -15,9 +15,9 @@ function prepare_data() {
  # cause OOM error for CI later.
  mkdir -p download/lm
  pushd download/lm
- wget -q http://www.openslr.org/resources/11/librispeech-vocab.txt
- wget -q http://www.openslr.org/resources/11/librispeech-lexicon.txt
- wget -q http://www.openslr.org/resources/11/librispeech-lm-norm.txt.gz
+ wget -q https://huggingface.co/csukuangfj/librispeech-for-ci/resolve/main/librispeech-lm-norm.txt.gz
+ wget -q https://huggingface.co/csukuangfj/librispeech-for-ci/resolve/main/librispeech-lexicon.txt
+ wget -q https://huggingface.co/csukuangfj/librispeech-for-ci/resolve/main/librispeech-vocab.txt
  ls -lh
  gunzip librispeech-lm-norm.txt.gz
 

diff --git a/.github/scripts/ljspeech/TTS/run.sh b/.github/scripts/ljspeech/TTS/run.sh
@@ -0,0 +1,157 @@
+#!/usr/bin/env bash
+
+set -ex
+
+python3 -m pip install piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html
+python3 -m pip install espnet_tts_frontend
+python3 -m pip install numba
+
+log() {
+ # This function is from espnet
+ local fname=${BASH_SOURCE[1]##*/}
+ echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
+}
+
+cd egs/ljspeech/TTS
+
+sed -i.bak s/600/8/g ./prepare.sh
+sed -i.bak s/"first 100"/"first 3"/g ./prepare.sh
+sed -i.bak s/500/5/g ./prepare.sh
+git diff
+
+function prepare_data() {
+ # We have created a subset of the data for testing
+ #
+ mkdir download
+ pushd download
+ wget -q https://huggingface.co/csukuangfj/ljspeech-subset-for-ci-test/resolve/main/LJSpeech-1.1.tar.bz2
+ tar xvf LJSpeech-1.1.tar.bz2
+ popd
+
+ ./prepare.sh
+ tree .
+}
+
+function train() {
+ pushd ./vits
+ sed -i.bak s/200/3/g ./train.py
+ git diff .
+ popd
+
+ for t in low medium high; do
+ ./vits/train.py \
+ --exp-dir vits/exp-$t \
+ --model-type $t \
+ --num-epochs 1 \
+ --save-every-n 1 \
+ --num-buckets 2 \
+ --tokens data/tokens.txt \
+ --max-duration 20
+
+ ls -lh vits/exp-$t
+ done
+}
+
+function infer() {
+ for t in low medium high; do
+ ./vits/infer.py \
+ --num-buckets 2 \
+ --model-type $t \
+ --epoch 1 \
+ --exp-dir ./vits/exp-$t \
+ --tokens data/tokens.txt \
+ --max-duration 20
+ done
+}
+
+function export_onnx() {
+ for t in low medium high; do
+ ./vits/export-onnx.py \
+ --model-type $t \
+ --epoch 1 \
+ --exp-dir ./vits/exp-$t \
+ --tokens data/tokens.txt
+
+ ls -lh vits/exp-$t/
+ done
+}
+
+function test_medium() {
+ git clone https://huggingface.co/csukuangfj/icefall-tts-ljspeech-vits-medium-2024-03-12
+
+ ./vits/export-onnx.py \
+ --model-type medium \
+ --epoch 820 \
+ --exp-dir ./icefall-tts-ljspeech-vits-medium-2024-03-12/exp \
+ --tokens ./icefall-tts-ljspeech-vits-medium-2024-03-12/data/tokens.txt
+
+ ls -lh ./icefall-tts-ljspeech-vits-medium-2024-03-12/exp
+
+ ./vits/test_onnx.py \
+ --model-filename ./icefall-tts-ljspeech-vits-medium-2024-03-12/exp/vits-epoch-820.onnx \
+ --tokens ./icefall-tts-ljspeech-vits-medium-2024-03-12/data/tokens.txt \
+ --output-filename /icefall/test-medium.wav
+
+ ls -lh /icefall/test-medium.wav
+
+ d=/icefall/vits-icefall-en_US-ljspeech-medium
+ mkdir $d
+ cp -v ./icefall-tts-ljspeech-vits-medium-2024-03-12/data/tokens.txt $d/
+ cp -v ./icefall-tts-ljspeech-vits-medium-2024-03-12/exp/vits-epoch-820.onnx $d/model.onnx
+
+ rm -rf icefall-tts-ljspeech-vits-medium-2024-03-12
+
+ pushd $d
+ wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/espeak-ng-data.tar.bz2
+ tar xf espeak-ng-data.tar.bz2
+ rm espeak-ng-data.tar.bz2
+ cd ..
+ tar cjf vits-icefall-en_US-ljspeech-medium.tar.bz2 vits-icefall-en_US-ljspeech-medium
+ rm -rf vits-icefall-en_US-ljspeech-medium
+ ls -lh *.tar.bz2
+ popd
+}
+
+function test_low() {
+ git clone https://huggingface.co/csukuangfj/icefall-tts-ljspeech-vits-low-2024-03-12
+
+ ./vits/export-onnx.py \
+ --model-type low \
+ --epoch 1600 \
+ --exp-dir ./icefall-tts-ljspeech-vits-low-2024-03-12/exp \
+ --tokens ./icefall-tts-ljspeech-vits-low-2024-03-12/data/tokens.txt
+
+ ls -lh ./icefall-tts-ljspeech-vits-low-2024-03-12/exp
+
+ ./vits/test_onnx.py \
+ --model-filename ./icefall-tts-ljspeech-vits-low-2024-03-12/exp/vits-epoch-1600.onnx \
+ --tokens ./icefall-tts-ljspeech-vits-low-2024-03-12/data/tokens.txt \
+ --output-filename /icefall/test-low.wav
+
+ ls -lh /icefall/test-low.wav
+
+ d=/icefall/vits-icefall-en_US-ljspeech-low
+ mkdir $d
+ cp -v ./icefall-tts-ljspeech-vits-low-2024-03-12/data/tokens.txt $d/
+ cp -v ./icefall-tts-ljspeech-vits-low-2024-03-12/exp/vits-epoch-1600.onnx $d/model.onnx
+
+ rm -rf icefall-tts-ljspeech-vits-low-2024-03-12
+
+ pushd $d
+ wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/espeak-ng-data.tar.bz2
+ tar xf espeak-ng-data.tar.bz2
+ rm espeak-ng-data.tar.bz2
+ cd ..
+ tar cjf vits-icefall-en_US-ljspeech-low.tar.bz2 vits-icefall-en_US-ljspeech-low
+ rm -rf vits-icefall-en_US-ljspeech-low
+ ls -lh *.tar.bz2
+ popd
+}
+
+prepare_data
+train
+infer
+export_onnx
+rm -rf vits/exp-{low,medium,high}
+test_medium
+test_low
diff --git a/.github/workflows/build-doc.yml b/.github/workflows/build-doc.yml
@@ -56,11 +56,14 @@ jobs:
  - name: Build doc
  shell: bash
  run: |
+ .github/scripts/generate-piper-phonemize-page.py
  cd docs
  python3 -m pip install -r ./requirements.txt
  make html
  touch build/html/.nojekyll
 
+ cp -v ../piper_phonemize.html ./build/html/
+
  - name: Deploy
  uses: peaceiris/actions-gh-pages@v3
  with:

diff --git a/.github/workflows/ljspeech.yml b/.github/workflows/ljspeech.yml
@@ -0,0 +1,102 @@
+name: ljspeech
+
+on:
+ push:
+ branches:
+ - master
+
+ pull_request:
+ branches:
+ - master
+
+ workflow_dispatch:
+
+concurrency:
+ group: ljspeech-${{ github.ref }}
+ cancel-in-progress: true
+
+jobs:
+ generate_build_matrix:
+ if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
+ # see https://github.com/pytorch/pytorch/pull/50633
+ runs-on: ubuntu-latest
+ outputs:
+ matrix: ${{ steps.set-matrix.outputs.matrix }}
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+ - name: Generating build matrix
+ id: set-matrix
+ run: |
+ # outputting for debugging purposes
+ python ./.github/scripts/docker/generate_build_matrix.py
+ MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py)
+ echo "::set-output name=matrix::${MATRIX}"
+
+ ljspeech:
+ needs: generate_build_matrix
+ name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }}
+ runs-on: ubuntu-latest
+ strategy:
+ fail-fast: false
+ matrix:
+ ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
+
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+
+ - name: Free space
+ shell: bash
+ run: |
+ ls -lh
+ df -h
+ rm -rf /opt/hostedtoolcache
+ df -h
+ echo "pwd: $PWD"
+ echo "github.workspace ${{ github.workspace }}"
+
+ - name: Run tests
+ uses: addnab/docker-run-action@v3
+ with:
+ image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }}
+ options: |
+ --volume ${{ github.workspace }}/:/icefall
+ shell: bash
+ run: |
+ export PYTHONPATH=/icefall:$PYTHONPATH
+ cd /icefall
+ git config --global --add safe.directory /icefall
+
+ .github/scripts/ljspeech/TTS/run.sh
+
+ - name: display files
+ shell: bash
+ run: |
+ ls -lh
+
+ - uses: actions/upload-artifact@v4
+ if: matrix.python-version == '3.9' && matrix.torch-version == '2.2.0'
+ with:
+ name: generated-test-files-${{ matrix.python-version }}-${{ matrix.torch-version }}
+ path: ./*.wav
+
+ - uses: actions/upload-artifact@v4
+ if: matrix.python-version == '3.9' && matrix.torch-version == '2.2.0'
+ with:
+ name: generated-models-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}
+ path: ./*.wav
+
+ - name: Release exported onnx models
+ if: matrix.python-version == '3.9' && matrix.torch-version == '2.2.0'
+ uses: svenstaro/upload-release-action@v2
+ with:
+ file_glob: true
+ overwrite: true
+ file: vits-icefall-*.tar.bz2
+ repo_name: k2-fsa/sherpa-onnx
+ repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
+ tag: tts-models
+