Skip to content

Commit

Permalink
Add CI test for aishell3
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj committed Apr 6, 2024
1 parent 35578f0 commit 6658d0c
Show file tree
Hide file tree
Showing 5 changed files with 236 additions and 1 deletion.
85 changes: 85 additions & 0 deletions .github/scripts/aishell3/TTS/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
#!/usr/bin/env bash

set -ex

python3 -m pip install piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html
python3 -m pip install numba
python3 -m pip install pypinyin
python3 -m pip install cython

apt-get install -y jq

log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

cd egs/aishell3/TTS

sed -i.bak s/1000/10/g ./prepare.sh


function download_data() {
mkdir download
pushd download
curl -SL -O https://huggingface.co/csukuangfj/aishell3-ci-data/resolve/main/aishell3.tar.bz2
tar xf aishell3.tar.bz2
rm aishell3.tar.bz2
ls -lh
popd
}

function prepare_data() {
./prepare.sh

echo "----------tokens.txt----------"
cat data/tokens.txt
echo "------------------------------"
wc -l data/tokens.txt
echo "------------------------------"
}

function train() {
pushd ./vits
sed -i.bak s/200/3/g ./train.py
git diff .
popd

for t in low medium high; do
./vits/train.py \
--exp-dir vits/exp-$t \
--model-type $t \
--num-epochs 1 \
--save-every-n 1 \
--num-buckets 2 \
--tokens data/tokens.txt \
--max-duration 20

ls -lh vits/exp-$t
done
}

function export_onnx() {
for t in low medium high; do
./vits/export-onnx.py \
--model-type $t \
--epoch 1 \
--exp-dir ./vits/exp-$t \
--tokens data/tokens.txt
--speakers ./data/speakers.txt

ls -lh vits/exp-$t/
done
}

function test_low() {
echo "TODO"
}


download_data
prepare_data
train
export_onnx
test_low
73 changes: 73 additions & 0 deletions .github/workflows/aishell3.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
name: aishell

on:
push:
branches:
- master
- tts-aishell3

pull_request:
branches:
- master

workflow_dispatch:

concurrency:
group: aishell3-${{ github.ref }}
cancel-in-progress: true

jobs:
generate_build_matrix:
if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event.label.name == 'ready' || github.event_name == 'push' || github.event_name == 'aishell3')

# see https://github.com/pytorch/pytorch/pull/50633
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Generating build matrix
id: set-matrix
run: |
# outputting for debugging purposes
python ./.github/scripts/docker/generate_build_matrix.py
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py)
echo "::set-output name=matrix::${MATRIX}"
aishell3:
needs: generate_build_matrix
name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }}
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}

steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Free space
shell: bash
run: |
df -h
rm -rf /opt/hostedtoolcache
df -h
echo "pwd: $PWD"
echo "github.workspace ${{ github.workspace }}"
- name: Run aishell3 tests
uses: addnab/docker-run-action@v3
with:
image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }}
options: |
--volume ${{ github.workspace }}/:/icefall
shell: bash
run: |
export PYTHONPATH=/icefall:$PYTHONPATH
cd /icefall
git config --global --add safe.directory /icefall
.github/scripts/aishell3/TTS/run.sh
73 changes: 73 additions & 0 deletions egs/aishell3/TTS/local/symbols.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# This file is copied from
# https://github.com/UEhQZXI/vits_chinese/blob/master/text/symbols.py
_pause = ["sil", "eos", "sp", "#0", "#1", "#2", "#3"]

_initials = [
"^",
"b",
"c",
"ch",
"d",
"f",
"g",
"h",
"j",
"k",
"l",
"m",
"n",
"p",
"q",
"r",
"s",
"sh",
"t",
"x",
"z",
"zh",
]

_tones = ["1", "2", "3", "4", "5"]

_finals = [
"a",
"ai",
"an",
"ang",
"ao",
"e",
"ei",
"en",
"eng",
"er",
"i",
"ia",
"ian",
"iang",
"iao",
"ie",
"ii",
"iii",
"in",
"ing",
"iong",
"iou",
"o",
"ong",
"ou",
"u",
"ua",
"uai",
"uan",
"uang",
"uei",
"uen",
"ueng",
"uo",
"v",
"van",
"ve",
"vn",
]

symbols = _pause + _initials + [i + j for i in _finals for j in _tones]
2 changes: 1 addition & 1 deletion egs/aishell3/TTS/prepare.sh
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then
# You can find files like spk-info.txt inside $dl_dir/aishell3
mkdir -p data/manifests
if [ ! -e data/manifests/.aishell3.done ]; then
lhotse prepare aishell3 $dl_dir/aishell3 data/manifests
lhotse prepare aishell3 $dl_dir/aishell3 data/manifests >/dev/null 2>&1
touch data/manifests/.aishell3.done
fi
fi
Expand Down
4 changes: 4 additions & 0 deletions egs/aishell3/TTS/vits/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -820,6 +820,10 @@ def run(rank, world_size, args):
params.vocab_size = tokenizer.vocab_size

aishell3 = Aishell3SpeechTtsDataModule(args)
assert aishell3.sampling_rate == params.sampling_rate, (
aishell3.sampling_rate,
params.sampling_rate,
)
speaker_map = aishell3.speakers()
params.num_spks = len(speaker_map)

Expand Down

0 comments on commit 6658d0c

Please sign in to comment.