Skip to content

Commit

Permalink
Add CI for ksponspeech (#1655)
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj authored Jun 16, 2024
1 parent c13c7aa commit 1f5c0a8
Show file tree
Hide file tree
Showing 2 changed files with 190 additions and 0 deletions.
72 changes: 72 additions & 0 deletions .github/scripts/ksponspeech/ASR/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#!/usr/bin/env bash

set -ex

log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

cd egs/ksponspeech/ASR


function test_pretrained() {
git lfs install
git clone https://huggingface.co/johnBamma/icefall-asr-ksponspeech-pruned-transducer-stateless7-streaming-2024-06-12
repo=icefall-asr-ksponspeech-pruned-transducer-stateless7-streaming-2024-06-12
pushd $repo
mkdir test_wavs
cd test_wavs
curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/0.wav
curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/1.wav
curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/2.wav
curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/3.wav
cd ../exp
ln -s pretrained.pt epoch-99.pt
ls -lh
popd

log 'test pretrained.py'
./pruned_transducer_stateless7_streaming/pretrained.py \
--checkpoint $repo/exp/pretrained.pt \
--tokens $repo/data/lang_bpe_5000/tokens.txt \
--method greedy_search \
$repo/test_wavs/0.wav \
$repo/test_wavs/1.wav \
$repo/test_wavs/2.wav \
$repo/test_wavs/3.wav

log 'test export-onnx.py'

./pruned_transducer_stateless7_streaming/export-onnx.py \
--tokens $repo/data/lang_bpe_5000/tokens.txt \
--use-averaged-model 0 \
--epoch 99 \
--avg 1 \
--decode-chunk-len 32 \
--exp-dir $repo/exp/

ls -lh $repo/exp

ls -lh $repo/data/lang_bpe_5000/

log 'test exported onnx models'
./pruned_transducer_stateless7_streaming/onnx_pretrained.py \
--encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
--decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
--joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
--tokens $repo/data/lang_bpe_5000/tokens.txt \
$repo/test_wavs/0.wav

dst=/tmp/model1
mkdir -p $dst

cp -v $repo/exp/*.onnx $dst
cp -v $repo/exp/*.onnx $dst
cp -v $repo/data/lang_bpe_5000/tokens.txt $dst
cp -v $repo/data/lang_bpe_5000/bpe.model $dst
rm -rf $repo
}

test_pretrained
118 changes: 118 additions & 0 deletions .github/workflows/ksponspeech.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
name: ksponspeech

on:
push:
branches:
- ksponspeech

workflow_dispatch:

jobs:
ksponspeech:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest]
python-version: [3.8]
fail-fast: false

steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'
cache-dependency-path: '**/requirements-ci.txt'

- name: Install Python dependencies
run: |
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
pip uninstall -y protobuf
pip install --no-binary protobuf protobuf==3.20.*
- name: Cache kaldifeat
id: my-cache
uses: actions/cache@v2
with:
path: |
~/tmp/kaldifeat
key: cache-tmp-${{ matrix.python-version }}-2023-05-22

- name: Install kaldifeat
if: steps.my-cache.outputs.cache-hit != 'true'
shell: bash
run: |
.github/scripts/install-kaldifeat.sh
- name: Test
shell: bash
run: |
export PYTHONPATH=$PWD:$PYTHONPATH
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
.github/scripts/ksponspeech/ASR/run.sh
- name: Show model files
shell: bash
run: |
src=/tmp/model1
ls -lh $src
- name: Upload model to huggingface
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
uses: nick-fields/retry@v3
with:
max_attempts: 20
timeout_seconds: 200
shell: bash
command: |
src=/tmp/model1
git config --global user.email "csukuangfj@gmail.com"
git config --global user.name "Fangjun Kuang"
rm -rf hf
export GIT_LFS_SKIP_SMUDGE=1
export GIT_CLONE_PROTECTION_ACTIVE=false
git clone https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16 hf
cd hf
git fetch
git pull
git merge -m "merge remote" --ff origin main
cp -v $src/* ./
ls -lh
git lfs track "bpe.model"
git lfs track "*.onnx"
cp -av test_wavs $src/
git add .
git status
git commit -m "update models"
git status
git push https://csukuangfj:$HF_TOKEN@huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16 main || true
rm -rf hf
- name: Prepare for release
shell: bash
run: |
src=/tmp/model1
d=sherpa-onnx-streaming-zipformer-korean-2024-06-16
mv $src ./$d
tar cjvf ${d}.tar.bz2 $d
ls -lh
- name: Release exported onnx models
uses: svenstaro/upload-release-action@v2
with:
file_glob: true
overwrite: true
file: sherpa-onnx-*.tar.bz2
repo_name: k2-fsa/sherpa-onnx
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
tag: asr-models

0 comments on commit 1f5c0a8

Please sign in to comment.