Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add CI for ksponspeech #1655

Merged
merged 1 commit into from
Jun 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 72 additions & 0 deletions .github/scripts/ksponspeech/ASR/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#!/usr/bin/env bash

set -ex

log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

cd egs/ksponspeech/ASR


function test_pretrained() {
git lfs install
git clone https://huggingface.co/johnBamma/icefall-asr-ksponspeech-pruned-transducer-stateless7-streaming-2024-06-12
repo=icefall-asr-ksponspeech-pruned-transducer-stateless7-streaming-2024-06-12
pushd $repo
mkdir test_wavs
cd test_wavs
curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/0.wav
curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/1.wav
curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/2.wav
curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/3.wav
cd ../exp
ln -s pretrained.pt epoch-99.pt
ls -lh
popd

log 'test pretrained.py'
./pruned_transducer_stateless7_streaming/pretrained.py \
--checkpoint $repo/exp/pretrained.pt \
--tokens $repo/data/lang_bpe_5000/tokens.txt \
--method greedy_search \
$repo/test_wavs/0.wav \
$repo/test_wavs/1.wav \
$repo/test_wavs/2.wav \
$repo/test_wavs/3.wav

log 'test export-onnx.py'

./pruned_transducer_stateless7_streaming/export-onnx.py \
--tokens $repo/data/lang_bpe_5000/tokens.txt \
--use-averaged-model 0 \
--epoch 99 \
--avg 1 \
--decode-chunk-len 32 \
--exp-dir $repo/exp/

ls -lh $repo/exp

ls -lh $repo/data/lang_bpe_5000/

log 'test exported onnx models'
./pruned_transducer_stateless7_streaming/onnx_pretrained.py \
--encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
--decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
--joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
--tokens $repo/data/lang_bpe_5000/tokens.txt \
$repo/test_wavs/0.wav

dst=/tmp/model1
mkdir -p $dst

cp -v $repo/exp/*.onnx $dst
cp -v $repo/exp/*.onnx $dst
cp -v $repo/data/lang_bpe_5000/tokens.txt $dst
cp -v $repo/data/lang_bpe_5000/bpe.model $dst
rm -rf $repo
}

test_pretrained
118 changes: 118 additions & 0 deletions .github/workflows/ksponspeech.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
name: ksponspeech

on:
push:
branches:
- ksponspeech

workflow_dispatch:

jobs:
ksponspeech:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest]
python-version: [3.8]
fail-fast: false

steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'
cache-dependency-path: '**/requirements-ci.txt'

- name: Install Python dependencies
run: |
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
pip uninstall -y protobuf
pip install --no-binary protobuf protobuf==3.20.*

- name: Cache kaldifeat
id: my-cache
uses: actions/cache@v2
with:
path: |
~/tmp/kaldifeat
key: cache-tmp-${{ matrix.python-version }}-2023-05-22

- name: Install kaldifeat
if: steps.my-cache.outputs.cache-hit != 'true'
shell: bash
run: |
.github/scripts/install-kaldifeat.sh

- name: Test
shell: bash
run: |
export PYTHONPATH=$PWD:$PYTHONPATH
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH

.github/scripts/ksponspeech/ASR/run.sh

- name: Show model files
shell: bash
run: |
src=/tmp/model1
ls -lh $src

- name: Upload model to huggingface
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
uses: nick-fields/retry@v3
with:
max_attempts: 20
timeout_seconds: 200
shell: bash
command: |
src=/tmp/model1
git config --global user.email "csukuangfj@gmail.com"
git config --global user.name "Fangjun Kuang"

rm -rf hf
export GIT_LFS_SKIP_SMUDGE=1
export GIT_CLONE_PROTECTION_ACTIVE=false

git clone https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16 hf
cd hf
git fetch
git pull
git merge -m "merge remote" --ff origin main
cp -v $src/* ./
ls -lh
git lfs track "bpe.model"
git lfs track "*.onnx"
cp -av test_wavs $src/
git add .
git status
git commit -m "update models"
git status

git push https://csukuangfj:$HF_TOKEN@huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16 main || true
rm -rf hf

- name: Prepare for release
shell: bash
run: |
src=/tmp/model1
d=sherpa-onnx-streaming-zipformer-korean-2024-06-16
mv $src ./$d
tar cjvf ${d}.tar.bz2 $d
ls -lh

- name: Release exported onnx models
uses: svenstaro/upload-release-action@v2
with:
file_glob: true
overwrite: true
file: sherpa-onnx-*.tar.bz2
repo_name: k2-fsa/sherpa-onnx
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
tag: asr-models
Loading