bump new version of AudioBench and add CI-CD test for ASR and SI benc… #40
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Test - Models | |
on: | |
push: | |
branches: | |
- 'CI-CD/bach' | |
workflow_dispatch: | |
inputs: | |
model_id: | |
description: 'Model ID on huggingface, for example: jan-hq/Jan-Llama3-0708' | |
required: true | |
default: homebrewltd/llama3-s-2024-07-08 | |
type: string | |
dataset_id: | |
description: 'Dataset ID on huggingface, for example: jan-hq/instruction-speech-conversation-test' | |
required: true | |
default: jan-hq/instruction-speech-conversation-test | |
type: string | |
extra_args: | |
description: 'Extra arguments for python command, for example:--mode audio --num_rows 5' | |
required: false | |
default: "--mode audio --num_rows 5" | |
type: string | |
run_benchmark: | |
description: 'Run benchmark test' | |
required: false | |
default: true | |
type: boolean | |
run_si_benchmark: | |
description: 'Run SI benchmark' | |
required: false | |
default: true | |
type: boolean | |
run_asr_benchmark: | |
description: 'Run ASR benchmark' | |
required: false | |
default: true | |
type: boolean | |
jobs: | |
run-test: | |
runs-on: research | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
with: | |
submodules: 'recursive' | |
- name: Install dependencies | |
working-directory: ./tests | |
run: | | |
python3 -m pip install --upgrade pip | |
pip3 install -r requirements.txt | |
- name: Run tests | |
working-directory: ./tests | |
run: | | |
python3 test_case.py --model_dir ${{ github.event.inputs.model_id || 'jan-hq/Jan-Llama3-0708' }} --data_dir ${{ github.event.inputs.dataset_id || 'jan-hq/instruction-speech-conversation-test' }} ${{ github.event.inputs.extra_args || '--mode audio --num_rows 5' }} | |
- name: Install benchmark dependencies | |
if: ${{ github.event.inputs.run_benchmark == 'true' }} | |
run: | | |
cd lm-evaluation-harness | |
pip3 install -e . | |
pip3 install lm_eval[vllm] | |
echo "$HOME/.local/bin" >> $GITHUB_PATH | |
- name: Run benchmark | |
if: ${{ github.event.inputs.run_benchmark == 'true' }} | |
run: | | |
cd lm-evaluation-harness | |
chmod +x ./run_benchmark.sh | |
./run_benchmark.sh ${{ github.event.inputs.model_id }} | |
- name: Upload benchmark results | |
if: ${{ github.event.inputs.run_benchmark == 'true' }} | |
uses: actions/upload-artifact@v2 | |
with: | |
name: benchmark-results | |
path: ./lm-evaluation-harness/benchmark_results/**/*.json | |
- name: Eval on Speech Instruction Benchmark | |
if: ${{ github.event.inputs.run_si_benchmark == 'true' }} | |
env: | |
AZURE_OPENAI_KEY: ${{ secrets.AZURE_OPENAI_KEY }} | |
run: | | |
cd AudioBench | |
pip3 install -r requirements.txt | |
chmod +x eval_si.sh | |
./eval_si.sh ${{ github.event.inputs.model_id }} | |
- name: Eval on ASR Benchmark | |
if: ${{ github.event.inputs.run_asr_benchmark == 'true' }} | |
env: | |
AZURE_OPENAI_KEY: ${{ secrets.AZURE_OPENAI_KEY }} | |
run: | | |
cd AudioBench | |
pip3 install -r requirements.txt | |
chmod +x eval_asr.sh | |
./eval_asr.sh ${{ github.event.inputs.model_id }} | |
- name: Upload audio results | |
if: ${{ github.event.inputs.run_benchmark == 'true' }} | |
uses: actions/upload-artifact@v2 | |
with: | |
name: audio-benchmark-results | |
path: ./AudioBench/benchmark_results/log/**/*.json |