Skip to content

Test and Benchmark Models #23

Test and Benchmark Models

Test and Benchmark Models #23

Workflow file for this run

name: Test and Benchmark Models
on:
workflow_dispatch:
inputs:
model_id:
description: 'Model ID on huggingface, for example: homebrewltd/llama3-s-2024-07-08'
required: true
default: homebrewltd/llama3-s-2024-07-08
type: string
dataset_id:
description: 'Dataset ID on huggingface, for example: jan-hq/instruction-speech-conversation-test'
required: true
default: jan-hq/instruction-speech-conversation-test
type: string
extra_args:
description: 'Extra arguments for python command, for example:--mode audio --num_rows 5'
required: false
default: "--mode audio --num_rows 5"
type: string
run_benchmark:
description: 'Run benchmark test'
required: false
default: true
type: boolean
run audio_benchmark:
description: 'Run audio benchmark test'
required: false
default: true
type: boolean
jobs:
run-test-and-benchmark:
runs-on: research
steps:
- name: Checkout
uses: actions/checkout@v4
with:
submodules: 'recursive'
- name: Install dependencies
working-directory: ./tests
run: |
python3 -m pip install --upgrade pip
pip3 install -r requirements.txt
- name: Run tests
working-directory: ./tests
run: |
python3 test_case.py --model_dir ${{ github.event.inputs.model_id || 'jan-hq/Jan-Llama3-0708' }} --data_dir ${{ github.event.inputs.dataset_id || 'jan-hq/instruction-speech-conversation-test' }} ${{ github.event.inputs.extra_args || '--mode audio --num_rows 5' }}
- name: Install benchmark dependencies
if: ${{ github.event.inputs.run_benchmark == 'true' }}
run: |
cd lm-evaluation-harness
pip3 install -e .
pip3 install lm_eval[vllm]
echo "$HOME/.local/bin" >> $GITHUB_PATH
- name: Run benchmark
if: ${{ github.event.inputs.run_benchmark == 'true' }}
run: |
cd lm-evaluation-harness
chmod +x ./run_benchmark.sh
./run_benchmark.sh ${{ github.event.inputs.model_id }}
- name: Upload benchmark results
if: ${{ github.event.inputs.run_benchmark == 'true' }}
uses: actions/upload-artifact@v2
with:
name: benchmark-results
path: ./lm-evaluation-harness/benchmark_results/**/*.json