GPQA Few-shot CoT, adapter part #7482
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Test | |
on: | |
push: | |
branches: [ main ] | |
paths-ignore: | |
- 'helm-frontend/**' | |
pull_request: | |
paths-ignore: | |
- 'helm-frontend/**' | |
jobs: | |
install: | |
# Tests that the HELM CLI commands work when only installing required dependencies | |
# without optional extra dependencies. | |
name: Run HELM with minimal dependencies only | |
runs-on: ubuntu-latest | |
strategy: | |
matrix: | |
python-version: ["3.9", "3.10", "3.11", "3.12"] | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Set up Python ${{ matrix.python-version }} | |
uses: actions/setup-python@v5 | |
with: | |
python-version: ${{ matrix.python-version }} | |
cache: pip | |
- run: python3 -m pip install --upgrade build | |
- run: python3 -m build | |
- run: python3 -m pip install dist/crfm_helm-*.whl | |
- run: helm-run --run-entries simple1:model=simple/model1 --max-eval-instances 10 --suite test | |
- run: helm-summarize --suite test | |
- run: helm-server --help | |
test: | |
name: Run all tests | |
runs-on: ubuntu-latest | |
strategy: | |
matrix: | |
python-version: ["3.9", "3.10", "3.11", "3.12"] | |
steps: | |
- name: Clear free space | |
run: | | |
sudo rm -rf /opt/ghc | |
df -h | |
- uses: actions/checkout@v4 | |
- uses: actions/setup-python@v5 | |
with: | |
python-version: ${{ matrix.python-version }} | |
cache: pip | |
# Installs dependencies and performs static code checks | |
- run: ./install-dev.sh | |
- run: ./pre-commit.sh | |
- name: Run tests | |
run: pytest --durations=20 | |
env: | |
TEST: ${{ matrix.test }} | |
VERSION: ${{ github.head_ref || 'main' }} | |
- name: Run entire pipeline quickly without any data | |
# Checking RunSpecs with openai/davinci should be comprehensive enough | |
run: helm-run --suite test -m 100 --skip-instances --models-to-run openai/davinci --exit-on-error | |