Skip to content

GPQA Few-shot CoT, adapter part #7482

GPQA Few-shot CoT, adapter part

GPQA Few-shot CoT, adapter part #7482

Workflow file for this run

name: Test
on:
push:
branches: [ main ]
paths-ignore:
- 'helm-frontend/**'
pull_request:
paths-ignore:
- 'helm-frontend/**'
jobs:
install:
# Tests that the HELM CLI commands work when only installing required dependencies
# without optional extra dependencies.
name: Run HELM with minimal dependencies only
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.9", "3.10", "3.11", "3.12"]
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: pip
- run: python3 -m pip install --upgrade build
- run: python3 -m build
- run: python3 -m pip install dist/crfm_helm-*.whl
- run: helm-run --run-entries simple1:model=simple/model1 --max-eval-instances 10 --suite test
- run: helm-summarize --suite test
- run: helm-server --help
test:
name: Run all tests
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.9", "3.10", "3.11", "3.12"]
steps:
- name: Clear free space
run: |
sudo rm -rf /opt/ghc
df -h
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: pip
# Installs dependencies and performs static code checks
- run: ./install-dev.sh
- run: ./pre-commit.sh
- name: Run tests
run: pytest --durations=20
env:
TEST: ${{ matrix.test }}
VERSION: ${{ github.head_ref || 'main' }}
- name: Run entire pipeline quickly without any data
# Checking RunSpecs with openai/davinci should be comprehensive enough
run: helm-run --suite test -m 100 --skip-instances --models-to-run openai/davinci --exit-on-error