Skip to content

Deprecate attention patching for llama #5167

Deprecate attention patching for llama

Deprecate attention patching for llama #5167

Workflow file for this run

name: PR GPU tests
on:
push:
branches:
- main
- release/*
pull_request_target:
branches:
- main
- release/**
workflow_dispatch:
# Cancel old runs when a new commit is pushed to the same branch if not on main or dev
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
jobs:
pytest-gpu:
uses: mosaicml/ci-testing/.github/workflows/pytest-gpu.yaml@v0.0.3
strategy:
matrix:
include:
- name: "gpu-2.2.1"
container: mosaicml/pytorch:2.2.1_cu121-python3.11-ubuntu20.04
markers: "gpu"
pip_deps: "[all]"
pytest_command: "coverage run -m pytest"
- name: "gpu-2.2.1-flash2"
container: mosaicml/llm-foundry:2.2.1_cu121_flash2-latest
markers: "gpu"
pip_deps: "[all-flash2]"
pytest_command: "coverage run -m pytest"
name: ${{ matrix.name }}
if: github.repository_owner == 'mosaicml'
with:
container: ${{ matrix.container }}
git_repo: mosaicml/llm-foundry
mcloud-timeout: 1800
name: ${{ matrix.name }}
pip_deps: ${{ matrix.pip_deps }}
pytest-command: ${{ matrix.pytest_command }}
pytest-markers: ${{ matrix.markers }}
python-version: 3.9
secrets:
mcloud-api-key: ${{ secrets.MCLOUD_API_KEY }}