Deprecate attention patching for llama #5167

	name: PR GPU tests
	on:
	push:
	branches:
	- main
	- release/*
	pull_request_target:
	branches:
	- main
	- release/**
	workflow_dispatch:
	# Cancel old runs when a new commit is pushed to the same branch if not on main or dev
	concurrency:
	group: ${{ github.workflow }}-${{ github.event.pull_request.number \|\| github.ref }}
	cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
	jobs:
	pytest-gpu:
	uses: mosaicml/ci-testing/.github/workflows/pytest-gpu.yaml@v0.0.3
	strategy:
	matrix:
	include:
	- name: "gpu-2.2.1"
	container: mosaicml/pytorch:2.2.1_cu121-python3.11-ubuntu20.04
	markers: "gpu"
	pip_deps: "[all]"
	pytest_command: "coverage run -m pytest"
	- name: "gpu-2.2.1-flash2"
	container: mosaicml/llm-foundry:2.2.1_cu121_flash2-latest
	markers: "gpu"
	pip_deps: "[all-flash2]"
	pytest_command: "coverage run -m pytest"
	name: ${{ matrix.name }}
	if: github.repository_owner == 'mosaicml'
	with:
	container: ${{ matrix.container }}
	git_repo: mosaicml/llm-foundry
	mcloud-timeout: 1800
	name: ${{ matrix.name }}
	pip_deps: ${{ matrix.pip_deps }}
	pytest-command: ${{ matrix.pytest_command }}
	pytest-markers: ${{ matrix.markers }}
	python-version: 3.9
	secrets:
	mcloud-api-key: ${{ secrets.MCLOUD_API_KEY }}

Provide feedback