FlashAttention Benchmark update #143
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Flash Attention Benchmark | |
| # To remotely trigger a FA Benchmarking run, use the following: | |
| # curl -L -X POST -H "Accept: application/vnd.github+json" -H "X-GitHub-Api-Version: 2022-11-28" -H "Authorization: Bearer $TOKEN" https://api.github.com/repos/pytorch/pytorch-integration-testing/dispatches -d '{"event_type": "benchmark_flash_attention"}' | |
| on: | |
| schedule: | |
| - cron: "0 6 * * *" # Run every day at 6AM | |
| push: | |
| paths: | |
| - .github/workflows/flash_attention.yml | |
| repository_dispatch: | |
| types: benchmark_flash_attention | |
| workflow_dispatch: | |
| jobs: | |
| benchmark-flash-attn: | |
| name: Flash Attention CuTe DSL Benchmark | |
| runs-on: linux.dgx.b200.8 | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v4 | |
| - name: Checkout Flash Attention repository | |
| uses: actions/checkout@v4 | |
| with: | |
| repository: Dao-AILab/flash-attention | |
| path: fa4 | |
| submodules: recursive | |
| - name: Setup GPU flags for docker run | |
| run: | | |
| echo "GPU_FLAG=--gpus all -e NVIDIA_DRIVER_CAPABILITIES=all" >> "${GITHUB_ENV}" | |
| - name: Run Flash Attention benchmark in Docker | |
| env: | |
| DOCKER_IMAGE: nvcr.io/nvidia/pytorch:25.09-py3 | |
| run: | | |
| set -eux | |
| container_name=$(docker run \ | |
| ${GPU_FLAG} \ | |
| --ipc=host \ | |
| --ulimit memlock=-1 \ | |
| --ulimit stack=67108864 \ | |
| --tty \ | |
| --detach \ | |
| --security-opt seccomp=unconfined \ | |
| --shm-size=4g \ | |
| -v "${GITHUB_WORKSPACE}:/tmp/workspace" \ | |
| -w /tmp/workspace \ | |
| "${DOCKER_IMAGE}" | |
| ) | |
| # Build and run FlashAttention CuTe DSL | |
| docker exec -t "${container_name}" bash -c " | |
| set -x | |
| pushd fa4 | |
| python setup.py install | |
| pip install -e flash_attn/cute/ | |
| nvidia-smi | |
| echo '<h1>B200' >> /tmp/workspace/fa4_output.txt | |
| nvidia-smi -q -d POWER | grep 'Current Power Limit' | head -1 | cut -d : -f 2 >> /tmp/workspace/fa4_output.txt | |
| echo '</h1>' >> /tmp/workspace/fa4_output.txt | |
| export PYTHONPATH=\$(pwd) | |
| python benchmarks/benchmark_attn.py >> /tmp/workspace/fa4_output.txt | |
| popd | |
| " | |
| # Display results in GitHub step summary | |
| if [ -f fa4_output.txt ]; then | |
| cat fa4_output.txt >> $GITHUB_STEP_SUMMARY | |
| fi |