From 5ba19f94ac7fc534c2745d0e3b33f0d9a949f15c Mon Sep 17 00:00:00 2001 From: mgoin Date: Wed, 18 Jun 2025 00:32:12 +0000 Subject: [PATCH] Fix FA2 fallback for Blackwell V1 Signed-off-by: mgoin --- vllm/platforms/cuda.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py index 2d07ddc36613..54719a3e79dd 100644 --- a/vllm/platforms/cuda.py +++ b/vllm/platforms/cuda.py @@ -255,7 +255,7 @@ def get_attn_backend_cls(cls, selected_backend, head_size, dtype, "install FlashInfer for better performance.") pass # FlashAttention is the default for SM 8.0+ GPUs - elif cls.has_device_capability(80): + if cls.has_device_capability(80): logger.info_once("Using Flash Attention backend on V1 engine.") return ("vllm.v1.attention.backends." "flash_attn.FlashAttentionBackend")