diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py index 07ae470fabfb..e2cc7adb395b 100644 --- a/vllm/platforms/cuda.py +++ b/vllm/platforms/cuda.py @@ -221,6 +221,21 @@ def get_attn_backend_cls(cls, selected_backend, head_size, dtype, logger.info_once("Using Triton backend on V1 engine.") return ("vllm.v1.attention.backends." "triton_attn.TritonAttentionBackend") + if cls.is_device_capability(100): + # Prefer FlashInfer for V1 on Blackwell GPUs if installed + try: + import flashinfer # noqa: F401 + logger.info_once( + "Using FlashInfer backend on V1 engine by default for " + "Blackwell (SM 10.0) GPUs.") + return ("vllm.v1.attention.backends." + "flashinfer.FlashInferBackend") + except ImportError: + logger.info_once( + "FlashInfer failed to import for V1 engine on " + "Blackwell (SM 10.0) GPUs; it is recommended to " + "install FlashInfer for better performance.") + pass if cls.has_device_capability(80): logger.info_once("Using Flash Attention backend on V1 engine.") return ("vllm.v1.attention.backends." diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py index 1ec9c78a361a..5811b2419610 100644 --- a/vllm/platforms/interface.py +++ b/vllm/platforms/interface.py @@ -226,6 +226,30 @@ def has_device_capability( return current_capability.to_int() >= capability + @classmethod + def is_device_capability( + cls, + capability: Union[tuple[int, int], int], + device_id: int = 0, + ) -> bool: + """ + Test whether this platform has exactly the specified device capability. + + The `capability` argument can either be: + + - A tuple `(major, minor)`. + - An integer ``. (See + [`DeviceCapability.to_int`][vllm.platforms.interface.DeviceCapability.to_int]) + """ + current_capability = cls.get_device_capability(device_id=device_id) + if current_capability is None: + return False + + if isinstance(capability, tuple): + return current_capability == capability + + return current_capability.to_int() == capability + @classmethod def get_device_name(cls, device_id: int = 0) -> str: """Get the name of a device."""