[BUG FIX][NON-CUDA]quick fix to avoid call cudagraph_unsafe in attention (vllm-project#25298)

xuechendi · charlifu · commit 6ddda58ee7c0 · 2025-09-25T16:25:41.000Z
Signed-off-by: Chendi Xue &lt;Chendi.Xue@intel.com&gt;
Signed-off-by: charlifu &lt;charlifu@amd.com&gt;
diff --git a/vllm/attention/layer.py b/vllm/attention/layer.py
@@ -29,6 +29,10 @@
 
 logger = init_logger(__name__)
 USE_XFORMERS_OPS = None
+try:
+    tag_cudagraph_unsafe = (torch._C.Tag.cudagraph_unsafe, )
+except AttributeError:
+    tag_cudagraph_unsafe = ()  # type: ignore[assignment]
 
 
 def check_xformers_availability():
@@ -577,7 +581,7 @@ def unified_attention_fake(
     mutates_args=[],
     fake_impl=unified_attention_fake,
     dispatch_key=current_platform.dispatch_key,
-    tags=(torch._C.Tag.cudagraph_unsafe, ),
+    tags=tag_cudagraph_unsafe,
 )
 
 
@@ -628,5 +632,5 @@ def unified_attention_with_output_fake(
     mutates_args=["output", "output_block_scale"],
     fake_impl=unified_attention_with_output_fake,
     dispatch_key=current_platform.dispatch_key,
-    tags=(torch._C.Tag.cudagraph_unsafe, ),
+    tags=tag_cudagraph_unsafe,
 )