[BugFix] Fix flashcomm_v1 when engine v0

rjg-lyh · rjg-lyh · commit 8774067e5ee1 · 2025-07-18T09:28:19.000+08:00
Signed-off-by: rjg-lyh &lt;1318825571@qq.com&gt;
diff --git a/vllm_ascend/models/qwen2.py b/vllm_ascend/models/qwen2.py
@@ -3,6 +3,7 @@
 
 import torch
 import torch.nn.functional as F
+import vllm.envs as envs
 from torch import nn
 from transformers import Qwen2Config
 from vllm.compilation.decorators import support_torch_compile
@@ -154,6 +155,7 @@ def forward(
         flashcomm_v1_enabled = False
         attn_metadata = get_forward_context().attn_metadata
         if ascend_envs.VLLM_ASCEND_ENABLE_FLASHCOMM == 1 and \
+            envs.VLLM_USE_V1 and \
             attn_metadata is not None and \
             attn_metadata.attn_state != AscendAttentionState.DecodeOnly:
             flashcomm_v1_enabled = True