We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent a394155 commit 8774067Copy full SHA for 8774067
vllm_ascend/models/qwen2.py
@@ -3,6 +3,7 @@
3
4
import torch
5
import torch.nn.functional as F
6
+import vllm.envs as envs
7
from torch import nn
8
from transformers import Qwen2Config
9
from vllm.compilation.decorators import support_torch_compile
@@ -154,6 +155,7 @@ def forward(
154
155
flashcomm_v1_enabled = False
156
attn_metadata = get_forward_context().attn_metadata
157
if ascend_envs.VLLM_ASCEND_ENABLE_FLASHCOMM == 1 and \
158
+ envs.VLLM_USE_V1 and \
159
attn_metadata is not None and \
160
attn_metadata.attn_state != AscendAttentionState.DecodeOnly:
161
flashcomm_v1_enabled = True
0 commit comments