Skip to content

Commit 8aea81e

Browse files
WoosukKwonAkshat-Tripathi
authored andcommitted
[V1][Minor] Do not print attn backend twice (vllm-project#13985)
Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
1 parent b469f95 commit 8aea81e

File tree

1 file changed

+4
-3
lines changed

1 file changed

+4
-3
lines changed

vllm/platforms/cuda.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,8 @@ def get_attn_backend_cls(cls, selected_backend, head_size, dtype,
178178
block_size)
179179
else:
180180
if use_v1:
181-
logger.info("Using FlashMLA backend on V1 engine.")
181+
logger.info_once(
182+
"Using FlashMLA backend on V1 engine.")
182183
return ("vllm.v1.attention.backends.mla."
183184
"flashmla.FlashMLABackend")
184185
else:
@@ -187,14 +188,14 @@ def get_attn_backend_cls(cls, selected_backend, head_size, dtype,
187188
"flashmla.FlashMLABackend")
188189

189190
if use_v1:
190-
logger.info("Using Triton MLA backend on V1 engine.")
191+
logger.info_once("Using Triton MLA backend on V1 engine.")
191192
return ("vllm.v1.attention.backends.mla."
192193
"triton_mla.TritonMLABackend")
193194
else:
194195
logger.info("Using Triton MLA backend.")
195196
return "vllm.attention.backends.triton_mla.TritonMLABackend"
196197
if use_v1:
197-
logger.info("Using Flash Attention backend on V1 engine.")
198+
logger.info_once("Using Flash Attention backend on V1 engine.")
198199
return ("vllm.v1.attention.backends.flash_attn."
199200
"FlashAttentionBackend")
200201
if selected_backend == _Backend.FLASHINFER:

0 commit comments

Comments
 (0)