Skip to content

Commit dbe9856

Browse files
mgoinrtourgeman
authored andcommitted
Disable FlashInfer sampler by default (vllm-project#26859)
Signed-off-by: mgoin <mgoin64@gmail.com>
1 parent 9104957 commit dbe9856

File tree

1 file changed

+6
-14
lines changed

1 file changed

+6
-14
lines changed

vllm/v1/sample/ops/topk_topp_sampler.py

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -46,23 +46,15 @@ def __init__(self, logprobs_mode: LogprobsMode = "raw_logprobs") -> None:
4646
"Falling back to default sampling implementation."
4747
)
4848
self.forward = self.forward_native
49-
elif envs.VLLM_USE_FLASHINFER_SAMPLER is not False:
50-
# NOTE(woosuk): The V0 sampler doesn't use FlashInfer for
51-
# sampling unless VLLM_USE_FLASHINFER_SAMPLER=1 (i.e., by
52-
# default it is unused). For backward compatibility, we set
53-
# `VLLM_USE_FLASHINFER_SAMPLER` as None by default and
54-
# interpret it differently in V0 and V1 samplers: In V0,
55-
# None means False, while in V1, None means True. This is
56-
# why we use the condition
57-
# `envs.VLLM_USE_FLASHINFER_SAMPLER is not False` here.
49+
elif envs.VLLM_USE_FLASHINFER_SAMPLER:
50+
# Users must opt in explicitly via VLLM_USE_FLASHINFER_SAMPLER=1.
5851
logger.info_once("Using FlashInfer for top-p & top-k sampling.")
5952
self.forward = self.forward_cuda
6053
else:
61-
logger.warning_once(
62-
"FlashInfer is available, but it is not enabled. "
63-
"Falling back to the PyTorch-native implementation of "
64-
"top-p & top-k sampling. For the best performance, "
65-
"please set VLLM_USE_FLASHINFER_SAMPLER=1."
54+
logger.debug_once(
55+
"FlashInfer top-p/top-k sampling is available but disabled "
56+
"by default. Set VLLM_USE_FLASHINFER_SAMPLER=1 to opt in "
57+
"after verifying accuracy for your workloads."
6658
)
6759
self.forward = self.forward_native
6860
else:

0 commit comments

Comments
 (0)