Skip to content

Commit e5e076c

Browse files
varun-sundar-rabindranathVarun Sundar Rabindranath
andauthored
[BugFix] Stopgap - Flashinfer Autotuner + GPT-OSS + DP/TP (vllm-project#27762)
Signed-off-by: Varun Sundar Rabindranath <vsundarr@redhat.com> Co-authored-by: Varun Sundar Rabindranath <vsundarr@redhat.com>
1 parent eebf00c commit e5e076c

File tree

1 file changed

+13
-7
lines changed

1 file changed

+13
-7
lines changed

vllm/model_executor/warmup/kernel_warmup.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import torch
1212

1313
import vllm.envs as envs
14-
from vllm.config import VllmConfig
14+
from vllm.config import CUDAGraphMode, VllmConfig
1515
from vllm.logger import init_logger
1616
from vllm.model_executor.warmup.deep_gemm_warmup import deep_gemm_warmup
1717
from vllm.platforms import current_platform
@@ -30,13 +30,19 @@ def flashinfer_autotune_supported(vllm_config: VllmConfig) -> bool:
3030
Record known issues with vllm + flashinfer autotune here. Return True if
3131
and only if flashinfer autotune will run through without issues.
3232
"""
33-
return not (
34-
vllm_config.parallel_config.data_parallel_size > 1
35-
and (
36-
envs.VLLM_USE_FLASHINFER_MOE_MXFP4_BF16
37-
or envs.VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8
38-
)
33+
is_tp_or_dp = (vllm_config.parallel_config.data_parallel_size > 1) or (
34+
vllm_config.parallel_config.tensor_parallel_size > 1
3935
)
36+
is_fi_mxfp4_backend = (
37+
envs.VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8
38+
or envs.VLLM_USE_FLASHINFER_MOE_MXFP4_BF16
39+
or envs.VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8_CUTLASS
40+
) or (
41+
current_platform.is_cuda() and current_platform.is_device_capability(100)
42+
) # on >=sm100, default mxfp4 backend is flashinfer
43+
is_eager = vllm_config.compilation_config.cudagraph_mode == CUDAGraphMode.NONE
44+
45+
return not (is_tp_or_dp and is_fi_mxfp4_backend and is_eager)
4046

4147

4248
def kernel_warmup(worker: "Worker"):

0 commit comments

Comments
 (0)