We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 84b48b3 commit dcdd1e5Copy full SHA for dcdd1e5
vllm/model_executor/layers/fused_moe/shared_fused_moe.py
@@ -25,9 +25,9 @@ def __init__(
25
super().__init__(**kwargs)
26
self._shared_experts = shared_experts
27
# Disable shared expert overlap if EP is disabled or we are not using
28
- # flashinfer + DP since there is nothing to be gained in this case
29
- # and it prevents the shared experts from being hidden from
30
- # torch.compile.
+ # flashinfer + DP since there is nothing to be gained in this case.
+ # Disabling the overlap optimization also prevents the shared experts
+ # from being hidden from torch.compile.
31
self.use_overlapped = use_overlapped and not (
32
self.use_ep or self.use_flashinfer_cutlass_kernels
33
) and self.shared_experts is not None
0 commit comments