Skip to content

Commit d99e278

Browse files
author
Varun Sundar Rabindranath
committed
mk cleanup
Signed-off-by: Varun Sundar Rabindranath <vsundarr@redhat.com>
1 parent 3405472 commit d99e278

File tree

2 files changed

+19
-27
lines changed

2 files changed

+19
-27
lines changed

vllm/model_executor/layers/fused_moe/deepep_ll_prepare_finalize.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -99,23 +99,24 @@ def __init__(
9999

100100
# We don't have enough information to determine if we should dispatch
101101
# activation scales in a packed ue8m0 format during object construction
102-
# time. This setting is handled by setup_packed_ue8m0_scales_dispatch.
102+
# time. This setting is handled by post_init_setup.
103103
self.use_ue8m0 = False
104104

105-
def supports_packed_ue8m0_scales_dispatch(self) -> bool:
106-
return True
105+
def post_init_setup(self, fused_experts: mk.FusedMoEPermuteExpertsUnpermute):
106+
if not fused_experts.supports_packed_ue8m0_act_scales():
107+
# Early exit.
108+
return
107109

108-
def setup_packed_ue8m0_scales_dispatch(self) -> None:
109110
if self.use_fp8_dispatch:
110111
logger.debug_once(
111-
"Update DeepEPLLPrepareFinalize to do packed ue8m0 scales dispatch"
112+
"Update DeepEPLLPrepareFinalize to do packed ue8m0 scales dispatch."
112113
)
113114
self.use_ue8m0 = True
114115
else:
115116
logger.warning_once(
116-
"Ignoring request to dispatch activation scales in a packed "
117-
"ue8m0 format as DeepEPLLPrepareAndFinalize is setup to"
118-
"dispatch raw/unquantized activations.",
117+
"DeepEPLLPrepareAndFinalize is setup to dispatch raw/unquantized "
118+
f"activations despite ({fused_experts.__class__.__name__}) being able "
119+
"to support quantized activations.",
119120
scope="local",
120121
)
121122

vllm/model_executor/layers/fused_moe/modular_kernel.py

Lines changed: 10 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,15 @@ class FusedMoEPrepareAndFinalize(ABC):
149149
described above.
150150
"""
151151

152+
def post_init_setup(self, fused_experts: "FusedMoEPermuteExpertsUnpermute"):
153+
"""
154+
Initialize FusedMoEPrepareAndFinalize settings that depend on
155+
FusedMoEPermuteExpertsUnpermute experts object.
156+
The FusedMoEPrepareAndFinalize implementations that have such
157+
dependencies may choose to override this function.
158+
"""
159+
return
160+
152161
@abstractmethod
153162
def prepare(
154163
self,
@@ -347,20 +356,6 @@ def output_is_reduced(self) -> bool:
347356
"""
348357
raise NotImplementedError
349358

350-
def supports_packed_ue8m0_scales_dispatch(self) -> bool:
351-
"""
352-
Return true if the implementation can dispatch activation scales in
353-
packed ue8m0 format.
354-
"""
355-
return False
356-
357-
def setup_packed_ue8m0_scales_dispatch(self) -> None:
358-
"""
359-
Setup internal state of the implementation to dispatch activation scales
360-
in packed ue8m0 format.
361-
"""
362-
raise NotImplementedError
363-
364359

365360
# TODO: add supported activations method (return string)
366361
class FusedMoEPermuteExpertsUnpermute(ABC):
@@ -735,11 +730,7 @@ def _post_init_setup(self):
735730
Resolve any leftover setup dependencies between self.prepare_finalize
736731
and self.fused_experts here.
737732
"""
738-
if (
739-
self.fused_experts.supports_packed_ue8m0_act_scales()
740-
and self.prepare_finalize.supports_packed_ue8m0_scales_dispatch()
741-
):
742-
self.prepare_finalize.setup_packed_ue8m0_scales_dispatch()
733+
self.prepare_finalize.post_init_setup(self.fused_experts)
743734

744735
def supports_expert_map(self) -> bool:
745736
"""

0 commit comments

Comments
 (0)