mk cleanup

Varun Sundar Rabindranath · Varun Sundar Rabindranath · commit d99e2781ce53 · 2025-11-10T10:15:54.000-05:00
Signed-off-by: Varun Sundar Rabindranath &lt;vsundarr@redhat.com&gt;
diff --git a/vllm/model_executor/layers/fused_moe/deepep_ll_prepare_finalize.py b/vllm/model_executor/layers/fused_moe/deepep_ll_prepare_finalize.py
@@ -99,23 +99,24 @@ def __init__(
 
         # We don't have enough information to determine if we should dispatch
         # activation scales in a packed ue8m0 format during object construction
-        # time. This setting is handled by setup_packed_ue8m0_scales_dispatch.
+        # time. This setting is handled by post_init_setup.
         self.use_ue8m0 = False
 
-    def supports_packed_ue8m0_scales_dispatch(self) -> bool:
-        return True
+    def post_init_setup(self, fused_experts: mk.FusedMoEPermuteExpertsUnpermute):
+        if not fused_experts.supports_packed_ue8m0_act_scales():
+            # Early exit.
+            return
 
-    def setup_packed_ue8m0_scales_dispatch(self) -> None:
         if self.use_fp8_dispatch:
             logger.debug_once(
-                "Update DeepEPLLPrepareFinalize to do packed ue8m0 scales dispatch"
+                "Update DeepEPLLPrepareFinalize to do packed ue8m0 scales dispatch."
             )
             self.use_ue8m0 = True
         else:
             logger.warning_once(
-                "Ignoring request to dispatch activation scales in a packed "
-                "ue8m0 format as DeepEPLLPrepareAndFinalize is setup to"
-                "dispatch raw/unquantized activations.",
+                "DeepEPLLPrepareAndFinalize is setup to dispatch raw/unquantized "
+                f"activations despite ({fused_experts.__class__.__name__}) being able "
+                "to support quantized activations.",
                 scope="local",
             )
 
diff --git a/vllm/model_executor/layers/fused_moe/modular_kernel.py b/vllm/model_executor/layers/fused_moe/modular_kernel.py
@@ -149,6 +149,15 @@ class FusedMoEPrepareAndFinalize(ABC):
     described above.
     """
 
+    def post_init_setup(self, fused_experts: "FusedMoEPermuteExpertsUnpermute"):
+        """
+        Initialize FusedMoEPrepareAndFinalize settings that depend on
+        FusedMoEPermuteExpertsUnpermute experts object.
+        The FusedMoEPrepareAndFinalize implementations that have such
+        dependencies may choose to override this function.
+        """
+        return
+
     @abstractmethod
     def prepare(
         self,
@@ -347,20 +356,6 @@ def output_is_reduced(self) -> bool:
         """
         raise NotImplementedError
 
-    def supports_packed_ue8m0_scales_dispatch(self) -> bool:
-        """
-        Return true if the implementation can dispatch activation scales in
-        packed ue8m0 format.
-        """
-        return False
-
-    def setup_packed_ue8m0_scales_dispatch(self) -> None:
-        """
-        Setup internal state of the implementation to dispatch activation scales
-        in packed ue8m0 format.
-        """
-        raise NotImplementedError
-
 
 # TODO: add supported activations method (return string)
 class FusedMoEPermuteExpertsUnpermute(ABC):
@@ -735,11 +730,7 @@ def _post_init_setup(self):
         Resolve any leftover setup dependencies between self.prepare_finalize
         and self.fused_experts here.
         """
-        if (
-            self.fused_experts.supports_packed_ue8m0_act_scales()
-            and self.prepare_finalize.supports_packed_ue8m0_scales_dispatch()
-        ):
-            self.prepare_finalize.setup_packed_ue8m0_scales_dispatch()
+        self.prepare_finalize.post_init_setup(self.fused_experts)
 
     def supports_expert_map(self) -> bool:
         """