[Bugfix] Fix Shared Expert/Zero expert code in FusedMoE.process_chunk (#25698)

SageMoore · robertgshaw2-redhat · web-flow · commit dfb9af20142a · 2025-09-26T01:25:28.000-07:00
Signed-off-by: Sage Moore &lt;sage@neuralmagic.com&gt;
Co-authored-by: Robert Shaw &lt;114415538+robertgshaw2-redhat@users.noreply.github.com&gt;
diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py
@@ -1925,7 +1925,9 @@ def process_chunk(chunk_start, chunk_end, skip_result_store=False):
             assert self.shared_experts is None or isinstance(
                 final_hidden_states, tuple)
 
-            if isinstance(final_hidden_states, tuple):
+            if self.zero_expert_num is not None and self.zero_expert_num > 0:
+                assert isinstance(final_hidden_states, tuple)
+                assert self.shared_experts is None
                 final_hidden_states, zero_expert_result = final_hidden_states
                 if zero_expert_result is not None:
                     final_hidden_states += zero_expert_result