Skip to content

Commit 7825421

Browse files
Handle shared experts correctly
Signed-off-by: Matthew Bonanni <mbonanni@redhat.com>
1 parent e18b714 commit 7825421

File tree

1 file changed

+16
-2
lines changed
  • vllm/model_executor/layers/fused_moe

1 file changed

+16
-2
lines changed

vllm/model_executor/layers/fused_moe/layer.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1801,6 +1801,15 @@ def process_chunk(chunk_start, chunk_end, skip_result_store=False):
18011801
staged_hidden_states.copy_(hidden_states, non_blocking=True)
18021802
staged_router_logits.copy_(router_logits, non_blocking=True)
18031803

1804+
# If there are shared experts but we are not using a modular kernel,
1805+
# the shared experts must be called here
1806+
if (not isinstance(self.quant_method.fused_experts,
1807+
FusedMoEModularKernel)
1808+
and self.shared_experts is not None):
1809+
shared_output = self.shared_experts(staged_hidden_states)
1810+
else:
1811+
shared_output = None
1812+
18041813
# Matrix multiply.
18051814
final_hidden_states = self.quant_method.apply(
18061815
layer=self,
@@ -1824,8 +1833,13 @@ def process_chunk(chunk_start, chunk_end, skip_result_store=False):
18241833
logical_replica_count=self.logical_replica_count,
18251834
)
18261835

1827-
assert self.shared_experts is None or isinstance(
1828-
final_hidden_states, tuple)
1836+
if shared_output is not None:
1837+
assert not isinstance(final_hidden_states, tuple)
1838+
assert self.shared_experts is not None
1839+
final_hidden_states = (
1840+
shared_output,
1841+
final_hidden_states,
1842+
)
18291843

18301844
if not skip_result_store:
18311845
if self.shared_experts is None:

0 commit comments

Comments
 (0)