File tree Expand file tree Collapse file tree 1 file changed +3
-3
lines changed
vllm/model_executor/layers/fused_moe Expand file tree Collapse file tree 1 file changed +3
-3
lines changed Original file line number Diff line number Diff line change @@ -1135,6 +1135,7 @@ def __init__(
11351135 )
11361136
11371137 self .global_num_experts = num_experts + num_redundant_experts
1138+ self .logical_num_experts = num_experts
11381139 self .zero_expert_num = zero_expert_num
11391140 self .zero_expert_type = zero_expert_type
11401141
@@ -1998,13 +1999,12 @@ def ensure_dp_chunking_init(self):
19981999
19992000 moe = self .moe_config
20002001
2001- # Note here we use `num_experts` which is logical expert count
20022002 if self .vllm_config .parallel_config .enable_dbo :
20032003 states_shape = (2 , moe .max_num_tokens , self .hidden_size )
2004- logits_shape = (2 , moe .max_num_tokens , moe . num_experts )
2004+ logits_shape = (2 , moe .max_num_tokens , self . logical_num_experts )
20052005 else :
20062006 states_shape = (moe .max_num_tokens , self .hidden_size )
2007- logits_shape = (moe .max_num_tokens , moe . num_experts )
2007+ logits_shape = (moe .max_num_tokens , self . logical_num_experts )
20082008
20092009 self .batched_hidden_states = torch .zeros (
20102010 states_shape , dtype = moe .in_dtype , device = torch .cuda .current_device ()
You can’t perform that action at this time.
0 commit comments