Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion tests/e2e/multicard/test_qwen3_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def test_models_distributed_Qwen3_MOE_TP2_WITH_EP():
tensor_parallel_size=2,
enable_expert_parallel=True,
distributed_executor_backend="mp",
enforce_eager=False,
) as vllm_model:
vllm_model.generate_greedy(example_prompts, max_tokens)

Expand All @@ -71,7 +72,7 @@ def test_models_distributed_Qwen3_MOE_W8A8():
dtype=dtype,
tensor_parallel_size=2,
quantization="ascend",
enforce_eager=False,
enforce_eager=True,
) as vllm_model:
vllm_model.generate_greedy(example_prompts, max_tokens)

Expand Down
3 changes: 2 additions & 1 deletion vllm_ascend/ops/common_fused_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,8 @@ def forward_impl(self, hidden_states: torch.Tensor,
moe_comm_method_name = forward_context.moe_comm_method_name

# TODO: Can we refactor this logic to model_runner?
if not self.moe_config.use_ep:
# TODO: Adjusted logic to differentiate between A2 and A3, we check ep_size here since mc2 only support ep_size >= 16 on A3 now
if self.moe_config.ep_size < 16:
moe_comm_method_name = "allgathercommimpl"

forward_context.moe_comm_method = getattr(self, moe_comm_method_name)
Expand Down
Loading