From 33aab163997232bc8d0aec1c10a5e08c08a795d5 Mon Sep 17 00:00:00 2001 From: Tyler Michael Smith Date: Mon, 3 Nov 2025 18:53:01 +0000 Subject: [PATCH 1/5] logging Signed-off-by: Tyler Michael Smith --- .../layers/fused_moe/deepep_ht_prepare_finalize.py | 2 ++ .../layers/fused_moe/routing_simulator.py | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py b/vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py index 929cff79980c..bb54d903cff4 100644 --- a/vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py +++ b/vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py @@ -158,6 +158,8 @@ def _do_dispatch( allocate_on_comm_stream=False, ) + print(expert_num_tokens_per_expert_list) + # record the handle for this ubatch a2a_idx = dbo_current_ubatch_id() self.handles[a2a_idx] = handle diff --git a/vllm/model_executor/layers/fused_moe/routing_simulator.py b/vllm/model_executor/layers/fused_moe/routing_simulator.py index 8b04cf4539e0..a01cdc4908b9 100644 --- a/vllm/model_executor/layers/fused_moe/routing_simulator.py +++ b/vllm/model_executor/layers/fused_moe/routing_simulator.py @@ -14,6 +14,10 @@ import torch +from vllm.logger import init_logger + +logger = init_logger(__name__) + class RoutingStrategy(ABC): """Base class for token-to-expert routing strategies.""" @@ -290,6 +294,12 @@ def simulate_routing( f"Available strategies: " f"{list(RoutingSimulator._routing_strategies.keys())}" ) + logger.warning_once( + "Simulating MoE routing using a %s strategy. " + "This should only be used for performance testing. " + "Model outputs will not be valid.", + strategy_name, + ) strategy = RoutingSimulator._routing_strategies[strategy_name] return strategy.route_tokens( From 4fdbbc51fbc6f33527266c0580db49f11370b0e6 Mon Sep 17 00:00:00 2001 From: Tyler Michael Smith Date: Mon, 3 Nov 2025 19:25:51 +0000 Subject: [PATCH 2/5] another print Signed-off-by: Tyler Michael Smith --- .../layers/fused_moe/deepep_ht_prepare_finalize.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py b/vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py index bb54d903cff4..63a519009d66 100644 --- a/vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py +++ b/vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py @@ -159,6 +159,7 @@ def _do_dispatch( ) print(expert_num_tokens_per_expert_list) + print(token_data.shape) # record the handle for this ubatch a2a_idx = dbo_current_ubatch_id() From 2615679301da78f10c59a224974c5e38068c7053 Mon Sep 17 00:00:00 2001 From: Tyler Michael Smith Date: Mon, 3 Nov 2025 19:57:27 +0000 Subject: [PATCH 3/5] fixup Signed-off-by: Tyler Michael Smith --- .../layers/fused_moe/deepep_ht_prepare_finalize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py b/vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py index 63a519009d66..bcac548f178b 100644 --- a/vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py +++ b/vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py @@ -159,7 +159,7 @@ def _do_dispatch( ) print(expert_num_tokens_per_expert_list) - print(token_data.shape) + print(token_data) # record the handle for this ubatch a2a_idx = dbo_current_ubatch_id() From 1190713e8539bf21800476052b7667fdad2eb386 Mon Sep 17 00:00:00 2001 From: Tyler Michael Smith Date: Mon, 3 Nov 2025 20:09:38 +0000 Subject: [PATCH 4/5] update Signed-off-by: Tyler Michael Smith --- .../layers/fused_moe/deepep_ht_prepare_finalize.py | 1 - vllm/model_executor/layers/fused_moe/layer.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py b/vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py index bcac548f178b..bb54d903cff4 100644 --- a/vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py +++ b/vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py @@ -159,7 +159,6 @@ def _do_dispatch( ) print(expert_num_tokens_per_expert_list) - print(token_data) # record the handle for this ubatch a2a_idx = dbo_current_ubatch_id() diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py index 46d351b48c5e..55aa2593193a 100644 --- a/vllm/model_executor/layers/fused_moe/layer.py +++ b/vllm/model_executor/layers/fused_moe/layer.py @@ -2066,7 +2066,7 @@ def select_experts( ) # DeepSeekv2 uses grouped_top_k - if use_grouped_topk: + elif use_grouped_topk: assert topk_group is not None assert num_expert_group is not None if is_rocm_aiter_moe_enabled(): From 130a5563659ebad9d3ec9d2ebd75184381807cce Mon Sep 17 00:00:00 2001 From: Tyler Michael Smith Date: Mon, 3 Nov 2025 20:15:30 +0000 Subject: [PATCH 5/5] remove cruft Signed-off-by: Tyler Michael Smith --- .../layers/fused_moe/deepep_ht_prepare_finalize.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py b/vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py index bb54d903cff4..929cff79980c 100644 --- a/vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py +++ b/vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py @@ -158,8 +158,6 @@ def _do_dispatch( allocate_on_comm_stream=False, ) - print(expert_num_tokens_per_expert_list) - # record the handle for this ubatch a2a_idx = dbo_current_ubatch_id() self.handles[a2a_idx] = handle