diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py index 46d351b48c5e..55aa2593193a 100644 --- a/vllm/model_executor/layers/fused_moe/layer.py +++ b/vllm/model_executor/layers/fused_moe/layer.py @@ -2066,7 +2066,7 @@ def select_experts( ) # DeepSeekv2 uses grouped_top_k - if use_grouped_topk: + elif use_grouped_topk: assert topk_group is not None assert num_expert_group is not None if is_rocm_aiter_moe_enabled(): diff --git a/vllm/model_executor/layers/fused_moe/routing_simulator.py b/vllm/model_executor/layers/fused_moe/routing_simulator.py index 8b04cf4539e0..a01cdc4908b9 100644 --- a/vllm/model_executor/layers/fused_moe/routing_simulator.py +++ b/vllm/model_executor/layers/fused_moe/routing_simulator.py @@ -14,6 +14,10 @@ import torch +from vllm.logger import init_logger + +logger = init_logger(__name__) + class RoutingStrategy(ABC): """Base class for token-to-expert routing strategies.""" @@ -290,6 +294,12 @@ def simulate_routing( f"Available strategies: " f"{list(RoutingSimulator._routing_strategies.keys())}" ) + logger.warning_once( + "Simulating MoE routing using a %s strategy. " + "This should only be used for performance testing. " + "Model outputs will not be valid.", + strategy_name, + ) strategy = RoutingSimulator._routing_strategies[strategy_name] return strategy.route_tokens(