From c15cbdd8e07873554d25fdad14443a3339c4310c Mon Sep 17 00:00:00 2001
From: Lu Fang <lufang@fb.com>
Date: Sat, 5 Apr 2025 15:59:12 -0700
Subject: [PATCH] Add apply_router_weight_on_input to
 CompressedTensorsWNA16MoEMethod's apply

Signed-off-by: Lu Fang <lufang@fb.com>
---
 .../compressed_tensors/compressed_tensors_moe.py             | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py
index 681a25ed4d33..f573c8ae5131 100644
--- a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py
+++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py
@@ -783,6 +783,7 @@ def apply(
         custom_routing_function: Optional[Callable] = None,
         scoring_func: str = "softmax",
         e_score_correction_bias: Optional[torch.Tensor] = None,
+        apply_router_weight_on_input: bool = False,
         activation: str = "silu",
     ) -> torch.Tensor:
         assert activation == "silu", "Only SiLU activation is supported."
@@ -790,6 +791,10 @@ def apply(
             raise NotImplementedError(
                 "Expert Parallelism is not supported for "
                 "fused Marlin MoE method.")
+        if apply_router_weight_on_input:
+            raise NotImplementedError(
+                "Apply router weight on input is not supported for "
+                "fused Marlin MoE method.")
 
         topk_weights, topk_ids = FusedMoE.select_experts(
             hidden_states=x,