File tree Expand file tree Collapse file tree 1 file changed +7
-2
lines changed
vllm/model_executor/layers/quantization Expand file tree Collapse file tree 1 file changed +7
-2
lines changed Original file line number Diff line number Diff line change @@ -794,7 +794,8 @@ def select_gemm_impl(
794794 )
795795 else :
796796 raise NotImplementedError (
797- "Incompatible Mxfp4 backend for EP batched experts format"
797+ f"Incompatible Mxfp4 backend ({ self .mxfp4_backend } ) for "
798+ "EP batched experts format"
798799 )
799800 else :
800801 assert self .moe_quant_config is not None
@@ -813,8 +814,12 @@ def select_gemm_impl(
813814 return TrtLlmGenExperts (self .moe , self .moe_quant_config , ** kwargs )
814815 elif self .mxfp4_backend == Mxfp4Backend .MARLIN :
815816 return MarlinExperts (self .moe_quant_config )
816- else :
817+ elif self . mxfp4_backend == Mxfp4Backend . TRITON :
817818 return OAITritonExperts (self .moe_quant_config )
819+ else :
820+ raise NotImplementedError (
821+ f"Incompatible Mxfp4 backend ({ self .mxfp4_backend } ) for EP"
822+ )
818823
819824 def _route_and_experts (
820825 self ,
You can’t perform that action at this time.
0 commit comments