[feat] move WEIGHT_SCALE_SUPPORTED into raise block to accelerate RLHF weight loading (#21164)

weixiao-huang · web-flow · commit c1b4eb048a28 · 2025-08-04T15:43:06.000+08:00
Signed-off-by: huangweixiao &lt;huangweixiao@msh.team&gt;
diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py
@@ -1079,9 +1079,6 @@ def weight_loader(self,
             raise ValueError(f"shard_id must be ['w1','w2','w3'] but "
                              f"got {shard_id}.")
 
-        WEIGHT_SCALE_SUPPORTED = [
-            e.value for e in FusedMoeWeightScaleSupported
-        ]
         # Fetch the dim to shard the parameter/loaded weight
         # based on the shard id. This will be whatever
         # dimension intermediate_size_per_partition is used.
@@ -1230,6 +1227,9 @@ def weight_loader(self,
                                                    loaded_weight=loaded_weight,
                                                    expert_id=expert_id)
             else:
+                WEIGHT_SCALE_SUPPORTED = [
+                    e.value for e in FusedMoeWeightScaleSupported
+                ]
                 raise ValueError(
                     f"quant method must be one of {WEIGHT_SCALE_SUPPORTED}")
             return True if return_success else None