Skip to content

Commit c1b4eb0

Browse files
[feat] move WEIGHT_SCALE_SUPPORTED into raise block to accelerate RLHF weight loading (#21164)
Signed-off-by: huangweixiao <huangweixiao@msh.team>
1 parent a7b8788 commit c1b4eb0

File tree

1 file changed

+3
-3
lines changed
  • vllm/model_executor/layers/fused_moe

1 file changed

+3
-3
lines changed

vllm/model_executor/layers/fused_moe/layer.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1079,9 +1079,6 @@ def weight_loader(self,
10791079
raise ValueError(f"shard_id must be ['w1','w2','w3'] but "
10801080
f"got {shard_id}.")
10811081

1082-
WEIGHT_SCALE_SUPPORTED = [
1083-
e.value for e in FusedMoeWeightScaleSupported
1084-
]
10851082
# Fetch the dim to shard the parameter/loaded weight
10861083
# based on the shard id. This will be whatever
10871084
# dimension intermediate_size_per_partition is used.
@@ -1230,6 +1227,9 @@ def weight_loader(self,
12301227
loaded_weight=loaded_weight,
12311228
expert_id=expert_id)
12321229
else:
1230+
WEIGHT_SCALE_SUPPORTED = [
1231+
e.value for e in FusedMoeWeightScaleSupported
1232+
]
12331233
raise ValueError(
12341234
f"quant method must be one of {WEIGHT_SCALE_SUPPORTED}")
12351235
return True if return_success else None

0 commit comments

Comments
 (0)