File tree Expand file tree Collapse file tree 1 file changed +9
-0
lines changed
vllm/model_executor/layers Expand file tree Collapse file tree 1 file changed +9
-0
lines changed Original file line number Diff line number Diff line change @@ -929,6 +929,15 @@ def weight_loader_v2(self,
929929 shard_offset = self ._get_shard_offset_mapping (loaded_shard_id )
930930 shard_size = self ._get_shard_size_mapping (loaded_shard_id )
931931
932+ # Note(simon): This is needed for Qwen3's fp8 quantization.
933+ if isinstance (param , BlockQuantScaleParameter ):
934+ assert self .quant_method is not None
935+ assert hasattr (self .quant_method , "quant_config" )
936+ weight_block_size = self .quant_method .quant_config .weight_block_size
937+ block_n , _ = weight_block_size [0 ], weight_block_size [1 ]
938+ shard_offset = (shard_offset + block_n - 1 ) // block_n
939+ shard_size = (shard_size + block_n - 1 ) // block_n
940+
932941 param .load_qkv_weight (loaded_weight = loaded_weight ,
933942 num_heads = self .num_kv_head_replicas ,
934943 shard_id = loaded_shard_id ,
You can’t perform that action at this time.
0 commit comments