[Misc] add group_size is -1 in awq quantization (vllm-project#18910)

lengrongfu · web-flow · commit 7f21e8052b5f · 2025-05-30T17:34:22.000Z
Signed-off-by: rongfu.leng &lt;rongfu.leng@daocloud.io&gt;
diff --git a/vllm/model_executor/layers/quantization/awq.py b/vllm/model_executor/layers/quantization/awq.py
@@ -101,7 +101,13 @@ def create_weights(self, layer: torch.nn.Module,
                        output_partition_sizes: list[int], input_size: int,
                        output_size: int, params_dtype: torch.dtype,
                        **extra_weight_attrs):
-        if input_size_per_partition % self.quant_config.group_size != 0:
+        # Normalize group_size
+        if self.quant_config.group_size != -1:
+            group_size = self.quant_config.group_size
+        else:
+            group_size = input_size
+
+        if input_size_per_partition % group_size != 0:
             raise ValueError(
                 "The input size is not aligned with the quantized "
                 "weight shape. This can be caused by too large "
@@ -127,9 +133,11 @@ def create_weights(self, layer: torch.nn.Module,
             packed_factor=self.quant_config.pack_factor,
             weight_loader=weight_loader)
 
+        num_groups = input_size_per_partition // group_size
+
         qzeros = PackedvLLMParameter(
             data=torch.empty(
-                input_size_per_partition // self.quant_config.group_size,
+                num_groups,
                 output_size_per_partition // self.quant_config.pack_factor,
                 dtype=torch.int32,
             ),
@@ -140,7 +148,7 @@ def create_weights(self, layer: torch.nn.Module,
             weight_loader=weight_loader)
 
         scales = GroupQuantScaleParameter(data=torch.empty(
-            input_size_per_partition // self.quant_config.group_size,
+            num_groups,
             output_size_per_partition,
             dtype=params_dtype,
         ),