fix condition

dsikka · dsikka · commit 9610d4ade51e · 2025-04-14T19:20:45.000Z
diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py
@@ -302,7 +302,6 @@ def _is_fp8_w8a16(self, weight_quant: BaseModel,
     def _is_wNa16_group_channel(self, weight_quant: BaseModel,
                                 input_quant: BaseModel) -> bool:
         input_quant_none = input_quant is None
-        #is_symmetric = weight_quant.symmetric
         is_channel_group = (
             weight_quant.strategy == QuantizationStrategy.CHANNEL.value
             or weight_quant.strategy == QuantizationStrategy.GROUP.value)
@@ -318,6 +317,7 @@ def _get_scheme_from_parts(
         if self._is_wNa16_group_channel(weight_quant, input_quant):
             if (self.quant_format == CompressionFormat.marlin_24.value
                     and weight_quant.num_bits in W4A16SPARSE24_SUPPORTED_BITS):
+                assert weight_quant.symmetric
                 return CompressedTensorsW4A16Sparse24(
                     strategy=weight_quant.strategy,
                     num_bits=weight_quant.num_bits,
@@ -327,6 +327,7 @@ def _get_scheme_from_parts(
                 return CompressedTensorsWNA16(
                     num_bits=weight_quant.num_bits,
                     strategy=weight_quant.strategy,
+                    symmetric=weight_quant.symmetric,
                     group_size=weight_quant.group_size,
                     actorder=weight_quant.actorder,
                     zero_points=(not weight_quant.symmetric))
diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_wNa16.py b/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_wNa16.py
@@ -38,11 +38,13 @@ def __init__(self,
                  strategy: str,
                  num_bits: int,
                  group_size: Optional[int] = None,
-                 actorder: Optional[ActivationOrdering] = None,
-                 zero_points: Optional[bool] = False):
+                 zero_points: Optional[bool] = False,
+                 symmetric: Optional[bool] = True,
+                 actorder: Optional[ActivationOrdering] = None):
 
         self.pack_factor = 32 // num_bits
         self.strategy = strategy
+        self.symmetric = symmetric
         self.group_size = -1 if group_size is None else group_size
         self.has_g_idx = actorder == ActivationOrdering.GROUP
 
@@ -81,7 +83,7 @@ def create_weights(self, layer: torch.nn.Module, output_size: int,
             weight_type=self.quant_type,
             act_type=params_dtype,
             group_size=self.group_size,
-            zero_points=self.zero_points,
+            zero_points=not self.symmetric,
             has_g_idx=self.has_g_idx
         )
 
@@ -181,6 +183,7 @@ def create_weights(self, layer: torch.nn.Module, output_size: int,
                                   w_s_param_name="weight_scale",
                                   w_zp_param_name="weight_zero_point",
                                   w_gidx_param_name="weight_g_idx")
+        print(self.kernel, type(self.kernel))
 
     # Checkpoints are serialized in compressed-tensors format, which is
     # different from the format the kernel may want. Handle repacking here.