Skip to content

Commit 9610d4a

Browse files
committed
fix condition
1 parent c050fe1 commit 9610d4a

File tree

2 files changed

+8
-4
lines changed

2 files changed

+8
-4
lines changed

vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,6 @@ def _is_fp8_w8a16(self, weight_quant: BaseModel,
302302
def _is_wNa16_group_channel(self, weight_quant: BaseModel,
303303
input_quant: BaseModel) -> bool:
304304
input_quant_none = input_quant is None
305-
#is_symmetric = weight_quant.symmetric
306305
is_channel_group = (
307306
weight_quant.strategy == QuantizationStrategy.CHANNEL.value
308307
or weight_quant.strategy == QuantizationStrategy.GROUP.value)
@@ -318,6 +317,7 @@ def _get_scheme_from_parts(
318317
if self._is_wNa16_group_channel(weight_quant, input_quant):
319318
if (self.quant_format == CompressionFormat.marlin_24.value
320319
and weight_quant.num_bits in W4A16SPARSE24_SUPPORTED_BITS):
320+
assert weight_quant.symmetric
321321
return CompressedTensorsW4A16Sparse24(
322322
strategy=weight_quant.strategy,
323323
num_bits=weight_quant.num_bits,
@@ -327,6 +327,7 @@ def _get_scheme_from_parts(
327327
return CompressedTensorsWNA16(
328328
num_bits=weight_quant.num_bits,
329329
strategy=weight_quant.strategy,
330+
symmetric=weight_quant.symmetric,
330331
group_size=weight_quant.group_size,
331332
actorder=weight_quant.actorder,
332333
zero_points=(not weight_quant.symmetric))

vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_wNa16.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,13 @@ def __init__(self,
3838
strategy: str,
3939
num_bits: int,
4040
group_size: Optional[int] = None,
41-
actorder: Optional[ActivationOrdering] = None,
42-
zero_points: Optional[bool] = False):
41+
zero_points: Optional[bool] = False,
42+
symmetric: Optional[bool] = True,
43+
actorder: Optional[ActivationOrdering] = None):
4344

4445
self.pack_factor = 32 // num_bits
4546
self.strategy = strategy
47+
self.symmetric = symmetric
4648
self.group_size = -1 if group_size is None else group_size
4749
self.has_g_idx = actorder == ActivationOrdering.GROUP
4850

@@ -81,7 +83,7 @@ def create_weights(self, layer: torch.nn.Module, output_size: int,
8183
weight_type=self.quant_type,
8284
act_type=params_dtype,
8385
group_size=self.group_size,
84-
zero_points=self.zero_points,
86+
zero_points=not self.symmetric,
8587
has_g_idx=self.has_g_idx
8688
)
8789

@@ -181,6 +183,7 @@ def create_weights(self, layer: torch.nn.Module, output_size: int,
181183
w_s_param_name="weight_scale",
182184
w_zp_param_name="weight_zero_point",
183185
w_gidx_param_name="weight_g_idx")
186+
print(self.kernel, type(self.kernel))
184187

185188
# Checkpoints are serialized in compressed-tensors format, which is
186189
# different from the format the kernel may want. Handle repacking here.

0 commit comments

Comments
 (0)