We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 9610d4a commit 918a847Copy full SHA for 918a847
vllm/model_executor/layers/quantization/kernels/mixed_precision/marlin.py
@@ -93,12 +93,14 @@ def transform_w_s(x):
93
94
if c.zero_points:
95
# TODO figure out a more efficient way to do it
96
+ grouped_k = (c.partition_weight_shape[0] //
97
+ c.group_size if c.group_size != -1 else 1)
98
self._transform_param(layer, self.w_zp_name, lambda x: \
99
marlin_zero_points(
100
unpack_cols(x.t(), c.weight_type.size_bits,
- c.partition_weight_shape[0] // c.group_size,
101
+ grouped_k,
102
c.partition_weight_shape[1]),
- size_k=c.partition_weight_shape[0] // c.group_size,
103
+ size_k=grouped_k,
104
size_n=c.partition_weight_shape[1],
105
num_bits=c.weight_type.size_bits))
106
else:
0 commit comments