File tree Expand file tree Collapse file tree 1 file changed +13
-13
lines changed
src/compressed_tensors/quantization Expand file tree Collapse file tree 1 file changed +13
-13
lines changed Original file line number Diff line number Diff line change @@ -142,6 +142,18 @@ def is_preset_scheme(name: str) -> bool:
142142 ),
143143)
144144
145+ # 4 bit integer weights only asymmetric quantization
146+ W4A16_ASYM = dict (
147+ weights = QuantizationArgs (
148+ num_bits = 4 ,
149+ type = QuantizationType .INT ,
150+ strategy = QuantizationStrategy .GROUP ,
151+ group_size = 128 ,
152+ symmetric = False ,
153+ dynamic = False ,
154+ ),
155+ )
156+
145157# 4 bit integer weights and 8 bit activations quantization
146158INT8_W4A8 = dict (
147159 weights = QuantizationArgs (
@@ -199,30 +211,18 @@ def is_preset_scheme(name: str) -> bool:
199211 ),
200212)
201213
202- # AWQ quantization
203- AWQ = dict (
204- weights = QuantizationArgs (
205- num_bits = 4 ,
206- type = QuantizationType .INT ,
207- strategy = QuantizationStrategy .GROUP ,
208- symmetric = False ,
209- dynamic = False ,
210- group_size = 128 ,
211- ),
212- )
213-
214214PRESET_SCHEMES = {
215215 # Unquantized (no-op)
216216 "UNQUANTIZED" : UNQUANTIZED ,
217217 # Integer weight only schemes
218218 "W8A16" : W8A16 ,
219219 "W4A16" : W4A16 ,
220+ "W4A16_ASYM" : W4A16_ASYM ,
220221 # Integer weight and activation schemes
221222 "W8A8" : INT8_W8A8 ,
222223 "INT8" : INT8_W8A8 , # alias for W8A8
223224 "W4A8" : INT8_W4A8 ,
224225 # Float weight and activation schemes
225226 "FP8" : FP8 ,
226227 "FP8_DYNAMIC" : FP8_DYNAMIC ,
227- "AWQ" : AWQ ,
228228}
You can’t perform that action at this time.
0 commit comments