Skip to content

Commit fdb4343

Browse files
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
1 parent 9232da3 commit fdb4343

File tree

10 files changed

+67
-60
lines changed

10 files changed

+67
-60
lines changed

neural_compressor/adaptor/torch_utils/weight_only.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -654,7 +654,7 @@ def quant_weight_w_scale(weight, scale, zp, group_size=-1):
654654
leng = weight.shape[1] // group_size
655655
tail_flag = False if weight.shape[1] % group_size == 0 else True
656656
for i in range(leng):
657-
int_weight_tmp = weight[:, i * group_size : (i + 1) * group_size].div_(scale[:, i].unsqueeze(1))
657+
int_weight_tmp = weight[:, i * group_size : (i + 1) * group_size].div_(scale[:, i].unsqueeze(1))
658658
if zp is not None:
659659
int_weight_tmp.add_(zp[:, i].unsqueeze(1))
660660
int_weight[:, i * group_size : (i + 1) * group_size].copy_(int_weight_tmp.round_())
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
# Demo of algorithm usage w/o INC
1+
# Demo of algorithm usage w/o INC

neural_compressor/torch/algorithms/weight_only/rtn.py

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,7 @@
2424

2525
from neural_compressor.torch.utils import logger, set_module
2626

27-
28-
from .utility import (
29-
quant_tensor,
30-
search_clip,
31-
)
27+
from .utility import quant_tensor, search_clip
3228

3329

3430
def rtn_quantize(
@@ -101,24 +97,23 @@ def rtn_quantize(
10197
group_size = weight_config[name]["group_size"]
10298
scheme = weight_config[name]["scheme"]
10399
quantile = weight_config[name].get("quantile", 1.0)
104-
group_dim = weight_config[name]['group_dim']
105-
use_full_range = weight_config[name]['use_full_range']
106-
use_mse_search = weight_config[name]['use_mse_search']
107-
use_layer_wise = weight_config[name]['use_layer_wise']
108-
export_compressed_model = weight_config[name]['export_compressed_model']
100+
group_dim = weight_config[name]["group_dim"]
101+
use_full_range = weight_config[name]["use_full_range"]
102+
use_mse_search = weight_config[name]["use_mse_search"]
103+
use_layer_wise = weight_config[name]["use_layer_wise"]
104+
export_compressed_model = weight_config[name]["export_compressed_model"]
109105
if export_compressed_model:
110106
use_optimum_format = kwargs.get("use_optimum_format", True)
111-
double_quant_dtype = weight_config[name]['double_quant_dtype']
107+
double_quant_dtype = weight_config[name]["double_quant_dtype"]
112108
double_quant_config = {
113109
"double_quant": False if double_quant_dtype == "fp32" else True,
114110
"double_quant_dtype": double_quant_dtype,
115-
"double_quant_bits": weight_config[name]['double_quant_bits'],
116-
"double_quant_scheme": weight_config[name]['double_quant_scheme'],
117-
"double_quant_group_size": weight_config[name]['double_quant_group_size'],
111+
"double_quant_bits": weight_config[name]["double_quant_bits"],
112+
"double_quant_scheme": weight_config[name]["double_quant_scheme"],
113+
"double_quant_group_size": weight_config[name]["double_quant_group_size"],
118114
}
119115
log_msg = (
120-
f"RTN quantization config: bits={bits}, group_size={group_size}, "
121-
+ f"scheme={scheme}, quantile={quantile}"
116+
f"RTN quantization config: bits={bits}, group_size={group_size}, " + f"scheme={scheme}, quantile={quantile}"
122117
)
123118
if dtype != "int":
124119
log_msg += f", dtype={dtype}"

neural_compressor/torch/algorithms/weight_only/utility.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,16 @@
1+
# Copyright (c) 2024 Intel Corporation
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
114

215
import torch
316
from torch.nn import functional as F
@@ -163,9 +176,7 @@ def qdq_weight_sym(weight, bits=4, quantile=1.0, return_int=False, full_range=Fa
163176
return weight.mul_(scale)
164177

165178

166-
def qdq_weight_actor(
167-
weight, bits, scheme, quantile=1.0, dtype="int", return_int=False, full_range=False, **kwargs
168-
):
179+
def qdq_weight_actor(weight, bits, scheme, quantile=1.0, dtype="int", return_int=False, full_range=False, **kwargs):
169180
"""Quant and dequant tensor per channel. It is an in-place op.
170181
171182
Args:
@@ -417,7 +428,7 @@ def quant_weight_w_scale(weight, scale, zp, group_size=-1, dtype="int"):
417428
leng = weight.shape[1] // group_size
418429
tail_flag = False if weight.shape[1] % group_size == 0 else True
419430
for i in range(leng):
420-
int_weight_tmp = weight[:, i * group_size : (i + 1) * group_size].div_(scale[:, i].unsqueeze(1))
431+
int_weight_tmp = weight[:, i * group_size : (i + 1) * group_size].div_(scale[:, i].unsqueeze(1))
421432
if zp is not None:
422433
int_weight_tmp.add_(zp[:, i].unsqueeze(1))
423434
int_weight[:, i * group_size : (i + 1) * group_size].copy_(int_weight_tmp.round_())
@@ -426,4 +437,4 @@ def quant_weight_w_scale(weight, scale, zp, group_size=-1, dtype="int"):
426437
if zp is not None:
427438
int_weight_tmp.add_(zp[:, -1].unsqueeze(1))
428439
int_weight[:, leng * group_size :].copy_(int_weight_tmp.round_())
429-
return int_weight
440+
return int_weight

neural_compressor/torch/quantization/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,13 @@
1919
GPTQConfig,
2020
)
2121
from .quantize import quantize, quantize_dynamic
22+
2223
# TODO(Yi): move config to config.py
2324
from .autotune import autotune, TuningConfig, get_default_tune_config
2425

2526
### Quantization Function Registration ###
2627
from neural_compressor.torch.quantization.weight_only import quantization_impl
2728
from neural_compressor.torch.utils import is_hpex_available
29+
2830
if is_hpex_available():
2931
from neural_compressor.torch.quantization.fp8 import quantization_impl

neural_compressor/torch/quantization/config.py

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -16,18 +16,12 @@
1616
# limitations under the License.
1717
# pylint:disable=import-error
1818

19-
import torch
2019
from typing import Callable, Dict, List, NamedTuple, Optional, Tuple, Union
2120

22-
from neural_compressor.common.base_config import BaseConfig, config_registry, register_config
23-
from neural_compressor.common.utility import (
24-
OP_NAME_OR_MODULE_TYPE,
25-
DEFAULT_WHITE_LIST,
26-
FP8_QUANT,
27-
GPTQ,
28-
RTN,
29-
)
21+
import torch
3022

23+
from neural_compressor.common.base_config import BaseConfig, config_registry, register_config
24+
from neural_compressor.common.utility import DEFAULT_WHITE_LIST, FP8_QUANT, GPTQ, OP_NAME_OR_MODULE_TYPE, RTN
3125
from neural_compressor.torch.utils import is_hpex_available, logger
3226
from neural_compressor.torch.utils.constants import PRIORITY_GPTQ, PRIORITY_RTN
3327

@@ -75,7 +69,7 @@ def __init__(
7569
use_layer_wise: bool = False,
7670
export_compressed_model: bool = False,
7771
double_quant_dtype: str = "fp32",
78-
double_quant_bits: int = 8, # not available when double_quant_dtype is not 'int'
72+
double_quant_bits: int = 8, # not available when double_quant_dtype is not 'int'
7973
double_quant_sym: bool = True,
8074
double_quant_group_size: int = 256,
8175
white_list: Optional[List[OP_NAME_OR_MODULE_TYPE]] = DEFAULT_WHITE_LIST,
@@ -255,9 +249,7 @@ def register_supported_configs(cls) -> List[OperatorConfig]:
255249
# TODO(Yi)
256250
linear_gptq_config = GPTQConfig()
257251
operators = [torch.nn.Linear, torch.nn.functional.linear]
258-
supported_configs.append(
259-
OperatorConfig(config=linear_gptq_config, operators=operators)
260-
)
252+
supported_configs.append(OperatorConfig(config=linear_gptq_config, operators=operators))
261253
cls.supported_configs = supported_configs
262254

263255
@staticmethod
@@ -362,8 +354,10 @@ def get_default_fp8_qconfig() -> FP8QConfig:
362354
"""
363355
return FP8QConfig()
364356

357+
365358
##################### Algo Configs End ###################################
366359

360+
367361
def get_all_registered_configs() -> Dict[str, BaseConfig]:
368362
registered_configs = config_registry.get_all_configs()
369363
return registered_configs.get(FRAMEWORK_NAME, {})

neural_compressor/torch/quantization/weight_only/quantization_impl.py

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,25 @@
1+
# Copyright (c) 2024 Intel Corporation
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
114

15+
from typing import Dict, Tuple
216

317
import torch
4-
from typing import Dict, Tuple
5-
from neural_compressor.common.utility import RTN, GPTQ # unified namespace
6-
from neural_compressor.torch.quantization import RTNConfig, GPTQConfig
7-
from neural_compressor.torch.utils import logger, fetch_module, register_algo
8-
from neural_compressor.torch.algorithms import rtn_quantize, gptq_quantize
18+
19+
from neural_compressor.common.utility import GPTQ, RTN # unified namespace
20+
from neural_compressor.torch.algorithms import gptq_quantize, rtn_quantize
21+
from neural_compressor.torch.quantization import GPTQConfig, RTNConfig
22+
from neural_compressor.torch.utils import fetch_module, logger, register_algo
923

1024

1125
###################### RTN Algo Entry ##################################

neural_compressor/torch/utils/utility.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,10 @@
1313
# limitations under the License.
1414

1515

16-
import torch
1716
from typing import Callable, Dict, List, Tuple
17+
18+
import torch
19+
1820
from neural_compressor.common.logger import Logger
1921

2022
# Dictionary to store a mapping between algorithm names and corresponding algo implementation(function)
@@ -118,4 +120,3 @@ def get_double_quant_config(double_quant_type, weight_sym=True):
118120
)
119121
DOUBLE_QUANT_CONFIGS[double_quant_type]["weight_sym"] = weight_sym
120122
return DOUBLE_QUANT_CONFIGS[double_quant_type]
121-

test/3x/torch/quantization/weight_only/test_rtn.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,9 @@
1-
import unittest
21
import copy
2+
import unittest
3+
34
import transformers
4-
from neural_compressor.torch.quantization import (
5-
get_default_rtn_config,
6-
RTNConfig,
7-
quantize,
8-
)
5+
6+
from neural_compressor.torch.quantization import RTNConfig, get_default_rtn_config, quantize
97

108

119
class TestRTNQuant(unittest.TestCase):
@@ -25,5 +23,6 @@ def test_export_compressed_model(self):
2523
model = quantize(model, quant_config)
2624
print(model)
2725

26+
2827
if __name__ == "__main__":
2928
unittest.main()

test/3x/torch/test_config.py

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,11 @@
11
import copy
22
import unittest
33

4+
import torch
45
import transformers
56

6-
from neural_compressor.torch.utils import logger
77
from neural_compressor.torch.quantization import RTNConfig, quantize
8-
9-
10-
import torch
8+
from neural_compressor.torch.utils import logger
119

1210

1311
def build_simple_torch_model():
@@ -142,7 +140,6 @@ def test_quantize_rtn_from_dict_advance(self):
142140
self.assertIsNotNone(qmodel)
143141

144142
def test_quantize_rtn_from_class_advance(self):
145-
146143
quant_config = RTNConfig(weight_bits=4, weight_dtype="nf4")
147144
# set operator instance
148145
fc1_config = RTNConfig(weight_bits=4, weight_dtype="int8")
@@ -153,7 +150,6 @@ def test_quantize_rtn_from_class_advance(self):
153150
self.assertIsNotNone(qmodel)
154151

155152
def test_config_white_lst(self):
156-
157153
global_config = RTNConfig(weight_bits=4, weight_dtype="nf4")
158154
# set operator instance
159155
fc1_config = RTNConfig(weight_bits=4, weight_dtype="int8", white_list=["model.fc1"])
@@ -179,7 +175,6 @@ def test_config_white_lst2(self):
179175
self.assertTrue(configs_mapping[("fc2", torch.nn.Linear)].weight_bits == 4)
180176

181177
def test_config_from_dict(self):
182-
183178
quant_config = {
184179
"rtn_weight_only_quant": {
185180
"global": {
@@ -199,7 +194,6 @@ def test_config_from_dict(self):
199194
self.assertIsNotNone(config.local_config)
200195

201196
def test_config_to_dict(self):
202-
203197
quant_config = RTNConfig(weight_bits=4, weight_dtype="nf4")
204198
fc1_config = RTNConfig(weight_bits=4, weight_dtype="int8")
205199
quant_config.set_local("model.fc1", fc1_config)
@@ -208,7 +202,6 @@ def test_config_to_dict(self):
208202
self.assertIn("local", config_dict)
209203

210204
def test_same_type_configs_addition(self):
211-
212205
quant_config1 = {
213206
"rtn_weight_only_quant": {
214207
"weight_dtype": "nf4",
@@ -242,7 +235,6 @@ def test_same_type_configs_addition(self):
242235
)
243236

244237
def test_diff_types_configs_addition(self):
245-
246238
quant_config1 = {
247239
"rtn_weight_only_quant": {
248240
"weight_dtype": "nf4",
@@ -259,7 +251,6 @@ def test_diff_types_configs_addition(self):
259251
self.assertIn("gptq", combined_config_d)
260252

261253
def test_composable_config_addition(self):
262-
263254
quant_config1 = {
264255
"rtn_weight_only_quant": {
265256
"weight_dtype": "nf4",

0 commit comments

Comments
 (0)