Skip to content

Commit 5ad4fa3

Browse files
committed
refine config and interface based on RFC design
Signed-off-by: xin3he <xin3.he@intel.com>
1 parent 2835bdb commit 5ad4fa3

File tree

24 files changed

+1005
-1262
lines changed

24 files changed

+1005
-1262
lines changed

examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_clm_no_trainer.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -230,8 +230,8 @@ def get_user_model():
230230

231231
# 3.x api
232232
if args.approach == 'weight_only':
233-
from neural_compressor.torch import RTNWeightQuantConfig, GPTQConfig, quantize
234-
from neural_compressor.torch.utils.utility import get_double_quant_config
233+
from neural_compressor.torch.quantization import RTNConfig, GPTQConfig, quantize
234+
from neural_compressor.torch.utils import get_double_quant_config
235235
weight_sym = True if args.woq_scheme == "sym" else False
236236
double_quant_config_dict = get_double_quant_config(args.double_quant_type, weight_sym=weight_sym)
237237

@@ -243,9 +243,9 @@ def get_user_model():
243243
"enable_mse_search": args.woq_enable_mse_search,
244244
}
245245
)
246-
quant_config = RTNWeightQuantConfig.from_dict(double_quant_config_dict)
246+
quant_config = RTNConfig.from_dict(double_quant_config_dict)
247247
else:
248-
quant_config = RTNWeightQuantConfig(
248+
quant_config = RTNConfig(
249249
weight_dtype=args.woq_dtype,
250250
weight_bits=args.woq_bits,
251251
weight_group_size=args.woq_group_size,
@@ -257,7 +257,7 @@ def get_user_model():
257257
double_quant_sym=args.double_quant_sym,
258258
double_quant_group_size=args.double_quant_group_size,
259259
)
260-
quant_config.set_local("lm_head", RTNWeightQuantConfig(weight_dtype="fp32"))
260+
quant_config.set_local("lm_head", RTNConfig(weight_dtype="fp32"))
261261
user_model = quantize(
262262
model=user_model, quant_config=quant_config
263263
)

neural_compressor/common/base_config.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ def set_local(self, operator_name: str, config: BaseConfig) -> BaseConfig:
127127
self.local_config[operator_name] = config
128128
return self
129129

130-
def to_dict(self, params_list=[], operator2str=None):
130+
def to_dict(self):
131131
result = {}
132132
global_config = self.get_params_dict()
133133
if bool(self.local_config):
@@ -147,7 +147,7 @@ def get_params_dict(self):
147147
return result
148148

149149
@classmethod
150-
def from_dict(cls, config_dict, str2operator=None):
150+
def from_dict(cls, config_dict):
151151
"""Construct config from a dict.
152152
153153
Args:

neural_compressor/common/utility.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
# config name
2828
BASE_CONFIG = "base_config"
2929
COMPOSABLE_CONFIG = "composable_config"
30-
RTN_WEIGHT_ONLY_QUANT = "rtn_weight_only_quant"
30+
RTN = "rtn"
3131
STATIC_QUANT = "static_quant"
3232
GPTQ = "gptq"
3333
FP8_QUANT = "fp8_quant"

neural_compressor/tensorflow/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def register_algo(name):
3535
3636
Usage example:
3737
@register_algo(name=example_algo)
38-
def example_algo(model: torch.nn.Module, quant_config: RTNWeightQuantConfig) -> torch.nn.Module:
38+
def example_algo(model: torch.nn.Module, quant_config: RTNConfig) -> torch.nn.Module:
3939
...
4040
Args:
4141
name (str): The name under which the algorithm function will be registered.

neural_compressor/torch/__init__.py

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,3 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14-
15-
from neural_compressor.torch.utils.utility import register_algo
16-
from neural_compressor.torch.algorithms import rtn_quantize_entry, gptq_quantize_entry
17-
18-
from neural_compressor.torch.quantization import (
19-
quantize,
20-
RTNWeightQuantConfig,
21-
get_default_rtn_config,
22-
GPTQConfig,
23-
get_default_gptq_config,
24-
)
25-
26-
from neural_compressor.torch.tune import autotune, TuningConfig, get_default_tune_config

neural_compressor/torch/algorithms/__init__.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,5 +13,7 @@
1313
# limitations under the License.
1414

1515

16-
from neural_compressor.torch.algorithms.weight_only_algos import rtn_quantize_entry
17-
from neural_compressor.torch.algorithms.weight_only_algos import gptq_quantize_entry
16+
from .weight_only import (
17+
rtn_quantize,
18+
gptq_quantize,
19+
)
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# Demo of algorithm usage w/o INC

neural_compressor/torch/algorithms/weight_only/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,6 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14+
15+
from .rtn import rtn_quantize
16+
from .gptq import gptq_quantize

0 commit comments

Comments
 (0)