Skip to content
This repository has been archived by the owner on Sep 18, 2024. It is now read-only.

support dtype&scheme customization for QAT quantizer #4137

Merged
merged 16 commits into from
Oct 13, 2021
4 changes: 2 additions & 2 deletions docs/en_US/Compression/CustomizeCompressor.rst
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ Sometimes it's necessary for a quantization operation to have a customized backw
grad_output : Tensor
gradient of the output of quantization operation
quant_type : QuantType
the type of quantization, it can be `QuantType.QUANT_INPUT`, `QuantType.QUANT_WEIGHT`, `QuantType.QUANT_OUTPUT`,
the type of quantization, it can be `QuantType.INPUT`, `QuantType.WEIGHT`, `QuantType.OUTPUT`,
you can define different behavior for different types.
Returns
-------
Expand All @@ -164,7 +164,7 @@ Sometimes it's necessary for a quantization operation to have a customized backw
"""

# for quant_output function, set grad to zero if the absolute value of tensor is larger than 1
if quant_type == QuantType.QUANT_OUTPUT:
if quant_type == QuantType.OUTPUT:
grad_output[torch.abs(tensor) > 1] = 0
return grad_output

Expand Down
45 changes: 29 additions & 16 deletions examples/model_compress/quantization/QAT_torch_quantizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@
import torch.nn.functional as F
from torchvision import datasets, transforms
from nni.algorithms.compression.pytorch.quantization import QAT_Quantizer
from nni.compression.pytorch.quantization.settings import set_quant_scheme_dtype

import sys
sys.path.append('../models')
from mnist.naive import NaiveModel


def train(model, device, train_loader, optimizer):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
Expand Down Expand Up @@ -58,22 +60,32 @@ def main():
# {'quant_types': ['input'], 'op_names': ['b']} in the configure_list.

configure_list = [{
'quant_types': ['weight', 'input'],
'quant_bits': {'weight': 8, 'input': 8},
'op_names': ['conv1', 'conv2']
}, {
'quant_types': ['output'],
'quant_bits': {'output': 8, },
'op_names': ['relu1', 'relu2']
}, {
'quant_types': ['output', 'weight', 'input'],
'quant_bits': {'output': 8, 'weight': 8, 'input': 8},
'op_names': ['fc1'],
}, {
'quant_types': ['output', 'weight', 'input'],
'quant_bits': {'output': 8, 'weight': 8, 'input': 8},
'op_names': ['fc2'],
}]
'quant_types': ['weight', 'input'],
'quant_bits': {'weight': 8, 'input': 8},
'op_names': ['conv1', 'conv2']
}, {
'quant_types': ['output'],
'quant_bits': {'output': 8, },
'op_names': ['relu1', 'relu2']
}, {
'quant_types': ['output', 'weight', 'input'],
'quant_bits': {'output': 8, 'weight': 8, 'input': 8},
'op_names': ['fc1', 'fc2'],
}]

# you can also set the quantization dtype and scheme layer-wise through configure_list like:
# configure_list = [{
# 'quant_types': ['weight', 'input'],
# 'quant_bits': {'weight': 8, 'input': 8},
# 'op_names': ['conv1', 'conv2'],
# 'quant_dtype': 'int',
# 'quant_scheme': 'per_channel_symmetric'
# }]
# For now quant_dtype's options are 'int' and 'uint. And quant_scheme's options are per_tensor_affine,
# per_tensor_symmetric, per_channel_affine and per_channel_symmetric.
set_quant_scheme_dtype('weight', 'per_channel_symmetric', 'int')
set_quant_scheme_dtype('output', 'per_tensor_symmetric', 'int')
set_quant_scheme_dtype('input', 'per_tensor_symmetric', 'int')

model = NaiveModel().to(device)
dummy_input = torch.randn(1, 1, 28, 28).to(device)
Expand All @@ -98,5 +110,6 @@ def main():
calibration_config = quantizer.export_model(model_path, calibration_path, onnx_path, input_shape, device)
print("Generated calibration config is: ", calibration_config)


if __name__ == '__main__':
main()
3 changes: 0 additions & 3 deletions nni/algorithms/compression/pytorch/quantization/observers.py

This file was deleted.

Loading