Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[quant] Add quant wrap for functional api and refine the qat #33162

Merged
merged 9 commits into from
Jun 9, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 47 additions & 53 deletions python/paddle/fluid/contrib/slim/quantization/imperative/qat.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,24 +251,25 @@ def __init__(self,
super(ImperativeQuantizeInputs, self).__init__()

self._quantizable_layer_type = tuple(
utils.quant_input_layers_map[layer]
if layer in utils.quant_input_layers_map else layer
utils.layer_name_map[layer]
if layer in utils.layer_name_map else layer
for layer in quantizable_layer_type)
for layer in self._quantizable_layer_type:
assert not isinstance(layer, str), \
assert not isinstance(layer, str) \
and layer in utils.fake_quant_input_layers, \
"%s is unspported to be quantized." % layer

quantize_type = {
'abs_max', 'moving_average_abs_max', 'channel_wise_abs_max'
}
assert weight_quantize_type in quantize_type, \
assert weight_quantize_type != 'moving_average_abs_max' \
and weight_quantize_type in quantize_type, \
"Unsupported weight_quantize_type: %s. It can only " \
"be abs_max or moving_average_abs_max or " \
"channel_wise_abs_max." % weight_quantize_type
assert activation_quantize_type != 'channel_wise_abs_max' \
and activation_quantize_type in quantize_type, \
"be abs_max or channel_wise_abs_max." % weight_quantize_type
# TODO (jc): activation_quantize_type supports range_abs_max
assert activation_quantize_type == 'moving_average_abs_max', \
"Unsupported activation_quantize_type: %s. It can " \
"only be abs_max or moving_average_abs_max now." \
"only be moving_average_abs_max now." \
% activation_quantize_type

bits_check = lambda bits: isinstance(bits, int) \
Expand Down Expand Up @@ -305,41 +306,29 @@ def apply(self, model):
assert isinstance(model, dygraph.Layer), \
"The model must be the instance of dygraph.Layer."

for name, layer in model.named_sublayers():
if not isinstance(layer, self._quantizable_layer_type) \
or (hasattr(layer, "skip_quant") \
and layer.skip_quant == True):
for name, cur_layer in model.named_sublayers():
if not isinstance(cur_layer, self._quantizable_layer_type) \
or (hasattr(cur_layer, "skip_quant") \
and cur_layer.skip_quant == True):
continue

# TODO(jc): optimize this module
last_idx = 0
idx = 0
obj = model
while idx < len(name):
if (name[idx] == '.'):
if hasattr(obj, name[last_idx:idx]):
obj = getattr(obj, name[last_idx:idx])
last_idx = idx + 1
idx += 1
target = name[last_idx:idx]

quant_layer = self._get_input_quantized_layer(layer)
setattr(obj, target, quant_layer)
parent_layer, sub_name = \
utils.find_parent_layer_and_sub_name(model, name)

cur_quant_layer = self._get_input_quantized_layer(cur_layer)
setattr(parent_layer, sub_name, cur_quant_layer)

def _get_input_quantized_layer(self, layer):
quant_layer_name = None
for key, value in utils.quant_input_layers_map.items():

for key, value in utils.layer_name_map.items():
if isinstance(layer, value):
quant_layer_name = 'Quantized' + key
break
assert quant_layer_name is not None, \
"The layer %s is unsupported to be quantized." \
% layer.full_name()

layer_with_weight = ['QuantizedConv2D', 'QuantizedLinear']
if quant_layer_name not in layer_with_weight:
quant_layer_name = 'QuantizedNoweightLayer'

return quant_nn.__dict__[quant_layer_name](layer, **self._kwargs)


Expand Down Expand Up @@ -374,25 +363,21 @@ def apply(self, model):
assert isinstance(model, dygraph.Layer), \
"The model must be the instance of dygraph.Layer."

for name, layer in model.named_sublayers():
if not self._is_target_layer(layer):
for cur_name, cur_layer in model.named_sublayers():
if not self._is_target_layer(cur_layer):
continue

# TODO(jc): optimize this module
last_idx = 0
idx = 0
obj = model
while idx < len(name):
if (name[idx] == '.'):
if hasattr(obj, name[last_idx:idx]):
obj = getattr(obj, name[last_idx:idx])
last_idx = idx + 1
idx += 1
target = name[last_idx:idx]

quant_layer = quant_nn.__dict__["QuantizedOutputLayer"](
layer, self._moving_rate)
setattr(obj, target, quant_layer)
parent_layer, sub_name = \
utils.find_parent_layer_and_sub_name(model, cur_name)

if isinstance(cur_layer, tuple(utils.fake_quant_output_layers)):
cur_quant_layer = quant_nn.FakeQuantMAOutputScaleLayer(
cur_layer, self._moving_rate)
else:
cur_quant_layer = quant_nn.MAOutputScaleLayer(cur_layer,
self._moving_rate)

setattr(parent_layer, sub_name, cur_quant_layer)

def save_quantized_model(self, layer, path, input_spec=None, **config):
"""
Expand Down Expand Up @@ -468,9 +453,18 @@ def _is_target_layer(self, layer):
"""
Whether the layer needs to calculate output scales.
"""
return isinstance(layer, utils.quant_output_layers) \
or ('quantized' in layer.full_name() and \
'quantized_noweight' not in layer.full_name())
flag = False
if isinstance(layer, dygraph.Layer):
# exclude fake_quant ops in quant_nn file
if utils.is_leaf_layer(layer) and \
not isinstance(layer, tuple(utils.fake_quant_leaf_layers)):
flag = True
# consider QuantizedConv2D and QuantizedLinear ops
if isinstance(layer, tuple(utils.fake_quant_wrap_layers)):
flag = True
if isinstance(layer, paddle.nn.quant.FloatFunctionalLayer):
flag = True
return flag

def _save_output_scale(self, program, scope):
"""
Expand Down Expand Up @@ -514,4 +508,4 @@ def _is_skip_quant_op(self, block, in_op):
previous_ops = [utils.find_previous_op(block, arg_name) \
for arg_name in in_op.input_arg_names]
return any(op is not None and op.type not in \
utils.fake_quantize_dequantize_types for op in previous_ops)
utils.fake_quantize_dequantize_op_types for op in previous_ops)
103 changes: 73 additions & 30 deletions python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,28 @@
from paddle.fluid.initializer import Constant
from paddle.fluid.data_feeder import check_variable_and_dtype
from paddle.nn import functional as F
import logging
from paddle.fluid.log_helper import get_logger

__all__ = [
'FakeQuantMovingAverage', 'FakeQuantAbsMax',
'FakeChannelWiseQuantDequantAbsMax', 'QuantizedConv2D', 'QuantizedLinear',
'QuantizedNoweightLayer', 'MovingAverageAbsMaxScale'
'FakeQuantMovingAverageAbsMax',
'FakeQuantAbsMax',
'FakeQuantChannelWiseAbsMax',
'QuantizedConv2D',
'QuantizedLinear',
'QuantizedNoweightLayer',
'MovingAverageAbsMaxScale',
'MAOutputScaleLayer',
'FakeQuantMAOutputScaleLayer',
]

_logger = get_logger(
__name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')

class FakeQuantMovingAverage(layers.Layer):

class FakeQuantMovingAverageAbsMax(layers.Layer):
r"""
FakeQuantMovingAverage layer does the moving_average_abs_max quant and then dequant.
FakeQuantMovingAverageAbsMax layer does the moving_average_abs_max quant and then dequant.
Its computational formula is described as below:

:math:`scale = (moving\_rate*accum+max(abs(x)))/(moving\_rate*state+1)`
Expand All @@ -45,7 +56,7 @@ def __init__(self,
moving_rate=0.9,
quant_bits=8,
dtype='float32'):
super(FakeQuantMovingAverage, self).__init__()
super(FakeQuantMovingAverageAbsMax, self).__init__()
self._moving_rate = moving_rate
self._quant_bits = quant_bits

Expand Down Expand Up @@ -98,7 +109,7 @@ def forward(self, input):
return out

check_variable_and_dtype(input, 'input', ['float32'],
"FakeQuantMovingAverage")
"FakeQuantMovingAverageAbsMax")
attrs = {
'moving_rate': self._moving_rate,
'bit_length': self._quant_bits,
Expand Down Expand Up @@ -210,7 +221,7 @@ def forward(self, input):
return quant_out


class FakeChannelWiseQuantDequantAbsMax(layers.Layer):
class FakeQuantChannelWiseAbsMax(layers.Layer):
def __init__(self,
name=None,
channel_num=None,
Expand All @@ -219,7 +230,7 @@ def __init__(self,
dtype='float32',
quant_on_weight=False):
assert quant_on_weight == True, "Channel_wise only can be used on weight quantization."
super(FakeChannelWiseQuantDequantAbsMax, self).__init__()
super(FakeQuantChannelWiseAbsMax, self).__init__()
self._quant_bits = quant_bits
self._quant_axis = quant_axis
self._dtype = dtype
Expand Down Expand Up @@ -265,7 +276,7 @@ def forward(self, input):
return out

check_variable_and_dtype(input, 'input', ['float32'],
"FakeChannelWiseQuantDequantAbsMax")
"FakeQuantChannelWiseAbsMax")
attrs = {'bit_length': self._quant_bits, 'quant_axis': self._quant_axis}
inputs = {"X": [input]}
quant_out = self._helper.create_variable(
Expand Down Expand Up @@ -313,8 +324,8 @@ def _get_fake_quant_type(quant_type, **kwargs):
"when you use channel_wise_abs_max strategy.")
fake_quant_map = {
'abs_max': FakeQuantAbsMax,
'moving_average_abs_max': FakeQuantMovingAverage,
'channel_wise_abs_max': FakeChannelWiseQuantDequantAbsMax
'moving_average_abs_max': FakeQuantMovingAverageAbsMax,
'channel_wise_abs_max': FakeQuantChannelWiseAbsMax
}

return fake_quant_map[quant_type](**call_args)
Expand Down Expand Up @@ -498,12 +509,7 @@ def __init__(self,
quant_on_weight=False)

def forward(self, input):
quant_input = self._fake_quant_input(input)
# TODO (jc): support ops that have several inputs
if isinstance(input, list):
assert len(input) == 1, \
"The QuantizedNoweightLayer should only have one input."
return self._layer.forward(quant_input)
return self._layer.forward(self._fake_quant_input(input))


class MovingAverageAbsMaxScale(layers.Layer):
Expand Down Expand Up @@ -590,19 +596,56 @@ def forward(self, input):
return quant_out


class QuantizedOutputLayer(layers.Layer):
def __init__(self, layer=None, moving_rate=0.9, dtype='float32'):
class MAOutputScaleLayer(layers.Layer):
"""
Calculate the scale (moving average abs max) for the output of the input layer.
Add MovingAverageMaxScale layer to the behind of the input layer.
"""

def __init__(self, layer=None, moving_rate=0.9, name=None, dtype='float32'):
r"""
Add MovingAverageMaxScale layer to the behind of the input layer.
Construct
"""
super(QuantizedOutputLayer, self).__init__()
super(MAOutputScaleLayer, self).__init__()
self._layer = layer
self._moving_average_abs_max_scale = \
MovingAverageAbsMaxScale(layer.full_name(), moving_rate, dtype)
if name is None:
name = layer.full_name()
self._ma_output_scale = \
MovingAverageAbsMaxScale(name, moving_rate, dtype)

def forward(self, *inputs, **kwargs):
out = self._layer(*inputs, **kwargs)
# TODO (jc): support the ops of several outputs
if (isinstance(out, list) or isinstance(out, tuple)) and len(out) > 1:
return out
else:
return self._ma_output_scale(out)

def forward(self, input):
if isinstance(input, list):
assert len(input) == 1, \
"The QuantizedOutputLayer should only have one input."
out = self._layer(input)
return self._moving_average_abs_max_scale(out)

class FakeQuantMAOutputScaleLayer(layers.Layer):
def __init__(self,
layer,
weight_bits=8,
activation_bits=8,
moving_rate=0.9,
name=None,
*args,
**kwargs):

super(FakeQuantMAOutputScaleLayer, self).__init__()
self._layer = layer
self._fake_quant_output = _get_fake_quant_type(
'moving_average_abs_max',
name=layer.full_name() if name is None else name,
moving_rate=moving_rate,
quant_bits=activation_bits,
dtype=self._dtype,
quant_on_weight=False)

def forward(self, *inputs, **kwargs):
out = self._layer(*inputs, **kwargs)
# TODO (jc): support the ops of several outputs
if (isinstance(out, list) or isinstance(out, tuple)) and len(out) > 1:
return out
else:
return self._fake_quant_output(out)
Loading