Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support user-defined activation/weight quantize and preprocess. #28570

Merged
merged 5 commits into from
Nov 18, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 43 additions & 3 deletions python/paddle/fluid/contrib/slim/quantization/imperative/qat.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,11 @@ def __init__(self,
weight_quantize_type='abs_max',
activation_quantize_type='moving_average_abs_max',
moving_rate=0.9,
quantizable_layer_type=['Conv2D', 'Linear']):
quantizable_layer_type=['Conv2D', 'Linear'],
weight_preprocess_layer=None,
act_preprocess_layer=None,
weight_quantize_layer=None,
act_quantize_layer=None):
"""
The constructor for ImperativeQuantAware.

Expand All @@ -81,7 +85,28 @@ def __init__(self,
quantizable_op_type(list[str]): List the type of layers that will be quantized.
Default is ['Conv2D', 'Linear']. The quantizable_op_type in
QuantizationFreezePass and ConvertToInt8Pass must be the same as this.

weight_preprocess_layer(paddle.nn.Layer, optional): A paddle Layer that defines how to preprocess
weight before quantization. Using this can quickly test if user's
preprocess method works or not. The input is non-quantized
weight and function returns processed weight to be quantized.
If None, the weight will be quantized directly. Default is None.
act_preprocess_layer(paddle.nn.Layer, optional): A paddle Layer that defines how to preprocess
activation before quantization. Using this can quickly test if user's
preprocess method works or not. The input is non-quantized
activation and function returns processed activation to be quantized.
If None, the activation will be quantized directly. Default is None.
weight_quantize_layer(paddle.nn.Layer, optional): A paddle Layer that defines how to quantize weight.
Using this can quickly test if user's quantization method works or not.
In this layer, user should both define quantization method and
dequantization method, that is, the function's input is non-quantized
weight and returns dequantized weight. If None, will use
quantization op defined by 'weight_quantize_type'. Default is None.
act_quantize_layer(paddle.nn.Layer, optional): A paddle Layer that defines how to quantize activation.
Using this can quickly test if user's quantization method works or not.
In this layer, user should both define quantization method and
dequantization method, that is, the function's input is non-quantized
activation and returns dequantized activation. If None, will use
quantization op defined by 'activation_quantize_type'. Default is None.

Examples:
.. code-block:: python
Expand Down Expand Up @@ -118,6 +143,19 @@ def __init__(self,
self._activation_bits = activation_bits
self._moving_rate = moving_rate

self._weight_pre_layer = weight_preprocess_layer
self._act_pre_layer = act_preprocess_layer
self._weight_quant_layer = weight_quantize_layer
self._act_quant_layer = act_quantize_layer

t_check = lambda method: method is None or issubclass(method, dygraph.layers.Layer)
assert t_check(
self._weight_pre_layer), "weight_preprocess should be nn.Layer"
assert t_check(self._act_pre_layer), "act_preprocess should be nn.Layer"
assert t_check(
self._weight_quant_layer), "weight_quantize should be nn.Layer"
assert t_check(self._act_quant_layer), "act_quantize should be nn.Layer"

quant_type = {
'abs_max', 'moving_average_abs_max', 'channel_wise_abs_max'
}
Expand Down Expand Up @@ -189,7 +227,9 @@ def _get_quantized_counterpart(self, layer):

quantized_layer = quant_nn.__dict__[quantized_counterpart[index]](
layer, self._weight_bits, self._activation_bits, self._moving_rate,
self._weight_quantize_type, self._activation_quantize_type)
self._weight_quantize_type, self._activation_quantize_type,
self._weight_pre_layer, self._act_pre_layer,
self._weight_quant_layer, self._act_quant_layer)
return quantized_layer


Expand Down
118 changes: 82 additions & 36 deletions python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,11 @@ def __init__(self,
activation_bits=8,
moving_rate=0.9,
weight_quantize_type='abs_max',
activation_quantize_type='abs_max'):
activation_quantize_type='abs_max',
weight_pre_layer=None,
act_pre_layer=None,
weight_quant_layer=None,
act_quant_layer=None):
super(QuantizedConv2D, self).__init__()
# For Conv2D
self._groups = getattr(layer, '_groups')
Expand All @@ -347,26 +351,44 @@ def __init__(self,
self.bias = getattr(layer, 'bias')
# For FakeQuant
self._conv2d_quant_axis = 0
self._fake_quant_weight = _get_fake_quant_type(
weight_quantize_type,
name=self.weight.name,
moving_rate=moving_rate,
quant_bits=weight_bits,
dtype=self._dtype,
quant_on_weight=True,
channel_num=self.weight.shape[self._conv2d_quant_axis],
quant_axis=self._conv2d_quant_axis)
self._fake_quant_input = _get_fake_quant_type(
activation_quantize_type,
name=layer.full_name(),
moving_rate=moving_rate,
quant_bits=activation_bits,
dtype=self._dtype,
quant_on_weight=False)

if weight_quant_layer is not None:
self._fake_quant_weight = weight_quant_layer()
else:
self._fake_quant_weight = _get_fake_quant_type(
weight_quantize_type,
name=self.weight.name,
moving_rate=moving_rate,
quant_bits=weight_bits,
dtype=self._dtype,
quant_on_weight=True,
channel_num=self.weight.shape[self._conv2d_quant_axis],
quant_axis=self._conv2d_quant_axis)
if act_quant_layer is not None:
self._fake_quant_input = act_quant_layer()
else:
self._fake_quant_input = _get_fake_quant_type(
activation_quantize_type,
name=layer.full_name(),
moving_rate=moving_rate,
quant_bits=activation_bits,
dtype=self._dtype,
quant_on_weight=False)

self._act_preprocess = act_pre_layer(
) if act_pre_layer is not None else None
self._weight_preprocess = weight_pre_layer(
) if weight_pre_layer is not None else None

def forward(self, input):
if self._act_preprocess is not None:
input = self._act_preprocess(input)
quant_input = self._fake_quant_input(input)
quant_weight = self._fake_quant_weight(self.weight)

weight = self.weight
if self._weight_preprocess is not None:
weight = self._weight_preprocess(self.weight)
quant_weight = self._fake_quant_weight(weight)

if in_dygraph_mode() and self._l_type == 'conv2d':
attrs = ('strides', self._stride, 'paddings', self._padding,
Expand Down Expand Up @@ -428,7 +450,11 @@ def __init__(self,
activation_bits=8,
moving_rate=0.9,
weight_quantize_type='abs_max',
activation_quantize_type='abs_max'):
activation_quantize_type='abs_max',
weight_pre_layer=None,
act_pre_layer=None,
weight_quant_layer=None,
act_quant_layer=None):
super(QuantizedLinear, self).__init__()
# For Linear
self._act = getattr(layer, '_act')
Expand All @@ -437,26 +463,46 @@ def __init__(self,
self.bias = getattr(layer, 'bias')
# For FakeQuant
self._linear_quant_axis = 1
self._fake_quant_weight = _get_fake_quant_type(
weight_quantize_type,
name=self.weight.name,
moving_rate=moving_rate,
quant_bits=weight_bits,
dtype=self._dtype,
quant_on_weight=True,
channel_num=self.weight.shape[self._linear_quant_axis],
quant_axis=self._linear_quant_axis)
self._fake_quant_input = _get_fake_quant_type(
activation_quantize_type,
name=layer.full_name(),
moving_rate=moving_rate,
quant_bits=activation_bits,
dtype=self._dtype,
quant_on_weight=False)

if weight_quant_layer is not None:
self._fake_quant_weight = weight_quant_layer()
else:
self._fake_quant_weight = _get_fake_quant_type(
weight_quantize_type,
name=self.weight.name,
moving_rate=moving_rate,
quant_bits=weight_bits,
dtype=self._dtype,
quant_on_weight=True,
channel_num=self.weight.shape[self._linear_quant_axis],
quant_axis=self._linear_quant_axis)

if act_quant_layer is not None:
self._fake_quant_input = act_quant_layer()
else:
self._fake_quant_input = _get_fake_quant_type(
activation_quantize_type,
name=layer.full_name(),
moving_rate=moving_rate,
quant_bits=activation_bits,
dtype=self._dtype,
quant_on_weight=False)

self._act_preprocess = act_pre_layer(
) if act_pre_layer is not None else None
self._weight_preprocess = weight_pre_layer(
) if weight_pre_layer is not None else None

def forward(self, input):
if self._act_preprocess is not None:
input = self._act_preprocess(input)
quant_input = self._fake_quant_input(input)
quant_weight = self._fake_quant_weight(self.weight)

weight = self.weight
if self._weight_preprocess is not None:
weight = self._weight_preprocess(self.weight)
quant_weight = self._fake_quant_weight(weight)

if in_dygraph_mode():
pre_bias = _varbase_creator(dtype=input.dtype)
core.ops.matmul(quant_input, quant_weight, pre_bias, 'transpose_X',
Expand Down
Loading