From 10e3cf43d436a1a611bf477e7efd3b8d50486486 Mon Sep 17 00:00:00 2001 From: pengjuncai <13006307475@163.com> Date: Thu, 27 May 2021 09:06:55 +0000 Subject: [PATCH 1/8] Add wrap for functional api --- .../slim/quantization/imperative/quant_nn.py | 28 +++++--- .../slim/quantization/imperative/utils.py | 3 +- python/paddle/nn/quant/__init__.py | 17 +++++ python/paddle/nn/quant/functional_layers.py | 71 +++++++++++++++++++ 4 files changed, 109 insertions(+), 10 deletions(-) create mode 100644 python/paddle/nn/quant/__init__.py create mode 100644 python/paddle/nn/quant/functional_layers.py diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py b/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py index f6fef0689d43a..507f3403c7e62 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py @@ -22,6 +22,7 @@ from paddle.fluid.initializer import Constant from paddle.fluid.data_feeder import check_variable_and_dtype from paddle.nn import functional as F +from paddle.fluid.log_helper import get_logger __all__ = [ 'FakeQuantMovingAverage', 'FakeQuantAbsMax', @@ -29,6 +30,9 @@ 'QuantizedNoweightLayer', 'MovingAverageAbsMaxScale' ] +_logger = get_logger( + __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') + class FakeQuantMovingAverage(layers.Layer): r""" @@ -498,12 +502,15 @@ def __init__(self, quant_on_weight=False) def forward(self, input): - quant_input = self._fake_quant_input(input) # TODO (jc): support ops that have several inputs - if isinstance(input, list): - assert len(input) == 1, \ - "The QuantizedNoweightLayer should only have one input." - return self._layer.forward(quant_input) + if (isinstance(input, list) or isinstance(input, tuple)) \ + and len(input) > 1: + _logger.info("%s has several inputs, so skip collecting " + "the input scales" % self._layer.full_name()) + return self._layer.forward(input) + else: + quant_input = self._fake_quant_input(input) + return self._layer.forward(quant_input) class MovingAverageAbsMaxScale(layers.Layer): @@ -601,8 +608,11 @@ def __init__(self, layer=None, moving_rate=0.9, dtype='float32'): MovingAverageAbsMaxScale(layer.full_name(), moving_rate, dtype) def forward(self, input): - if isinstance(input, list): - assert len(input) == 1, \ - "The QuantizedOutputLayer should only have one input." out = self._layer(input) - return self._moving_average_abs_max_scale(out) + # TODO (jc): support the ops of several outputs + if (isinstance(out, list) or isinstance(out, tuple)) and len(out) > 1: + _logger.info("%s has several outputs, so skip collecting " + "the output threshold" % self._layer.full_name()) + return out + else: + return self._moving_average_abs_max_scale(out) diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py b/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py index 491f8a7e25cbc..9adc7df69209c 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py @@ -54,7 +54,8 @@ paddle.nn.PReLU, paddle.nn.ReLU, paddle.nn.ReLU6, paddle.nn.SELU, paddle.nn.Sigmoid, paddle.nn.Softmax, paddle.nn.Softplus, paddle.nn.Softshrink, paddle.nn.Softsign, paddle.nn.Swish, paddle.nn.Tanh, - paddle.nn.Tanhshrink, paddle.nn.ThresholdedReLU, paddle.nn.Upsample) + paddle.nn.Tanhshrink, paddle.nn.ThresholdedReLU, paddle.nn.Upsample, + paddle.nn.quant.FloatFunctionalLayer) weight_op_types = [ "conv2d", "depthwise_conv2d", "matmul", "conv2d_transpose", diff --git a/python/paddle/nn/quant/__init__.py b/python/paddle/nn/quant/__init__.py new file mode 100644 index 0000000000000..f78586e0c6fde --- /dev/null +++ b/python/paddle/nn/quant/__init__.py @@ -0,0 +1,17 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .functional_layers import FloatFunctionalLayer # noqa: F401 + +__all__ = ['FloatFunctionalLayer'] diff --git a/python/paddle/nn/quant/functional_layers.py b/python/paddle/nn/quant/functional_layers.py new file mode 100644 index 0000000000000..a1013dbd773ce --- /dev/null +++ b/python/paddle/nn/quant/functional_layers.py @@ -0,0 +1,71 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ...fluid.dygraph import layers +from ...tensor import math, manipulation + +__all__ = [] + + +class FloatFunctionalLayer(layers.Layer): + def __init__(self): + super(FloatFunctionalLayer, self).__init__() + + def add(x, y, name=None): + """ + Wrap paddle.add + """ + return math.add(x, y, name) + + def subtract(x, y, name=None): + """ + Wrap paddle.subtract + """ + return math.subtract(x, y, name) + + def multiply(x, y, name=None): + """ + Wrap paddle.multiply + """ + return math.multiply(x, y, name) + + def divide(x, y, name=None): + """ + Wrap paddle.divide + """ + return math.divide(x, y, name) + + def reshape(x, shape, name=None): + """ + Wrap paddle.reshape + """ + return manipulation.reshape(x, shape, name) + + def tranpose(x, perm, name=None): + """ + Wrap paddle.tranpose + """ + return manipulation.transpose(x, perm, name) + + def concat(x, axis=0, name=None): + """ + Warp paddle.concat + """ + return manipulation.concat(x, axis, name) + + def flatten(x, start_axis=0, stop_axis=-1, name=None): + """ + Warp paddle.flatten + """ + return manipulation.flatten(x, start_axis, stop_axis, name) From dfce1d393c169ca2738d6da0cfb94b0160b15f26 Mon Sep 17 00:00:00 2001 From: pengjuncai <13006307475@163.com> Date: Mon, 31 May 2021 03:27:13 +0000 Subject: [PATCH 2/8] Refine the wraped api --- .../slim/quantization/imperative/quant_nn.py | 4 +- python/paddle/nn/quant/functional_layers.py | 40 +++++++------------ 2 files changed, 17 insertions(+), 27 deletions(-) diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py b/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py index a37094f2f52d3..b4449b2751dce 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py @@ -608,8 +608,8 @@ def __init__(self, layer=None, moving_rate=0.9, dtype='float32'): self._moving_average_abs_max_scale = \ MovingAverageAbsMaxScale(layer.full_name(), moving_rate, dtype) - def forward(self, input): - out = self._layer(input) + def forward(self, *inputs, **kwargs): + out = self._layer(*inputs, **kwargs) # TODO (jc): support the ops of several outputs if (isinstance(out, list) or isinstance(out, tuple)) and len(out) > 1: _logger.info("%s has several outputs, so skip collecting " diff --git a/python/paddle/nn/quant/functional_layers.py b/python/paddle/nn/quant/functional_layers.py index e7c0011777843..7eeaa5c58e78a 100644 --- a/python/paddle/nn/quant/functional_layers.py +++ b/python/paddle/nn/quant/functional_layers.py @@ -24,53 +24,43 @@ def __init__(self): class add(FloatFunctionalLayer): - def __init__(self, name=None): + def __init__(self): super(add, self).__init__() - self._name = name - def forward(self, inputs): - assert len(inputs) == 2, "The inputs should be (x, y)" - return math.add(inputs[0], inputs[1], self._name) + def forward(self, x, y, name=None): + return math.add(x, y, name) class subtract(FloatFunctionalLayer): - def __init__(self, name=None): + def __init__(self): super(subtract, self).__init__() - self._name = name - def forward(self, inputs): - assert len(inputs) == 2, "The inputs should be (x, y)" - return math.subtract(inputs[0], inputs[1], self._name) + def forward(self, x, y, name=None): + return math.subtract(x, y, name) class multiply(FloatFunctionalLayer): - def __init__(self, name=None): + def __init__(self): super(multiply, self).__init__() - self._name = name - def forward(self, inputs): - assert len(inputs) == 2, "The inputs should be (x, y)" - return math.multiply(inputs[0], inputs[1], self._name) + def forward(self, x, y, name=None): + return math.multiply(x, y, name) class divide(FloatFunctionalLayer): - def __init__(self, name=None): + def __init__(self): super(divide, self).__init__() - self._name = name - def forward(self, inputs): - assert len(inputs) == 2, "The inputs should be (x, y)" - return math.divide(inputs[0], inputs[1], self._name) + def forward(self, x, y, name=None): + return math.divide(x, y, name) class reshape(FloatFunctionalLayer): - def __init__(self, name=None): + def __init__(self): super(reshape, self).__init__() - self._name = name - def forward(self, inputs): - assert len(inputs) == 2, "The inputs should be (x, shape)" - return manipulation.reshape(inputs[0], inputs[1], self._name) + def forward(self, x, shape, name=None): + return manipulation.reshape(x, shape, name) class tranpose(FloatFunctionalLayer): From d35cc88f1e87e12a96cbe56cbc773683fd4a7c1f Mon Sep 17 00:00:00 2001 From: pengjuncai <13006307475@163.com> Date: Wed, 2 Jun 2021 03:34:28 +0000 Subject: [PATCH 3/8] Add unit test for quant functional layers --- .../slim/quantization/imperative/qat.py | 86 ++++++++----------- .../slim/quantization/imperative/quant_nn.py | 22 ++--- .../slim/quantization/imperative/utils.py | 65 ++++++++++---- python/paddle/nn/quant/__init__.py | 2 +- python/paddle/nn/quant/functional_layers.py | 29 +++---- 5 files changed, 108 insertions(+), 96 deletions(-) diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py index 66b11d1f17ad4..ac80ff8a660aa 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py @@ -255,20 +255,21 @@ def __init__(self, if layer in utils.quant_input_layers_map else layer for layer in quantizable_layer_type) for layer in self._quantizable_layer_type: - assert not isinstance(layer, str), \ + assert not isinstance(layer, str) \ + and layer in utils.quant_input_layers_map.values(), \ "%s is unspported to be quantized." % layer quantize_type = { 'abs_max', 'moving_average_abs_max', 'channel_wise_abs_max' } - assert weight_quantize_type in quantize_type, \ + assert weight_quantize_type != 'moving_average_abs_max' \ + and weight_quantize_type in quantize_type, \ "Unsupported weight_quantize_type: %s. It can only " \ - "be abs_max or moving_average_abs_max or " \ - "channel_wise_abs_max." % weight_quantize_type - assert activation_quantize_type != 'channel_wise_abs_max' \ - and activation_quantize_type in quantize_type, \ + "be abs_max or channel_wise_abs_max." % weight_quantize_type + # TODO (jc): activation_quantize_type supports range_abs_max + assert activation_quantize_type == 'moving_average_abs_max', \ "Unsupported activation_quantize_type: %s. It can " \ - "only be abs_max or moving_average_abs_max now." \ + "only be moving_average_abs_max now." \ % activation_quantize_type bits_check = lambda bits: isinstance(bits, int) \ @@ -305,26 +306,17 @@ def apply(self, model): assert isinstance(model, dygraph.Layer), \ "The model must be the instance of dygraph.Layer." - for name, layer in model.named_sublayers(): - if not isinstance(layer, self._quantizable_layer_type) \ - or (hasattr(layer, "skip_quant") \ - and layer.skip_quant == True): + for name, cur_layer in model.named_sublayers(): + if not isinstance(cur_layer, self._quantizable_layer_type) \ + or (hasattr(cur_layer, "skip_quant") \ + and cur_layer.skip_quant == True): continue - # TODO(jc): optimize this module - last_idx = 0 - idx = 0 - obj = model - while idx < len(name): - if (name[idx] == '.'): - if hasattr(obj, name[last_idx:idx]): - obj = getattr(obj, name[last_idx:idx]) - last_idx = idx + 1 - idx += 1 - target = name[last_idx:idx] - - quant_layer = self._get_input_quantized_layer(layer) - setattr(obj, target, quant_layer) + parent_layer, sub_name = \ + utils.find_parent_layer_and_sub_name(model, name) + + cur_quant_layer = self._get_input_quantized_layer(cur_layer) + setattr(parent_layer, sub_name, cur_quant_layer) def _get_input_quantized_layer(self, layer): quant_layer_name = None @@ -336,8 +328,7 @@ def _get_input_quantized_layer(self, layer): "The layer %s is unsupported to be quantized." \ % layer.full_name() - layer_with_weight = ['QuantizedConv2D', 'QuantizedLinear'] - if quant_layer_name not in layer_with_weight: + if layer not in utils.fake_quant_input_layers: quant_layer_name = 'QuantizedNoweightLayer' return quant_nn.__dict__[quant_layer_name](layer, **self._kwargs) @@ -374,25 +365,16 @@ def apply(self, model): assert isinstance(model, dygraph.Layer), \ "The model must be the instance of dygraph.Layer." - for name, layer in model.named_sublayers(): - if not self._is_target_layer(layer): + for name, cur_layer in model.named_sublayers(): + if not self._is_target_layer(cur_layer): continue - # TODO(jc): optimize this module - last_idx = 0 - idx = 0 - obj = model - while idx < len(name): - if (name[idx] == '.'): - if hasattr(obj, name[last_idx:idx]): - obj = getattr(obj, name[last_idx:idx]) - last_idx = idx + 1 - idx += 1 - target = name[last_idx:idx] - - quant_layer = quant_nn.__dict__["QuantizedOutputLayer"]( - layer, self._moving_rate) - setattr(obj, target, quant_layer) + parent_layer, sub_name = \ + utils.find_parent_layer_and_sub_name(model, name) + + cur_quant_layer = quant_nn.__dict__["QuantizedOutputLayer"]( + cur_layer, self._moving_rate) + setattr(parent_layer, sub_name, cur_quant_layer) def save_quantized_model(self, layer, path, input_spec=None, **config): """ @@ -468,9 +450,17 @@ def _is_target_layer(self, layer): """ Whether the layer needs to calculate output scales. """ - return isinstance(layer, utils.quant_output_layers) \ - or ('quantized' in layer.full_name() and \ - 'quantized_noweight' not in layer.full_name()) + flag = False + if isinstance(layer, dygraph.Layer): + # exclude fake_quant ops in quant_nn file + if utils.is_leaf_layer(layer) and \ + 'fake_quant' not in layer.full_name(): + flag = True + # consider QuantizedConv2D and QuantizedLinear ops + if 'quantized' in layer.full_name() and \ + 'quantized_noweight' not in layer.full_name(): + flag = True + return flag def _save_output_scale(self, program, scope): """ @@ -514,4 +504,4 @@ def _is_skip_quant_op(self, block, in_op): previous_ops = [utils.find_previous_op(block, arg_name) \ for arg_name in in_op.input_arg_names] return any(op is not None and op.type not in \ - utils.fake_quantize_dequantize_types for op in previous_ops) + utils.fake_quantize_dequantize_op_types for op in previous_ops) diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py b/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py index b4449b2751dce..a3b4811f2ff4c 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py @@ -26,8 +26,8 @@ from paddle.fluid.log_helper import get_logger __all__ = [ - 'FakeQuantMovingAverage', 'FakeQuantAbsMax', - 'FakeChannelWiseQuantDequantAbsMax', 'QuantizedConv2D', 'QuantizedLinear', + 'FakeQuantMovingAverageAbsMax', 'FakeQuantAbsMax', + 'FakeQuantChannelWiseAbsMax', 'QuantizedConv2D', 'QuantizedLinear', 'QuantizedNoweightLayer', 'MovingAverageAbsMaxScale' ] @@ -35,9 +35,9 @@ __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') -class FakeQuantMovingAverage(layers.Layer): +class FakeQuantMovingAverageAbsMax(layers.Layer): r""" - FakeQuantMovingAverage layer does the moving_average_abs_max quant and then dequant. + FakeQuantMovingAverageAbsMax layer does the moving_average_abs_max quant and then dequant. Its computational formula is described as below: :math:`scale = (moving\_rate*accum+max(abs(x)))/(moving\_rate*state+1)` @@ -50,7 +50,7 @@ def __init__(self, moving_rate=0.9, quant_bits=8, dtype='float32'): - super(FakeQuantMovingAverage, self).__init__() + super(FakeQuantMovingAverageAbsMax, self).__init__() self._moving_rate = moving_rate self._quant_bits = quant_bits @@ -103,7 +103,7 @@ def forward(self, input): return out check_variable_and_dtype(input, 'input', ['float32'], - "FakeQuantMovingAverage") + "FakeQuantMovingAverageAbsMax") attrs = { 'moving_rate': self._moving_rate, 'bit_length': self._quant_bits, @@ -215,7 +215,7 @@ def forward(self, input): return quant_out -class FakeChannelWiseQuantDequantAbsMax(layers.Layer): +class FakeQuantChannelWiseAbsMax(layers.Layer): def __init__(self, name=None, channel_num=None, @@ -224,7 +224,7 @@ def __init__(self, dtype='float32', quant_on_weight=False): assert quant_on_weight == True, "Channel_wise only can be used on weight quantization." - super(FakeChannelWiseQuantDequantAbsMax, self).__init__() + super(FakeQuantChannelWiseAbsMax, self).__init__() self._quant_bits = quant_bits self._quant_axis = quant_axis self._dtype = dtype @@ -270,7 +270,7 @@ def forward(self, input): return out check_variable_and_dtype(input, 'input', ['float32'], - "FakeChannelWiseQuantDequantAbsMax") + "FakeQuantChannelWiseAbsMax") attrs = {'bit_length': self._quant_bits, 'quant_axis': self._quant_axis} inputs = {"X": [input]} quant_out = self._helper.create_variable( @@ -318,8 +318,8 @@ def _get_fake_quant_type(quant_type, **kwargs): "when you use channel_wise_abs_max strategy.") fake_quant_map = { 'abs_max': FakeQuantAbsMax, - 'moving_average_abs_max': FakeQuantMovingAverage, - 'channel_wise_abs_max': FakeChannelWiseQuantDequantAbsMax + 'moving_average_abs_max': FakeQuantMovingAverageAbsMax, + 'channel_wise_abs_max': FakeQuantChannelWiseAbsMax } return fake_quant_map[quant_type](**call_args) diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py b/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py index 9adc7df69209c..b8cb76b5e191d 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py @@ -13,6 +13,7 @@ # limitations under the License. import paddle +from paddle.fluid import dygraph import numpy as np quant_input_layers_map = { @@ -37,31 +38,26 @@ 'LayerNorm': paddle.nn.LayerNorm, } -fake_quantize_dequantize_types = [ - "fake_quantize_dequantize_abs_max", - "fake_channel_wise_quantize_dequantize_abs_max", - "fake_quantize_dequantize_moving_average_abs_max" -] +# Apply fake quant for the inputs of these layers +fake_quant_input_layers = [paddle.nn.Conv2D, paddle.nn.Linear] -quant_output_layers = ( - paddle.nn.Conv2D, paddle.nn.Conv2DTranspose, paddle.nn.Linear, - paddle.nn.AdaptiveAvgPool2D, paddle.nn.AdaptiveMaxPool2D, - paddle.nn.AvgPool2D, paddle.nn.MaxPool2D, paddle.nn.BatchNorm, - paddle.nn.BatchNorm2D, paddle.nn.LayerNorm, paddle.nn.SyncBatchNorm, - paddle.nn.ELU, paddle.nn.GELU, paddle.nn.Hardshrink, paddle.nn.Hardsigmoid, - paddle.nn.Hardswish, paddle.nn.Hardtanh, paddle.nn.LeakyReLU, - paddle.nn.LogSigmoid, paddle.nn.LogSoftmax, paddle.nn.Maxout, - paddle.nn.PReLU, paddle.nn.ReLU, paddle.nn.ReLU6, paddle.nn.SELU, - paddle.nn.Sigmoid, paddle.nn.Softmax, paddle.nn.Softplus, - paddle.nn.Softshrink, paddle.nn.Softsign, paddle.nn.Swish, paddle.nn.Tanh, - paddle.nn.Tanhshrink, paddle.nn.ThresholdedReLU, paddle.nn.Upsample, - paddle.nn.quant.FloatFunctionalLayer) +# Apply fake quant for the output of these layers +fake_quant_output_layers = [ + paddle.nn.AdaptiveAvgPool2D, + paddle.nn.AdaptiveMaxPool2D, +] weight_op_types = [ "conv2d", "depthwise_conv2d", "matmul", "conv2d_transpose", "depthwise_conv2d_transpose" ] +fake_quantize_dequantize_op_types = [ + "fake_quantize_dequantize_abs_max", + "fake_channel_wise_quantize_dequantize_abs_max", + "fake_quantize_dequantize_moving_average_abs_max" +] + def load_variable_data(scope, var_name): ''' @@ -91,3 +87,36 @@ def find_next_ops(block, var_name): if var_name in op.input_arg_names: res_ops.append(op) return res_ops + + +def find_parent_layer_and_sub_name(model, name): + """ + Given the model and the name of a layer, find the parent layer and + the sub_name of the layer. + For example, if name is 'block_1/convbn_1/conv_1', the parent layer is + 'block_1/convbn_1' and the sub_name is `conv_1`. + """ + assert isinstance(model, dygraph.Layer), \ + "The model must be the instance of paddle.nn.Layer." + assert len(name) > 0, "The input (name) should not be empty." + + last_idx = 0 + idx = 0 + parent_layer = model + while idx < len(name): + if name[idx] == '.': + sub_name = name[last_idx:idx] + if hasattr(parent_layer, sub_name): + parent_layer = getattr(parent_layer, sub_name) + last_idx = idx + 1 + idx += 1 + sub_name = name[last_idx:idx] + return parent_layer, sub_name + + +def is_leaf_layer(layer): + """ + Whether the layer is leaf layer. + """ + return isinstance(layer, dygraph.Layer) \ + and len(layer.sublayers()) == 0 diff --git a/python/paddle/nn/quant/__init__.py b/python/paddle/nn/quant/__init__.py index e8a26705c68ff..c7f9a5073def8 100644 --- a/python/paddle/nn/quant/__init__.py +++ b/python/paddle/nn/quant/__init__.py @@ -18,7 +18,7 @@ from .functional_layers import multiply # noqa: F401 from .functional_layers import divide # noqa: F401 from .functional_layers import reshape # noqa: F401 -from .functional_layers import tranpose # noqa: F401 +from .functional_layers import transpose # noqa: F401 from .functional_layers import concat # noqa: F401 from .functional_layers import flatten # noqa: F401 diff --git a/python/paddle/nn/quant/functional_layers.py b/python/paddle/nn/quant/functional_layers.py index 7eeaa5c58e78a..03a311dc708a1 100644 --- a/python/paddle/nn/quant/functional_layers.py +++ b/python/paddle/nn/quant/functional_layers.py @@ -63,32 +63,25 @@ def forward(self, x, shape, name=None): return manipulation.reshape(x, shape, name) -class tranpose(FloatFunctionalLayer): - def __init__(self, name=None): - super(tranpose, self).__init__() - self._name = name +class transpose(FloatFunctionalLayer): + def __init__(self): + super(transpose, self).__init__() - def forward(self, inputs): - assert len(inputs) == 2, "The inputs should be (x, perm)" - return manipulation.tranpose(inputs[0], inputs[1], self._name) + def forward(self, x, perm, name=None): + return manipulation.transpose(x, perm, name) class concat(FloatFunctionalLayer): - def __init__(self, name=None): + def __init__(self): super(concat, self).__init__() - self._name = name - def forward(self, inputs): - assert len(inputs) == 2, "The inputs should be (x, axis)" - return manipulation.concat(inputs[0], inputs[1], self._name) + def forward(self, x, axis, name=None): + return manipulation.concat(x, axis, name) class flatten(FloatFunctionalLayer): - def __init__(self, name=None): + def __init__(self): super(flatten, self).__init__() - self._name = name - def forward(self, inputs): - assert len( - inputs) == 3, "The inputs should be (x, start_axis, stop_axis)" - return manipulation.flatten(inputs[0], inputs[1], inputs[2], self._name) + def forward(self, x, start_axis, stop_axis, name=None): + return manipulation.flatten(x, start_axis, stop_axis, name) From 679629c3a6a9c746f6fa7e59a3f6f610adbfe8f2 Mon Sep 17 00:00:00 2001 From: pengjuncai <13006307475@163.com> Date: Wed, 2 Jun 2021 09:38:38 +0000 Subject: [PATCH 4/8] Update all unit tests for dygraph qat --- .../slim/quantization/imperative/qat.py | 29 +- .../slim/quantization/imperative/quant_nn.py | 57 +- .../slim/quantization/imperative/utils.py | 16 +- .../slim/tests/test_imperative_out_scale.py | 337 ++---------- .../contrib/slim/tests/test_imperative_qat.py | 336 ++---------- .../test_imperative_qat_addquantdequant.py | 494 ------------------ .../tests/test_imperative_qat_channelwise.py | 399 +------------- .../slim/tests/test_imperative_skip_op.py | 129 +---- 8 files changed, 178 insertions(+), 1619 deletions(-) delete mode 100644 python/paddle/fluid/contrib/slim/tests/test_imperative_qat_addquantdequant.py diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py index ac80ff8a660aa..0f2f621fdb328 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py @@ -251,12 +251,12 @@ def __init__(self, super(ImperativeQuantizeInputs, self).__init__() self._quantizable_layer_type = tuple( - utils.quant_input_layers_map[layer] - if layer in utils.quant_input_layers_map else layer + utils.layer_name_map[layer] + if layer in utils.layer_name_map else layer for layer in quantizable_layer_type) for layer in self._quantizable_layer_type: assert not isinstance(layer, str) \ - and layer in utils.quant_input_layers_map.values(), \ + and layer in utils.fake_quant_input_layers, \ "%s is unspported to be quantized." % layer quantize_type = { @@ -320,7 +320,8 @@ def apply(self, model): def _get_input_quantized_layer(self, layer): quant_layer_name = None - for key, value in utils.quant_input_layers_map.items(): + + for key, value in utils.layer_name_map.items(): if isinstance(layer, value): quant_layer_name = 'Quantized' + key break @@ -328,9 +329,6 @@ def _get_input_quantized_layer(self, layer): "The layer %s is unsupported to be quantized." \ % layer.full_name() - if layer not in utils.fake_quant_input_layers: - quant_layer_name = 'QuantizedNoweightLayer' - return quant_nn.__dict__[quant_layer_name](layer, **self._kwargs) @@ -372,8 +370,13 @@ def apply(self, model): parent_layer, sub_name = \ utils.find_parent_layer_and_sub_name(model, name) - cur_quant_layer = quant_nn.__dict__["QuantizedOutputLayer"]( - cur_layer, self._moving_rate) + if isinstance(cur_layer, tuple(utils.fake_quant_output_layers)): + cur_quant_layer = quant_nn.__dict__[ + "FakeQuantMAOutputScaleLayer"](cur_layer, self._moving_rate) + else: + cur_quant_layer = quant_nn.__dict__["MAOutputScaleLayer"]( + cur_layer, self._moving_rate) + setattr(parent_layer, sub_name, cur_quant_layer) def save_quantized_model(self, layer, path, input_spec=None, **config): @@ -454,12 +457,14 @@ def _is_target_layer(self, layer): if isinstance(layer, dygraph.Layer): # exclude fake_quant ops in quant_nn file if utils.is_leaf_layer(layer) and \ - 'fake_quant' not in layer.full_name(): + not isinstance(layer, tuple(utils.fake_quant_leaf_layers)): flag = True # consider QuantizedConv2D and QuantizedLinear ops - if 'quantized' in layer.full_name() and \ - 'quantized_noweight' not in layer.full_name(): + if isinstance(layer, + (quant_nn.QuantizedConv2D, quant_nn.QuantizedLinear)): flag = True + if isinstance(layer, paddle.nn.quant.FloatFunctionalLayer): + flag = True return flag def _save_output_scale(self, program, scope): diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py b/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py index a3b4811f2ff4c..fe8e4570c69a0 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py @@ -26,9 +26,15 @@ from paddle.fluid.log_helper import get_logger __all__ = [ - 'FakeQuantMovingAverageAbsMax', 'FakeQuantAbsMax', - 'FakeQuantChannelWiseAbsMax', 'QuantizedConv2D', 'QuantizedLinear', - 'QuantizedNoweightLayer', 'MovingAverageAbsMaxScale' + 'FakeQuantMovingAverageAbsMax', + 'FakeQuantAbsMax', + 'FakeQuantChannelWiseAbsMax', + 'QuantizedConv2D', + 'QuantizedLinear', + 'QuantizedNoweightLayer', + 'MovingAverageAbsMaxScale', + 'MAOutputScaleLayer', + 'FakeQuantMAOutputScaleLayer', ] _logger = get_logger( @@ -598,14 +604,19 @@ def forward(self, input): return quant_out -class QuantizedOutputLayer(layers.Layer): +class MAOutputScaleLayer(layers.Layer): + """ + Calculate the scale (moving average abs max) for the output of the input layer. + Add MovingAverageMaxScale layer to the behind of the input layer. + """ + def __init__(self, layer=None, moving_rate=0.9, dtype='float32'): r""" - Add MovingAverageMaxScale layer to the behind of the input layer. + Construct """ - super(QuantizedOutputLayer, self).__init__() + super(MAOutputScaleLayer, self).__init__() self._layer = layer - self._moving_average_abs_max_scale = \ + self._ma_output_scale = \ MovingAverageAbsMaxScale(layer.full_name(), moving_rate, dtype) def forward(self, *inputs, **kwargs): @@ -616,4 +627,34 @@ def forward(self, *inputs, **kwargs): "the output threshold" % self._layer.full_name()) return out else: - return self._moving_average_abs_max_scale(out) + return self._ma_output_scale(out) + + +class FakeQuantMAOutputScaleLayer(layers.Layer): + def __init__(self, + layer, + weight_bits=8, + activation_bits=8, + moving_rate=0.9, + *args, + **kwargs): + + super(FakeQuantMAOutputScaleLayer, self).__init__() + self._layer = layer + self._fake_quant_output = _get_fake_quant_type( + 'moving_average_abs_max', + name=layer.full_name(), + moving_rate=moving_rate, + quant_bits=activation_bits, + dtype=self._dtype, + quant_on_weight=False) + + def forward(self, *inputs, **kwargs): + out = self._layer(*inputs, **kwargs) + # TODO (jc): support the ops of several outputs + if (isinstance(out, list) or isinstance(out, tuple)) and len(out) > 1: + _logger.info("%s has several outputs, so skip collecting " + "the output threshold" % self._layer.full_name()) + return out + else: + return self._fake_quant_output(out) diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py b/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py index b8cb76b5e191d..faec53a87fdba 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py @@ -15,8 +15,9 @@ import paddle from paddle.fluid import dygraph import numpy as np +from . import quant_nn -quant_input_layers_map = { +layer_name_map = { 'Conv2D': paddle.nn.Conv2D, 'Linear': paddle.nn.Linear, 'AdaptiveAvgPool2D': paddle.nn.AdaptiveAvgPool2D, @@ -39,12 +40,21 @@ } # Apply fake quant for the inputs of these layers +# TODO (jc): support paddle.nn.Conv2DTranspose fake_quant_input_layers = [paddle.nn.Conv2D, paddle.nn.Linear] # Apply fake quant for the output of these layers fake_quant_output_layers = [ - paddle.nn.AdaptiveAvgPool2D, - paddle.nn.AdaptiveMaxPool2D, + paddle.nn.AdaptiveAvgPool2D, paddle.nn.AvgPool2D, paddle.nn.ReLU, + paddle.nn.LeakyReLU, paddle.nn.quant.add, paddle.nn.quant.subtract, + paddle.nn.quant.multiply, paddle.nn.quant.divide +] + +fake_quant_leaf_layers = [ + quant_nn.FakeQuantAbsMax, + quant_nn.FakeQuantChannelWiseAbsMax, + quant_nn.FakeQuantMovingAverageAbsMax, + quant_nn.MovingAverageAbsMaxScale, ] weight_op_types = [ diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_out_scale.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_out_scale.py index 8d6ce76ef0fa5..6cc58a38f227a 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_imperative_out_scale.py +++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_out_scale.py @@ -28,7 +28,6 @@ from paddle.fluid.optimizer import AdamOptimizer from paddle.fluid.framework import IrGraph from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware -from paddle.fluid.contrib.slim.quantization import OutScaleForTrainingPass, OutScaleForInferencePass, QuantizationTransformPass from paddle.fluid.dygraph.container import Sequential from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX from paddle.nn.layer import ReLU, LeakyReLU, Sigmoid, Softmax, PReLU @@ -36,6 +35,8 @@ from paddle.fluid.log_helper import get_logger from paddle.fluid.dygraph import nn +from imperative_test_utils import fix_model_dict, train_lenet, ImperativeLenet + paddle.enable_static() os.environ["CPU_NUM"] = "1" @@ -54,59 +55,6 @@ def get_vaild_warning_num(warning, w): return num -def StaticLenet(data, num_classes=10, classifier_activation='softmax'): - conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1") - conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2") - fc_w1_attr = fluid.ParamAttr(name="fc_w_1") - fc_w2_attr = fluid.ParamAttr(name="fc_w_2") - fc_w3_attr = fluid.ParamAttr(name="fc_w_3") - conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2") - fc_b1_attr = fluid.ParamAttr(name="fc_b_1") - fc_b2_attr = fluid.ParamAttr(name="fc_b_2") - fc_b3_attr = fluid.ParamAttr(name="fc_b_3") - conv1 = fluid.layers.conv2d( - data, - num_filters=6, - filter_size=3, - stride=1, - padding=1, - param_attr=conv2d_w1_attr, - bias_attr=False) - batch_norm1 = layers.batch_norm(conv1) - relu1 = layers.relu(batch_norm1) - pool1 = fluid.layers.pool2d( - relu1, pool_size=2, pool_type='max', pool_stride=2) - conv2 = fluid.layers.conv2d( - pool1, - num_filters=16, - filter_size=5, - stride=1, - padding=0, - param_attr=conv2d_w2_attr, - bias_attr=conv2d_b2_attr) - batch_norm2 = layers.batch_norm(conv2) - prelu1 = layers.prelu(batch_norm2, mode='all') - pool2 = fluid.layers.pool2d( - prelu1, pool_size=2, pool_type='max', pool_stride=2) - - fc1 = fluid.layers.fc(input=pool2, - size=120, - param_attr=fc_w1_attr, - bias_attr=fc_b1_attr) - leaky_relu1 = layers.leaky_relu(fc1, alpha=0.01) - fc2 = fluid.layers.fc(input=leaky_relu1, - size=84, - param_attr=fc_w2_attr, - bias_attr=fc_b2_attr) - sigmoid1 = layers.sigmoid(fc2) - fc3 = fluid.layers.fc(input=sigmoid1, - size=num_classes, - param_attr=fc_w3_attr, - bias_attr=fc_b3_attr) - softmax1 = layers.softmax(fc3, use_cudnn=True) - return softmax1 - - class ImperativeLenet(fluid.dygraph.Layer): def __init__(self, num_classes=10): super(ImperativeLenet, self).__init__() @@ -175,38 +123,11 @@ def forward(self, inputs): class TestImperativeOutSclae(unittest.TestCase): def test_out_scale_acc(self): - def _build_static_lenet(main, startup, is_test=False, seed=1000): - with fluid.unique_name.guard(): - with fluid.program_guard(main, startup): - main.random_seed = seed - startup.random_seed = seed - img = fluid.layers.data( - name='image', shape=[1, 28, 28], dtype='float32') - label = fluid.layers.data( - name='label', shape=[1], dtype='int64') - prediction = StaticLenet(img) - if not is_test: - loss = fluid.layers.cross_entropy( - input=prediction, label=label) - avg_loss = fluid.layers.mean(loss) - else: - avg_loss = prediction - return img, label, avg_loss - - reader = paddle.batch( - paddle.dataset.mnist.test(), batch_size=32, drop_last=True) - weight_quantize_type = 'abs_max' - activation_quantize_type = 'moving_average_abs_max' - param_init_map = {} seed = 1000 lr = 0.001 - dynamic_out_scale_list = [] - static_out_scale_list = [] - # imperative train - _logger.info( - "--------------------------dynamic graph qat--------------------------" - ) + weight_quantize_type = 'abs_max' + activation_quantize_type = 'moving_average_abs_max' imperative_out_scale = ImperativeQuantAware( weight_quantize_type=weight_quantize_type, activation_quantize_type=activation_quantize_type) @@ -215,207 +136,46 @@ def _build_static_lenet(main, startup, is_test=False, seed=1000): np.random.seed(seed) fluid.default_main_program().random_seed = seed fluid.default_startup_program().random_seed = seed + lenet = ImperativeLenet() - fixed_state = {} - for name, param in lenet.named_parameters(): - p_shape = param.numpy().shape - p_value = param.numpy() - if name.endswith("bias"): - value = np.zeros_like(p_value).astype('float32') - else: - value = np.random.normal( - loc=0.0, scale=0.01, size=np.product(p_shape)).reshape( - p_shape).astype('float32') - fixed_state[name] = value - param_init_map[param.name] = value - lenet.set_dict(fixed_state) + lenet = fix_model_dict(lenet) imperative_out_scale.quantize(lenet) + + reader = paddle.batch( + paddle.dataset.mnist.test(), batch_size=32, drop_last=True) adam = AdamOptimizer( learning_rate=lr, parameter_list=lenet.parameters()) - dynamic_loss_rec = [] - lenet.train() - for batch_id, data in enumerate(reader()): - x_data = np.array([x[0].reshape(1, 28, 28) - for x in data]).astype('float32') - y_data = np.array( - [x[1] for x in data]).astype('int64').reshape(-1, 1) - - img = fluid.dygraph.to_variable(x_data) - label = fluid.dygraph.to_variable(y_data) - - out = lenet(img) - loss = fluid.layers.cross_entropy(out, label) - avg_loss = fluid.layers.mean(loss) - avg_loss.backward() - adam.minimize(avg_loss) - lenet.clear_gradients() - dynamic_loss_rec.append(avg_loss.numpy()[0]) - if batch_id % 100 == 0: - _logger.info('{}: {}'.format('loss', avg_loss.numpy())) - + loss_list = train_lenet(lenet, reader, adam) lenet.eval() param_save_path = "test_save_quantized_model/lenet.pdparams" save_dict = lenet.state_dict() paddle.save(save_dict, param_save_path) - path = "./dynamic_outscale_infer_model/lenet" - dynamic_save_dir = "./dynamic_outscale_infer_model" - + save_path = "./dynamic_outscale_infer_model/lenet" imperative_out_scale.save_quantized_model( layer=lenet, - path=path, + path=save_path, input_spec=[ paddle.static.InputSpec( shape=[None, 1, 28, 28], dtype='float32') ]) - _logger.info( - "--------------------------static graph qat--------------------------" - ) - static_loss_rec = [] - if core.is_compiled_with_cuda(): - place = core.CUDAPlace(0) - else: - place = core.CPUPlace() - exe = fluid.Executor(place) - - main = fluid.Program() - infer = fluid.Program() - startup = fluid.Program() - static_img, static_label, static_loss = _build_static_lenet( - main, startup, False, seed) - infer_img, _, infer_pre = _build_static_lenet(infer, startup, True, - seed) - with fluid.unique_name.guard(): - with fluid.program_guard(main, startup): - opt = AdamOptimizer(learning_rate=lr) - opt.minimize(static_loss) - - scope = core.Scope() - with fluid.scope_guard(scope): - exe.run(startup) - for param in main.all_parameters(): - if "batch_norm" in param.name: - param_name = param.name.replace("norm", "norm2d") - elif 'prelu' in param.name: - param_name = param.name.replace("prelu", 'p_re_lu') - else: - param_name = param.name - param_tensor = scope.var(param.name).get_tensor() - param_tensor.set(param_init_map[param_name], place) - main_graph = IrGraph(core.Graph(main.desc), for_test=False) - infer_graph = IrGraph(core.Graph(infer.desc), for_test=True) - transform_pass = QuantizationTransformPass( - scope=scope, - place=place, - activation_quantize_type=activation_quantize_type, - weight_quantize_type=weight_quantize_type, - quantizable_op_type=['conv2d', 'depthwise_conv2d', 'mul']) - transform_pass.apply(main_graph) - transform_pass.apply(infer_graph) - outscale_pass = OutScaleForTrainingPass(scope=scope, place=place) - outscale_pass.apply(main_graph) - build_strategy = fluid.BuildStrategy() - build_strategy.fuse_all_reduce_ops = False - binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel( - loss_name=static_loss.name, build_strategy=build_strategy) - - feeder = fluid.DataFeeder( - feed_list=[static_img, static_label], place=place) - with fluid.scope_guard(scope): - for batch_id, data in enumerate(reader()): - loss_v, = exe.run(binary, - feed=feeder.feed(data), - fetch_list=[static_loss]) - static_loss_rec.append(loss_v[0]) - if batch_id % 100 == 0: - _logger.info('{}: {}'.format('loss', loss_v)) - scale_inference_pass = OutScaleForInferencePass(scope=scope) - scale_inference_pass.apply(infer_graph) - - save_program = infer_graph.to_program() - static_save_dir = "./static_outscale_infer_model" - with fluid.scope_guard(scope): - fluid.io.save_inference_model( - dirname=static_save_dir, - feeded_var_names=[infer_img.name], - target_vars=[infer_pre], - executor=exe, - main_program=save_program, - model_filename="lenet" + INFER_MODEL_SUFFIX, - params_filename="lenet" + INFER_PARAMS_SUFFIX) - - rtol = 1e-05 - atol = 1e-08 - for i, (loss_d, - loss_s) in enumerate(zip(dynamic_loss_rec, static_loss_rec)): - diff = np.abs(loss_d - loss_s) - if diff > (atol + rtol * np.abs(loss_s)): - _logger.info( - "diff({}) at {}, dynamic loss = {}, static loss = {}". - format(diff, i, loss_d, loss_s)) - break - self.assertTrue( - np.allclose( - np.array(dynamic_loss_rec), - np.array(static_loss_rec), - rtol=rtol, - atol=atol, - equal_nan=True), - msg='Failed to do the imperative qat.') - - # load dynamic model - [dynamic_inference_program, feed_target_names, fetch_targets] = ( - fluid.io.load_inference_model( - dirname=dynamic_save_dir, - executor=exe, - model_filename="lenet" + INFER_MODEL_SUFFIX, - params_filename="lenet" + INFER_PARAMS_SUFFIX)) - # load static model - [static_inference_program, feed_target_names, fetch_targets] = ( - fluid.io.load_inference_model( - dirname=static_save_dir, - executor=exe, - model_filename="lenet" + INFER_MODEL_SUFFIX, - params_filename="lenet" + INFER_PARAMS_SUFFIX)) - - dynamic_ops = dynamic_inference_program.global_block().ops - static_ops = static_inference_program.global_block().ops - - for op in dynamic_ops[:]: - if op.type == "flatten2" or 'fake' in op.type: - dynamic_ops.remove(op) - - for op in static_ops[:]: - if 'fake' in op.type: - static_ops.remove(op) - - op_count = 0 - for i in range(len(dynamic_ops)): - if dynamic_ops[i].has_attr("out_threshold"): - op_count += 1 - self.assertTrue(dynamic_ops[i].type == static_ops[i].type) - if dynamic_ops[i].attr("out_threshold") != static_ops[i].attr( - "out_threshold"): - _logger.info(dynamic_ops[i].attr("out_threshold")) - _logger.info(static_ops[i].attr("out_threshold")) - self.assertTrue(dynamic_ops[i].attr("out_threshold") == - static_ops[i].attr("out_threshold")) - - _logger.info("op_cout: {}".format(op_count)) - self.assertTrue(op_count == 14) + for i in range(len(loss_list) - 1): + self.assertTrue( + loss_list[i] > loss_list[i + 1], + msg='Failed to do the imperative qat.') class TestSaveQuanztizedModelFromCheckPoint(unittest.TestCase): def test_save_quantized_model(self): - weight_quantize_type = 'abs_max' - activation_quantize_type = 'moving_average_abs_max' + lr = 0.001 + load_param_path = "test_save_quantized_model/lenet.pdparams" - path = "./dynamic_outscale_infer_model_from_checkpoint/lenet" - dynamic_model_save_dir = "./dynamic_outscale_infer_model_from_checkpoint" - static_model_save_dir = "./static_outscale_infer_model" + save_path = "./dynamic_outscale_infer_model_from_checkpoint/lenet" + weight_quantize_type = 'abs_max' + activation_quantize_type = 'moving_average_abs_max' imperative_out_scale = ImperativeQuantAware( weight_quantize_type=weight_quantize_type, activation_quantize_type=activation_quantize_type) @@ -426,56 +186,25 @@ def test_save_quantized_model(self): imperative_out_scale.quantize(lenet) lenet.set_dict(load_dict) + reader = paddle.batch( + paddle.dataset.mnist.test(), batch_size=32, drop_last=True) + adam = AdamOptimizer( + learning_rate=lr, parameter_list=lenet.parameters()) + loss_list = train_lenet(lenet, reader, adam) + lenet.eval() + imperative_out_scale.save_quantized_model( layer=lenet, - path=path, + path=save_path, input_spec=[ paddle.static.InputSpec( shape=[None, 1, 28, 28], dtype='float32') ]) - if core.is_compiled_with_cuda(): - place = core.CUDAPlace(0) - else: - place = core.CPUPlace() - exe = fluid.Executor(place) - - # load dynamic model - [dynamic_inference_program, feed_target_names, fetch_targets] = ( - fluid.io.load_inference_model( - dirname=dynamic_model_save_dir, - executor=exe, - model_filename="lenet" + INFER_MODEL_SUFFIX, - params_filename="lenet" + INFER_PARAMS_SUFFIX)) - # load static model - [static_inference_program, feed_target_names, fetch_targets] = ( - fluid.io.load_inference_model( - dirname=static_model_save_dir, - executor=exe, - model_filename="lenet" + INFER_MODEL_SUFFIX, - params_filename="lenet" + INFER_PARAMS_SUFFIX)) - - dynamic_ops = dynamic_inference_program.global_block().ops - static_ops = static_inference_program.global_block().ops - - for op in dynamic_ops[:]: - if op.type == "flatten2" or 'fake' in op.type: - dynamic_ops.remove(op) - - for op in static_ops[:]: - if 'fake' in op.type: - static_ops.remove(op) - - op_count = 0 - for i in range(len(dynamic_ops)): - if dynamic_ops[i].has_attr("out_threshold"): - op_count += 1 - self.assertTrue(dynamic_ops[i].type == static_ops[i].type) - self.assertTrue(dynamic_ops[i].attr("out_threshold") == - static_ops[i].attr("out_threshold")) - - _logger.info("op_cout: {}".format(op_count)) - self.assertTrue(op_count == 14) + for i in range(len(loss_list) - 1): + self.assertTrue( + loss_list[i] > loss_list[i + 1], + msg='Failed to do the imperative qat.') if __name__ == '__main__': diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py index 99a23525409f3..a188a1fdfa9f3 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py +++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py @@ -21,20 +21,20 @@ import time import unittest import logging + import paddle import paddle.fluid as fluid from paddle.fluid import core from paddle.fluid.optimizer import AdamOptimizer -from paddle.fluid.framework import IrGraph from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware -from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass from paddle.fluid.dygraph.container import Sequential from paddle.nn import Linear, Conv2D, Softmax -from paddle.fluid.dygraph.nn import Pool2D from paddle.fluid.log_helper import get_logger from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX from paddle.fluid.contrib.slim.quantization.imperative.quant_nn import QuantizedConv2D +from imperative_test_utils import fix_model_dict, ImperativeLenet + paddle.enable_static() os.environ["CPU_NUM"] = "1" @@ -45,115 +45,6 @@ __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') -def StaticLenet(data, num_classes=10): - conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1") - conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2") - fc_w1_attr = fluid.ParamAttr(name="fc_w_1") - fc_w2_attr = fluid.ParamAttr(name="fc_w_2") - fc_w3_attr = fluid.ParamAttr(name="fc_w_3") - conv2d_b1_attr = fluid.ParamAttr(name="conv2d_b_1") - conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2") - fc_b1_attr = fluid.ParamAttr(name="fc_b_1") - fc_b2_attr = fluid.ParamAttr(name="fc_b_2") - fc_b3_attr = fluid.ParamAttr(name="fc_b_3") - conv1 = fluid.layers.conv2d( - data, - num_filters=6, - filter_size=3, - stride=1, - padding=1, - param_attr=conv2d_w1_attr, - bias_attr=conv2d_b1_attr) - pool1 = fluid.layers.pool2d( - conv1, pool_size=2, pool_type='max', pool_stride=2) - conv2 = fluid.layers.conv2d( - pool1, - num_filters=16, - filter_size=5, - stride=1, - padding=0, - param_attr=conv2d_w2_attr, - bias_attr=conv2d_b2_attr) - pool2 = fluid.layers.pool2d( - conv2, pool_size=2, pool_type='max', pool_stride=2) - - fc1 = fluid.layers.fc(input=pool2, - size=120, - param_attr=fc_w1_attr, - bias_attr=fc_b1_attr) - fc2 = fluid.layers.fc(input=fc1, - size=84, - param_attr=fc_w2_attr, - bias_attr=fc_b2_attr) - fc3 = fluid.layers.fc(input=fc2, - size=num_classes, - param_attr=fc_w3_attr, - bias_attr=fc_b3_attr) - fc4 = fluid.layers.softmax(fc3, use_cudnn=True) - - return fc4 - - -class ImperativeLenet(fluid.dygraph.Layer): - def __init__(self, num_classes=10): - super(ImperativeLenet, self).__init__() - conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1") - conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2") - fc_w1_attr = fluid.ParamAttr(name="fc_w_1") - fc_w2_attr = fluid.ParamAttr(name="fc_w_2") - fc_w3_attr = fluid.ParamAttr(name="fc_w_3") - conv2d_b1_attr = fluid.ParamAttr(name="conv2d_b_1") - conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2") - fc_b1_attr = fluid.ParamAttr(name="fc_b_1") - fc_b2_attr = fluid.ParamAttr(name="fc_b_2") - fc_b3_attr = fluid.ParamAttr(name="fc_b_3") - self.features = Sequential( - Conv2D( - in_channels=1, - out_channels=6, - kernel_size=3, - stride=1, - padding=1, - weight_attr=conv2d_w1_attr, - bias_attr=conv2d_b1_attr), - Pool2D( - pool_size=2, pool_type='max', pool_stride=2), - Conv2D( - in_channels=6, - out_channels=16, - kernel_size=5, - stride=1, - padding=0, - weight_attr=conv2d_w2_attr, - bias_attr=conv2d_b2_attr), - Pool2D( - pool_size=2, pool_type='max', pool_stride=2)) - - self.fc = Sequential( - Linear( - in_features=400, - out_features=120, - weight_attr=fc_w1_attr, - bias_attr=fc_b1_attr), - Linear( - in_features=120, - out_features=84, - weight_attr=fc_w2_attr, - bias_attr=fc_b2_attr), - Linear( - in_features=84, - out_features=num_classes, - weight_attr=fc_w3_attr, - bias_attr=fc_b3_attr), - Softmax()) - - def forward(self, inputs): - x = self.features(inputs) - x = fluid.layers.flatten(x, 1) - x = self.fc(x) - return x - - class TestImperativeQat(unittest.TestCase): """ QAT = quantization-aware training @@ -164,19 +55,26 @@ def setUpClass(cls): timestamp = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime()) cls.root_path = os.path.join(os.getcwd(), "imperative_qat_" + timestamp) cls.save_path = os.path.join(cls.root_path, "lenet") - cls.dynamic_root_path = os.path.join(os.getcwd(), - "dynamic_mnist_" + timestamp) - cls.dynamic_save_path = os.path.join(cls.dynamic_root_path, "model") @classmethod def tearDownClass(cls): - shutil.rmtree(cls.root_path) - shutil.rmtree(cls.dynamic_root_path) + try: + shutil.rmtree(cls.root_path) + except Exception as e: + print("Failed to delete {} due to {}".format(cls.root_path, str(e))) + + def set_quant_type(self): + self.weight_quantize_type = None + self.activation_quantize_type = None + print('weight_quantize_type', self.weight_quantize_type) + + def run_qat_save(self): + self.set_quant_type() - def test_qat_save(self): imperative_qat = ImperativeQuantAware( - weight_quantize_type='abs_max', - activation_quantize_type='moving_average_abs_max') + weight_quantize_type=self.weight_quantize_type, + activation_quantize_type=self.activation_quantize_type) + with fluid.dygraph.guard(): # For CI coverage conv1 = Conv2D( @@ -190,10 +88,17 @@ def test_qat_save(self): data = np.random.uniform(-1, 1, [10, 3, 32, 32]).astype('float32') quant_conv1(fluid.dygraph.to_variable(data)) + seed = 1 + np.random.seed(seed) + fluid.default_main_program().random_seed = seed + fluid.default_startup_program().random_seed = seed + lenet = ImperativeLenet() + lenet = fix_model_dict(lenet) imperative_qat.quantize(lenet) adam = AdamOptimizer( learning_rate=0.001, parameter_list=lenet.parameters()) + train_reader = paddle.batch( paddle.dataset.mnist.train(), batch_size=32, drop_last=True) test_reader = paddle.batch( @@ -226,6 +131,7 @@ def test_qat_save(self): break lenet.eval() + eval_acc_top1_list = [] for batch_id, data in enumerate(test_reader()): x_data = np.array([x[0].reshape(1, 28, 28) for x in data]).astype('float32') @@ -242,14 +148,19 @@ def test_qat_save(self): input=out, label=label, k=5) if batch_id % 100 == 0: + eval_acc_top1_list.append(float(acc_top1.numpy())) _logger.info( "Test | At epoch {} step {}: acc1 = {:}, acc5 = {:}". format(epoch, batch_id, acc_top1.numpy(), acc_top5.numpy())) - # save weights - model_dict = lenet.state_dict() - fluid.save_dygraph(model_dict, "save_temp") + # check eval acc + eval_acc_top1 = sum(eval_acc_top1_list) / len( + eval_acc_top1_list) + print('eval_acc_top1', eval_acc_top1) + self.assertTrue( + eval_acc_top1 > 0.9, + msg="The test acc {%f} is less than 0.9." % eval_acc_top1) # test the correctness of `paddle.jit.save` data = next(test_reader()) @@ -260,13 +171,14 @@ def test_qat_save(self): before_save = lenet(test_img) # save inference quantized model - paddle.jit.save( + imperative_qat.save_quantized_model( layer=lenet, - path=TestImperativeQat.save_path, + path=self.save_path, input_spec=[ paddle.static.InputSpec( shape=[None, 1, 28, 28], dtype='float32') ]) + print('Quantized model saved in {%s}' % self.save_path) if core.is_compiled_with_cuda(): place = core.CUDAPlace(0) @@ -275,183 +187,27 @@ def test_qat_save(self): exe = fluid.Executor(place) [inference_program, feed_target_names, fetch_targets] = fluid.io.load_inference_model( - dirname=TestImperativeQat.root_path, + dirname=self.root_path, executor=exe, model_filename="lenet" + INFER_MODEL_SUFFIX, params_filename="lenet" + INFER_PARAMS_SUFFIX) after_save, = exe.run(inference_program, feed={feed_target_names[0]: test_data}, fetch_list=fetch_targets) - + # check self.assertTrue( np.allclose(after_save, before_save.numpy()), msg='Failed to save the inference quantized model.') - def test_qat_acc(self): - def _build_static_lenet(main, startup, is_test=False, seed=1000): - with fluid.unique_name.guard(): - with fluid.program_guard(main, startup): - main.random_seed = seed - startup.random_seed = seed - img = fluid.layers.data( - name='image', shape=[1, 28, 28], dtype='float32') - label = fluid.layers.data( - name='label', shape=[1], dtype='int64') - prediction = StaticLenet(img) - if not is_test: - loss = fluid.layers.cross_entropy( - input=prediction, label=label) - avg_loss = fluid.layers.mean(loss) - else: - avg_loss = prediction - return img, label, avg_loss - - reader = paddle.batch( - paddle.dataset.mnist.test(), batch_size=32, drop_last=True) - weight_quantize_type = 'abs_max' - activation_quant_type = 'moving_average_abs_max' - param_init_map = {} - seed = 1000 - lr = 0.01 - - # imperative train - _logger.info( - "--------------------------dynamic graph qat--------------------------" - ) - imperative_qat = ImperativeQuantAware( - weight_quantize_type=weight_quantize_type, - activation_quantize_type=activation_quant_type) - with fluid.dygraph.guard(): - np.random.seed(seed) - fluid.default_main_program().random_seed = seed - fluid.default_startup_program().random_seed = seed - lenet = ImperativeLenet() - fixed_state = {} - for name, param in lenet.named_parameters(): - p_shape = param.numpy().shape - p_value = param.numpy() - if name.endswith("bias"): - value = np.zeros_like(p_value).astype('float32') - else: - value = np.random.normal( - loc=0.0, scale=0.01, size=np.product(p_shape)).reshape( - p_shape).astype('float32') - fixed_state[name] = value - param_init_map[param.name] = value - lenet.set_dict(fixed_state) +class TestImperativeQatAbsMax(TestImperativeQat): + def set_quant_type(self): + self.weight_quantize_type = 'abs_max' + self.activation_quantize_type = 'moving_average_abs_max' + print('weight_quantize_type', self.weight_quantize_type) - imperative_qat.quantize(lenet) - adam = AdamOptimizer( - learning_rate=lr, parameter_list=lenet.parameters()) - dynamic_loss_rec = [] - lenet.train() - for batch_id, data in enumerate(reader()): - x_data = np.array([x[0].reshape(1, 28, 28) - for x in data]).astype('float32') - y_data = np.array( - [x[1] for x in data]).astype('int64').reshape(-1, 1) - - img = fluid.dygraph.to_variable(x_data) - label = fluid.dygraph.to_variable(y_data) - - out = lenet(img) - loss = fluid.layers.cross_entropy(out, label) - avg_loss = fluid.layers.mean(loss) - avg_loss.backward() - adam.minimize(avg_loss) - lenet.clear_gradients() - dynamic_loss_rec.append(avg_loss.numpy()[0]) - if batch_id % 100 == 0: - _logger.info('{}: {}'.format('loss', avg_loss.numpy())) - - paddle.jit.save( - layer=lenet, - path=TestImperativeQat.dynamic_save_path, - input_spec=[ - paddle.static.InputSpec( - shape=[None, 1, 28, 28], dtype='float32') - ]) - - # static graph train - _logger.info( - "--------------------------static graph qat--------------------------" - ) - static_loss_rec = [] - if core.is_compiled_with_cuda(): - place = core.CUDAPlace(0) - else: - place = core.CPUPlace() - exe = fluid.Executor(place) - - main = fluid.Program() - infer = fluid.Program() - startup = fluid.Program() - static_img, static_label, static_loss = _build_static_lenet( - main, startup, False, seed) - infer_img, _, infer_pre = _build_static_lenet(infer, startup, True, - seed) - with fluid.unique_name.guard(): - with fluid.program_guard(main, startup): - opt = AdamOptimizer(learning_rate=lr) - opt.minimize(static_loss) - - scope = core.Scope() - with fluid.scope_guard(scope): - exe.run(startup) - for param in main.all_parameters(): - param_tensor = scope.var(param.name).get_tensor() - param_tensor.set(param_init_map[param.name], place) - - main_graph = IrGraph(core.Graph(main.desc), for_test=False) - infer_graph = IrGraph(core.Graph(infer.desc), for_test=True) - transform_pass = QuantizationTransformPass( - scope=scope, - place=place, - activation_quantize_type=activation_quant_type, - weight_quantize_type=weight_quantize_type, - quantizable_op_type=['conv2d', 'depthwise_conv2d', 'mul']) - transform_pass.apply(main_graph) - transform_pass.apply(infer_graph) - build_strategy = fluid.BuildStrategy() - build_strategy.fuse_all_reduce_ops = False - binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel( - loss_name=static_loss.name, build_strategy=build_strategy) - - feeder = fluid.DataFeeder( - feed_list=[static_img, static_label], place=place) - with fluid.scope_guard(scope): - for batch_id, data in enumerate(reader()): - loss_v, = exe.run(binary, - feed=feeder.feed(data), - fetch_list=[static_loss]) - static_loss_rec.append(loss_v[0]) - if batch_id % 100 == 0: - _logger.info('{}: {}'.format('loss', loss_v)) - - save_program = infer_graph.to_program() - with fluid.scope_guard(scope): - fluid.io.save_inference_model("./static_mnist", [infer_img.name], - [infer_pre], exe, save_program) - rtol = 1e-05 - atol = 1e-08 - for i, (loss_d, - loss_s) in enumerate(zip(dynamic_loss_rec, static_loss_rec)): - diff = np.abs(loss_d - loss_s) - if diff > (atol + rtol * np.abs(loss_s)): - _logger.info( - "diff({}) at {}, dynamic loss = {}, static loss = {}". - format(diff, i, loss_d, loss_s)) - break - - self.assertTrue( - np.allclose( - np.array(dynamic_loss_rec), - np.array(static_loss_rec), - rtol=rtol, - atol=atol, - equal_nan=True), - msg='Failed to do the imperative qat.') + def test_qat(self): + self.run_qat_save() if __name__ == '__main__': diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_addquantdequant.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_addquantdequant.py deleted file mode 100644 index f5b3e89ef415c..0000000000000 --- a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_addquantdequant.py +++ /dev/null @@ -1,494 +0,0 @@ -# copyright (c) 2018 paddlepaddle authors. all rights reserved. -# -# licensed under the apache license, version 2.0 (the "license"); -# you may not use this file except in compliance with the license. -# you may obtain a copy of the license at -# -# http://www.apache.org/licenses/license-2.0 -# -# unless required by applicable law or agreed to in writing, software -# distributed under the license is distributed on an "as is" basis, -# without warranties or conditions of any kind, either express or implied. -# see the license for the specific language governing permissions and -# limitations under the license. - -from __future__ import print_function - -import os -import numpy as np -import random -import shutil -import time -import unittest -import logging -import paddle -import six -import paddle.fluid as fluid -from paddle.nn import functional -from paddle.nn import Linear, Conv2D, Softmax, BatchNorm -from paddle.fluid.layers import nn -from paddle.fluid import core -from paddle.fluid.layer_helper import LayerHelper -from paddle.fluid.optimizer import AdamOptimizer -from paddle.fluid.framework import IrGraph -from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware, QuantizationTransformPass, AddQuantDequantPass -from paddle.fluid.dygraph.container import Sequential -from paddle.fluid.dygraph.nn import Pool2D -from paddle.nn.layer.activation import ReLU, LeakyReLU, ReLU6, Tanh, Swish -from paddle.fluid.log_helper import get_logger -from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX - -paddle.enable_static() - -os.environ["CPU_NUM"] = "1" -if core.is_compiled_with_cuda(): - fluid.set_flags({"FLAGS_cudnn_deterministic": True}) - -_logger = get_logger( - __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') - - -def StaticLenet(data, num_classes=10): - conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1") - conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2") - conv2d_w3_attr = fluid.ParamAttr(name="conv2d_w_3") - fc_w1_attr = fluid.ParamAttr(name="fc_w_1") - fc_w2_attr = fluid.ParamAttr(name="fc_w_2") - fc_w3_attr = fluid.ParamAttr(name="fc_w_3") - conv2d_b1_attr = fluid.ParamAttr(name="conv2d_b_1") - conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2") - conv2d_b3_attr = fluid.ParamAttr(name="conv2d_b_3") - fc_b1_attr = fluid.ParamAttr(name="fc_b_1") - fc_b2_attr = fluid.ParamAttr(name="fc_b_2") - fc_b3_attr = fluid.ParamAttr(name="fc_b_3") - - conv1 = fluid.layers.conv2d( - data, - num_filters=6, - filter_size=3, - stride=1, - padding=1, - param_attr=conv2d_w1_attr, - bias_attr=conv2d_b1_attr) - conv1 = fluid.layers.leaky_relu(conv1, alpha=0.02) - pool1 = fluid.layers.pool2d( - conv1, pool_size=2, pool_type='max', pool_stride=2) - conv2 = fluid.layers.conv2d( - pool1, - num_filters=16, - filter_size=5, - stride=1, - padding=0, - param_attr=conv2d_w2_attr, - bias_attr=conv2d_b2_attr) - pool2 = fluid.layers.pool2d( - conv2, pool_size=2, pool_type='max', pool_stride=2) - pool2 = fluid.layers.relu(pool2) - pool2 = fluid.layers.swish(pool2) - conv3 = fluid.layers.conv2d( - pool2, - num_filters=16, - filter_size=1, - stride=1, - padding=0, - param_attr=conv2d_w3_attr, - bias_attr=conv2d_b3_attr) - conv3 = fluid.layers.relu6(conv3) - conv3 = paddle.tensor.math.tanh(conv3) - fc1 = fluid.layers.fc(input=conv3, - size=120, - param_attr=fc_w1_attr, - bias_attr=fc_b1_attr) - fc2 = fluid.layers.fc(input=fc1, - size=84, - param_attr=fc_w2_attr, - bias_attr=fc_b2_attr) - fc3 = fluid.layers.fc(input=fc2, - size=num_classes, - param_attr=fc_w3_attr, - bias_attr=fc_b3_attr) - fc3 = fluid.layers.softmax(fc3, use_cudnn=True) - - return fc3 - - -class ImperativeLenet(fluid.dygraph.Layer): - def __init__(self, num_classes=10): - super(ImperativeLenet, self).__init__() - conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1") - conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2") - conv2d_w3_attr = fluid.ParamAttr(name="conv2d_w_3") - fc_w1_attr = fluid.ParamAttr(name="fc_w_1") - fc_w2_attr = fluid.ParamAttr(name="fc_w_2") - fc_w3_attr = fluid.ParamAttr(name="fc_w_3") - conv2d_b1_attr = fluid.ParamAttr(name="conv2d_b_1") - conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2") - conv2d_b3_attr = fluid.ParamAttr(name="conv2d_b_3") - fc_b1_attr = fluid.ParamAttr(name="fc_b_1") - fc_b2_attr = fluid.ParamAttr(name="fc_b_2") - fc_b3_attr = fluid.ParamAttr(name="fc_b_3") - self.features = Sequential( - Conv2D( - in_channels=1, - out_channels=6, - kernel_size=3, - stride=1, - padding=1, - weight_attr=conv2d_w1_attr, - bias_attr=conv2d_b1_attr), - LeakyReLU(negative_slope=0.02), - Pool2D( - pool_size=2, pool_type='max', pool_stride=2), - Conv2D( - in_channels=6, - out_channels=16, - kernel_size=5, - stride=1, - padding=0, - weight_attr=conv2d_w2_attr, - bias_attr=conv2d_b2_attr), - Pool2D( - pool_size=2, pool_type='max', pool_stride=2), - ReLU(), - Swish(), - Conv2D( - in_channels=16, - out_channels=16, - kernel_size=1, - stride=1, - padding=0, - weight_attr=conv2d_w3_attr, - bias_attr=conv2d_b3_attr), - ReLU6(), - Tanh()) - self.fc = Sequential( - Linear( - in_features=400, - out_features=120, - weight_attr=fc_w1_attr, - bias_attr=fc_b1_attr), - Linear( - in_features=120, - out_features=84, - weight_attr=fc_w2_attr, - bias_attr=fc_b2_attr), - Linear( - in_features=84, - out_features=num_classes, - weight_attr=fc_w3_attr, - bias_attr=fc_b3_attr), - Softmax()) - - def forward(self, inputs): - x = self.features(inputs) - x = fluid.layers.flatten(x, 1) - x = self.fc(x) - return x - - -class TestImperativeAddQuantDequant(unittest.TestCase): - @classmethod - def setUpClass(cls): - timestamp = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime()) - cls.root_path = os.path.join(os.getcwd(), - "imperative_qat_aqd_" + timestamp) - cls.save_path = os.path.join(cls.root_path, "lenet") - cls.dynamic_root_path = os.path.join(os.getcwd(), - "dynamic_mnist_aqd_" + timestamp) - cls.dynamic_save_path = os.path.join(cls.dynamic_root_path, "model") - - @classmethod - def tearDownClass(cls): - shutil.rmtree(cls.root_path) - shutil.rmtree(cls.dynamic_root_path) - - def test_qat_save(self): - - imperative_qat = ImperativeQuantAware( - weight_quantize_type='abs_max', - activation_quantize_type='moving_average_abs_max', - quantizable_layer_type=[ - 'Conv2D', 'Linear', 'ReLU', 'LeakyReLU', 'ReLU6', 'Tanh', - 'Swish' - ]) - - with fluid.dygraph.guard(): - lenet = ImperativeLenet() - imperative_qat.quantize(lenet) - adam = AdamOptimizer( - learning_rate=0.001, parameter_list=lenet.parameters()) - train_reader = paddle.batch( - paddle.dataset.mnist.train(), batch_size=32, drop_last=True) - test_reader = paddle.batch( - paddle.dataset.mnist.test(), batch_size=32) - - epoch_num = 1 - for epoch in range(epoch_num): - lenet.train() - for batch_id, data in enumerate(train_reader()): - x_data = np.array([x[0].reshape(1, 28, 28) - for x in data]).astype('float32') - y_data = np.array( - [x[1] for x in data]).astype('int64').reshape(-1, 1) - - img = fluid.dygraph.to_variable(x_data) - label = fluid.dygraph.to_variable(y_data) - out = lenet(img) - acc = fluid.layers.accuracy(out, label) - loss = fluid.layers.cross_entropy(out, label) - avg_loss = fluid.layers.mean(loss) - avg_loss.backward() - adam.minimize(avg_loss) - lenet.clear_gradients() - if batch_id % 100 == 0: - _logger.info( - "Train | At epoch {} step {}: loss = {:}, acc= {:}". - format(epoch, batch_id, - avg_loss.numpy(), acc.numpy())) - if batch_id == 500: # For shortening CI time - break - - lenet.eval() - for batch_id, data in enumerate(test_reader()): - x_data = np.array([x[0].reshape(1, 28, 28) - for x in data]).astype('float32') - y_data = np.array( - [x[1] for x in data]).astype('int64').reshape(-1, 1) - - img = fluid.dygraph.to_variable(x_data) - label = fluid.dygraph.to_variable(y_data) - - out = lenet(img) - acc_top1 = fluid.layers.accuracy( - input=out, label=label, k=1) - acc_top5 = fluid.layers.accuracy( - input=out, label=label, k=5) - - if batch_id % 100 == 0: - _logger.info( - "Test | At epoch {} step {}: acc1 = {:}, acc5 = {:}". - format(epoch, batch_id, - acc_top1.numpy(), acc_top5.numpy())) - - # save weights - model_dict = lenet.state_dict() - fluid.save_dygraph(model_dict, "save_temp") - - # test the correctness of `paddle.jit.save` - data = next(test_reader()) - test_data = np.array([x[0].reshape(1, 28, 28) - for x in data]).astype('float32') - test_img = fluid.dygraph.to_variable(test_data) - lenet.eval() - before_save = lenet(test_img) - - # save inference quantized model - paddle.jit.save( - layer=lenet, - path=TestImperativeAddQuantDequant.save_path, - input_spec=[ - paddle.static.InputSpec( - shape=[None, 1, 28, 28], dtype='float32') - ]) - if core.is_compiled_with_cuda(): - place = core.CUDAPlace(0) - else: - place = core.CPUPlace() - exe = fluid.Executor(place) - [inference_program, feed_target_names, - fetch_targets] = fluid.io.load_inference_model( - dirname=TestImperativeAddQuantDequant.root_path, - executor=exe, - model_filename="lenet" + INFER_MODEL_SUFFIX, - params_filename="lenet" + INFER_PARAMS_SUFFIX) - after_save, = exe.run(inference_program, - feed={feed_target_names[0]: test_data}, - fetch_list=fetch_targets) - - self.assertTrue( - np.allclose(after_save, before_save.numpy()), - msg='Failed to save the inference quantized model.') - - def test_qat_acc(self): - def _build_static_lenet(main, startup, is_test=False, seed=1000): - with fluid.unique_name.guard(): - with fluid.program_guard(main, startup): - main.random_seed = seed - startup.random_seed = seed - img = fluid.layers.data( - name='image', shape=[1, 28, 28], dtype='float32') - label = fluid.layers.data( - name='label', shape=[1], dtype='int64') - prediction = StaticLenet(img) - if not is_test: - loss = fluid.layers.cross_entropy( - input=prediction, label=label) - avg_loss = fluid.layers.mean(loss) - else: - avg_loss = prediction - return img, label, avg_loss - - reader = paddle.batch( - paddle.dataset.mnist.test(), batch_size=32, drop_last=True) - weight_quantize_type = 'abs_max' - activation_quant_type = 'moving_average_abs_max' - param_init_map = {} - seed = 1000 - lr = 0.001 - - # imperative train - _logger.info( - "--------------------------dynamic graph qat--------------------------" - ) - imperative_qat = ImperativeQuantAware( - weight_quantize_type=weight_quantize_type, - activation_quantize_type=activation_quant_type, - quantizable_layer_type=[ - 'Conv2D', 'Linear', 'ReLU', 'LeakyReLU', 'ReLU6', 'Tanh', - 'Swish' - ]) - - with fluid.dygraph.guard(): - np.random.seed(seed) - fluid.default_main_program().random_seed = seed - fluid.default_startup_program().random_seed = seed - lenet = ImperativeLenet() - fixed_state = {} - for name, param in lenet.named_parameters(): - p_shape = param.numpy().shape - p_value = param.numpy() - if name.endswith("bias"): - value = np.zeros_like(p_value).astype('float32') - else: - value = np.random.normal( - loc=0.0, scale=0.01, size=np.product(p_shape)).reshape( - p_shape).astype('float32') - fixed_state[name] = value - param_init_map[param.name] = value - lenet.set_dict(fixed_state) - - imperative_qat.quantize(lenet) - adam = AdamOptimizer( - learning_rate=lr, parameter_list=lenet.parameters()) - dynamic_loss_rec = [] - lenet.train() - for batch_id, data in enumerate(reader()): - x_data = np.array([x[0].reshape(1, 28, 28) - for x in data]).astype('float32') - y_data = np.array( - [x[1] for x in data]).astype('int64').reshape(-1, 1) - - img = fluid.dygraph.to_variable(x_data) - label = fluid.dygraph.to_variable(y_data) - - out = lenet(img) - loss = fluid.layers.cross_entropy(out, label) - avg_loss = fluid.layers.mean(loss) - avg_loss.backward() - adam.minimize(avg_loss) - lenet.clear_gradients() - dynamic_loss_rec.append(avg_loss.numpy()[0]) - if batch_id % 100 == 0: - _logger.info('{}: {}'.format('loss', avg_loss.numpy())) - if batch_id > 500: - break - lenet.eval() - paddle.jit.save( - layer=lenet, - path=TestImperativeAddQuantDequant.dynamic_save_path, - input_spec=[ - paddle.static.InputSpec( - shape=[None, 1, 28, 28], dtype='float32') - ]) - - # static graph train - _logger.info( - "--------------------------static graph qat--------------------------" - ) - static_loss_rec = [] - if core.is_compiled_with_cuda(): - place = core.CUDAPlace(0) - else: - place = core.CPUPlace() - exe = fluid.Executor(place) - - main = fluid.Program() - infer = fluid.Program() - startup = fluid.Program() - static_img, static_label, static_loss = _build_static_lenet( - main, startup, False, seed) - infer_img, _, infer_pre = _build_static_lenet(infer, startup, True, - seed) - with fluid.unique_name.guard(): - with fluid.program_guard(main, startup): - opt = AdamOptimizer(learning_rate=lr) - opt.minimize(static_loss) - - scope = core.Scope() - with fluid.scope_guard(scope): - exe.run(startup) - for param in main.all_parameters(): - param_tensor = scope.var(param.name).get_tensor() - param_tensor.set(param_init_map[param.name], place) - - main_graph = IrGraph(core.Graph(main.desc), for_test=False) - infer_graph = IrGraph(core.Graph(infer.desc), for_test=True) - transform_pass = QuantizationTransformPass( - scope=scope, - place=place, - activation_quantize_type=activation_quant_type, - weight_quantize_type=weight_quantize_type, - quantizable_op_type=['conv2d', 'depthwise_conv2d', 'mul']) - add_quant_dequant_pass = AddQuantDequantPass( - scope=scope, - place=place, - quantizable_op_type=[ - 'relu', 'leaky_relu', 'relu6', 'tanh', 'swish' - ]) - transform_pass.apply(main_graph) - transform_pass.apply(infer_graph) - add_quant_dequant_pass.apply(main_graph) - add_quant_dequant_pass.apply(infer_graph) - build_strategy = fluid.BuildStrategy() - build_strategy.fuse_all_reduce_ops = False - binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel( - loss_name=static_loss.name, build_strategy=build_strategy) - - feeder = fluid.DataFeeder( - feed_list=[static_img, static_label], place=place) - with fluid.scope_guard(scope): - for batch_id, data in enumerate(reader()): - loss_v, = exe.run(binary, - feed=feeder.feed(data), - fetch_list=[static_loss]) - static_loss_rec.append(loss_v[0]) - if batch_id % 100 == 0: - _logger.info('{}: {}'.format('loss', loss_v)) - - save_program = infer_graph.to_program() - with fluid.scope_guard(scope): - fluid.io.save_inference_model("./static_mnist", [infer_img.name], - [infer_pre], exe, save_program) - rtol = 1e-08 - atol = 1e-10 - for i, (loss_d, - loss_s) in enumerate(zip(dynamic_loss_rec, static_loss_rec)): - diff = np.abs(loss_d - loss_s) - if diff > (atol + rtol * np.abs(loss_s)): - _logger.info( - "diff({}) at {}, dynamic loss = {}, static loss = {}". - format(diff, i, loss_d, loss_s)) - break - - self.assertTrue( - np.allclose( - np.array(dynamic_loss_rec), - np.array(static_loss_rec), - rtol=rtol, - atol=atol, - equal_nan=True), - msg='Failed to do the imperative qat.') - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_channelwise.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_channelwise.py index f888edfcc977a..da4e285633680 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_channelwise.py +++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_channelwise.py @@ -19,18 +19,13 @@ import random import unittest import logging + import paddle import paddle.fluid as fluid from paddle.fluid import core -from paddle.fluid.optimizer import AdamOptimizer -from paddle.fluid.framework import IrGraph -from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware -from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass -from paddle.fluid.dygraph.container import Sequential -from paddle.nn import Linear, Conv2D, Softmax -from paddle.fluid.dygraph.nn import Pool2D from paddle.fluid.log_helper import get_logger -from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX + +from test_imperative_qat import TestImperativeQat paddle.enable_static() @@ -42,388 +37,14 @@ __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') -def StaticLenet(data, num_classes=10): - conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1") - conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2") - fc_w1_attr = fluid.ParamAttr(name="fc_w_1") - fc_w2_attr = fluid.ParamAttr(name="fc_w_2") - fc_w3_attr = fluid.ParamAttr(name="fc_w_3") - conv2d_b1_attr = fluid.ParamAttr(name="conv2d_b_1") - conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2") - fc_b1_attr = fluid.ParamAttr(name="fc_b_1") - fc_b2_attr = fluid.ParamAttr(name="fc_b_2") - fc_b3_attr = fluid.ParamAttr(name="fc_b_3") - conv1 = fluid.layers.conv2d( - data, - num_filters=6, - filter_size=3, - stride=1, - padding=1, - param_attr=conv2d_w1_attr, - bias_attr=conv2d_b1_attr) - pool1 = fluid.layers.pool2d( - conv1, pool_size=2, pool_type='max', pool_stride=2) - conv2 = fluid.layers.conv2d( - pool1, - num_filters=16, - filter_size=5, - stride=1, - padding=0, - param_attr=conv2d_w2_attr, - bias_attr=conv2d_b2_attr) - pool2 = fluid.layers.pool2d( - conv2, pool_size=2, pool_type='max', pool_stride=2) - - fc1 = fluid.layers.fc(input=pool2, - size=120, - param_attr=fc_w1_attr, - bias_attr=fc_b1_attr) - fc2 = fluid.layers.fc(input=fc1, - size=84, - param_attr=fc_w2_attr, - bias_attr=fc_b2_attr) - fc3 = fluid.layers.fc(input=fc2, - size=num_classes, - param_attr=fc_w3_attr, - bias_attr=fc_b3_attr) - fc3 = fluid.layers.softmax(fc3, use_cudnn=True) - - return fc3 - - -class ImperativeLenet(fluid.dygraph.Layer): - def __init__(self, num_classes=10): - super(ImperativeLenet, self).__init__() - conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1") - conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2") - fc_w1_attr = fluid.ParamAttr(name="fc_w_1") - fc_w2_attr = fluid.ParamAttr(name="fc_w_2") - fc_w3_attr = fluid.ParamAttr(name="fc_w_3") - conv2d_b1_attr = fluid.ParamAttr(name="conv2d_b_1") - conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2") - fc_b1_attr = fluid.ParamAttr(name="fc_b_1") - fc_b2_attr = fluid.ParamAttr(name="fc_b_2") - fc_b3_attr = fluid.ParamAttr(name="fc_b_3") - self.features = Sequential( - Conv2D( - in_channels=1, - out_channels=6, - kernel_size=3, - stride=1, - padding=1, - weight_attr=conv2d_w1_attr, - bias_attr=conv2d_b1_attr), - Pool2D( - pool_size=2, pool_type='max', pool_stride=2), - Conv2D( - in_channels=6, - out_channels=16, - kernel_size=5, - stride=1, - padding=0, - weight_attr=conv2d_w2_attr, - bias_attr=conv2d_b2_attr), - Pool2D( - pool_size=2, pool_type='max', pool_stride=2)) - - self.fc = Sequential( - Linear( - in_features=400, - out_features=120, - weight_attr=fc_w1_attr, - bias_attr=fc_b1_attr), - Linear( - in_features=120, - out_features=84, - weight_attr=fc_w2_attr, - bias_attr=fc_b2_attr), - Linear( - in_features=84, - out_features=num_classes, - weight_attr=fc_w3_attr, - bias_attr=fc_b3_attr), - Softmax()) - - def forward(self, inputs): - x = self.features(inputs) - x = fluid.layers.flatten(x, 1) - x = self.fc(x) - return x - - -class TestImperativeQatChannelWise(unittest.TestCase): - """ - QAT = quantization-aware training - """ - - def test_qat_save(self): - imperative_qat = ImperativeQuantAware( - weight_quantize_type='channel_wise_abs_max', - activation_quantize_type='moving_average_abs_max') - - with fluid.dygraph.guard(): - lenet = ImperativeLenet() - imperative_qat.quantize(lenet) - adam = AdamOptimizer( - learning_rate=0.001, parameter_list=lenet.parameters()) - train_reader = paddle.batch( - paddle.dataset.mnist.train(), batch_size=32, drop_last=True) - test_reader = paddle.batch( - paddle.dataset.mnist.test(), batch_size=32) - - epoch_num = 1 - for epoch in range(epoch_num): - lenet.train() - for batch_id, data in enumerate(train_reader()): - x_data = np.array([x[0].reshape(1, 28, 28) - for x in data]).astype('float32') - y_data = np.array( - [x[1] for x in data]).astype('int64').reshape(-1, 1) - - img = fluid.dygraph.to_variable(x_data) - label = fluid.dygraph.to_variable(y_data) - out = lenet(img) - acc = fluid.layers.accuracy(out, label) - loss = fluid.layers.cross_entropy(out, label) - avg_loss = fluid.layers.mean(loss) - avg_loss.backward() - adam.minimize(avg_loss) - lenet.clear_gradients() - if batch_id % 100 == 0: - _logger.info( - "Train | At epoch {} step {}: loss = {:}, acc= {:}". - format(epoch, batch_id, - avg_loss.numpy(), acc.numpy())) - - lenet.eval() - for batch_id, data in enumerate(test_reader()): - x_data = np.array([x[0].reshape(1, 28, 28) - for x in data]).astype('float32') - y_data = np.array( - [x[1] for x in data]).astype('int64').reshape(-1, 1) - - img = fluid.dygraph.to_variable(x_data) - label = fluid.dygraph.to_variable(y_data) - - out = lenet(img) - acc_top1 = fluid.layers.accuracy( - input=out, label=label, k=1) - acc_top5 = fluid.layers.accuracy( - input=out, label=label, k=5) - - if batch_id % 100 == 0: - _logger.info( - "Test | At epoch {} step {}: acc1 = {:}, acc5 = {:}". - format(epoch, batch_id, - acc_top1.numpy(), acc_top5.numpy())) - - # save weights - model_dict = lenet.state_dict() - fluid.save_dygraph(model_dict, "save_temp") - - # test the correctness of `paddle.jit.save` - data = next(test_reader()) - test_data = np.array([x[0].reshape(1, 28, 28) - for x in data]).astype('float32') - test_img = fluid.dygraph.to_variable(test_data) - lenet.eval() - before_save = lenet(test_img) - - # save inference quantized model - path = "./qat_infer_model/mnist" - save_dir = "./qat_infer_model" - paddle.jit.save( - layer=lenet, - path=path, - input_spec=[ - paddle.static.InputSpec( - shape=[None, 1, 28, 28], dtype='float32') - ]) - - if core.is_compiled_with_cuda(): - place = core.CUDAPlace(0) - else: - place = core.CPUPlace() - exe = fluid.Executor(place) - [inference_program, feed_target_names, - fetch_targets] = fluid.io.load_inference_model( - dirname=save_dir, - executor=exe, - model_filename="mnist" + INFER_MODEL_SUFFIX, - params_filename="mnist" + INFER_PARAMS_SUFFIX) - after_save, = exe.run(inference_program, - feed={feed_target_names[0]: test_data}, - fetch_list=fetch_targets) - - self.assertTrue( - np.allclose(after_save, before_save.numpy()), - msg='Failed to save the inference quantized model.') - - def test_qat_acc(self): - def _build_static_lenet(main, startup, is_test=False, seed=1000): - with fluid.unique_name.guard(): - with fluid.program_guard(main, startup): - main.random_seed = seed - startup.random_seed = seed - img = fluid.layers.data( - name='image', shape=[1, 28, 28], dtype='float32') - label = fluid.layers.data( - name='label', shape=[1], dtype='int64') - prediction = StaticLenet(img) - if not is_test: - loss = fluid.layers.cross_entropy( - input=prediction, label=label) - avg_loss = fluid.layers.mean(loss) - else: - avg_loss = prediction - return img, label, avg_loss - - reader = paddle.batch( - paddle.dataset.mnist.test(), batch_size=32, drop_last=True) - weight_quantize_type = 'channel_wise_abs_max' - activation_quant_type = 'moving_average_abs_max' - param_init_map = {} - seed = 1000 - lr = 0.001 - - # imperative train - _logger.info( - "--------------------------dynamic graph qat--------------------------" - ) - imperative_qat = ImperativeQuantAware( - weight_quantize_type=weight_quantize_type, - activation_quantize_type=activation_quant_type) - - with fluid.dygraph.guard(): - np.random.seed(seed) - fluid.default_main_program().random_seed = seed - fluid.default_startup_program().random_seed = seed - lenet = ImperativeLenet() - fixed_state = {} - for name, param in lenet.named_parameters(): - p_shape = param.numpy().shape - p_value = param.numpy() - if name.endswith("bias"): - value = np.zeros_like(p_value).astype('float32') - else: - value = np.random.normal( - loc=0.0, scale=0.01, size=np.product(p_shape)).reshape( - p_shape).astype('float32') - fixed_state[name] = value - param_init_map[param.name] = value - lenet.set_dict(fixed_state) - - imperative_qat.quantize(lenet) - adam = AdamOptimizer( - learning_rate=lr, parameter_list=lenet.parameters()) - dynamic_loss_rec = [] - lenet.train() - for batch_id, data in enumerate(reader()): - x_data = np.array([x[0].reshape(1, 28, 28) - for x in data]).astype('float32') - y_data = np.array( - [x[1] for x in data]).astype('int64').reshape(-1, 1) - - img = fluid.dygraph.to_variable(x_data) - label = fluid.dygraph.to_variable(y_data) - - out = lenet(img) - loss = fluid.layers.cross_entropy(out, label) - avg_loss = fluid.layers.mean(loss) - avg_loss.backward() - adam.minimize(avg_loss) - lenet.clear_gradients() - dynamic_loss_rec.append(avg_loss.numpy()[0]) - if batch_id % 100 == 0: - _logger.info('{}: {}'.format('loss', avg_loss.numpy())) - - paddle.jit.save( - layer=lenet, - path="./dynamic_mnist/model", - input_spec=[ - paddle.static.InputSpec( - shape=[None, 1, 28, 28], dtype='float32') - ]) - - # static graph train - _logger.info( - "--------------------------static graph qat--------------------------" - ) - static_loss_rec = [] - if core.is_compiled_with_cuda(): - place = core.CUDAPlace(0) - else: - place = core.CPUPlace() - exe = fluid.Executor(place) - - main = fluid.Program() - infer = fluid.Program() - startup = fluid.Program() - static_img, static_label, static_loss = _build_static_lenet( - main, startup, False, seed) - infer_img, _, infer_pre = _build_static_lenet(infer, startup, True, - seed) - with fluid.unique_name.guard(): - with fluid.program_guard(main, startup): - opt = AdamOptimizer(learning_rate=lr) - opt.minimize(static_loss) - - scope = core.Scope() - with fluid.scope_guard(scope): - exe.run(startup) - for param in main.all_parameters(): - param_tensor = scope.var(param.name).get_tensor() - param_tensor.set(param_init_map[param.name], place) - - main_graph = IrGraph(core.Graph(main.desc), for_test=False) - infer_graph = IrGraph(core.Graph(infer.desc), for_test=True) - transform_pass = QuantizationTransformPass( - scope=scope, - place=place, - activation_quantize_type=activation_quant_type, - weight_quantize_type=weight_quantize_type, - quantizable_op_type=['conv2d', 'depthwise_conv2d', 'mul']) - transform_pass.apply(main_graph) - transform_pass.apply(infer_graph) - build_strategy = fluid.BuildStrategy() - build_strategy.fuse_all_reduce_ops = False - binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel( - loss_name=static_loss.name, build_strategy=build_strategy) - - feeder = fluid.DataFeeder( - feed_list=[static_img, static_label], place=place) - with fluid.scope_guard(scope): - for batch_id, data in enumerate(reader()): - loss_v, = exe.run(binary, - feed=feeder.feed(data), - fetch_list=[static_loss]) - static_loss_rec.append(loss_v[0]) - if batch_id % 100 == 0: - _logger.info('{}: {}'.format('loss', loss_v)) - - save_program = infer_graph.to_program() - with fluid.scope_guard(scope): - fluid.io.save_inference_model("./static_mnist", [infer_img.name], - [infer_pre], exe, save_program) - rtol = 1e-05 - atol = 1e-08 - for i, (loss_d, - loss_s) in enumerate(zip(dynamic_loss_rec, static_loss_rec)): - diff = np.abs(loss_d - loss_s) - if diff > (atol + rtol * np.abs(loss_s)): - _logger.info( - "diff({}) at {}, dynamic loss = {}, static loss = {}". - format(diff, i, loss_d, loss_s)) - break +class TestImperativeQatChannelWise(TestImperativeQat): + def set_quant_type(self): + self.weight_quantize_type = 'channel_wise_abs_max' + self.activation_quantize_type = 'moving_average_abs_max' + print('weight_quantize_type', self.weight_quantize_type) - self.assertTrue( - np.allclose( - np.array(dynamic_loss_rec), - np.array(static_loss_rec), - rtol=rtol, - atol=atol, - equal_nan=True), - msg='Failed to do the imperative qat.') + def test_qat(self): + self.run_qat_save() if __name__ == '__main__': diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_skip_op.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_skip_op.py index bda02769cea86..bb24f941c625e 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_imperative_skip_op.py +++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_skip_op.py @@ -31,6 +31,8 @@ from paddle.fluid.dygraph.nn import Pool2D from paddle.fluid.log_helper import get_logger +from imperative_test_utils import fix_model_dict, train_lenet, ImperativeLenetWithSkipQuant + os.environ["CPU_NUM"] = "1" if core.is_compiled_with_cuda(): fluid.set_flags({"FLAGS_cudnn_deterministic": True}) @@ -39,144 +41,33 @@ __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') -class ImperativeLenet(fluid.dygraph.Layer): - def __init__(self, num_classes=10): - super(ImperativeLenet, self).__init__() - conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1") - conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2") - fc_w1_attr = fluid.ParamAttr(name="fc_w_1") - fc_w2_attr = fluid.ParamAttr(name="fc_w_2") - fc_w3_attr = fluid.ParamAttr(name="fc_w_3") - conv2d_b1_attr = fluid.ParamAttr(name="conv2d_b_1") - conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2") - fc_b1_attr = fluid.ParamAttr(name="fc_b_1") - fc_b2_attr = fluid.ParamAttr(name="fc_b_2") - fc_b3_attr = fluid.ParamAttr(name="fc_b_3") - self.conv2d_0 = Conv2D( - in_channels=1, - out_channels=6, - kernel_size=3, - stride=1, - padding=1, - weight_attr=conv2d_w1_attr, - bias_attr=conv2d_b1_attr) - self.conv2d_0.skip_quant = True - - self.batch_norm_0 = BatchNorm(6) - self.relu_0 = ReLU() - self.pool2d_0 = Pool2D(pool_size=2, pool_type='max', pool_stride=2) - self.conv2d_1 = Conv2D( - in_channels=6, - out_channels=16, - kernel_size=5, - stride=1, - padding=0, - weight_attr=conv2d_w2_attr, - bias_attr=conv2d_b2_attr) - self.conv2d_1.skip_quant = False - - self.batch_norm_1 = BatchNorm(16) - self.relu6_0 = ReLU6() - self.pool2d_1 = Pool2D(pool_size=2, pool_type='max', pool_stride=2) - self.linear_0 = Linear( - in_features=400, - out_features=120, - weight_attr=fc_w1_attr, - bias_attr=fc_b1_attr) - self.linear_0.skip_quant = True - - self.leaky_relu_0 = LeakyReLU() - self.linear_1 = Linear( - in_features=120, - out_features=84, - weight_attr=fc_w2_attr, - bias_attr=fc_b2_attr) - self.linear_1.skip_quant = False - - self.sigmoid_0 = Sigmoid() - self.linear_2 = Linear( - in_features=84, - out_features=num_classes, - weight_attr=fc_w3_attr, - bias_attr=fc_b3_attr) - self.linear_2.skip_quant = False - self.softmax_0 = Softmax() - - def forward(self, inputs): - x = self.conv2d_0(inputs) - x = self.batch_norm_0(x) - x = self.relu_0(x) - x = self.pool2d_0(x) - x = self.conv2d_1(x) - x = self.batch_norm_1(x) - x = self.relu6_0(x) - x = self.pool2d_1(x) - - x = fluid.layers.flatten(x, 1) - - x = self.linear_0(x) - x = self.leaky_relu_0(x) - x = self.linear_1(x) - x = self.sigmoid_0(x) - x = self.linear_2(x) - x = self.softmax_0(x) - - return x - - class TestImperativeOutSclae(unittest.TestCase): def test_out_scale_acc(self): seed = 1000 lr = 0.1 - imperative_out_scale = ImperativeQuantAware() + qat = ImperativeQuantAware() np.random.seed(seed) reader = paddle.batch( paddle.dataset.mnist.test(), batch_size=512, drop_last=True) - lenet = ImperativeLenet() - fixed_state = {} - for name, param in lenet.named_parameters(): - p_shape = param.numpy().shape - p_value = param.numpy() - if name.endswith("bias"): - value = np.zeros_like(p_value).astype('float32') - else: - value = np.random.normal( - loc=0.0, scale=0.01, - size=np.product(p_shape)).reshape(p_shape).astype('float32') - fixed_state[name] = value - lenet.set_dict(fixed_state) - imperative_out_scale.quantize(lenet) + + lenet = ImperativeLenetWithSkipQuant() + lenet = fix_model_dict(lenet) + qat.quantize(lenet) + adam = AdamOptimizer( learning_rate=lr, parameter_list=lenet.parameters()) dynamic_loss_rec = [] lenet.train() - for batch_id, data in enumerate(reader()): - x_data = np.array([x[0].reshape(1, 28, 28) - for x in data]).astype('float32') - y_data = np.array( - [x[1] for x in data]).astype('int64').reshape(-1, 1) - - img = fluid.dygraph.to_variable(x_data) - label = fluid.dygraph.to_variable(y_data) - - out = lenet(img) - loss = fluid.layers.cross_entropy(out, label) - avg_loss = fluid.layers.mean(loss) - avg_loss.backward() - adam.minimize(avg_loss) - lenet.clear_gradients() - dynamic_loss_rec.append(avg_loss.numpy()[0]) - if batch_id % 100 == 0: - _logger.info('{}: {}'.format('loss', avg_loss.numpy())) + loss_list = train_lenet(lenet, reader, adam) lenet.eval() path = "./save_dynamic_quant_infer_model/lenet" save_dir = "./save_dynamic_quant_infer_model" - imperative_out_scale.save_quantized_model( + qat.save_quantized_model( layer=lenet, path=path, input_spec=[ From 4a344c67a709b066bdf1d7995a1aef3bbf9e343f Mon Sep 17 00:00:00 2001 From: pengjuncai <13006307475@163.com> Date: Wed, 2 Jun 2021 09:56:16 +0000 Subject: [PATCH 5/8] add default input params for wrapped functional layers --- python/paddle/nn/quant/functional_layers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/paddle/nn/quant/functional_layers.py b/python/paddle/nn/quant/functional_layers.py index 03a311dc708a1..ce5fb3e616eb5 100644 --- a/python/paddle/nn/quant/functional_layers.py +++ b/python/paddle/nn/quant/functional_layers.py @@ -75,7 +75,7 @@ class concat(FloatFunctionalLayer): def __init__(self): super(concat, self).__init__() - def forward(self, x, axis, name=None): + def forward(self, x, axis=0, name=None): return manipulation.concat(x, axis, name) @@ -83,5 +83,5 @@ class flatten(FloatFunctionalLayer): def __init__(self): super(flatten, self).__init__() - def forward(self, x, start_axis, stop_axis, name=None): + def forward(self, x, start_axis=0, stop_axis=-1, name=None): return manipulation.flatten(x, start_axis, stop_axis, name) From 4fdfe37d3dd134492753d7bb0bca06608b68e25b Mon Sep 17 00:00:00 2001 From: pengjuncai <13006307475@163.com> Date: Thu, 3 Jun 2021 02:18:17 +0000 Subject: [PATCH 6/8] up, test=develop --- .../slim/quantization/imperative/qat.py | 15 +- .../slim/quantization/imperative/quant_nn.py | 9 +- .../slim/quantization/imperative/utils.py | 2 + .../slim/tests/imperative_test_utils.py | 222 ++++++++++++++++++ 4 files changed, 237 insertions(+), 11 deletions(-) create mode 100644 python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py index 0f2f621fdb328..600ce6397e1af 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py @@ -363,19 +363,19 @@ def apply(self, model): assert isinstance(model, dygraph.Layer), \ "The model must be the instance of dygraph.Layer." - for name, cur_layer in model.named_sublayers(): + for cur_name, cur_layer in model.named_sublayers(): if not self._is_target_layer(cur_layer): continue parent_layer, sub_name = \ - utils.find_parent_layer_and_sub_name(model, name) + utils.find_parent_layer_and_sub_name(model, cur_name) if isinstance(cur_layer, tuple(utils.fake_quant_output_layers)): - cur_quant_layer = quant_nn.__dict__[ - "FakeQuantMAOutputScaleLayer"](cur_layer, self._moving_rate) - else: - cur_quant_layer = quant_nn.__dict__["MAOutputScaleLayer"]( + cur_quant_layer = quant_nn.FakeQuantMAOutputScaleLayer( cur_layer, self._moving_rate) + else: + cur_quant_layer = quant_nn.MAOutputScaleLayer(cur_layer, + self._moving_rate) setattr(parent_layer, sub_name, cur_quant_layer) @@ -460,8 +460,7 @@ def _is_target_layer(self, layer): not isinstance(layer, tuple(utils.fake_quant_leaf_layers)): flag = True # consider QuantizedConv2D and QuantizedLinear ops - if isinstance(layer, - (quant_nn.QuantizedConv2D, quant_nn.QuantizedLinear)): + if isinstance(layer, tuple(utils.fake_quant_wrap_layers)): flag = True if isinstance(layer, paddle.nn.quant.FloatFunctionalLayer): flag = True diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py b/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py index fe8e4570c69a0..9d5c4ca241704 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py @@ -610,14 +610,16 @@ class MAOutputScaleLayer(layers.Layer): Add MovingAverageMaxScale layer to the behind of the input layer. """ - def __init__(self, layer=None, moving_rate=0.9, dtype='float32'): + def __init__(self, layer=None, moving_rate=0.9, name=None, dtype='float32'): r""" Construct """ super(MAOutputScaleLayer, self).__init__() self._layer = layer + if name is None: + name = layer.full_name() self._ma_output_scale = \ - MovingAverageAbsMaxScale(layer.full_name(), moving_rate, dtype) + MovingAverageAbsMaxScale(name, moving_rate, dtype) def forward(self, *inputs, **kwargs): out = self._layer(*inputs, **kwargs) @@ -636,6 +638,7 @@ def __init__(self, weight_bits=8, activation_bits=8, moving_rate=0.9, + name=None, *args, **kwargs): @@ -643,7 +646,7 @@ def __init__(self, self._layer = layer self._fake_quant_output = _get_fake_quant_type( 'moving_average_abs_max', - name=layer.full_name(), + name=layer.full_name() if name is None else name, moving_rate=moving_rate, quant_bits=activation_bits, dtype=self._dtype, diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py b/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py index faec53a87fdba..dada8e6e7a17d 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py @@ -57,6 +57,8 @@ quant_nn.MovingAverageAbsMaxScale, ] +fake_quant_wrap_layers = [quant_nn.QuantizedConv2D, quant_nn.QuantizedLinear] + weight_op_types = [ "conv2d", "depthwise_conv2d", "matmul", "conv2d_transpose", "depthwise_conv2d_transpose" diff --git a/python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py b/python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py new file mode 100644 index 0000000000000..e491b513c118f --- /dev/null +++ b/python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py @@ -0,0 +1,222 @@ +# copyright (c) 2021 paddlepaddle authors. all rights reserved. +# +# licensed under the apache license, version 2.0 (the "license"); +# you may not use this file except in compliance with the license. +# you may obtain a copy of the license at +# +# http://www.apache.org/licenses/license-2.0 +# +# unless required by applicable law or agreed to in writing, software +# distributed under the license is distributed on an "as is" basis, +# without warranties or conditions of any kind, either express or implied. +# see the license for the specific language governing permissions and +# limitations under the license. +import numpy as np +import logging + +import paddle +import paddle.fluid as fluid +from paddle.fluid import core +from paddle.fluid.dygraph.container import Sequential +from paddle.nn import ReLU, ReLU6, LeakyReLU, Sigmoid, Softmax, PReLU +from paddle.nn import Linear, Conv2D, Softmax, BatchNorm2D, MaxPool2D + +from paddle.fluid.log_helper import get_logger + +_logger = get_logger( + __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s') + + +def fix_model_dict(model): + fixed_state = {} + for name, param in model.named_parameters(): + p_shape = param.numpy().shape + p_value = param.numpy() + if name.endswith("bias"): + value = np.zeros_like(p_value).astype('float32') + else: + value = np.random.normal( + loc=0.0, scale=0.01, + size=np.product(p_shape)).reshape(p_shape).astype('float32') + fixed_state[name] = value + model.set_dict(fixed_state) + return model + + +def train_lenet(lenet, reader, optimizer): + loss_list = [] + lenet.train() + + for batch_id, data in enumerate(reader()): + x_data = np.array([x[0].reshape(1, 28, 28) + for x in data]).astype('float32') + y_data = np.array([x[1] for x in data]).astype('int64').reshape(-1, 1) + + img = paddle.to_tensor(x_data) + label = paddle.to_tensor(y_data) + + out = lenet(img) + loss = fluid.layers.cross_entropy(out, label) + avg_loss = fluid.layers.mean(loss) + avg_loss.backward() + + optimizer.minimize(avg_loss) + lenet.clear_gradients() + + if batch_id % 100 == 0: + loss_list.append(avg_loss.numpy()[0]) + _logger.info('{}: {}'.format('loss', avg_loss.numpy())) + + return loss_list + + +class ImperativeLenet(fluid.dygraph.Layer): + def __init__(self, num_classes=10): + super(ImperativeLenet, self).__init__() + conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1") + conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2") + fc_w1_attr = fluid.ParamAttr(name="fc_w_1") + fc_w2_attr = fluid.ParamAttr(name="fc_w_2") + fc_w3_attr = fluid.ParamAttr(name="fc_w_3") + conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2") + fc_b1_attr = fluid.ParamAttr(name="fc_b_1") + fc_b2_attr = fluid.ParamAttr(name="fc_b_2") + fc_b3_attr = fluid.ParamAttr(name="fc_b_3") + self.features = Sequential( + Conv2D( + in_channels=1, + out_channels=6, + kernel_size=3, + stride=1, + padding=1, + weight_attr=conv2d_w1_attr, + bias_attr=False), + BatchNorm2D(6), + ReLU(), + MaxPool2D( + kernel_size=2, stride=2), + Conv2D( + in_channels=6, + out_channels=16, + kernel_size=5, + stride=1, + padding=0, + weight_attr=conv2d_w2_attr, + bias_attr=conv2d_b2_attr), + BatchNorm2D(16), + PReLU(), + MaxPool2D( + kernel_size=2, stride=2)) + + self.fc = Sequential( + Linear( + in_features=400, + out_features=120, + weight_attr=fc_w1_attr, + bias_attr=fc_b1_attr), + LeakyReLU(), + Linear( + in_features=120, + out_features=84, + weight_attr=fc_w2_attr, + bias_attr=fc_b2_attr), + Sigmoid(), + Linear( + in_features=84, + out_features=num_classes, + weight_attr=fc_w3_attr, + bias_attr=fc_b3_attr), + Softmax()) + + def forward(self, inputs): + x = self.features(inputs) + + x = fluid.layers.flatten(x, 1) + x = self.fc(x) + return x + + +class ImperativeLenetWithSkipQuant(fluid.dygraph.Layer): + def __init__(self, num_classes=10): + super(ImperativeLenetWithSkipQuant, self).__init__() + + conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1") + conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2") + fc_w1_attr = fluid.ParamAttr(name="fc_w_1") + fc_w2_attr = fluid.ParamAttr(name="fc_w_2") + fc_w3_attr = fluid.ParamAttr(name="fc_w_3") + conv2d_b1_attr = fluid.ParamAttr(name="conv2d_b_1") + conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2") + fc_b1_attr = fluid.ParamAttr(name="fc_b_1") + fc_b2_attr = fluid.ParamAttr(name="fc_b_2") + fc_b3_attr = fluid.ParamAttr(name="fc_b_3") + self.conv2d_0 = Conv2D( + in_channels=1, + out_channels=6, + kernel_size=3, + stride=1, + padding=1, + weight_attr=conv2d_w1_attr, + bias_attr=conv2d_b1_attr) + self.conv2d_0.skip_quant = True + + self.batch_norm_0 = BatchNorm2D(6) + self.relu_0 = ReLU() + self.pool2d_0 = MaxPool2D(kernel_size=2, stride=2) + self.conv2d_1 = Conv2D( + in_channels=6, + out_channels=16, + kernel_size=5, + stride=1, + padding=0, + weight_attr=conv2d_w2_attr, + bias_attr=conv2d_b2_attr) + self.conv2d_1.skip_quant = False + + self.batch_norm_1 = BatchNorm2D(16) + self.relu6_0 = ReLU6() + self.pool2d_1 = MaxPool2D(kernel_size=2, stride=2) + self.linear_0 = Linear( + in_features=400, + out_features=120, + weight_attr=fc_w1_attr, + bias_attr=fc_b1_attr) + self.linear_0.skip_quant = True + + self.leaky_relu_0 = LeakyReLU() + self.linear_1 = Linear( + in_features=120, + out_features=84, + weight_attr=fc_w2_attr, + bias_attr=fc_b2_attr) + self.linear_1.skip_quant = False + + self.sigmoid_0 = Sigmoid() + self.linear_2 = Linear( + in_features=84, + out_features=num_classes, + weight_attr=fc_w3_attr, + bias_attr=fc_b3_attr) + self.linear_2.skip_quant = False + self.softmax_0 = Softmax() + + def forward(self, inputs): + x = self.conv2d_0(inputs) + x = self.batch_norm_0(x) + x = self.relu_0(x) + x = self.pool2d_0(x) + x = self.conv2d_1(x) + x = self.batch_norm_1(x) + x = self.relu6_0(x) + x = self.pool2d_1(x) + + x = fluid.layers.flatten(x, 1) + + x = self.linear_0(x) + x = self.leaky_relu_0(x) + x = self.linear_1(x) + x = self.sigmoid_0(x) + x = self.linear_2(x) + x = self.softmax_0(x) + + return x From 44ed2d90bcdd77515afdfa8c2de354c786de0ecd Mon Sep 17 00:00:00 2001 From: pengjuncai <13006307475@163.com> Date: Thu, 3 Jun 2021 02:54:28 +0000 Subject: [PATCH 7/8] up, test=develop --- python/paddle/fluid/contrib/slim/tests/CMakeLists.txt | 7 ------- 1 file changed, 7 deletions(-) diff --git a/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt b/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt index 758e01b8245a2..7ae404b6d0dec 100644 --- a/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt +++ b/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt @@ -270,12 +270,6 @@ list(REMOVE_ITEM TEST_OPS #TODO(wanghaoshuang): Fix this unitest failed on GCC8. LIST(REMOVE_ITEM TEST_OPS test_auto_pruning) LIST(REMOVE_ITEM TEST_OPS test_filter_pruning) - -# only tests on singal GPU environment -LIST(REMOVE_ITEM TEST_OPS test_imperative_qat_addquantdequant) - -py_test_modules(test_imperative_qat_addquantdequant MODULES test_imperative_qat_addquantdequant ENVS - CUDA_VISIBLE_DEVICES=0) # fix if(WIN32) @@ -313,7 +307,6 @@ set_tests_properties(test_quantization_pass PROPERTIES TIMEOUT 120) set_tests_properties(test_imperative_qat_channelwise PROPERTIES TIMEOUT 120) set_tests_properties(test_user_defined_quantization PROPERTIES TIMEOUT 120) set_tests_properties(test_imperative_qat PROPERTIES TIMEOUT 120) -set_tests_properties(test_imperative_qat_addquantdequant PROPERTIES TIMEOUT 120) set_tests_properties(test_imperative_out_scale PROPERTIES TIMEOUT 120) if(LINUX AND WITH_MKLDNN) set_tests_properties(test_quant2_int8_mobilenetv1_mkldnn PROPERTIES TIMEOUT 120) From 006ed3202f1cae24f3903d42e21bf9ed8293a8cc Mon Sep 17 00:00:00 2001 From: pengjuncai <13006307475@163.com> Date: Mon, 7 Jun 2021 03:04:52 +0000 Subject: [PATCH 8/8] up, test=develop --- .../slim/quantization/imperative/quant_nn.py | 14 +-- .../slim/quantization/imperative/utils.py | 7 +- .../slim/tests/imperative_test_utils.py | 2 + .../contrib/slim/tests/test_imperative_qat.py | 6 +- .../tests/test_imperative_qat_channelwise.py | 2 +- .../test_nn_quant_functional_layers.py | 87 +++++++++++++++++++ 6 files changed, 98 insertions(+), 20 deletions(-) create mode 100644 python/paddle/fluid/tests/unittests/test_nn_quant_functional_layers.py diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py b/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py index 9d5c4ca241704..fd1f7f423ff8f 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py @@ -509,15 +509,7 @@ def __init__(self, quant_on_weight=False) def forward(self, input): - # TODO (jc): support ops that have several inputs - if (isinstance(input, list) or isinstance(input, tuple)) \ - and len(input) > 1: - _logger.info("%s has several inputs, so skip collecting " - "the input scales" % self._layer.full_name()) - return self._layer.forward(input) - else: - quant_input = self._fake_quant_input(input) - return self._layer.forward(quant_input) + return self._layer.forward(self._fake_quant_input(input)) class MovingAverageAbsMaxScale(layers.Layer): @@ -625,8 +617,6 @@ def forward(self, *inputs, **kwargs): out = self._layer(*inputs, **kwargs) # TODO (jc): support the ops of several outputs if (isinstance(out, list) or isinstance(out, tuple)) and len(out) > 1: - _logger.info("%s has several outputs, so skip collecting " - "the output threshold" % self._layer.full_name()) return out else: return self._ma_output_scale(out) @@ -656,8 +646,6 @@ def forward(self, *inputs, **kwargs): out = self._layer(*inputs, **kwargs) # TODO (jc): support the ops of several outputs if (isinstance(out, list) or isinstance(out, tuple)) and len(out) > 1: - _logger.info("%s has several outputs, so skip collecting " - "the output threshold" % self._layer.full_name()) return out else: return self._fake_quant_output(out) diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py b/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py index dada8e6e7a17d..94639b9cc68f9 100644 --- a/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py +++ b/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py @@ -44,10 +44,11 @@ fake_quant_input_layers = [paddle.nn.Conv2D, paddle.nn.Linear] # Apply fake quant for the output of these layers +# TODO(jc): fix the problem of adding duplicate fake_quant ops +# paddle.nn.AdaptiveAvgPool2D, paddle.nn.AvgPool2D, paddle.nn.ReLU,paddle.nn.LeakyReLU fake_quant_output_layers = [ - paddle.nn.AdaptiveAvgPool2D, paddle.nn.AvgPool2D, paddle.nn.ReLU, - paddle.nn.LeakyReLU, paddle.nn.quant.add, paddle.nn.quant.subtract, - paddle.nn.quant.multiply, paddle.nn.quant.divide + paddle.nn.quant.add, paddle.nn.quant.subtract, paddle.nn.quant.multiply, + paddle.nn.quant.divide ] fake_quant_leaf_layers = [ diff --git a/python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py b/python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py index e491b513c118f..cc26f6a88f2e0 100644 --- a/python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py +++ b/python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py @@ -127,11 +127,13 @@ def __init__(self, num_classes=10): weight_attr=fc_w3_attr, bias_attr=fc_b3_attr), Softmax()) + self.add = paddle.nn.quant.add() def forward(self, inputs): x = self.features(inputs) x = fluid.layers.flatten(x, 1) + x = self.add(x, paddle.to_tensor(0.0)) # For CI x = self.fc(x) return x diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py index a188a1fdfa9f3..bf411e5b38efa 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py +++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py @@ -63,13 +63,13 @@ def tearDownClass(cls): except Exception as e: print("Failed to delete {} due to {}".format(cls.root_path, str(e))) - def set_quant_type(self): + def set_vars(self): self.weight_quantize_type = None self.activation_quantize_type = None print('weight_quantize_type', self.weight_quantize_type) def run_qat_save(self): - self.set_quant_type() + self.set_vars() imperative_qat = ImperativeQuantAware( weight_quantize_type=self.weight_quantize_type, @@ -201,7 +201,7 @@ def run_qat_save(self): class TestImperativeQatAbsMax(TestImperativeQat): - def set_quant_type(self): + def set_vars(self): self.weight_quantize_type = 'abs_max' self.activation_quantize_type = 'moving_average_abs_max' print('weight_quantize_type', self.weight_quantize_type) diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_channelwise.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_channelwise.py index da4e285633680..3d2cad388d172 100644 --- a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_channelwise.py +++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_channelwise.py @@ -38,7 +38,7 @@ class TestImperativeQatChannelWise(TestImperativeQat): - def set_quant_type(self): + def set_vars(self): self.weight_quantize_type = 'channel_wise_abs_max' self.activation_quantize_type = 'moving_average_abs_max' print('weight_quantize_type', self.weight_quantize_type) diff --git a/python/paddle/fluid/tests/unittests/test_nn_quant_functional_layers.py b/python/paddle/fluid/tests/unittests/test_nn_quant_functional_layers.py new file mode 100644 index 0000000000000..86dc43bacf86b --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_nn_quant_functional_layers.py @@ -0,0 +1,87 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +import paddle + + +class TestFunctionalLayers(unittest.TestCase): + """ + """ + + def setUp(self): + paddle.disable_static() + np.random.seed(1) + + shape = [3, 100, 120] + self.x = paddle.to_tensor(np.random.random(shape)) + self.y = paddle.to_tensor(np.random.random(shape)) + + def check(self, x, y): + self.assertTrue(np.allclose(x.numpy(), y.numpy())) + + def test_quant_add(self): + out_1 = paddle.add(self.x, self.y) + out_2 = paddle.nn.quant.add()(self.x, self.y) + self.check(out_1, out_2) + + def test_quant_subtract(self): + out_1 = paddle.subtract(self.x, self.y) + out_2 = paddle.nn.quant.subtract()(self.x, self.y) + self.check(out_1, out_2) + + def test_quant_multiply(self): + out_1 = paddle.multiply(self.x, self.y) + out_2 = paddle.nn.quant.multiply()(self.x, self.y) + self.check(out_1, out_2) + + def test_quant_divide(self): + out_1 = paddle.divide(self.x, self.y) + out_2 = paddle.nn.quant.divide()(self.x, self.y) + self.check(out_1, out_2) + + def test_quant_reshape(self): + reshape = [120, 300] + out_1 = paddle.reshape(self.x, reshape) + out_2 = paddle.nn.quant.reshape()(self.x.clone(), reshape) + self.check(out_1, out_2) + self.assertTrue(out_1.shape == out_2.shape) + + def test_quant_transpose(self): + perm = [1, 2, 0] + out_1 = paddle.transpose(self.x, perm) + out_2 = paddle.nn.quant.transpose()(self.x.clone(), perm) + self.check(out_1, out_2) + self.assertTrue(out_1.shape == out_2.shape) + + def test_quant_concat(self): + out_1 = paddle.concat([self.x, self.y], axis=0) + out_2 = paddle.nn.quant.concat()([self.x, self.y], 0) + self.check(out_1, out_2) + self.assertTrue(out_1.shape == out_2.shape) + + def test_quant_flatten(self): + start_axis = 1 + end_axis = 2 + out_1 = paddle.flatten(self.x, start_axis, end_axis) + out_2 = paddle.nn.quant.flatten()(self.x.clone(), start_axis, end_axis) + self.check(out_1, out_2) + self.assertTrue(out_1.shape == out_2.shape) + + +if __name__ == '__main__': + unittest.main()