From 10e3cf43d436a1a611bf477e7efd3b8d50486486 Mon Sep 17 00:00:00 2001
From: pengjuncai <13006307475@163.com>
Date: Thu, 27 May 2021 09:06:55 +0000
Subject: [PATCH 1/8] Add wrap for functional api

---
 .../slim/quantization/imperative/quant_nn.py  | 28 +++++---
 .../slim/quantization/imperative/utils.py     |  3 +-
 python/paddle/nn/quant/__init__.py            | 17 +++++
 python/paddle/nn/quant/functional_layers.py   | 71 +++++++++++++++++++
 4 files changed, 109 insertions(+), 10 deletions(-)
 create mode 100644 python/paddle/nn/quant/__init__.py
 create mode 100644 python/paddle/nn/quant/functional_layers.py

diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py b/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py
index f6fef0689d43a..507f3403c7e62 100644
--- a/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py
+++ b/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py
@@ -22,6 +22,7 @@
 from paddle.fluid.initializer import Constant
 from paddle.fluid.data_feeder import check_variable_and_dtype
 from paddle.nn import functional as F
+from paddle.fluid.log_helper import get_logger
 
 __all__ = [
     'FakeQuantMovingAverage', 'FakeQuantAbsMax',
@@ -29,6 +30,9 @@
     'QuantizedNoweightLayer', 'MovingAverageAbsMaxScale'
 ]
 
+_logger = get_logger(
+    __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
+
 
 class FakeQuantMovingAverage(layers.Layer):
     r"""
@@ -498,12 +502,15 @@ def __init__(self,
             quant_on_weight=False)
 
     def forward(self, input):
-        quant_input = self._fake_quant_input(input)
         # TODO (jc): support ops that have several inputs
-        if isinstance(input, list):
-            assert len(input) == 1, \
-                "The QuantizedNoweightLayer should only have one input."
-        return self._layer.forward(quant_input)
+        if (isinstance(input, list) or isinstance(input, tuple)) \
+            and len(input) > 1:
+            _logger.info("%s has several inputs, so skip collecting "
+                         "the input scales" % self._layer.full_name())
+            return self._layer.forward(input)
+        else:
+            quant_input = self._fake_quant_input(input)
+            return self._layer.forward(quant_input)
 
 
 class MovingAverageAbsMaxScale(layers.Layer):
@@ -601,8 +608,11 @@ def __init__(self, layer=None, moving_rate=0.9, dtype='float32'):
             MovingAverageAbsMaxScale(layer.full_name(), moving_rate, dtype)
 
     def forward(self, input):
-        if isinstance(input, list):
-            assert len(input) == 1, \
-                "The QuantizedOutputLayer should only have one input."
         out = self._layer(input)
-        return self._moving_average_abs_max_scale(out)
+        # TODO (jc): support the ops of several outputs
+        if (isinstance(out, list) or isinstance(out, tuple)) and len(out) > 1:
+            _logger.info("%s has several outputs, so skip collecting "
+                         "the output threshold" % self._layer.full_name())
+            return out
+        else:
+            return self._moving_average_abs_max_scale(out)
diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py b/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py
index 491f8a7e25cbc..9adc7df69209c 100644
--- a/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py
+++ b/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py
@@ -54,7 +54,8 @@
     paddle.nn.PReLU, paddle.nn.ReLU, paddle.nn.ReLU6, paddle.nn.SELU,
     paddle.nn.Sigmoid, paddle.nn.Softmax, paddle.nn.Softplus,
     paddle.nn.Softshrink, paddle.nn.Softsign, paddle.nn.Swish, paddle.nn.Tanh,
-    paddle.nn.Tanhshrink, paddle.nn.ThresholdedReLU, paddle.nn.Upsample)
+    paddle.nn.Tanhshrink, paddle.nn.ThresholdedReLU, paddle.nn.Upsample,
+    paddle.nn.quant.FloatFunctionalLayer)
 
 weight_op_types = [
     "conv2d", "depthwise_conv2d", "matmul", "conv2d_transpose",
diff --git a/python/paddle/nn/quant/__init__.py b/python/paddle/nn/quant/__init__.py
new file mode 100644
index 0000000000000..f78586e0c6fde
--- /dev/null
+++ b/python/paddle/nn/quant/__init__.py
@@ -0,0 +1,17 @@
+#   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .functional_layers import FloatFunctionalLayer  # noqa: F401
+
+__all__ = ['FloatFunctionalLayer']
diff --git a/python/paddle/nn/quant/functional_layers.py b/python/paddle/nn/quant/functional_layers.py
new file mode 100644
index 0000000000000..a1013dbd773ce
--- /dev/null
+++ b/python/paddle/nn/quant/functional_layers.py
@@ -0,0 +1,71 @@
+#   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from ...fluid.dygraph import layers
+from ...tensor import math, manipulation
+
+__all__ = []
+
+
+class FloatFunctionalLayer(layers.Layer):
+    def __init__(self):
+        super(FloatFunctionalLayer, self).__init__()
+
+    def add(x, y, name=None):
+        """
+        Wrap paddle.add
+        """
+        return math.add(x, y, name)
+
+    def subtract(x, y, name=None):
+        """
+        Wrap paddle.subtract
+        """
+        return math.subtract(x, y, name)
+
+    def multiply(x, y, name=None):
+        """
+        Wrap paddle.multiply
+        """
+        return math.multiply(x, y, name)
+
+    def divide(x, y, name=None):
+        """
+        Wrap paddle.divide
+        """
+        return math.divide(x, y, name)
+
+    def reshape(x, shape, name=None):
+        """
+        Wrap paddle.reshape
+        """
+        return manipulation.reshape(x, shape, name)
+
+    def tranpose(x, perm, name=None):
+        """
+        Wrap paddle.tranpose
+        """
+        return manipulation.transpose(x, perm, name)
+
+    def concat(x, axis=0, name=None):
+        """
+        Warp paddle.concat
+        """
+        return manipulation.concat(x, axis, name)
+
+    def flatten(x, start_axis=0, stop_axis=-1, name=None):
+        """
+        Warp paddle.flatten
+        """
+        return manipulation.flatten(x, start_axis, stop_axis, name)

From dfce1d393c169ca2738d6da0cfb94b0160b15f26 Mon Sep 17 00:00:00 2001
From: pengjuncai <13006307475@163.com>
Date: Mon, 31 May 2021 03:27:13 +0000
Subject: [PATCH 2/8] Refine the wraped api

---
 .../slim/quantization/imperative/quant_nn.py  |  4 +-
 python/paddle/nn/quant/functional_layers.py   | 40 +++++++------------
 2 files changed, 17 insertions(+), 27 deletions(-)

diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py b/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py
index a37094f2f52d3..b4449b2751dce 100644
--- a/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py
+++ b/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py
@@ -608,8 +608,8 @@ def __init__(self, layer=None, moving_rate=0.9, dtype='float32'):
         self._moving_average_abs_max_scale = \
             MovingAverageAbsMaxScale(layer.full_name(), moving_rate, dtype)
 
-    def forward(self, input):
-        out = self._layer(input)
+    def forward(self, *inputs, **kwargs):
+        out = self._layer(*inputs, **kwargs)
         # TODO (jc): support the ops of several outputs
         if (isinstance(out, list) or isinstance(out, tuple)) and len(out) > 1:
             _logger.info("%s has several outputs, so skip collecting "
diff --git a/python/paddle/nn/quant/functional_layers.py b/python/paddle/nn/quant/functional_layers.py
index e7c0011777843..7eeaa5c58e78a 100644
--- a/python/paddle/nn/quant/functional_layers.py
+++ b/python/paddle/nn/quant/functional_layers.py
@@ -24,53 +24,43 @@ def __init__(self):
 
 
 class add(FloatFunctionalLayer):
-    def __init__(self, name=None):
+    def __init__(self):
         super(add, self).__init__()
-        self._name = name
 
-    def forward(self, inputs):
-        assert len(inputs) == 2, "The inputs should be (x, y)"
-        return math.add(inputs[0], inputs[1], self._name)
+    def forward(self, x, y, name=None):
+        return math.add(x, y, name)
 
 
 class subtract(FloatFunctionalLayer):
-    def __init__(self, name=None):
+    def __init__(self):
         super(subtract, self).__init__()
-        self._name = name
 
-    def forward(self, inputs):
-        assert len(inputs) == 2, "The inputs should be (x, y)"
-        return math.subtract(inputs[0], inputs[1], self._name)
+    def forward(self, x, y, name=None):
+        return math.subtract(x, y, name)
 
 
 class multiply(FloatFunctionalLayer):
-    def __init__(self, name=None):
+    def __init__(self):
         super(multiply, self).__init__()
-        self._name = name
 
-    def forward(self, inputs):
-        assert len(inputs) == 2, "The inputs should be (x, y)"
-        return math.multiply(inputs[0], inputs[1], self._name)
+    def forward(self, x, y, name=None):
+        return math.multiply(x, y, name)
 
 
 class divide(FloatFunctionalLayer):
-    def __init__(self, name=None):
+    def __init__(self):
         super(divide, self).__init__()
-        self._name = name
 
-    def forward(self, inputs):
-        assert len(inputs) == 2, "The inputs should be (x, y)"
-        return math.divide(inputs[0], inputs[1], self._name)
+    def forward(self, x, y, name=None):
+        return math.divide(x, y, name)
 
 
 class reshape(FloatFunctionalLayer):
-    def __init__(self, name=None):
+    def __init__(self):
         super(reshape, self).__init__()
-        self._name = name
 
-    def forward(self, inputs):
-        assert len(inputs) == 2, "The inputs should be (x, shape)"
-        return manipulation.reshape(inputs[0], inputs[1], self._name)
+    def forward(self, x, shape, name=None):
+        return manipulation.reshape(x, shape, name)
 
 
 class tranpose(FloatFunctionalLayer):

From d35cc88f1e87e12a96cbe56cbc773683fd4a7c1f Mon Sep 17 00:00:00 2001
From: pengjuncai <13006307475@163.com>
Date: Wed, 2 Jun 2021 03:34:28 +0000
Subject: [PATCH 3/8] Add unit test for quant functional layers

---
 .../slim/quantization/imperative/qat.py       | 86 ++++++++-----------
 .../slim/quantization/imperative/quant_nn.py  | 22 ++---
 .../slim/quantization/imperative/utils.py     | 65 ++++++++++----
 python/paddle/nn/quant/__init__.py            |  2 +-
 python/paddle/nn/quant/functional_layers.py   | 29 +++----
 5 files changed, 108 insertions(+), 96 deletions(-)

diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py
index 66b11d1f17ad4..ac80ff8a660aa 100644
--- a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py
+++ b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py
@@ -255,20 +255,21 @@ def __init__(self,
             if layer in utils.quant_input_layers_map else layer
             for layer in quantizable_layer_type)
         for layer in self._quantizable_layer_type:
-            assert not isinstance(layer, str), \
+            assert not isinstance(layer, str) \
+                and layer in utils.quant_input_layers_map.values(), \
                 "%s is unspported to be quantized." % layer
 
         quantize_type = {
             'abs_max', 'moving_average_abs_max', 'channel_wise_abs_max'
         }
-        assert weight_quantize_type in quantize_type, \
+        assert weight_quantize_type != 'moving_average_abs_max' \
+            and weight_quantize_type in quantize_type, \
             "Unsupported weight_quantize_type: %s. It can only " \
-            "be abs_max or moving_average_abs_max or " \
-            "channel_wise_abs_max." % weight_quantize_type
-        assert activation_quantize_type != 'channel_wise_abs_max' \
-            and activation_quantize_type in quantize_type, \
+            "be abs_max or channel_wise_abs_max." % weight_quantize_type
+        # TODO (jc): activation_quantize_type supports range_abs_max
+        assert activation_quantize_type == 'moving_average_abs_max', \
             "Unsupported activation_quantize_type: %s. It can " \
-            "only be abs_max or moving_average_abs_max now." \
+            "only be moving_average_abs_max now." \
             % activation_quantize_type
 
         bits_check = lambda bits: isinstance(bits, int) \
@@ -305,26 +306,17 @@ def apply(self, model):
         assert isinstance(model, dygraph.Layer), \
             "The model must be the instance of dygraph.Layer."
 
-        for name, layer in model.named_sublayers():
-            if not isinstance(layer, self._quantizable_layer_type) \
-                or (hasattr(layer, "skip_quant") \
-                    and layer.skip_quant == True):
+        for name, cur_layer in model.named_sublayers():
+            if not isinstance(cur_layer, self._quantizable_layer_type) \
+                or (hasattr(cur_layer, "skip_quant") \
+                    and cur_layer.skip_quant == True):
                 continue
 
-            # TODO(jc): optimize this module
-            last_idx = 0
-            idx = 0
-            obj = model
-            while idx < len(name):
-                if (name[idx] == '.'):
-                    if hasattr(obj, name[last_idx:idx]):
-                        obj = getattr(obj, name[last_idx:idx])
-                        last_idx = idx + 1
-                idx += 1
-            target = name[last_idx:idx]
-
-            quant_layer = self._get_input_quantized_layer(layer)
-            setattr(obj, target, quant_layer)
+            parent_layer, sub_name = \
+                utils.find_parent_layer_and_sub_name(model, name)
+
+            cur_quant_layer = self._get_input_quantized_layer(cur_layer)
+            setattr(parent_layer, sub_name, cur_quant_layer)
 
     def _get_input_quantized_layer(self, layer):
         quant_layer_name = None
@@ -336,8 +328,7 @@ def _get_input_quantized_layer(self, layer):
             "The layer %s is unsupported to be quantized." \
             % layer.full_name()
 
-        layer_with_weight = ['QuantizedConv2D', 'QuantizedLinear']
-        if quant_layer_name not in layer_with_weight:
+        if layer not in utils.fake_quant_input_layers:
             quant_layer_name = 'QuantizedNoweightLayer'
 
         return quant_nn.__dict__[quant_layer_name](layer, **self._kwargs)
@@ -374,25 +365,16 @@ def apply(self, model):
         assert isinstance(model, dygraph.Layer), \
             "The model must be the instance of dygraph.Layer."
 
-        for name, layer in model.named_sublayers():
-            if not self._is_target_layer(layer):
+        for name, cur_layer in model.named_sublayers():
+            if not self._is_target_layer(cur_layer):
                 continue
 
-            # TODO(jc): optimize this module
-            last_idx = 0
-            idx = 0
-            obj = model
-            while idx < len(name):
-                if (name[idx] == '.'):
-                    if hasattr(obj, name[last_idx:idx]):
-                        obj = getattr(obj, name[last_idx:idx])
-                        last_idx = idx + 1
-                idx += 1
-            target = name[last_idx:idx]
-
-            quant_layer = quant_nn.__dict__["QuantizedOutputLayer"](
-                layer, self._moving_rate)
-            setattr(obj, target, quant_layer)
+            parent_layer, sub_name = \
+                utils.find_parent_layer_and_sub_name(model, name)
+
+            cur_quant_layer = quant_nn.__dict__["QuantizedOutputLayer"](
+                cur_layer, self._moving_rate)
+            setattr(parent_layer, sub_name, cur_quant_layer)
 
     def save_quantized_model(self, layer, path, input_spec=None, **config):
         """
@@ -468,9 +450,17 @@ def _is_target_layer(self, layer):
         """
         Whether the layer needs to calculate output scales.
         """
-        return isinstance(layer, utils.quant_output_layers) \
-            or ('quantized' in layer.full_name() and \
-                'quantized_noweight' not in layer.full_name())
+        flag = False
+        if isinstance(layer, dygraph.Layer):
+            # exclude fake_quant ops in quant_nn file
+            if utils.is_leaf_layer(layer) and \
+                'fake_quant' not in layer.full_name():
+                flag = True
+            # consider QuantizedConv2D and QuantizedLinear ops
+            if 'quantized' in layer.full_name() and \
+                'quantized_noweight' not in layer.full_name():
+                flag = True
+        return flag
 
     def _save_output_scale(self, program, scope):
         """
@@ -514,4 +504,4 @@ def _is_skip_quant_op(self, block, in_op):
         previous_ops = [utils.find_previous_op(block, arg_name) \
             for arg_name in in_op.input_arg_names]
         return any(op is not None and op.type not in \
-            utils.fake_quantize_dequantize_types for op in previous_ops)
+            utils.fake_quantize_dequantize_op_types for op in previous_ops)
diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py b/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py
index b4449b2751dce..a3b4811f2ff4c 100644
--- a/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py
+++ b/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py
@@ -26,8 +26,8 @@
 from paddle.fluid.log_helper import get_logger
 
 __all__ = [
-    'FakeQuantMovingAverage', 'FakeQuantAbsMax',
-    'FakeChannelWiseQuantDequantAbsMax', 'QuantizedConv2D', 'QuantizedLinear',
+    'FakeQuantMovingAverageAbsMax', 'FakeQuantAbsMax',
+    'FakeQuantChannelWiseAbsMax', 'QuantizedConv2D', 'QuantizedLinear',
     'QuantizedNoweightLayer', 'MovingAverageAbsMaxScale'
 ]
 
@@ -35,9 +35,9 @@
     __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
 
 
-class FakeQuantMovingAverage(layers.Layer):
+class FakeQuantMovingAverageAbsMax(layers.Layer):
     r"""
-    FakeQuantMovingAverage layer does the moving_average_abs_max quant and then dequant.
+    FakeQuantMovingAverageAbsMax layer does the moving_average_abs_max quant and then dequant.
     Its computational formula is described as below:
 
     :math:`scale = (moving\_rate*accum+max(abs(x)))/(moving\_rate*state+1)`
@@ -50,7 +50,7 @@ def __init__(self,
                  moving_rate=0.9,
                  quant_bits=8,
                  dtype='float32'):
-        super(FakeQuantMovingAverage, self).__init__()
+        super(FakeQuantMovingAverageAbsMax, self).__init__()
         self._moving_rate = moving_rate
         self._quant_bits = quant_bits
 
@@ -103,7 +103,7 @@ def forward(self, input):
             return out
 
         check_variable_and_dtype(input, 'input', ['float32'],
-                                 "FakeQuantMovingAverage")
+                                 "FakeQuantMovingAverageAbsMax")
         attrs = {
             'moving_rate': self._moving_rate,
             'bit_length': self._quant_bits,
@@ -215,7 +215,7 @@ def forward(self, input):
         return quant_out
 
 
-class FakeChannelWiseQuantDequantAbsMax(layers.Layer):
+class FakeQuantChannelWiseAbsMax(layers.Layer):
     def __init__(self,
                  name=None,
                  channel_num=None,
@@ -224,7 +224,7 @@ def __init__(self,
                  dtype='float32',
                  quant_on_weight=False):
         assert quant_on_weight == True, "Channel_wise only can be used on weight quantization."
-        super(FakeChannelWiseQuantDequantAbsMax, self).__init__()
+        super(FakeQuantChannelWiseAbsMax, self).__init__()
         self._quant_bits = quant_bits
         self._quant_axis = quant_axis
         self._dtype = dtype
@@ -270,7 +270,7 @@ def forward(self, input):
             return out
 
         check_variable_and_dtype(input, 'input', ['float32'],
-                                 "FakeChannelWiseQuantDequantAbsMax")
+                                 "FakeQuantChannelWiseAbsMax")
         attrs = {'bit_length': self._quant_bits, 'quant_axis': self._quant_axis}
         inputs = {"X": [input]}
         quant_out = self._helper.create_variable(
@@ -318,8 +318,8 @@ def _get_fake_quant_type(quant_type, **kwargs):
             "when you use channel_wise_abs_max strategy.")
     fake_quant_map = {
         'abs_max': FakeQuantAbsMax,
-        'moving_average_abs_max': FakeQuantMovingAverage,
-        'channel_wise_abs_max': FakeChannelWiseQuantDequantAbsMax
+        'moving_average_abs_max': FakeQuantMovingAverageAbsMax,
+        'channel_wise_abs_max': FakeQuantChannelWiseAbsMax
     }
 
     return fake_quant_map[quant_type](**call_args)
diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py b/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py
index 9adc7df69209c..b8cb76b5e191d 100644
--- a/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py
+++ b/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import paddle
+from paddle.fluid import dygraph
 import numpy as np
 
 quant_input_layers_map = {
@@ -37,31 +38,26 @@
     'LayerNorm': paddle.nn.LayerNorm,
 }
 
-fake_quantize_dequantize_types = [
-    "fake_quantize_dequantize_abs_max",
-    "fake_channel_wise_quantize_dequantize_abs_max",
-    "fake_quantize_dequantize_moving_average_abs_max"
-]
+# Apply fake quant for the inputs of these layers
+fake_quant_input_layers = [paddle.nn.Conv2D, paddle.nn.Linear]
 
-quant_output_layers = (
-    paddle.nn.Conv2D, paddle.nn.Conv2DTranspose, paddle.nn.Linear,
-    paddle.nn.AdaptiveAvgPool2D, paddle.nn.AdaptiveMaxPool2D,
-    paddle.nn.AvgPool2D, paddle.nn.MaxPool2D, paddle.nn.BatchNorm,
-    paddle.nn.BatchNorm2D, paddle.nn.LayerNorm, paddle.nn.SyncBatchNorm,
-    paddle.nn.ELU, paddle.nn.GELU, paddle.nn.Hardshrink, paddle.nn.Hardsigmoid,
-    paddle.nn.Hardswish, paddle.nn.Hardtanh, paddle.nn.LeakyReLU,
-    paddle.nn.LogSigmoid, paddle.nn.LogSoftmax, paddle.nn.Maxout,
-    paddle.nn.PReLU, paddle.nn.ReLU, paddle.nn.ReLU6, paddle.nn.SELU,
-    paddle.nn.Sigmoid, paddle.nn.Softmax, paddle.nn.Softplus,
-    paddle.nn.Softshrink, paddle.nn.Softsign, paddle.nn.Swish, paddle.nn.Tanh,
-    paddle.nn.Tanhshrink, paddle.nn.ThresholdedReLU, paddle.nn.Upsample,
-    paddle.nn.quant.FloatFunctionalLayer)
+# Apply fake quant for the output of these layers
+fake_quant_output_layers = [
+    paddle.nn.AdaptiveAvgPool2D,
+    paddle.nn.AdaptiveMaxPool2D,
+]
 
 weight_op_types = [
     "conv2d", "depthwise_conv2d", "matmul", "conv2d_transpose",
     "depthwise_conv2d_transpose"
 ]
 
+fake_quantize_dequantize_op_types = [
+    "fake_quantize_dequantize_abs_max",
+    "fake_channel_wise_quantize_dequantize_abs_max",
+    "fake_quantize_dequantize_moving_average_abs_max"
+]
+
 
 def load_variable_data(scope, var_name):
     '''
@@ -91,3 +87,36 @@ def find_next_ops(block, var_name):
         if var_name in op.input_arg_names:
             res_ops.append(op)
     return res_ops
+
+
+def find_parent_layer_and_sub_name(model, name):
+    """
+    Given the model and the name of a layer, find the parent layer and
+    the sub_name of the layer.
+    For example, if name is 'block_1/convbn_1/conv_1', the parent layer is
+    'block_1/convbn_1' and the sub_name is `conv_1`.
+    """
+    assert isinstance(model, dygraph.Layer), \
+            "The model must be the instance of paddle.nn.Layer."
+    assert len(name) > 0, "The input (name) should not be empty."
+
+    last_idx = 0
+    idx = 0
+    parent_layer = model
+    while idx < len(name):
+        if name[idx] == '.':
+            sub_name = name[last_idx:idx]
+            if hasattr(parent_layer, sub_name):
+                parent_layer = getattr(parent_layer, sub_name)
+                last_idx = idx + 1
+        idx += 1
+    sub_name = name[last_idx:idx]
+    return parent_layer, sub_name
+
+
+def is_leaf_layer(layer):
+    """
+    Whether the layer is leaf layer.
+    """
+    return isinstance(layer, dygraph.Layer) \
+        and len(layer.sublayers()) == 0
diff --git a/python/paddle/nn/quant/__init__.py b/python/paddle/nn/quant/__init__.py
index e8a26705c68ff..c7f9a5073def8 100644
--- a/python/paddle/nn/quant/__init__.py
+++ b/python/paddle/nn/quant/__init__.py
@@ -18,7 +18,7 @@
 from .functional_layers import multiply  # noqa: F401
 from .functional_layers import divide  # noqa: F401
 from .functional_layers import reshape  # noqa: F401
-from .functional_layers import tranpose  # noqa: F401
+from .functional_layers import transpose  # noqa: F401
 from .functional_layers import concat  # noqa: F401
 from .functional_layers import flatten  # noqa: F401
 
diff --git a/python/paddle/nn/quant/functional_layers.py b/python/paddle/nn/quant/functional_layers.py
index 7eeaa5c58e78a..03a311dc708a1 100644
--- a/python/paddle/nn/quant/functional_layers.py
+++ b/python/paddle/nn/quant/functional_layers.py
@@ -63,32 +63,25 @@ def forward(self, x, shape, name=None):
         return manipulation.reshape(x, shape, name)
 
 
-class tranpose(FloatFunctionalLayer):
-    def __init__(self, name=None):
-        super(tranpose, self).__init__()
-        self._name = name
+class transpose(FloatFunctionalLayer):
+    def __init__(self):
+        super(transpose, self).__init__()
 
-    def forward(self, inputs):
-        assert len(inputs) == 2, "The inputs should be (x, perm)"
-        return manipulation.tranpose(inputs[0], inputs[1], self._name)
+    def forward(self, x, perm, name=None):
+        return manipulation.transpose(x, perm, name)
 
 
 class concat(FloatFunctionalLayer):
-    def __init__(self, name=None):
+    def __init__(self):
         super(concat, self).__init__()
-        self._name = name
 
-    def forward(self, inputs):
-        assert len(inputs) == 2, "The inputs should be (x, axis)"
-        return manipulation.concat(inputs[0], inputs[1], self._name)
+    def forward(self, x, axis, name=None):
+        return manipulation.concat(x, axis, name)
 
 
 class flatten(FloatFunctionalLayer):
-    def __init__(self, name=None):
+    def __init__(self):
         super(flatten, self).__init__()
-        self._name = name
 
-    def forward(self, inputs):
-        assert len(
-            inputs) == 3, "The inputs should be (x, start_axis, stop_axis)"
-        return manipulation.flatten(inputs[0], inputs[1], inputs[2], self._name)
+    def forward(self, x, start_axis, stop_axis, name=None):
+        return manipulation.flatten(x, start_axis, stop_axis, name)

From 679629c3a6a9c746f6fa7e59a3f6f610adbfe8f2 Mon Sep 17 00:00:00 2001
From: pengjuncai <13006307475@163.com>
Date: Wed, 2 Jun 2021 09:38:38 +0000
Subject: [PATCH 4/8] Update all unit tests for dygraph qat

---
 .../slim/quantization/imperative/qat.py       |  29 +-
 .../slim/quantization/imperative/quant_nn.py  |  57 +-
 .../slim/quantization/imperative/utils.py     |  16 +-
 .../slim/tests/test_imperative_out_scale.py   | 337 ++----------
 .../contrib/slim/tests/test_imperative_qat.py | 336 ++----------
 .../test_imperative_qat_addquantdequant.py    | 494 ------------------
 .../tests/test_imperative_qat_channelwise.py  | 399 +-------------
 .../slim/tests/test_imperative_skip_op.py     | 129 +----
 8 files changed, 178 insertions(+), 1619 deletions(-)
 delete mode 100644 python/paddle/fluid/contrib/slim/tests/test_imperative_qat_addquantdequant.py

diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py
index ac80ff8a660aa..0f2f621fdb328 100644
--- a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py
+++ b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py
@@ -251,12 +251,12 @@ def __init__(self,
         super(ImperativeQuantizeInputs, self).__init__()
 
         self._quantizable_layer_type = tuple(
-            utils.quant_input_layers_map[layer]
-            if layer in utils.quant_input_layers_map else layer
+            utils.layer_name_map[layer]
+            if layer in utils.layer_name_map else layer
             for layer in quantizable_layer_type)
         for layer in self._quantizable_layer_type:
             assert not isinstance(layer, str) \
-                and layer in utils.quant_input_layers_map.values(), \
+                and layer in utils.fake_quant_input_layers, \
                 "%s is unspported to be quantized." % layer
 
         quantize_type = {
@@ -320,7 +320,8 @@ def apply(self, model):
 
     def _get_input_quantized_layer(self, layer):
         quant_layer_name = None
-        for key, value in utils.quant_input_layers_map.items():
+
+        for key, value in utils.layer_name_map.items():
             if isinstance(layer, value):
                 quant_layer_name = 'Quantized' + key
                 break
@@ -328,9 +329,6 @@ def _get_input_quantized_layer(self, layer):
             "The layer %s is unsupported to be quantized." \
             % layer.full_name()
 
-        if layer not in utils.fake_quant_input_layers:
-            quant_layer_name = 'QuantizedNoweightLayer'
-
         return quant_nn.__dict__[quant_layer_name](layer, **self._kwargs)
 
 
@@ -372,8 +370,13 @@ def apply(self, model):
             parent_layer, sub_name = \
                 utils.find_parent_layer_and_sub_name(model, name)
 
-            cur_quant_layer = quant_nn.__dict__["QuantizedOutputLayer"](
-                cur_layer, self._moving_rate)
+            if isinstance(cur_layer, tuple(utils.fake_quant_output_layers)):
+                cur_quant_layer = quant_nn.__dict__[
+                    "FakeQuantMAOutputScaleLayer"](cur_layer, self._moving_rate)
+            else:
+                cur_quant_layer = quant_nn.__dict__["MAOutputScaleLayer"](
+                    cur_layer, self._moving_rate)
+
             setattr(parent_layer, sub_name, cur_quant_layer)
 
     def save_quantized_model(self, layer, path, input_spec=None, **config):
@@ -454,12 +457,14 @@ def _is_target_layer(self, layer):
         if isinstance(layer, dygraph.Layer):
             # exclude fake_quant ops in quant_nn file
             if utils.is_leaf_layer(layer) and \
-                'fake_quant' not in layer.full_name():
+                not isinstance(layer, tuple(utils.fake_quant_leaf_layers)):
                 flag = True
             # consider QuantizedConv2D and QuantizedLinear ops
-            if 'quantized' in layer.full_name() and \
-                'quantized_noweight' not in layer.full_name():
+            if isinstance(layer,
+                          (quant_nn.QuantizedConv2D, quant_nn.QuantizedLinear)):
                 flag = True
+        if isinstance(layer, paddle.nn.quant.FloatFunctionalLayer):
+            flag = True
         return flag
 
     def _save_output_scale(self, program, scope):
diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py b/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py
index a3b4811f2ff4c..fe8e4570c69a0 100644
--- a/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py
+++ b/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py
@@ -26,9 +26,15 @@
 from paddle.fluid.log_helper import get_logger
 
 __all__ = [
-    'FakeQuantMovingAverageAbsMax', 'FakeQuantAbsMax',
-    'FakeQuantChannelWiseAbsMax', 'QuantizedConv2D', 'QuantizedLinear',
-    'QuantizedNoweightLayer', 'MovingAverageAbsMaxScale'
+    'FakeQuantMovingAverageAbsMax',
+    'FakeQuantAbsMax',
+    'FakeQuantChannelWiseAbsMax',
+    'QuantizedConv2D',
+    'QuantizedLinear',
+    'QuantizedNoweightLayer',
+    'MovingAverageAbsMaxScale',
+    'MAOutputScaleLayer',
+    'FakeQuantMAOutputScaleLayer',
 ]
 
 _logger = get_logger(
@@ -598,14 +604,19 @@ def forward(self, input):
         return quant_out
 
 
-class QuantizedOutputLayer(layers.Layer):
+class MAOutputScaleLayer(layers.Layer):
+    """
+    Calculate the scale (moving average abs max) for the output of the input layer.
+    Add MovingAverageMaxScale layer to the behind of the input layer.
+    """
+
     def __init__(self, layer=None, moving_rate=0.9, dtype='float32'):
         r"""
-        Add MovingAverageMaxScale layer to the behind of the input layer.
+        Construct
         """
-        super(QuantizedOutputLayer, self).__init__()
+        super(MAOutputScaleLayer, self).__init__()
         self._layer = layer
-        self._moving_average_abs_max_scale = \
+        self._ma_output_scale = \
             MovingAverageAbsMaxScale(layer.full_name(), moving_rate, dtype)
 
     def forward(self, *inputs, **kwargs):
@@ -616,4 +627,34 @@ def forward(self, *inputs, **kwargs):
                          "the output threshold" % self._layer.full_name())
             return out
         else:
-            return self._moving_average_abs_max_scale(out)
+            return self._ma_output_scale(out)
+
+
+class FakeQuantMAOutputScaleLayer(layers.Layer):
+    def __init__(self,
+                 layer,
+                 weight_bits=8,
+                 activation_bits=8,
+                 moving_rate=0.9,
+                 *args,
+                 **kwargs):
+
+        super(FakeQuantMAOutputScaleLayer, self).__init__()
+        self._layer = layer
+        self._fake_quant_output = _get_fake_quant_type(
+            'moving_average_abs_max',
+            name=layer.full_name(),
+            moving_rate=moving_rate,
+            quant_bits=activation_bits,
+            dtype=self._dtype,
+            quant_on_weight=False)
+
+    def forward(self, *inputs, **kwargs):
+        out = self._layer(*inputs, **kwargs)
+        # TODO (jc): support the ops of several outputs
+        if (isinstance(out, list) or isinstance(out, tuple)) and len(out) > 1:
+            _logger.info("%s has several outputs, so skip collecting "
+                         "the output threshold" % self._layer.full_name())
+            return out
+        else:
+            return self._fake_quant_output(out)
diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py b/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py
index b8cb76b5e191d..faec53a87fdba 100644
--- a/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py
+++ b/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py
@@ -15,8 +15,9 @@
 import paddle
 from paddle.fluid import dygraph
 import numpy as np
+from . import quant_nn
 
-quant_input_layers_map = {
+layer_name_map = {
     'Conv2D': paddle.nn.Conv2D,
     'Linear': paddle.nn.Linear,
     'AdaptiveAvgPool2D': paddle.nn.AdaptiveAvgPool2D,
@@ -39,12 +40,21 @@
 }
 
 # Apply fake quant for the inputs of these layers
+# TODO (jc): support paddle.nn.Conv2DTranspose
 fake_quant_input_layers = [paddle.nn.Conv2D, paddle.nn.Linear]
 
 # Apply fake quant for the output of these layers
 fake_quant_output_layers = [
-    paddle.nn.AdaptiveAvgPool2D,
-    paddle.nn.AdaptiveMaxPool2D,
+    paddle.nn.AdaptiveAvgPool2D, paddle.nn.AvgPool2D, paddle.nn.ReLU,
+    paddle.nn.LeakyReLU, paddle.nn.quant.add, paddle.nn.quant.subtract,
+    paddle.nn.quant.multiply, paddle.nn.quant.divide
+]
+
+fake_quant_leaf_layers = [
+    quant_nn.FakeQuantAbsMax,
+    quant_nn.FakeQuantChannelWiseAbsMax,
+    quant_nn.FakeQuantMovingAverageAbsMax,
+    quant_nn.MovingAverageAbsMaxScale,
 ]
 
 weight_op_types = [
diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_out_scale.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_out_scale.py
index 8d6ce76ef0fa5..6cc58a38f227a 100644
--- a/python/paddle/fluid/contrib/slim/tests/test_imperative_out_scale.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_out_scale.py
@@ -28,7 +28,6 @@
 from paddle.fluid.optimizer import AdamOptimizer
 from paddle.fluid.framework import IrGraph
 from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware
-from paddle.fluid.contrib.slim.quantization import OutScaleForTrainingPass, OutScaleForInferencePass, QuantizationTransformPass
 from paddle.fluid.dygraph.container import Sequential
 from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
 from paddle.nn.layer import ReLU, LeakyReLU, Sigmoid, Softmax, PReLU
@@ -36,6 +35,8 @@
 from paddle.fluid.log_helper import get_logger
 from paddle.fluid.dygraph import nn
 
+from imperative_test_utils import fix_model_dict, train_lenet, ImperativeLenet
+
 paddle.enable_static()
 
 os.environ["CPU_NUM"] = "1"
@@ -54,59 +55,6 @@ def get_vaild_warning_num(warning, w):
     return num
 
 
-def StaticLenet(data, num_classes=10, classifier_activation='softmax'):
-    conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1")
-    conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2")
-    fc_w1_attr = fluid.ParamAttr(name="fc_w_1")
-    fc_w2_attr = fluid.ParamAttr(name="fc_w_2")
-    fc_w3_attr = fluid.ParamAttr(name="fc_w_3")
-    conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2")
-    fc_b1_attr = fluid.ParamAttr(name="fc_b_1")
-    fc_b2_attr = fluid.ParamAttr(name="fc_b_2")
-    fc_b3_attr = fluid.ParamAttr(name="fc_b_3")
-    conv1 = fluid.layers.conv2d(
-        data,
-        num_filters=6,
-        filter_size=3,
-        stride=1,
-        padding=1,
-        param_attr=conv2d_w1_attr,
-        bias_attr=False)
-    batch_norm1 = layers.batch_norm(conv1)
-    relu1 = layers.relu(batch_norm1)
-    pool1 = fluid.layers.pool2d(
-        relu1, pool_size=2, pool_type='max', pool_stride=2)
-    conv2 = fluid.layers.conv2d(
-        pool1,
-        num_filters=16,
-        filter_size=5,
-        stride=1,
-        padding=0,
-        param_attr=conv2d_w2_attr,
-        bias_attr=conv2d_b2_attr)
-    batch_norm2 = layers.batch_norm(conv2)
-    prelu1 = layers.prelu(batch_norm2, mode='all')
-    pool2 = fluid.layers.pool2d(
-        prelu1, pool_size=2, pool_type='max', pool_stride=2)
-
-    fc1 = fluid.layers.fc(input=pool2,
-                          size=120,
-                          param_attr=fc_w1_attr,
-                          bias_attr=fc_b1_attr)
-    leaky_relu1 = layers.leaky_relu(fc1, alpha=0.01)
-    fc2 = fluid.layers.fc(input=leaky_relu1,
-                          size=84,
-                          param_attr=fc_w2_attr,
-                          bias_attr=fc_b2_attr)
-    sigmoid1 = layers.sigmoid(fc2)
-    fc3 = fluid.layers.fc(input=sigmoid1,
-                          size=num_classes,
-                          param_attr=fc_w3_attr,
-                          bias_attr=fc_b3_attr)
-    softmax1 = layers.softmax(fc3, use_cudnn=True)
-    return softmax1
-
-
 class ImperativeLenet(fluid.dygraph.Layer):
     def __init__(self, num_classes=10):
         super(ImperativeLenet, self).__init__()
@@ -175,38 +123,11 @@ def forward(self, inputs):
 
 class TestImperativeOutSclae(unittest.TestCase):
     def test_out_scale_acc(self):
-        def _build_static_lenet(main, startup, is_test=False, seed=1000):
-            with fluid.unique_name.guard():
-                with fluid.program_guard(main, startup):
-                    main.random_seed = seed
-                    startup.random_seed = seed
-                    img = fluid.layers.data(
-                        name='image', shape=[1, 28, 28], dtype='float32')
-                    label = fluid.layers.data(
-                        name='label', shape=[1], dtype='int64')
-                    prediction = StaticLenet(img)
-                    if not is_test:
-                        loss = fluid.layers.cross_entropy(
-                            input=prediction, label=label)
-                        avg_loss = fluid.layers.mean(loss)
-                    else:
-                        avg_loss = prediction
-            return img, label, avg_loss
-
-        reader = paddle.batch(
-            paddle.dataset.mnist.test(), batch_size=32, drop_last=True)
-        weight_quantize_type = 'abs_max'
-        activation_quantize_type = 'moving_average_abs_max'
-        param_init_map = {}
         seed = 1000
         lr = 0.001
-        dynamic_out_scale_list = []
-        static_out_scale_list = []
 
-        # imperative train
-        _logger.info(
-            "--------------------------dynamic graph qat--------------------------"
-        )
+        weight_quantize_type = 'abs_max'
+        activation_quantize_type = 'moving_average_abs_max'
         imperative_out_scale = ImperativeQuantAware(
             weight_quantize_type=weight_quantize_type,
             activation_quantize_type=activation_quantize_type)
@@ -215,207 +136,46 @@ def _build_static_lenet(main, startup, is_test=False, seed=1000):
             np.random.seed(seed)
             fluid.default_main_program().random_seed = seed
             fluid.default_startup_program().random_seed = seed
+
             lenet = ImperativeLenet()
-            fixed_state = {}
-            for name, param in lenet.named_parameters():
-                p_shape = param.numpy().shape
-                p_value = param.numpy()
-                if name.endswith("bias"):
-                    value = np.zeros_like(p_value).astype('float32')
-                else:
-                    value = np.random.normal(
-                        loc=0.0, scale=0.01, size=np.product(p_shape)).reshape(
-                            p_shape).astype('float32')
-                fixed_state[name] = value
-                param_init_map[param.name] = value
-            lenet.set_dict(fixed_state)
+            lenet = fix_model_dict(lenet)
             imperative_out_scale.quantize(lenet)
+
+            reader = paddle.batch(
+                paddle.dataset.mnist.test(), batch_size=32, drop_last=True)
             adam = AdamOptimizer(
                 learning_rate=lr, parameter_list=lenet.parameters())
-            dynamic_loss_rec = []
-            lenet.train()
-            for batch_id, data in enumerate(reader()):
-                x_data = np.array([x[0].reshape(1, 28, 28)
-                                   for x in data]).astype('float32')
-                y_data = np.array(
-                    [x[1] for x in data]).astype('int64').reshape(-1, 1)
-
-                img = fluid.dygraph.to_variable(x_data)
-                label = fluid.dygraph.to_variable(y_data)
-
-                out = lenet(img)
-                loss = fluid.layers.cross_entropy(out, label)
-                avg_loss = fluid.layers.mean(loss)
-                avg_loss.backward()
-                adam.minimize(avg_loss)
-                lenet.clear_gradients()
-                dynamic_loss_rec.append(avg_loss.numpy()[0])
-                if batch_id % 100 == 0:
-                    _logger.info('{}: {}'.format('loss', avg_loss.numpy()))
-
+            loss_list = train_lenet(lenet, reader, adam)
             lenet.eval()
 
         param_save_path = "test_save_quantized_model/lenet.pdparams"
         save_dict = lenet.state_dict()
         paddle.save(save_dict, param_save_path)
 
-        path = "./dynamic_outscale_infer_model/lenet"
-        dynamic_save_dir = "./dynamic_outscale_infer_model"
-
+        save_path = "./dynamic_outscale_infer_model/lenet"
         imperative_out_scale.save_quantized_model(
             layer=lenet,
-            path=path,
+            path=save_path,
             input_spec=[
                 paddle.static.InputSpec(
                     shape=[None, 1, 28, 28], dtype='float32')
             ])
 
-        _logger.info(
-            "--------------------------static graph qat--------------------------"
-        )
-        static_loss_rec = []
-        if core.is_compiled_with_cuda():
-            place = core.CUDAPlace(0)
-        else:
-            place = core.CPUPlace()
-        exe = fluid.Executor(place)
-
-        main = fluid.Program()
-        infer = fluid.Program()
-        startup = fluid.Program()
-        static_img, static_label, static_loss = _build_static_lenet(
-            main, startup, False, seed)
-        infer_img, _, infer_pre = _build_static_lenet(infer, startup, True,
-                                                      seed)
-        with fluid.unique_name.guard():
-            with fluid.program_guard(main, startup):
-                opt = AdamOptimizer(learning_rate=lr)
-                opt.minimize(static_loss)
-
-        scope = core.Scope()
-        with fluid.scope_guard(scope):
-            exe.run(startup)
-        for param in main.all_parameters():
-            if "batch_norm" in param.name:
-                param_name = param.name.replace("norm", "norm2d")
-            elif 'prelu' in param.name:
-                param_name = param.name.replace("prelu", 'p_re_lu')
-            else:
-                param_name = param.name
-            param_tensor = scope.var(param.name).get_tensor()
-            param_tensor.set(param_init_map[param_name], place)
-        main_graph = IrGraph(core.Graph(main.desc), for_test=False)
-        infer_graph = IrGraph(core.Graph(infer.desc), for_test=True)
-        transform_pass = QuantizationTransformPass(
-            scope=scope,
-            place=place,
-            activation_quantize_type=activation_quantize_type,
-            weight_quantize_type=weight_quantize_type,
-            quantizable_op_type=['conv2d', 'depthwise_conv2d', 'mul'])
-        transform_pass.apply(main_graph)
-        transform_pass.apply(infer_graph)
-        outscale_pass = OutScaleForTrainingPass(scope=scope, place=place)
-        outscale_pass.apply(main_graph)
-        build_strategy = fluid.BuildStrategy()
-        build_strategy.fuse_all_reduce_ops = False
-        binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel(
-            loss_name=static_loss.name, build_strategy=build_strategy)
-
-        feeder = fluid.DataFeeder(
-            feed_list=[static_img, static_label], place=place)
-        with fluid.scope_guard(scope):
-            for batch_id, data in enumerate(reader()):
-                loss_v, = exe.run(binary,
-                                  feed=feeder.feed(data),
-                                  fetch_list=[static_loss])
-                static_loss_rec.append(loss_v[0])
-                if batch_id % 100 == 0:
-                    _logger.info('{}: {}'.format('loss', loss_v))
-        scale_inference_pass = OutScaleForInferencePass(scope=scope)
-        scale_inference_pass.apply(infer_graph)
-
-        save_program = infer_graph.to_program()
-        static_save_dir = "./static_outscale_infer_model"
-        with fluid.scope_guard(scope):
-            fluid.io.save_inference_model(
-                dirname=static_save_dir,
-                feeded_var_names=[infer_img.name],
-                target_vars=[infer_pre],
-                executor=exe,
-                main_program=save_program,
-                model_filename="lenet" + INFER_MODEL_SUFFIX,
-                params_filename="lenet" + INFER_PARAMS_SUFFIX)
-
-        rtol = 1e-05
-        atol = 1e-08
-        for i, (loss_d,
-                loss_s) in enumerate(zip(dynamic_loss_rec, static_loss_rec)):
-            diff = np.abs(loss_d - loss_s)
-            if diff > (atol + rtol * np.abs(loss_s)):
-                _logger.info(
-                    "diff({}) at {}, dynamic loss = {}, static loss = {}".
-                    format(diff, i, loss_d, loss_s))
-                break
-        self.assertTrue(
-            np.allclose(
-                np.array(dynamic_loss_rec),
-                np.array(static_loss_rec),
-                rtol=rtol,
-                atol=atol,
-                equal_nan=True),
-            msg='Failed to do the imperative qat.')
-
-        # load dynamic model
-        [dynamic_inference_program, feed_target_names, fetch_targets] = (
-            fluid.io.load_inference_model(
-                dirname=dynamic_save_dir,
-                executor=exe,
-                model_filename="lenet" + INFER_MODEL_SUFFIX,
-                params_filename="lenet" + INFER_PARAMS_SUFFIX))
-        # load static model
-        [static_inference_program, feed_target_names, fetch_targets] = (
-            fluid.io.load_inference_model(
-                dirname=static_save_dir,
-                executor=exe,
-                model_filename="lenet" + INFER_MODEL_SUFFIX,
-                params_filename="lenet" + INFER_PARAMS_SUFFIX))
-
-        dynamic_ops = dynamic_inference_program.global_block().ops
-        static_ops = static_inference_program.global_block().ops
-
-        for op in dynamic_ops[:]:
-            if op.type == "flatten2" or 'fake' in op.type:
-                dynamic_ops.remove(op)
-
-        for op in static_ops[:]:
-            if 'fake' in op.type:
-                static_ops.remove(op)
-
-        op_count = 0
-        for i in range(len(dynamic_ops)):
-            if dynamic_ops[i].has_attr("out_threshold"):
-                op_count += 1
-                self.assertTrue(dynamic_ops[i].type == static_ops[i].type)
-                if dynamic_ops[i].attr("out_threshold") != static_ops[i].attr(
-                        "out_threshold"):
-                    _logger.info(dynamic_ops[i].attr("out_threshold"))
-                    _logger.info(static_ops[i].attr("out_threshold"))
-                self.assertTrue(dynamic_ops[i].attr("out_threshold") ==
-                                static_ops[i].attr("out_threshold"))
-
-        _logger.info("op_cout: {}".format(op_count))
-        self.assertTrue(op_count == 14)
+        for i in range(len(loss_list) - 1):
+            self.assertTrue(
+                loss_list[i] > loss_list[i + 1],
+                msg='Failed to do the imperative qat.')
 
 
 class TestSaveQuanztizedModelFromCheckPoint(unittest.TestCase):
     def test_save_quantized_model(self):
-        weight_quantize_type = 'abs_max'
-        activation_quantize_type = 'moving_average_abs_max'
+        lr = 0.001
+
         load_param_path = "test_save_quantized_model/lenet.pdparams"
-        path = "./dynamic_outscale_infer_model_from_checkpoint/lenet"
-        dynamic_model_save_dir = "./dynamic_outscale_infer_model_from_checkpoint"
-        static_model_save_dir = "./static_outscale_infer_model"
+        save_path = "./dynamic_outscale_infer_model_from_checkpoint/lenet"
 
+        weight_quantize_type = 'abs_max'
+        activation_quantize_type = 'moving_average_abs_max'
         imperative_out_scale = ImperativeQuantAware(
             weight_quantize_type=weight_quantize_type,
             activation_quantize_type=activation_quantize_type)
@@ -426,56 +186,25 @@ def test_save_quantized_model(self):
             imperative_out_scale.quantize(lenet)
             lenet.set_dict(load_dict)
 
+            reader = paddle.batch(
+                paddle.dataset.mnist.test(), batch_size=32, drop_last=True)
+            adam = AdamOptimizer(
+                learning_rate=lr, parameter_list=lenet.parameters())
+            loss_list = train_lenet(lenet, reader, adam)
+            lenet.eval()
+
         imperative_out_scale.save_quantized_model(
             layer=lenet,
-            path=path,
+            path=save_path,
             input_spec=[
                 paddle.static.InputSpec(
                     shape=[None, 1, 28, 28], dtype='float32')
             ])
 
-        if core.is_compiled_with_cuda():
-            place = core.CUDAPlace(0)
-        else:
-            place = core.CPUPlace()
-        exe = fluid.Executor(place)
-
-        # load dynamic model
-        [dynamic_inference_program, feed_target_names, fetch_targets] = (
-            fluid.io.load_inference_model(
-                dirname=dynamic_model_save_dir,
-                executor=exe,
-                model_filename="lenet" + INFER_MODEL_SUFFIX,
-                params_filename="lenet" + INFER_PARAMS_SUFFIX))
-        # load static model
-        [static_inference_program, feed_target_names, fetch_targets] = (
-            fluid.io.load_inference_model(
-                dirname=static_model_save_dir,
-                executor=exe,
-                model_filename="lenet" + INFER_MODEL_SUFFIX,
-                params_filename="lenet" + INFER_PARAMS_SUFFIX))
-
-        dynamic_ops = dynamic_inference_program.global_block().ops
-        static_ops = static_inference_program.global_block().ops
-
-        for op in dynamic_ops[:]:
-            if op.type == "flatten2" or 'fake' in op.type:
-                dynamic_ops.remove(op)
-
-        for op in static_ops[:]:
-            if 'fake' in op.type:
-                static_ops.remove(op)
-
-        op_count = 0
-        for i in range(len(dynamic_ops)):
-            if dynamic_ops[i].has_attr("out_threshold"):
-                op_count += 1
-                self.assertTrue(dynamic_ops[i].type == static_ops[i].type)
-                self.assertTrue(dynamic_ops[i].attr("out_threshold") ==
-                                static_ops[i].attr("out_threshold"))
-
-        _logger.info("op_cout: {}".format(op_count))
-        self.assertTrue(op_count == 14)
+        for i in range(len(loss_list) - 1):
+            self.assertTrue(
+                loss_list[i] > loss_list[i + 1],
+                msg='Failed to do the imperative qat.')
 
 
 if __name__ == '__main__':
diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py
index 99a23525409f3..a188a1fdfa9f3 100644
--- a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py
@@ -21,20 +21,20 @@
 import time
 import unittest
 import logging
+
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid import core
 from paddle.fluid.optimizer import AdamOptimizer
-from paddle.fluid.framework import IrGraph
 from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware
-from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass
 from paddle.fluid.dygraph.container import Sequential
 from paddle.nn import Linear, Conv2D, Softmax
-from paddle.fluid.dygraph.nn import Pool2D
 from paddle.fluid.log_helper import get_logger
 from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
 from paddle.fluid.contrib.slim.quantization.imperative.quant_nn import QuantizedConv2D
 
+from imperative_test_utils import fix_model_dict, ImperativeLenet
+
 paddle.enable_static()
 
 os.environ["CPU_NUM"] = "1"
@@ -45,115 +45,6 @@
     __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
 
 
-def StaticLenet(data, num_classes=10):
-    conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1")
-    conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2")
-    fc_w1_attr = fluid.ParamAttr(name="fc_w_1")
-    fc_w2_attr = fluid.ParamAttr(name="fc_w_2")
-    fc_w3_attr = fluid.ParamAttr(name="fc_w_3")
-    conv2d_b1_attr = fluid.ParamAttr(name="conv2d_b_1")
-    conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2")
-    fc_b1_attr = fluid.ParamAttr(name="fc_b_1")
-    fc_b2_attr = fluid.ParamAttr(name="fc_b_2")
-    fc_b3_attr = fluid.ParamAttr(name="fc_b_3")
-    conv1 = fluid.layers.conv2d(
-        data,
-        num_filters=6,
-        filter_size=3,
-        stride=1,
-        padding=1,
-        param_attr=conv2d_w1_attr,
-        bias_attr=conv2d_b1_attr)
-    pool1 = fluid.layers.pool2d(
-        conv1, pool_size=2, pool_type='max', pool_stride=2)
-    conv2 = fluid.layers.conv2d(
-        pool1,
-        num_filters=16,
-        filter_size=5,
-        stride=1,
-        padding=0,
-        param_attr=conv2d_w2_attr,
-        bias_attr=conv2d_b2_attr)
-    pool2 = fluid.layers.pool2d(
-        conv2, pool_size=2, pool_type='max', pool_stride=2)
-
-    fc1 = fluid.layers.fc(input=pool2,
-                          size=120,
-                          param_attr=fc_w1_attr,
-                          bias_attr=fc_b1_attr)
-    fc2 = fluid.layers.fc(input=fc1,
-                          size=84,
-                          param_attr=fc_w2_attr,
-                          bias_attr=fc_b2_attr)
-    fc3 = fluid.layers.fc(input=fc2,
-                          size=num_classes,
-                          param_attr=fc_w3_attr,
-                          bias_attr=fc_b3_attr)
-    fc4 = fluid.layers.softmax(fc3, use_cudnn=True)
-
-    return fc4
-
-
-class ImperativeLenet(fluid.dygraph.Layer):
-    def __init__(self, num_classes=10):
-        super(ImperativeLenet, self).__init__()
-        conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1")
-        conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2")
-        fc_w1_attr = fluid.ParamAttr(name="fc_w_1")
-        fc_w2_attr = fluid.ParamAttr(name="fc_w_2")
-        fc_w3_attr = fluid.ParamAttr(name="fc_w_3")
-        conv2d_b1_attr = fluid.ParamAttr(name="conv2d_b_1")
-        conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2")
-        fc_b1_attr = fluid.ParamAttr(name="fc_b_1")
-        fc_b2_attr = fluid.ParamAttr(name="fc_b_2")
-        fc_b3_attr = fluid.ParamAttr(name="fc_b_3")
-        self.features = Sequential(
-            Conv2D(
-                in_channels=1,
-                out_channels=6,
-                kernel_size=3,
-                stride=1,
-                padding=1,
-                weight_attr=conv2d_w1_attr,
-                bias_attr=conv2d_b1_attr),
-            Pool2D(
-                pool_size=2, pool_type='max', pool_stride=2),
-            Conv2D(
-                in_channels=6,
-                out_channels=16,
-                kernel_size=5,
-                stride=1,
-                padding=0,
-                weight_attr=conv2d_w2_attr,
-                bias_attr=conv2d_b2_attr),
-            Pool2D(
-                pool_size=2, pool_type='max', pool_stride=2))
-
-        self.fc = Sequential(
-            Linear(
-                in_features=400,
-                out_features=120,
-                weight_attr=fc_w1_attr,
-                bias_attr=fc_b1_attr),
-            Linear(
-                in_features=120,
-                out_features=84,
-                weight_attr=fc_w2_attr,
-                bias_attr=fc_b2_attr),
-            Linear(
-                in_features=84,
-                out_features=num_classes,
-                weight_attr=fc_w3_attr,
-                bias_attr=fc_b3_attr),
-            Softmax())
-
-    def forward(self, inputs):
-        x = self.features(inputs)
-        x = fluid.layers.flatten(x, 1)
-        x = self.fc(x)
-        return x
-
-
 class TestImperativeQat(unittest.TestCase):
     """
     QAT = quantization-aware training
@@ -164,19 +55,26 @@ def setUpClass(cls):
         timestamp = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime())
         cls.root_path = os.path.join(os.getcwd(), "imperative_qat_" + timestamp)
         cls.save_path = os.path.join(cls.root_path, "lenet")
-        cls.dynamic_root_path = os.path.join(os.getcwd(),
-                                             "dynamic_mnist_" + timestamp)
-        cls.dynamic_save_path = os.path.join(cls.dynamic_root_path, "model")
 
     @classmethod
     def tearDownClass(cls):
-        shutil.rmtree(cls.root_path)
-        shutil.rmtree(cls.dynamic_root_path)
+        try:
+            shutil.rmtree(cls.root_path)
+        except Exception as e:
+            print("Failed to delete {} due to {}".format(cls.root_path, str(e)))
+
+    def set_quant_type(self):
+        self.weight_quantize_type = None
+        self.activation_quantize_type = None
+        print('weight_quantize_type', self.weight_quantize_type)
+
+    def run_qat_save(self):
+        self.set_quant_type()
 
-    def test_qat_save(self):
         imperative_qat = ImperativeQuantAware(
-            weight_quantize_type='abs_max',
-            activation_quantize_type='moving_average_abs_max')
+            weight_quantize_type=self.weight_quantize_type,
+            activation_quantize_type=self.activation_quantize_type)
+
         with fluid.dygraph.guard():
             # For CI coverage
             conv1 = Conv2D(
@@ -190,10 +88,17 @@ def test_qat_save(self):
             data = np.random.uniform(-1, 1, [10, 3, 32, 32]).astype('float32')
             quant_conv1(fluid.dygraph.to_variable(data))
 
+            seed = 1
+            np.random.seed(seed)
+            fluid.default_main_program().random_seed = seed
+            fluid.default_startup_program().random_seed = seed
+
             lenet = ImperativeLenet()
+            lenet = fix_model_dict(lenet)
             imperative_qat.quantize(lenet)
             adam = AdamOptimizer(
                 learning_rate=0.001, parameter_list=lenet.parameters())
+
             train_reader = paddle.batch(
                 paddle.dataset.mnist.train(), batch_size=32, drop_last=True)
             test_reader = paddle.batch(
@@ -226,6 +131,7 @@ def test_qat_save(self):
                         break
 
                 lenet.eval()
+                eval_acc_top1_list = []
                 for batch_id, data in enumerate(test_reader()):
                     x_data = np.array([x[0].reshape(1, 28, 28)
                                        for x in data]).astype('float32')
@@ -242,14 +148,19 @@ def test_qat_save(self):
                         input=out, label=label, k=5)
 
                     if batch_id % 100 == 0:
+                        eval_acc_top1_list.append(float(acc_top1.numpy()))
                         _logger.info(
                             "Test | At epoch {} step {}: acc1 = {:}, acc5 = {:}".
                             format(epoch, batch_id,
                                    acc_top1.numpy(), acc_top5.numpy()))
 
-            # save weights
-            model_dict = lenet.state_dict()
-            fluid.save_dygraph(model_dict, "save_temp")
+                # check eval acc
+                eval_acc_top1 = sum(eval_acc_top1_list) / len(
+                    eval_acc_top1_list)
+                print('eval_acc_top1', eval_acc_top1)
+                self.assertTrue(
+                    eval_acc_top1 > 0.9,
+                    msg="The test acc {%f} is less than 0.9." % eval_acc_top1)
 
             # test the correctness of `paddle.jit.save`
             data = next(test_reader())
@@ -260,13 +171,14 @@ def test_qat_save(self):
             before_save = lenet(test_img)
 
         # save inference quantized model
-        paddle.jit.save(
+        imperative_qat.save_quantized_model(
             layer=lenet,
-            path=TestImperativeQat.save_path,
+            path=self.save_path,
             input_spec=[
                 paddle.static.InputSpec(
                     shape=[None, 1, 28, 28], dtype='float32')
             ])
+        print('Quantized model saved in {%s}' % self.save_path)
 
         if core.is_compiled_with_cuda():
             place = core.CUDAPlace(0)
@@ -275,183 +187,27 @@ def test_qat_save(self):
         exe = fluid.Executor(place)
         [inference_program, feed_target_names,
          fetch_targets] = fluid.io.load_inference_model(
-             dirname=TestImperativeQat.root_path,
+             dirname=self.root_path,
              executor=exe,
              model_filename="lenet" + INFER_MODEL_SUFFIX,
              params_filename="lenet" + INFER_PARAMS_SUFFIX)
         after_save, = exe.run(inference_program,
                               feed={feed_target_names[0]: test_data},
                               fetch_list=fetch_targets)
-
+        # check
         self.assertTrue(
             np.allclose(after_save, before_save.numpy()),
             msg='Failed to save the inference quantized model.')
 
-    def test_qat_acc(self):
-        def _build_static_lenet(main, startup, is_test=False, seed=1000):
-            with fluid.unique_name.guard():
-                with fluid.program_guard(main, startup):
-                    main.random_seed = seed
-                    startup.random_seed = seed
-                    img = fluid.layers.data(
-                        name='image', shape=[1, 28, 28], dtype='float32')
-                    label = fluid.layers.data(
-                        name='label', shape=[1], dtype='int64')
-                    prediction = StaticLenet(img)
-                    if not is_test:
-                        loss = fluid.layers.cross_entropy(
-                            input=prediction, label=label)
-                        avg_loss = fluid.layers.mean(loss)
-                    else:
-                        avg_loss = prediction
-            return img, label, avg_loss
-
-        reader = paddle.batch(
-            paddle.dataset.mnist.test(), batch_size=32, drop_last=True)
-        weight_quantize_type = 'abs_max'
-        activation_quant_type = 'moving_average_abs_max'
-        param_init_map = {}
-        seed = 1000
-        lr = 0.01
-
-        # imperative train
-        _logger.info(
-            "--------------------------dynamic graph qat--------------------------"
-        )
-        imperative_qat = ImperativeQuantAware(
-            weight_quantize_type=weight_quantize_type,
-            activation_quantize_type=activation_quant_type)
 
-        with fluid.dygraph.guard():
-            np.random.seed(seed)
-            fluid.default_main_program().random_seed = seed
-            fluid.default_startup_program().random_seed = seed
-            lenet = ImperativeLenet()
-            fixed_state = {}
-            for name, param in lenet.named_parameters():
-                p_shape = param.numpy().shape
-                p_value = param.numpy()
-                if name.endswith("bias"):
-                    value = np.zeros_like(p_value).astype('float32')
-                else:
-                    value = np.random.normal(
-                        loc=0.0, scale=0.01, size=np.product(p_shape)).reshape(
-                            p_shape).astype('float32')
-                fixed_state[name] = value
-                param_init_map[param.name] = value
-            lenet.set_dict(fixed_state)
+class TestImperativeQatAbsMax(TestImperativeQat):
+    def set_quant_type(self):
+        self.weight_quantize_type = 'abs_max'
+        self.activation_quantize_type = 'moving_average_abs_max'
+        print('weight_quantize_type', self.weight_quantize_type)
 
-            imperative_qat.quantize(lenet)
-            adam = AdamOptimizer(
-                learning_rate=lr, parameter_list=lenet.parameters())
-            dynamic_loss_rec = []
-            lenet.train()
-            for batch_id, data in enumerate(reader()):
-                x_data = np.array([x[0].reshape(1, 28, 28)
-                                   for x in data]).astype('float32')
-                y_data = np.array(
-                    [x[1] for x in data]).astype('int64').reshape(-1, 1)
-
-                img = fluid.dygraph.to_variable(x_data)
-                label = fluid.dygraph.to_variable(y_data)
-
-                out = lenet(img)
-                loss = fluid.layers.cross_entropy(out, label)
-                avg_loss = fluid.layers.mean(loss)
-                avg_loss.backward()
-                adam.minimize(avg_loss)
-                lenet.clear_gradients()
-                dynamic_loss_rec.append(avg_loss.numpy()[0])
-                if batch_id % 100 == 0:
-                    _logger.info('{}: {}'.format('loss', avg_loss.numpy()))
-
-        paddle.jit.save(
-            layer=lenet,
-            path=TestImperativeQat.dynamic_save_path,
-            input_spec=[
-                paddle.static.InputSpec(
-                    shape=[None, 1, 28, 28], dtype='float32')
-            ])
-
-        # static graph train
-        _logger.info(
-            "--------------------------static graph qat--------------------------"
-        )
-        static_loss_rec = []
-        if core.is_compiled_with_cuda():
-            place = core.CUDAPlace(0)
-        else:
-            place = core.CPUPlace()
-        exe = fluid.Executor(place)
-
-        main = fluid.Program()
-        infer = fluid.Program()
-        startup = fluid.Program()
-        static_img, static_label, static_loss = _build_static_lenet(
-            main, startup, False, seed)
-        infer_img, _, infer_pre = _build_static_lenet(infer, startup, True,
-                                                      seed)
-        with fluid.unique_name.guard():
-            with fluid.program_guard(main, startup):
-                opt = AdamOptimizer(learning_rate=lr)
-                opt.minimize(static_loss)
-
-        scope = core.Scope()
-        with fluid.scope_guard(scope):
-            exe.run(startup)
-        for param in main.all_parameters():
-            param_tensor = scope.var(param.name).get_tensor()
-            param_tensor.set(param_init_map[param.name], place)
-
-        main_graph = IrGraph(core.Graph(main.desc), for_test=False)
-        infer_graph = IrGraph(core.Graph(infer.desc), for_test=True)
-        transform_pass = QuantizationTransformPass(
-            scope=scope,
-            place=place,
-            activation_quantize_type=activation_quant_type,
-            weight_quantize_type=weight_quantize_type,
-            quantizable_op_type=['conv2d', 'depthwise_conv2d', 'mul'])
-        transform_pass.apply(main_graph)
-        transform_pass.apply(infer_graph)
-        build_strategy = fluid.BuildStrategy()
-        build_strategy.fuse_all_reduce_ops = False
-        binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel(
-            loss_name=static_loss.name, build_strategy=build_strategy)
-
-        feeder = fluid.DataFeeder(
-            feed_list=[static_img, static_label], place=place)
-        with fluid.scope_guard(scope):
-            for batch_id, data in enumerate(reader()):
-                loss_v, = exe.run(binary,
-                                  feed=feeder.feed(data),
-                                  fetch_list=[static_loss])
-                static_loss_rec.append(loss_v[0])
-                if batch_id % 100 == 0:
-                    _logger.info('{}: {}'.format('loss', loss_v))
-
-        save_program = infer_graph.to_program()
-        with fluid.scope_guard(scope):
-            fluid.io.save_inference_model("./static_mnist", [infer_img.name],
-                                          [infer_pre], exe, save_program)
-        rtol = 1e-05
-        atol = 1e-08
-        for i, (loss_d,
-                loss_s) in enumerate(zip(dynamic_loss_rec, static_loss_rec)):
-            diff = np.abs(loss_d - loss_s)
-            if diff > (atol + rtol * np.abs(loss_s)):
-                _logger.info(
-                    "diff({}) at {}, dynamic loss = {}, static loss = {}".
-                    format(diff, i, loss_d, loss_s))
-                break
-
-        self.assertTrue(
-            np.allclose(
-                np.array(dynamic_loss_rec),
-                np.array(static_loss_rec),
-                rtol=rtol,
-                atol=atol,
-                equal_nan=True),
-            msg='Failed to do the imperative qat.')
+    def test_qat(self):
+        self.run_qat_save()
 
 
 if __name__ == '__main__':
diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_addquantdequant.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_addquantdequant.py
deleted file mode 100644
index f5b3e89ef415c..0000000000000
--- a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_addquantdequant.py
+++ /dev/null
@@ -1,494 +0,0 @@
-#   copyright (c) 2018 paddlepaddle authors. all rights reserved.
-#
-# licensed under the apache license, version 2.0 (the "license");
-# you may not use this file except in compliance with the license.
-# you may obtain a copy of the license at
-#
-#     http://www.apache.org/licenses/license-2.0
-#
-# unless required by applicable law or agreed to in writing, software
-# distributed under the license is distributed on an "as is" basis,
-# without warranties or conditions of any kind, either express or implied.
-# see the license for the specific language governing permissions and
-# limitations under the license.
-
-from __future__ import print_function
-
-import os
-import numpy as np
-import random
-import shutil
-import time
-import unittest
-import logging
-import paddle
-import six
-import paddle.fluid as fluid
-from paddle.nn import functional
-from paddle.nn import Linear, Conv2D, Softmax, BatchNorm
-from paddle.fluid.layers import nn
-from paddle.fluid import core
-from paddle.fluid.layer_helper import LayerHelper
-from paddle.fluid.optimizer import AdamOptimizer
-from paddle.fluid.framework import IrGraph
-from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware, QuantizationTransformPass, AddQuantDequantPass
-from paddle.fluid.dygraph.container import Sequential
-from paddle.fluid.dygraph.nn import Pool2D
-from paddle.nn.layer.activation import ReLU, LeakyReLU, ReLU6, Tanh, Swish
-from paddle.fluid.log_helper import get_logger
-from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
-
-paddle.enable_static()
-
-os.environ["CPU_NUM"] = "1"
-if core.is_compiled_with_cuda():
-    fluid.set_flags({"FLAGS_cudnn_deterministic": True})
-
-_logger = get_logger(
-    __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
-
-
-def StaticLenet(data, num_classes=10):
-    conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1")
-    conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2")
-    conv2d_w3_attr = fluid.ParamAttr(name="conv2d_w_3")
-    fc_w1_attr = fluid.ParamAttr(name="fc_w_1")
-    fc_w2_attr = fluid.ParamAttr(name="fc_w_2")
-    fc_w3_attr = fluid.ParamAttr(name="fc_w_3")
-    conv2d_b1_attr = fluid.ParamAttr(name="conv2d_b_1")
-    conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2")
-    conv2d_b3_attr = fluid.ParamAttr(name="conv2d_b_3")
-    fc_b1_attr = fluid.ParamAttr(name="fc_b_1")
-    fc_b2_attr = fluid.ParamAttr(name="fc_b_2")
-    fc_b3_attr = fluid.ParamAttr(name="fc_b_3")
-
-    conv1 = fluid.layers.conv2d(
-        data,
-        num_filters=6,
-        filter_size=3,
-        stride=1,
-        padding=1,
-        param_attr=conv2d_w1_attr,
-        bias_attr=conv2d_b1_attr)
-    conv1 = fluid.layers.leaky_relu(conv1, alpha=0.02)
-    pool1 = fluid.layers.pool2d(
-        conv1, pool_size=2, pool_type='max', pool_stride=2)
-    conv2 = fluid.layers.conv2d(
-        pool1,
-        num_filters=16,
-        filter_size=5,
-        stride=1,
-        padding=0,
-        param_attr=conv2d_w2_attr,
-        bias_attr=conv2d_b2_attr)
-    pool2 = fluid.layers.pool2d(
-        conv2, pool_size=2, pool_type='max', pool_stride=2)
-    pool2 = fluid.layers.relu(pool2)
-    pool2 = fluid.layers.swish(pool2)
-    conv3 = fluid.layers.conv2d(
-        pool2,
-        num_filters=16,
-        filter_size=1,
-        stride=1,
-        padding=0,
-        param_attr=conv2d_w3_attr,
-        bias_attr=conv2d_b3_attr)
-    conv3 = fluid.layers.relu6(conv3)
-    conv3 = paddle.tensor.math.tanh(conv3)
-    fc1 = fluid.layers.fc(input=conv3,
-                          size=120,
-                          param_attr=fc_w1_attr,
-                          bias_attr=fc_b1_attr)
-    fc2 = fluid.layers.fc(input=fc1,
-                          size=84,
-                          param_attr=fc_w2_attr,
-                          bias_attr=fc_b2_attr)
-    fc3 = fluid.layers.fc(input=fc2,
-                          size=num_classes,
-                          param_attr=fc_w3_attr,
-                          bias_attr=fc_b3_attr)
-    fc3 = fluid.layers.softmax(fc3, use_cudnn=True)
-
-    return fc3
-
-
-class ImperativeLenet(fluid.dygraph.Layer):
-    def __init__(self, num_classes=10):
-        super(ImperativeLenet, self).__init__()
-        conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1")
-        conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2")
-        conv2d_w3_attr = fluid.ParamAttr(name="conv2d_w_3")
-        fc_w1_attr = fluid.ParamAttr(name="fc_w_1")
-        fc_w2_attr = fluid.ParamAttr(name="fc_w_2")
-        fc_w3_attr = fluid.ParamAttr(name="fc_w_3")
-        conv2d_b1_attr = fluid.ParamAttr(name="conv2d_b_1")
-        conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2")
-        conv2d_b3_attr = fluid.ParamAttr(name="conv2d_b_3")
-        fc_b1_attr = fluid.ParamAttr(name="fc_b_1")
-        fc_b2_attr = fluid.ParamAttr(name="fc_b_2")
-        fc_b3_attr = fluid.ParamAttr(name="fc_b_3")
-        self.features = Sequential(
-            Conv2D(
-                in_channels=1,
-                out_channels=6,
-                kernel_size=3,
-                stride=1,
-                padding=1,
-                weight_attr=conv2d_w1_attr,
-                bias_attr=conv2d_b1_attr),
-            LeakyReLU(negative_slope=0.02),
-            Pool2D(
-                pool_size=2, pool_type='max', pool_stride=2),
-            Conv2D(
-                in_channels=6,
-                out_channels=16,
-                kernel_size=5,
-                stride=1,
-                padding=0,
-                weight_attr=conv2d_w2_attr,
-                bias_attr=conv2d_b2_attr),
-            Pool2D(
-                pool_size=2, pool_type='max', pool_stride=2),
-            ReLU(),
-            Swish(),
-            Conv2D(
-                in_channels=16,
-                out_channels=16,
-                kernel_size=1,
-                stride=1,
-                padding=0,
-                weight_attr=conv2d_w3_attr,
-                bias_attr=conv2d_b3_attr),
-            ReLU6(),
-            Tanh())
-        self.fc = Sequential(
-            Linear(
-                in_features=400,
-                out_features=120,
-                weight_attr=fc_w1_attr,
-                bias_attr=fc_b1_attr),
-            Linear(
-                in_features=120,
-                out_features=84,
-                weight_attr=fc_w2_attr,
-                bias_attr=fc_b2_attr),
-            Linear(
-                in_features=84,
-                out_features=num_classes,
-                weight_attr=fc_w3_attr,
-                bias_attr=fc_b3_attr),
-            Softmax())
-
-    def forward(self, inputs):
-        x = self.features(inputs)
-        x = fluid.layers.flatten(x, 1)
-        x = self.fc(x)
-        return x
-
-
-class TestImperativeAddQuantDequant(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-        timestamp = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime())
-        cls.root_path = os.path.join(os.getcwd(),
-                                     "imperative_qat_aqd_" + timestamp)
-        cls.save_path = os.path.join(cls.root_path, "lenet")
-        cls.dynamic_root_path = os.path.join(os.getcwd(),
-                                             "dynamic_mnist_aqd_" + timestamp)
-        cls.dynamic_save_path = os.path.join(cls.dynamic_root_path, "model")
-
-    @classmethod
-    def tearDownClass(cls):
-        shutil.rmtree(cls.root_path)
-        shutil.rmtree(cls.dynamic_root_path)
-
-    def test_qat_save(self):
-
-        imperative_qat = ImperativeQuantAware(
-            weight_quantize_type='abs_max',
-            activation_quantize_type='moving_average_abs_max',
-            quantizable_layer_type=[
-                'Conv2D', 'Linear', 'ReLU', 'LeakyReLU', 'ReLU6', 'Tanh',
-                'Swish'
-            ])
-
-        with fluid.dygraph.guard():
-            lenet = ImperativeLenet()
-            imperative_qat.quantize(lenet)
-            adam = AdamOptimizer(
-                learning_rate=0.001, parameter_list=lenet.parameters())
-            train_reader = paddle.batch(
-                paddle.dataset.mnist.train(), batch_size=32, drop_last=True)
-            test_reader = paddle.batch(
-                paddle.dataset.mnist.test(), batch_size=32)
-
-            epoch_num = 1
-            for epoch in range(epoch_num):
-                lenet.train()
-                for batch_id, data in enumerate(train_reader()):
-                    x_data = np.array([x[0].reshape(1, 28, 28)
-                                       for x in data]).astype('float32')
-                    y_data = np.array(
-                        [x[1] for x in data]).astype('int64').reshape(-1, 1)
-
-                    img = fluid.dygraph.to_variable(x_data)
-                    label = fluid.dygraph.to_variable(y_data)
-                    out = lenet(img)
-                    acc = fluid.layers.accuracy(out, label)
-                    loss = fluid.layers.cross_entropy(out, label)
-                    avg_loss = fluid.layers.mean(loss)
-                    avg_loss.backward()
-                    adam.minimize(avg_loss)
-                    lenet.clear_gradients()
-                    if batch_id % 100 == 0:
-                        _logger.info(
-                            "Train | At epoch {} step {}: loss = {:}, acc= {:}".
-                            format(epoch, batch_id,
-                                   avg_loss.numpy(), acc.numpy()))
-                    if batch_id == 500:  # For shortening CI time
-                        break
-
-                lenet.eval()
-                for batch_id, data in enumerate(test_reader()):
-                    x_data = np.array([x[0].reshape(1, 28, 28)
-                                       for x in data]).astype('float32')
-                    y_data = np.array(
-                        [x[1] for x in data]).astype('int64').reshape(-1, 1)
-
-                    img = fluid.dygraph.to_variable(x_data)
-                    label = fluid.dygraph.to_variable(y_data)
-
-                    out = lenet(img)
-                    acc_top1 = fluid.layers.accuracy(
-                        input=out, label=label, k=1)
-                    acc_top5 = fluid.layers.accuracy(
-                        input=out, label=label, k=5)
-
-                    if batch_id % 100 == 0:
-                        _logger.info(
-                            "Test | At epoch {} step {}: acc1 = {:}, acc5 = {:}".
-                            format(epoch, batch_id,
-                                   acc_top1.numpy(), acc_top5.numpy()))
-
-            # save weights
-            model_dict = lenet.state_dict()
-            fluid.save_dygraph(model_dict, "save_temp")
-
-            # test the correctness of `paddle.jit.save`
-            data = next(test_reader())
-            test_data = np.array([x[0].reshape(1, 28, 28)
-                                  for x in data]).astype('float32')
-            test_img = fluid.dygraph.to_variable(test_data)
-            lenet.eval()
-            before_save = lenet(test_img)
-
-        # save inference quantized model
-        paddle.jit.save(
-            layer=lenet,
-            path=TestImperativeAddQuantDequant.save_path,
-            input_spec=[
-                paddle.static.InputSpec(
-                    shape=[None, 1, 28, 28], dtype='float32')
-            ])
-        if core.is_compiled_with_cuda():
-            place = core.CUDAPlace(0)
-        else:
-            place = core.CPUPlace()
-        exe = fluid.Executor(place)
-        [inference_program, feed_target_names,
-         fetch_targets] = fluid.io.load_inference_model(
-             dirname=TestImperativeAddQuantDequant.root_path,
-             executor=exe,
-             model_filename="lenet" + INFER_MODEL_SUFFIX,
-             params_filename="lenet" + INFER_PARAMS_SUFFIX)
-        after_save, = exe.run(inference_program,
-                              feed={feed_target_names[0]: test_data},
-                              fetch_list=fetch_targets)
-
-        self.assertTrue(
-            np.allclose(after_save, before_save.numpy()),
-            msg='Failed to save the inference quantized model.')
-
-    def test_qat_acc(self):
-        def _build_static_lenet(main, startup, is_test=False, seed=1000):
-            with fluid.unique_name.guard():
-                with fluid.program_guard(main, startup):
-                    main.random_seed = seed
-                    startup.random_seed = seed
-                    img = fluid.layers.data(
-                        name='image', shape=[1, 28, 28], dtype='float32')
-                    label = fluid.layers.data(
-                        name='label', shape=[1], dtype='int64')
-                    prediction = StaticLenet(img)
-                    if not is_test:
-                        loss = fluid.layers.cross_entropy(
-                            input=prediction, label=label)
-                        avg_loss = fluid.layers.mean(loss)
-                    else:
-                        avg_loss = prediction
-            return img, label, avg_loss
-
-        reader = paddle.batch(
-            paddle.dataset.mnist.test(), batch_size=32, drop_last=True)
-        weight_quantize_type = 'abs_max'
-        activation_quant_type = 'moving_average_abs_max'
-        param_init_map = {}
-        seed = 1000
-        lr = 0.001
-
-        # imperative train
-        _logger.info(
-            "--------------------------dynamic graph qat--------------------------"
-        )
-        imperative_qat = ImperativeQuantAware(
-            weight_quantize_type=weight_quantize_type,
-            activation_quantize_type=activation_quant_type,
-            quantizable_layer_type=[
-                'Conv2D', 'Linear', 'ReLU', 'LeakyReLU', 'ReLU6', 'Tanh',
-                'Swish'
-            ])
-
-        with fluid.dygraph.guard():
-            np.random.seed(seed)
-            fluid.default_main_program().random_seed = seed
-            fluid.default_startup_program().random_seed = seed
-            lenet = ImperativeLenet()
-            fixed_state = {}
-            for name, param in lenet.named_parameters():
-                p_shape = param.numpy().shape
-                p_value = param.numpy()
-                if name.endswith("bias"):
-                    value = np.zeros_like(p_value).astype('float32')
-                else:
-                    value = np.random.normal(
-                        loc=0.0, scale=0.01, size=np.product(p_shape)).reshape(
-                            p_shape).astype('float32')
-                fixed_state[name] = value
-                param_init_map[param.name] = value
-            lenet.set_dict(fixed_state)
-
-            imperative_qat.quantize(lenet)
-            adam = AdamOptimizer(
-                learning_rate=lr, parameter_list=lenet.parameters())
-            dynamic_loss_rec = []
-            lenet.train()
-            for batch_id, data in enumerate(reader()):
-                x_data = np.array([x[0].reshape(1, 28, 28)
-                                   for x in data]).astype('float32')
-                y_data = np.array(
-                    [x[1] for x in data]).astype('int64').reshape(-1, 1)
-
-                img = fluid.dygraph.to_variable(x_data)
-                label = fluid.dygraph.to_variable(y_data)
-
-                out = lenet(img)
-                loss = fluid.layers.cross_entropy(out, label)
-                avg_loss = fluid.layers.mean(loss)
-                avg_loss.backward()
-                adam.minimize(avg_loss)
-                lenet.clear_gradients()
-                dynamic_loss_rec.append(avg_loss.numpy()[0])
-                if batch_id % 100 == 0:
-                    _logger.info('{}: {}'.format('loss', avg_loss.numpy()))
-                if batch_id > 500:
-                    break
-            lenet.eval()
-        paddle.jit.save(
-            layer=lenet,
-            path=TestImperativeAddQuantDequant.dynamic_save_path,
-            input_spec=[
-                paddle.static.InputSpec(
-                    shape=[None, 1, 28, 28], dtype='float32')
-            ])
-
-        # static graph train
-        _logger.info(
-            "--------------------------static graph qat--------------------------"
-        )
-        static_loss_rec = []
-        if core.is_compiled_with_cuda():
-            place = core.CUDAPlace(0)
-        else:
-            place = core.CPUPlace()
-        exe = fluid.Executor(place)
-
-        main = fluid.Program()
-        infer = fluid.Program()
-        startup = fluid.Program()
-        static_img, static_label, static_loss = _build_static_lenet(
-            main, startup, False, seed)
-        infer_img, _, infer_pre = _build_static_lenet(infer, startup, True,
-                                                      seed)
-        with fluid.unique_name.guard():
-            with fluid.program_guard(main, startup):
-                opt = AdamOptimizer(learning_rate=lr)
-                opt.minimize(static_loss)
-
-        scope = core.Scope()
-        with fluid.scope_guard(scope):
-            exe.run(startup)
-        for param in main.all_parameters():
-            param_tensor = scope.var(param.name).get_tensor()
-            param_tensor.set(param_init_map[param.name], place)
-
-        main_graph = IrGraph(core.Graph(main.desc), for_test=False)
-        infer_graph = IrGraph(core.Graph(infer.desc), for_test=True)
-        transform_pass = QuantizationTransformPass(
-            scope=scope,
-            place=place,
-            activation_quantize_type=activation_quant_type,
-            weight_quantize_type=weight_quantize_type,
-            quantizable_op_type=['conv2d', 'depthwise_conv2d', 'mul'])
-        add_quant_dequant_pass = AddQuantDequantPass(
-            scope=scope,
-            place=place,
-            quantizable_op_type=[
-                'relu', 'leaky_relu', 'relu6', 'tanh', 'swish'
-            ])
-        transform_pass.apply(main_graph)
-        transform_pass.apply(infer_graph)
-        add_quant_dequant_pass.apply(main_graph)
-        add_quant_dequant_pass.apply(infer_graph)
-        build_strategy = fluid.BuildStrategy()
-        build_strategy.fuse_all_reduce_ops = False
-        binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel(
-            loss_name=static_loss.name, build_strategy=build_strategy)
-
-        feeder = fluid.DataFeeder(
-            feed_list=[static_img, static_label], place=place)
-        with fluid.scope_guard(scope):
-            for batch_id, data in enumerate(reader()):
-                loss_v, = exe.run(binary,
-                                  feed=feeder.feed(data),
-                                  fetch_list=[static_loss])
-                static_loss_rec.append(loss_v[0])
-                if batch_id % 100 == 0:
-                    _logger.info('{}: {}'.format('loss', loss_v))
-
-        save_program = infer_graph.to_program()
-        with fluid.scope_guard(scope):
-            fluid.io.save_inference_model("./static_mnist", [infer_img.name],
-                                          [infer_pre], exe, save_program)
-        rtol = 1e-08
-        atol = 1e-10
-        for i, (loss_d,
-                loss_s) in enumerate(zip(dynamic_loss_rec, static_loss_rec)):
-            diff = np.abs(loss_d - loss_s)
-            if diff > (atol + rtol * np.abs(loss_s)):
-                _logger.info(
-                    "diff({}) at {}, dynamic loss = {}, static loss = {}".
-                    format(diff, i, loss_d, loss_s))
-                break
-
-        self.assertTrue(
-            np.allclose(
-                np.array(dynamic_loss_rec),
-                np.array(static_loss_rec),
-                rtol=rtol,
-                atol=atol,
-                equal_nan=True),
-            msg='Failed to do the imperative qat.')
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_channelwise.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_channelwise.py
index f888edfcc977a..da4e285633680 100644
--- a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_channelwise.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_channelwise.py
@@ -19,18 +19,13 @@
 import random
 import unittest
 import logging
+
 import paddle
 import paddle.fluid as fluid
 from paddle.fluid import core
-from paddle.fluid.optimizer import AdamOptimizer
-from paddle.fluid.framework import IrGraph
-from paddle.fluid.contrib.slim.quantization import ImperativeQuantAware
-from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass
-from paddle.fluid.dygraph.container import Sequential
-from paddle.nn import Linear, Conv2D, Softmax
-from paddle.fluid.dygraph.nn import Pool2D
 from paddle.fluid.log_helper import get_logger
-from paddle.fluid.dygraph.io import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX
+
+from test_imperative_qat import TestImperativeQat
 
 paddle.enable_static()
 
@@ -42,388 +37,14 @@
     __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
 
 
-def StaticLenet(data, num_classes=10):
-    conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1")
-    conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2")
-    fc_w1_attr = fluid.ParamAttr(name="fc_w_1")
-    fc_w2_attr = fluid.ParamAttr(name="fc_w_2")
-    fc_w3_attr = fluid.ParamAttr(name="fc_w_3")
-    conv2d_b1_attr = fluid.ParamAttr(name="conv2d_b_1")
-    conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2")
-    fc_b1_attr = fluid.ParamAttr(name="fc_b_1")
-    fc_b2_attr = fluid.ParamAttr(name="fc_b_2")
-    fc_b3_attr = fluid.ParamAttr(name="fc_b_3")
-    conv1 = fluid.layers.conv2d(
-        data,
-        num_filters=6,
-        filter_size=3,
-        stride=1,
-        padding=1,
-        param_attr=conv2d_w1_attr,
-        bias_attr=conv2d_b1_attr)
-    pool1 = fluid.layers.pool2d(
-        conv1, pool_size=2, pool_type='max', pool_stride=2)
-    conv2 = fluid.layers.conv2d(
-        pool1,
-        num_filters=16,
-        filter_size=5,
-        stride=1,
-        padding=0,
-        param_attr=conv2d_w2_attr,
-        bias_attr=conv2d_b2_attr)
-    pool2 = fluid.layers.pool2d(
-        conv2, pool_size=2, pool_type='max', pool_stride=2)
-
-    fc1 = fluid.layers.fc(input=pool2,
-                          size=120,
-                          param_attr=fc_w1_attr,
-                          bias_attr=fc_b1_attr)
-    fc2 = fluid.layers.fc(input=fc1,
-                          size=84,
-                          param_attr=fc_w2_attr,
-                          bias_attr=fc_b2_attr)
-    fc3 = fluid.layers.fc(input=fc2,
-                          size=num_classes,
-                          param_attr=fc_w3_attr,
-                          bias_attr=fc_b3_attr)
-    fc3 = fluid.layers.softmax(fc3, use_cudnn=True)
-
-    return fc3
-
-
-class ImperativeLenet(fluid.dygraph.Layer):
-    def __init__(self, num_classes=10):
-        super(ImperativeLenet, self).__init__()
-        conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1")
-        conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2")
-        fc_w1_attr = fluid.ParamAttr(name="fc_w_1")
-        fc_w2_attr = fluid.ParamAttr(name="fc_w_2")
-        fc_w3_attr = fluid.ParamAttr(name="fc_w_3")
-        conv2d_b1_attr = fluid.ParamAttr(name="conv2d_b_1")
-        conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2")
-        fc_b1_attr = fluid.ParamAttr(name="fc_b_1")
-        fc_b2_attr = fluid.ParamAttr(name="fc_b_2")
-        fc_b3_attr = fluid.ParamAttr(name="fc_b_3")
-        self.features = Sequential(
-            Conv2D(
-                in_channels=1,
-                out_channels=6,
-                kernel_size=3,
-                stride=1,
-                padding=1,
-                weight_attr=conv2d_w1_attr,
-                bias_attr=conv2d_b1_attr),
-            Pool2D(
-                pool_size=2, pool_type='max', pool_stride=2),
-            Conv2D(
-                in_channels=6,
-                out_channels=16,
-                kernel_size=5,
-                stride=1,
-                padding=0,
-                weight_attr=conv2d_w2_attr,
-                bias_attr=conv2d_b2_attr),
-            Pool2D(
-                pool_size=2, pool_type='max', pool_stride=2))
-
-        self.fc = Sequential(
-            Linear(
-                in_features=400,
-                out_features=120,
-                weight_attr=fc_w1_attr,
-                bias_attr=fc_b1_attr),
-            Linear(
-                in_features=120,
-                out_features=84,
-                weight_attr=fc_w2_attr,
-                bias_attr=fc_b2_attr),
-            Linear(
-                in_features=84,
-                out_features=num_classes,
-                weight_attr=fc_w3_attr,
-                bias_attr=fc_b3_attr),
-            Softmax())
-
-    def forward(self, inputs):
-        x = self.features(inputs)
-        x = fluid.layers.flatten(x, 1)
-        x = self.fc(x)
-        return x
-
-
-class TestImperativeQatChannelWise(unittest.TestCase):
-    """
-    QAT = quantization-aware training
-    """
-
-    def test_qat_save(self):
-        imperative_qat = ImperativeQuantAware(
-            weight_quantize_type='channel_wise_abs_max',
-            activation_quantize_type='moving_average_abs_max')
-
-        with fluid.dygraph.guard():
-            lenet = ImperativeLenet()
-            imperative_qat.quantize(lenet)
-            adam = AdamOptimizer(
-                learning_rate=0.001, parameter_list=lenet.parameters())
-            train_reader = paddle.batch(
-                paddle.dataset.mnist.train(), batch_size=32, drop_last=True)
-            test_reader = paddle.batch(
-                paddle.dataset.mnist.test(), batch_size=32)
-
-            epoch_num = 1
-            for epoch in range(epoch_num):
-                lenet.train()
-                for batch_id, data in enumerate(train_reader()):
-                    x_data = np.array([x[0].reshape(1, 28, 28)
-                                       for x in data]).astype('float32')
-                    y_data = np.array(
-                        [x[1] for x in data]).astype('int64').reshape(-1, 1)
-
-                    img = fluid.dygraph.to_variable(x_data)
-                    label = fluid.dygraph.to_variable(y_data)
-                    out = lenet(img)
-                    acc = fluid.layers.accuracy(out, label)
-                    loss = fluid.layers.cross_entropy(out, label)
-                    avg_loss = fluid.layers.mean(loss)
-                    avg_loss.backward()
-                    adam.minimize(avg_loss)
-                    lenet.clear_gradients()
-                    if batch_id % 100 == 0:
-                        _logger.info(
-                            "Train | At epoch {} step {}: loss = {:}, acc= {:}".
-                            format(epoch, batch_id,
-                                   avg_loss.numpy(), acc.numpy()))
-
-                lenet.eval()
-                for batch_id, data in enumerate(test_reader()):
-                    x_data = np.array([x[0].reshape(1, 28, 28)
-                                       for x in data]).astype('float32')
-                    y_data = np.array(
-                        [x[1] for x in data]).astype('int64').reshape(-1, 1)
-
-                    img = fluid.dygraph.to_variable(x_data)
-                    label = fluid.dygraph.to_variable(y_data)
-
-                    out = lenet(img)
-                    acc_top1 = fluid.layers.accuracy(
-                        input=out, label=label, k=1)
-                    acc_top5 = fluid.layers.accuracy(
-                        input=out, label=label, k=5)
-
-                    if batch_id % 100 == 0:
-                        _logger.info(
-                            "Test | At epoch {} step {}: acc1 = {:}, acc5 = {:}".
-                            format(epoch, batch_id,
-                                   acc_top1.numpy(), acc_top5.numpy()))
-
-            # save weights
-            model_dict = lenet.state_dict()
-            fluid.save_dygraph(model_dict, "save_temp")
-
-            # test the correctness of `paddle.jit.save`
-            data = next(test_reader())
-            test_data = np.array([x[0].reshape(1, 28, 28)
-                                  for x in data]).astype('float32')
-            test_img = fluid.dygraph.to_variable(test_data)
-            lenet.eval()
-            before_save = lenet(test_img)
-
-        # save inference quantized model
-        path = "./qat_infer_model/mnist"
-        save_dir = "./qat_infer_model"
-        paddle.jit.save(
-            layer=lenet,
-            path=path,
-            input_spec=[
-                paddle.static.InputSpec(
-                    shape=[None, 1, 28, 28], dtype='float32')
-            ])
-
-        if core.is_compiled_with_cuda():
-            place = core.CUDAPlace(0)
-        else:
-            place = core.CPUPlace()
-        exe = fluid.Executor(place)
-        [inference_program, feed_target_names,
-         fetch_targets] = fluid.io.load_inference_model(
-             dirname=save_dir,
-             executor=exe,
-             model_filename="mnist" + INFER_MODEL_SUFFIX,
-             params_filename="mnist" + INFER_PARAMS_SUFFIX)
-        after_save, = exe.run(inference_program,
-                              feed={feed_target_names[0]: test_data},
-                              fetch_list=fetch_targets)
-
-        self.assertTrue(
-            np.allclose(after_save, before_save.numpy()),
-            msg='Failed to save the inference quantized model.')
-
-    def test_qat_acc(self):
-        def _build_static_lenet(main, startup, is_test=False, seed=1000):
-            with fluid.unique_name.guard():
-                with fluid.program_guard(main, startup):
-                    main.random_seed = seed
-                    startup.random_seed = seed
-                    img = fluid.layers.data(
-                        name='image', shape=[1, 28, 28], dtype='float32')
-                    label = fluid.layers.data(
-                        name='label', shape=[1], dtype='int64')
-                    prediction = StaticLenet(img)
-                    if not is_test:
-                        loss = fluid.layers.cross_entropy(
-                            input=prediction, label=label)
-                        avg_loss = fluid.layers.mean(loss)
-                    else:
-                        avg_loss = prediction
-            return img, label, avg_loss
-
-        reader = paddle.batch(
-            paddle.dataset.mnist.test(), batch_size=32, drop_last=True)
-        weight_quantize_type = 'channel_wise_abs_max'
-        activation_quant_type = 'moving_average_abs_max'
-        param_init_map = {}
-        seed = 1000
-        lr = 0.001
-
-        # imperative train
-        _logger.info(
-            "--------------------------dynamic graph qat--------------------------"
-        )
-        imperative_qat = ImperativeQuantAware(
-            weight_quantize_type=weight_quantize_type,
-            activation_quantize_type=activation_quant_type)
-
-        with fluid.dygraph.guard():
-            np.random.seed(seed)
-            fluid.default_main_program().random_seed = seed
-            fluid.default_startup_program().random_seed = seed
-            lenet = ImperativeLenet()
-            fixed_state = {}
-            for name, param in lenet.named_parameters():
-                p_shape = param.numpy().shape
-                p_value = param.numpy()
-                if name.endswith("bias"):
-                    value = np.zeros_like(p_value).astype('float32')
-                else:
-                    value = np.random.normal(
-                        loc=0.0, scale=0.01, size=np.product(p_shape)).reshape(
-                            p_shape).astype('float32')
-                fixed_state[name] = value
-                param_init_map[param.name] = value
-            lenet.set_dict(fixed_state)
-
-            imperative_qat.quantize(lenet)
-            adam = AdamOptimizer(
-                learning_rate=lr, parameter_list=lenet.parameters())
-            dynamic_loss_rec = []
-            lenet.train()
-            for batch_id, data in enumerate(reader()):
-                x_data = np.array([x[0].reshape(1, 28, 28)
-                                   for x in data]).astype('float32')
-                y_data = np.array(
-                    [x[1] for x in data]).astype('int64').reshape(-1, 1)
-
-                img = fluid.dygraph.to_variable(x_data)
-                label = fluid.dygraph.to_variable(y_data)
-
-                out = lenet(img)
-                loss = fluid.layers.cross_entropy(out, label)
-                avg_loss = fluid.layers.mean(loss)
-                avg_loss.backward()
-                adam.minimize(avg_loss)
-                lenet.clear_gradients()
-                dynamic_loss_rec.append(avg_loss.numpy()[0])
-                if batch_id % 100 == 0:
-                    _logger.info('{}: {}'.format('loss', avg_loss.numpy()))
-
-        paddle.jit.save(
-            layer=lenet,
-            path="./dynamic_mnist/model",
-            input_spec=[
-                paddle.static.InputSpec(
-                    shape=[None, 1, 28, 28], dtype='float32')
-            ])
-
-        # static graph train
-        _logger.info(
-            "--------------------------static graph qat--------------------------"
-        )
-        static_loss_rec = []
-        if core.is_compiled_with_cuda():
-            place = core.CUDAPlace(0)
-        else:
-            place = core.CPUPlace()
-        exe = fluid.Executor(place)
-
-        main = fluid.Program()
-        infer = fluid.Program()
-        startup = fluid.Program()
-        static_img, static_label, static_loss = _build_static_lenet(
-            main, startup, False, seed)
-        infer_img, _, infer_pre = _build_static_lenet(infer, startup, True,
-                                                      seed)
-        with fluid.unique_name.guard():
-            with fluid.program_guard(main, startup):
-                opt = AdamOptimizer(learning_rate=lr)
-                opt.minimize(static_loss)
-
-        scope = core.Scope()
-        with fluid.scope_guard(scope):
-            exe.run(startup)
-        for param in main.all_parameters():
-            param_tensor = scope.var(param.name).get_tensor()
-            param_tensor.set(param_init_map[param.name], place)
-
-        main_graph = IrGraph(core.Graph(main.desc), for_test=False)
-        infer_graph = IrGraph(core.Graph(infer.desc), for_test=True)
-        transform_pass = QuantizationTransformPass(
-            scope=scope,
-            place=place,
-            activation_quantize_type=activation_quant_type,
-            weight_quantize_type=weight_quantize_type,
-            quantizable_op_type=['conv2d', 'depthwise_conv2d', 'mul'])
-        transform_pass.apply(main_graph)
-        transform_pass.apply(infer_graph)
-        build_strategy = fluid.BuildStrategy()
-        build_strategy.fuse_all_reduce_ops = False
-        binary = fluid.CompiledProgram(main_graph.graph).with_data_parallel(
-            loss_name=static_loss.name, build_strategy=build_strategy)
-
-        feeder = fluid.DataFeeder(
-            feed_list=[static_img, static_label], place=place)
-        with fluid.scope_guard(scope):
-            for batch_id, data in enumerate(reader()):
-                loss_v, = exe.run(binary,
-                                  feed=feeder.feed(data),
-                                  fetch_list=[static_loss])
-                static_loss_rec.append(loss_v[0])
-                if batch_id % 100 == 0:
-                    _logger.info('{}: {}'.format('loss', loss_v))
-
-        save_program = infer_graph.to_program()
-        with fluid.scope_guard(scope):
-            fluid.io.save_inference_model("./static_mnist", [infer_img.name],
-                                          [infer_pre], exe, save_program)
-        rtol = 1e-05
-        atol = 1e-08
-        for i, (loss_d,
-                loss_s) in enumerate(zip(dynamic_loss_rec, static_loss_rec)):
-            diff = np.abs(loss_d - loss_s)
-            if diff > (atol + rtol * np.abs(loss_s)):
-                _logger.info(
-                    "diff({}) at {}, dynamic loss = {}, static loss = {}".
-                    format(diff, i, loss_d, loss_s))
-                break
+class TestImperativeQatChannelWise(TestImperativeQat):
+    def set_quant_type(self):
+        self.weight_quantize_type = 'channel_wise_abs_max'
+        self.activation_quantize_type = 'moving_average_abs_max'
+        print('weight_quantize_type', self.weight_quantize_type)
 
-        self.assertTrue(
-            np.allclose(
-                np.array(dynamic_loss_rec),
-                np.array(static_loss_rec),
-                rtol=rtol,
-                atol=atol,
-                equal_nan=True),
-            msg='Failed to do the imperative qat.')
+    def test_qat(self):
+        self.run_qat_save()
 
 
 if __name__ == '__main__':
diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_skip_op.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_skip_op.py
index bda02769cea86..bb24f941c625e 100644
--- a/python/paddle/fluid/contrib/slim/tests/test_imperative_skip_op.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_skip_op.py
@@ -31,6 +31,8 @@
 from paddle.fluid.dygraph.nn import Pool2D
 from paddle.fluid.log_helper import get_logger
 
+from imperative_test_utils import fix_model_dict, train_lenet, ImperativeLenetWithSkipQuant
+
 os.environ["CPU_NUM"] = "1"
 if core.is_compiled_with_cuda():
     fluid.set_flags({"FLAGS_cudnn_deterministic": True})
@@ -39,144 +41,33 @@
     __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
 
 
-class ImperativeLenet(fluid.dygraph.Layer):
-    def __init__(self, num_classes=10):
-        super(ImperativeLenet, self).__init__()
-        conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1")
-        conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2")
-        fc_w1_attr = fluid.ParamAttr(name="fc_w_1")
-        fc_w2_attr = fluid.ParamAttr(name="fc_w_2")
-        fc_w3_attr = fluid.ParamAttr(name="fc_w_3")
-        conv2d_b1_attr = fluid.ParamAttr(name="conv2d_b_1")
-        conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2")
-        fc_b1_attr = fluid.ParamAttr(name="fc_b_1")
-        fc_b2_attr = fluid.ParamAttr(name="fc_b_2")
-        fc_b3_attr = fluid.ParamAttr(name="fc_b_3")
-        self.conv2d_0 = Conv2D(
-            in_channels=1,
-            out_channels=6,
-            kernel_size=3,
-            stride=1,
-            padding=1,
-            weight_attr=conv2d_w1_attr,
-            bias_attr=conv2d_b1_attr)
-        self.conv2d_0.skip_quant = True
-
-        self.batch_norm_0 = BatchNorm(6)
-        self.relu_0 = ReLU()
-        self.pool2d_0 = Pool2D(pool_size=2, pool_type='max', pool_stride=2)
-        self.conv2d_1 = Conv2D(
-            in_channels=6,
-            out_channels=16,
-            kernel_size=5,
-            stride=1,
-            padding=0,
-            weight_attr=conv2d_w2_attr,
-            bias_attr=conv2d_b2_attr)
-        self.conv2d_1.skip_quant = False
-
-        self.batch_norm_1 = BatchNorm(16)
-        self.relu6_0 = ReLU6()
-        self.pool2d_1 = Pool2D(pool_size=2, pool_type='max', pool_stride=2)
-        self.linear_0 = Linear(
-            in_features=400,
-            out_features=120,
-            weight_attr=fc_w1_attr,
-            bias_attr=fc_b1_attr)
-        self.linear_0.skip_quant = True
-
-        self.leaky_relu_0 = LeakyReLU()
-        self.linear_1 = Linear(
-            in_features=120,
-            out_features=84,
-            weight_attr=fc_w2_attr,
-            bias_attr=fc_b2_attr)
-        self.linear_1.skip_quant = False
-
-        self.sigmoid_0 = Sigmoid()
-        self.linear_2 = Linear(
-            in_features=84,
-            out_features=num_classes,
-            weight_attr=fc_w3_attr,
-            bias_attr=fc_b3_attr)
-        self.linear_2.skip_quant = False
-        self.softmax_0 = Softmax()
-
-    def forward(self, inputs):
-        x = self.conv2d_0(inputs)
-        x = self.batch_norm_0(x)
-        x = self.relu_0(x)
-        x = self.pool2d_0(x)
-        x = self.conv2d_1(x)
-        x = self.batch_norm_1(x)
-        x = self.relu6_0(x)
-        x = self.pool2d_1(x)
-
-        x = fluid.layers.flatten(x, 1)
-
-        x = self.linear_0(x)
-        x = self.leaky_relu_0(x)
-        x = self.linear_1(x)
-        x = self.sigmoid_0(x)
-        x = self.linear_2(x)
-        x = self.softmax_0(x)
-
-        return x
-
-
 class TestImperativeOutSclae(unittest.TestCase):
     def test_out_scale_acc(self):
         seed = 1000
         lr = 0.1
 
-        imperative_out_scale = ImperativeQuantAware()
+        qat = ImperativeQuantAware()
 
         np.random.seed(seed)
         reader = paddle.batch(
             paddle.dataset.mnist.test(), batch_size=512, drop_last=True)
-        lenet = ImperativeLenet()
-        fixed_state = {}
-        for name, param in lenet.named_parameters():
-            p_shape = param.numpy().shape
-            p_value = param.numpy()
-            if name.endswith("bias"):
-                value = np.zeros_like(p_value).astype('float32')
-            else:
-                value = np.random.normal(
-                    loc=0.0, scale=0.01,
-                    size=np.product(p_shape)).reshape(p_shape).astype('float32')
-            fixed_state[name] = value
-        lenet.set_dict(fixed_state)
-        imperative_out_scale.quantize(lenet)
+
+        lenet = ImperativeLenetWithSkipQuant()
+        lenet = fix_model_dict(lenet)
+        qat.quantize(lenet)
+
         adam = AdamOptimizer(
             learning_rate=lr, parameter_list=lenet.parameters())
         dynamic_loss_rec = []
         lenet.train()
-        for batch_id, data in enumerate(reader()):
-            x_data = np.array([x[0].reshape(1, 28, 28)
-                               for x in data]).astype('float32')
-            y_data = np.array(
-                [x[1] for x in data]).astype('int64').reshape(-1, 1)
-
-            img = fluid.dygraph.to_variable(x_data)
-            label = fluid.dygraph.to_variable(y_data)
-
-            out = lenet(img)
-            loss = fluid.layers.cross_entropy(out, label)
-            avg_loss = fluid.layers.mean(loss)
-            avg_loss.backward()
-            adam.minimize(avg_loss)
-            lenet.clear_gradients()
-            dynamic_loss_rec.append(avg_loss.numpy()[0])
-            if batch_id % 100 == 0:
-                _logger.info('{}: {}'.format('loss', avg_loss.numpy()))
+        loss_list = train_lenet(lenet, reader, adam)
 
         lenet.eval()
 
         path = "./save_dynamic_quant_infer_model/lenet"
         save_dir = "./save_dynamic_quant_infer_model"
 
-        imperative_out_scale.save_quantized_model(
+        qat.save_quantized_model(
             layer=lenet,
             path=path,
             input_spec=[

From 4a344c67a709b066bdf1d7995a1aef3bbf9e343f Mon Sep 17 00:00:00 2001
From: pengjuncai <13006307475@163.com>
Date: Wed, 2 Jun 2021 09:56:16 +0000
Subject: [PATCH 5/8] add default input params for wrapped functional layers

---
 python/paddle/nn/quant/functional_layers.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/paddle/nn/quant/functional_layers.py b/python/paddle/nn/quant/functional_layers.py
index 03a311dc708a1..ce5fb3e616eb5 100644
--- a/python/paddle/nn/quant/functional_layers.py
+++ b/python/paddle/nn/quant/functional_layers.py
@@ -75,7 +75,7 @@ class concat(FloatFunctionalLayer):
     def __init__(self):
         super(concat, self).__init__()
 
-    def forward(self, x, axis, name=None):
+    def forward(self, x, axis=0, name=None):
         return manipulation.concat(x, axis, name)
 
 
@@ -83,5 +83,5 @@ class flatten(FloatFunctionalLayer):
     def __init__(self):
         super(flatten, self).__init__()
 
-    def forward(self, x, start_axis, stop_axis, name=None):
+    def forward(self, x, start_axis=0, stop_axis=-1, name=None):
         return manipulation.flatten(x, start_axis, stop_axis, name)

From 4fdfe37d3dd134492753d7bb0bca06608b68e25b Mon Sep 17 00:00:00 2001
From: pengjuncai <13006307475@163.com>
Date: Thu, 3 Jun 2021 02:18:17 +0000
Subject: [PATCH 6/8] up, test=develop

---
 .../slim/quantization/imperative/qat.py       |  15 +-
 .../slim/quantization/imperative/quant_nn.py  |   9 +-
 .../slim/quantization/imperative/utils.py     |   2 +
 .../slim/tests/imperative_test_utils.py       | 222 ++++++++++++++++++
 4 files changed, 237 insertions(+), 11 deletions(-)
 create mode 100644 python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py

diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py
index 0f2f621fdb328..600ce6397e1af 100644
--- a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py
+++ b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py
@@ -363,19 +363,19 @@ def apply(self, model):
         assert isinstance(model, dygraph.Layer), \
             "The model must be the instance of dygraph.Layer."
 
-        for name, cur_layer in model.named_sublayers():
+        for cur_name, cur_layer in model.named_sublayers():
             if not self._is_target_layer(cur_layer):
                 continue
 
             parent_layer, sub_name = \
-                utils.find_parent_layer_and_sub_name(model, name)
+                utils.find_parent_layer_and_sub_name(model, cur_name)
 
             if isinstance(cur_layer, tuple(utils.fake_quant_output_layers)):
-                cur_quant_layer = quant_nn.__dict__[
-                    "FakeQuantMAOutputScaleLayer"](cur_layer, self._moving_rate)
-            else:
-                cur_quant_layer = quant_nn.__dict__["MAOutputScaleLayer"](
+                cur_quant_layer = quant_nn.FakeQuantMAOutputScaleLayer(
                     cur_layer, self._moving_rate)
+            else:
+                cur_quant_layer = quant_nn.MAOutputScaleLayer(cur_layer,
+                                                              self._moving_rate)
 
             setattr(parent_layer, sub_name, cur_quant_layer)
 
@@ -460,8 +460,7 @@ def _is_target_layer(self, layer):
                 not isinstance(layer, tuple(utils.fake_quant_leaf_layers)):
                 flag = True
             # consider QuantizedConv2D and QuantizedLinear ops
-            if isinstance(layer,
-                          (quant_nn.QuantizedConv2D, quant_nn.QuantizedLinear)):
+            if isinstance(layer, tuple(utils.fake_quant_wrap_layers)):
                 flag = True
         if isinstance(layer, paddle.nn.quant.FloatFunctionalLayer):
             flag = True
diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py b/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py
index fe8e4570c69a0..9d5c4ca241704 100644
--- a/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py
+++ b/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py
@@ -610,14 +610,16 @@ class MAOutputScaleLayer(layers.Layer):
     Add MovingAverageMaxScale layer to the behind of the input layer.
     """
 
-    def __init__(self, layer=None, moving_rate=0.9, dtype='float32'):
+    def __init__(self, layer=None, moving_rate=0.9, name=None, dtype='float32'):
         r"""
         Construct
         """
         super(MAOutputScaleLayer, self).__init__()
         self._layer = layer
+        if name is None:
+            name = layer.full_name()
         self._ma_output_scale = \
-            MovingAverageAbsMaxScale(layer.full_name(), moving_rate, dtype)
+            MovingAverageAbsMaxScale(name, moving_rate, dtype)
 
     def forward(self, *inputs, **kwargs):
         out = self._layer(*inputs, **kwargs)
@@ -636,6 +638,7 @@ def __init__(self,
                  weight_bits=8,
                  activation_bits=8,
                  moving_rate=0.9,
+                 name=None,
                  *args,
                  **kwargs):
 
@@ -643,7 +646,7 @@ def __init__(self,
         self._layer = layer
         self._fake_quant_output = _get_fake_quant_type(
             'moving_average_abs_max',
-            name=layer.full_name(),
+            name=layer.full_name() if name is None else name,
             moving_rate=moving_rate,
             quant_bits=activation_bits,
             dtype=self._dtype,
diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py b/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py
index faec53a87fdba..dada8e6e7a17d 100644
--- a/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py
+++ b/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py
@@ -57,6 +57,8 @@
     quant_nn.MovingAverageAbsMaxScale,
 ]
 
+fake_quant_wrap_layers = [quant_nn.QuantizedConv2D, quant_nn.QuantizedLinear]
+
 weight_op_types = [
     "conv2d", "depthwise_conv2d", "matmul", "conv2d_transpose",
     "depthwise_conv2d_transpose"
diff --git a/python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py b/python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py
new file mode 100644
index 0000000000000..e491b513c118f
--- /dev/null
+++ b/python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py
@@ -0,0 +1,222 @@
+#   copyright (c) 2021 paddlepaddle authors. all rights reserved.
+#
+# licensed under the apache license, version 2.0 (the "license");
+# you may not use this file except in compliance with the license.
+# you may obtain a copy of the license at
+#
+#     http://www.apache.org/licenses/license-2.0
+#
+# unless required by applicable law or agreed to in writing, software
+# distributed under the license is distributed on an "as is" basis,
+# without warranties or conditions of any kind, either express or implied.
+# see the license for the specific language governing permissions and
+# limitations under the license.
+import numpy as np
+import logging
+
+import paddle
+import paddle.fluid as fluid
+from paddle.fluid import core
+from paddle.fluid.dygraph.container import Sequential
+from paddle.nn import ReLU, ReLU6, LeakyReLU, Sigmoid, Softmax, PReLU
+from paddle.nn import Linear, Conv2D, Softmax, BatchNorm2D, MaxPool2D
+
+from paddle.fluid.log_helper import get_logger
+
+_logger = get_logger(
+    __name__, logging.INFO, fmt='%(asctime)s-%(levelname)s: %(message)s')
+
+
+def fix_model_dict(model):
+    fixed_state = {}
+    for name, param in model.named_parameters():
+        p_shape = param.numpy().shape
+        p_value = param.numpy()
+        if name.endswith("bias"):
+            value = np.zeros_like(p_value).astype('float32')
+        else:
+            value = np.random.normal(
+                loc=0.0, scale=0.01,
+                size=np.product(p_shape)).reshape(p_shape).astype('float32')
+        fixed_state[name] = value
+    model.set_dict(fixed_state)
+    return model
+
+
+def train_lenet(lenet, reader, optimizer):
+    loss_list = []
+    lenet.train()
+
+    for batch_id, data in enumerate(reader()):
+        x_data = np.array([x[0].reshape(1, 28, 28)
+                           for x in data]).astype('float32')
+        y_data = np.array([x[1] for x in data]).astype('int64').reshape(-1, 1)
+
+        img = paddle.to_tensor(x_data)
+        label = paddle.to_tensor(y_data)
+
+        out = lenet(img)
+        loss = fluid.layers.cross_entropy(out, label)
+        avg_loss = fluid.layers.mean(loss)
+        avg_loss.backward()
+
+        optimizer.minimize(avg_loss)
+        lenet.clear_gradients()
+
+        if batch_id % 100 == 0:
+            loss_list.append(avg_loss.numpy()[0])
+            _logger.info('{}: {}'.format('loss', avg_loss.numpy()))
+
+    return loss_list
+
+
+class ImperativeLenet(fluid.dygraph.Layer):
+    def __init__(self, num_classes=10):
+        super(ImperativeLenet, self).__init__()
+        conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1")
+        conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2")
+        fc_w1_attr = fluid.ParamAttr(name="fc_w_1")
+        fc_w2_attr = fluid.ParamAttr(name="fc_w_2")
+        fc_w3_attr = fluid.ParamAttr(name="fc_w_3")
+        conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2")
+        fc_b1_attr = fluid.ParamAttr(name="fc_b_1")
+        fc_b2_attr = fluid.ParamAttr(name="fc_b_2")
+        fc_b3_attr = fluid.ParamAttr(name="fc_b_3")
+        self.features = Sequential(
+            Conv2D(
+                in_channels=1,
+                out_channels=6,
+                kernel_size=3,
+                stride=1,
+                padding=1,
+                weight_attr=conv2d_w1_attr,
+                bias_attr=False),
+            BatchNorm2D(6),
+            ReLU(),
+            MaxPool2D(
+                kernel_size=2, stride=2),
+            Conv2D(
+                in_channels=6,
+                out_channels=16,
+                kernel_size=5,
+                stride=1,
+                padding=0,
+                weight_attr=conv2d_w2_attr,
+                bias_attr=conv2d_b2_attr),
+            BatchNorm2D(16),
+            PReLU(),
+            MaxPool2D(
+                kernel_size=2, stride=2))
+
+        self.fc = Sequential(
+            Linear(
+                in_features=400,
+                out_features=120,
+                weight_attr=fc_w1_attr,
+                bias_attr=fc_b1_attr),
+            LeakyReLU(),
+            Linear(
+                in_features=120,
+                out_features=84,
+                weight_attr=fc_w2_attr,
+                bias_attr=fc_b2_attr),
+            Sigmoid(),
+            Linear(
+                in_features=84,
+                out_features=num_classes,
+                weight_attr=fc_w3_attr,
+                bias_attr=fc_b3_attr),
+            Softmax())
+
+    def forward(self, inputs):
+        x = self.features(inputs)
+
+        x = fluid.layers.flatten(x, 1)
+        x = self.fc(x)
+        return x
+
+
+class ImperativeLenetWithSkipQuant(fluid.dygraph.Layer):
+    def __init__(self, num_classes=10):
+        super(ImperativeLenetWithSkipQuant, self).__init__()
+
+        conv2d_w1_attr = fluid.ParamAttr(name="conv2d_w_1")
+        conv2d_w2_attr = fluid.ParamAttr(name="conv2d_w_2")
+        fc_w1_attr = fluid.ParamAttr(name="fc_w_1")
+        fc_w2_attr = fluid.ParamAttr(name="fc_w_2")
+        fc_w3_attr = fluid.ParamAttr(name="fc_w_3")
+        conv2d_b1_attr = fluid.ParamAttr(name="conv2d_b_1")
+        conv2d_b2_attr = fluid.ParamAttr(name="conv2d_b_2")
+        fc_b1_attr = fluid.ParamAttr(name="fc_b_1")
+        fc_b2_attr = fluid.ParamAttr(name="fc_b_2")
+        fc_b3_attr = fluid.ParamAttr(name="fc_b_3")
+        self.conv2d_0 = Conv2D(
+            in_channels=1,
+            out_channels=6,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            weight_attr=conv2d_w1_attr,
+            bias_attr=conv2d_b1_attr)
+        self.conv2d_0.skip_quant = True
+
+        self.batch_norm_0 = BatchNorm2D(6)
+        self.relu_0 = ReLU()
+        self.pool2d_0 = MaxPool2D(kernel_size=2, stride=2)
+        self.conv2d_1 = Conv2D(
+            in_channels=6,
+            out_channels=16,
+            kernel_size=5,
+            stride=1,
+            padding=0,
+            weight_attr=conv2d_w2_attr,
+            bias_attr=conv2d_b2_attr)
+        self.conv2d_1.skip_quant = False
+
+        self.batch_norm_1 = BatchNorm2D(16)
+        self.relu6_0 = ReLU6()
+        self.pool2d_1 = MaxPool2D(kernel_size=2, stride=2)
+        self.linear_0 = Linear(
+            in_features=400,
+            out_features=120,
+            weight_attr=fc_w1_attr,
+            bias_attr=fc_b1_attr)
+        self.linear_0.skip_quant = True
+
+        self.leaky_relu_0 = LeakyReLU()
+        self.linear_1 = Linear(
+            in_features=120,
+            out_features=84,
+            weight_attr=fc_w2_attr,
+            bias_attr=fc_b2_attr)
+        self.linear_1.skip_quant = False
+
+        self.sigmoid_0 = Sigmoid()
+        self.linear_2 = Linear(
+            in_features=84,
+            out_features=num_classes,
+            weight_attr=fc_w3_attr,
+            bias_attr=fc_b3_attr)
+        self.linear_2.skip_quant = False
+        self.softmax_0 = Softmax()
+
+    def forward(self, inputs):
+        x = self.conv2d_0(inputs)
+        x = self.batch_norm_0(x)
+        x = self.relu_0(x)
+        x = self.pool2d_0(x)
+        x = self.conv2d_1(x)
+        x = self.batch_norm_1(x)
+        x = self.relu6_0(x)
+        x = self.pool2d_1(x)
+
+        x = fluid.layers.flatten(x, 1)
+
+        x = self.linear_0(x)
+        x = self.leaky_relu_0(x)
+        x = self.linear_1(x)
+        x = self.sigmoid_0(x)
+        x = self.linear_2(x)
+        x = self.softmax_0(x)
+
+        return x

From 44ed2d90bcdd77515afdfa8c2de354c786de0ecd Mon Sep 17 00:00:00 2001
From: pengjuncai <13006307475@163.com>
Date: Thu, 3 Jun 2021 02:54:28 +0000
Subject: [PATCH 7/8] up, test=develop

---
 python/paddle/fluid/contrib/slim/tests/CMakeLists.txt | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt b/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
index 758e01b8245a2..7ae404b6d0dec 100644
--- a/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
+++ b/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt
@@ -270,12 +270,6 @@ list(REMOVE_ITEM TEST_OPS
 #TODO(wanghaoshuang): Fix this unitest failed on GCC8.
 LIST(REMOVE_ITEM TEST_OPS test_auto_pruning)
 LIST(REMOVE_ITEM TEST_OPS test_filter_pruning)
-
-# only tests on singal GPU environment
-LIST(REMOVE_ITEM TEST_OPS test_imperative_qat_addquantdequant)
-
-py_test_modules(test_imperative_qat_addquantdequant MODULES test_imperative_qat_addquantdequant ENVS
-	CUDA_VISIBLE_DEVICES=0)
 	
 # fix
 if(WIN32)
@@ -313,7 +307,6 @@ set_tests_properties(test_quantization_pass PROPERTIES TIMEOUT 120)
 set_tests_properties(test_imperative_qat_channelwise PROPERTIES TIMEOUT 120)
 set_tests_properties(test_user_defined_quantization PROPERTIES TIMEOUT 120)
 set_tests_properties(test_imperative_qat PROPERTIES TIMEOUT 120)
-set_tests_properties(test_imperative_qat_addquantdequant PROPERTIES TIMEOUT 120)
 set_tests_properties(test_imperative_out_scale PROPERTIES TIMEOUT 120)
 if(LINUX AND WITH_MKLDNN)
     set_tests_properties(test_quant2_int8_mobilenetv1_mkldnn PROPERTIES TIMEOUT 120)

From 006ed3202f1cae24f3903d42e21bf9ed8293a8cc Mon Sep 17 00:00:00 2001
From: pengjuncai <13006307475@163.com>
Date: Mon, 7 Jun 2021 03:04:52 +0000
Subject: [PATCH 8/8] up, test=develop

---
 .../slim/quantization/imperative/quant_nn.py  | 14 +--
 .../slim/quantization/imperative/utils.py     |  7 +-
 .../slim/tests/imperative_test_utils.py       |  2 +
 .../contrib/slim/tests/test_imperative_qat.py |  6 +-
 .../tests/test_imperative_qat_channelwise.py  |  2 +-
 .../test_nn_quant_functional_layers.py        | 87 +++++++++++++++++++
 6 files changed, 98 insertions(+), 20 deletions(-)
 create mode 100644 python/paddle/fluid/tests/unittests/test_nn_quant_functional_layers.py

diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py b/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py
index 9d5c4ca241704..fd1f7f423ff8f 100644
--- a/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py
+++ b/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py
@@ -509,15 +509,7 @@ def __init__(self,
             quant_on_weight=False)
 
     def forward(self, input):
-        # TODO (jc): support ops that have several inputs
-        if (isinstance(input, list) or isinstance(input, tuple)) \
-            and len(input) > 1:
-            _logger.info("%s has several inputs, so skip collecting "
-                         "the input scales" % self._layer.full_name())
-            return self._layer.forward(input)
-        else:
-            quant_input = self._fake_quant_input(input)
-            return self._layer.forward(quant_input)
+        return self._layer.forward(self._fake_quant_input(input))
 
 
 class MovingAverageAbsMaxScale(layers.Layer):
@@ -625,8 +617,6 @@ def forward(self, *inputs, **kwargs):
         out = self._layer(*inputs, **kwargs)
         # TODO (jc): support the ops of several outputs
         if (isinstance(out, list) or isinstance(out, tuple)) and len(out) > 1:
-            _logger.info("%s has several outputs, so skip collecting "
-                         "the output threshold" % self._layer.full_name())
             return out
         else:
             return self._ma_output_scale(out)
@@ -656,8 +646,6 @@ def forward(self, *inputs, **kwargs):
         out = self._layer(*inputs, **kwargs)
         # TODO (jc): support the ops of several outputs
         if (isinstance(out, list) or isinstance(out, tuple)) and len(out) > 1:
-            _logger.info("%s has several outputs, so skip collecting "
-                         "the output threshold" % self._layer.full_name())
             return out
         else:
             return self._fake_quant_output(out)
diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py b/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py
index dada8e6e7a17d..94639b9cc68f9 100644
--- a/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py
+++ b/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py
@@ -44,10 +44,11 @@
 fake_quant_input_layers = [paddle.nn.Conv2D, paddle.nn.Linear]
 
 # Apply fake quant for the output of these layers
+# TODO(jc): fix the problem of adding duplicate fake_quant ops
+# paddle.nn.AdaptiveAvgPool2D, paddle.nn.AvgPool2D, paddle.nn.ReLU,paddle.nn.LeakyReLU
 fake_quant_output_layers = [
-    paddle.nn.AdaptiveAvgPool2D, paddle.nn.AvgPool2D, paddle.nn.ReLU,
-    paddle.nn.LeakyReLU, paddle.nn.quant.add, paddle.nn.quant.subtract,
-    paddle.nn.quant.multiply, paddle.nn.quant.divide
+    paddle.nn.quant.add, paddle.nn.quant.subtract, paddle.nn.quant.multiply,
+    paddle.nn.quant.divide
 ]
 
 fake_quant_leaf_layers = [
diff --git a/python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py b/python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py
index e491b513c118f..cc26f6a88f2e0 100644
--- a/python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py
+++ b/python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py
@@ -127,11 +127,13 @@ def __init__(self, num_classes=10):
                 weight_attr=fc_w3_attr,
                 bias_attr=fc_b3_attr),
             Softmax())
+        self.add = paddle.nn.quant.add()
 
     def forward(self, inputs):
         x = self.features(inputs)
 
         x = fluid.layers.flatten(x, 1)
+        x = self.add(x, paddle.to_tensor(0.0))  # For CI
         x = self.fc(x)
         return x
 
diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py
index a188a1fdfa9f3..bf411e5b38efa 100644
--- a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat.py
@@ -63,13 +63,13 @@ def tearDownClass(cls):
         except Exception as e:
             print("Failed to delete {} due to {}".format(cls.root_path, str(e)))
 
-    def set_quant_type(self):
+    def set_vars(self):
         self.weight_quantize_type = None
         self.activation_quantize_type = None
         print('weight_quantize_type', self.weight_quantize_type)
 
     def run_qat_save(self):
-        self.set_quant_type()
+        self.set_vars()
 
         imperative_qat = ImperativeQuantAware(
             weight_quantize_type=self.weight_quantize_type,
@@ -201,7 +201,7 @@ def run_qat_save(self):
 
 
 class TestImperativeQatAbsMax(TestImperativeQat):
-    def set_quant_type(self):
+    def set_vars(self):
         self.weight_quantize_type = 'abs_max'
         self.activation_quantize_type = 'moving_average_abs_max'
         print('weight_quantize_type', self.weight_quantize_type)
diff --git a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_channelwise.py b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_channelwise.py
index da4e285633680..3d2cad388d172 100644
--- a/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_channelwise.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_imperative_qat_channelwise.py
@@ -38,7 +38,7 @@
 
 
 class TestImperativeQatChannelWise(TestImperativeQat):
-    def set_quant_type(self):
+    def set_vars(self):
         self.weight_quantize_type = 'channel_wise_abs_max'
         self.activation_quantize_type = 'moving_average_abs_max'
         print('weight_quantize_type', self.weight_quantize_type)
diff --git a/python/paddle/fluid/tests/unittests/test_nn_quant_functional_layers.py b/python/paddle/fluid/tests/unittests/test_nn_quant_functional_layers.py
new file mode 100644
index 0000000000000..86dc43bacf86b
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/test_nn_quant_functional_layers.py
@@ -0,0 +1,87 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import unittest
+import numpy as np
+import paddle
+
+
+class TestFunctionalLayers(unittest.TestCase):
+    """
+    """
+
+    def setUp(self):
+        paddle.disable_static()
+        np.random.seed(1)
+
+        shape = [3, 100, 120]
+        self.x = paddle.to_tensor(np.random.random(shape))
+        self.y = paddle.to_tensor(np.random.random(shape))
+
+    def check(self, x, y):
+        self.assertTrue(np.allclose(x.numpy(), y.numpy()))
+
+    def test_quant_add(self):
+        out_1 = paddle.add(self.x, self.y)
+        out_2 = paddle.nn.quant.add()(self.x, self.y)
+        self.check(out_1, out_2)
+
+    def test_quant_subtract(self):
+        out_1 = paddle.subtract(self.x, self.y)
+        out_2 = paddle.nn.quant.subtract()(self.x, self.y)
+        self.check(out_1, out_2)
+
+    def test_quant_multiply(self):
+        out_1 = paddle.multiply(self.x, self.y)
+        out_2 = paddle.nn.quant.multiply()(self.x, self.y)
+        self.check(out_1, out_2)
+
+    def test_quant_divide(self):
+        out_1 = paddle.divide(self.x, self.y)
+        out_2 = paddle.nn.quant.divide()(self.x, self.y)
+        self.check(out_1, out_2)
+
+    def test_quant_reshape(self):
+        reshape = [120, 300]
+        out_1 = paddle.reshape(self.x, reshape)
+        out_2 = paddle.nn.quant.reshape()(self.x.clone(), reshape)
+        self.check(out_1, out_2)
+        self.assertTrue(out_1.shape == out_2.shape)
+
+    def test_quant_transpose(self):
+        perm = [1, 2, 0]
+        out_1 = paddle.transpose(self.x, perm)
+        out_2 = paddle.nn.quant.transpose()(self.x.clone(), perm)
+        self.check(out_1, out_2)
+        self.assertTrue(out_1.shape == out_2.shape)
+
+    def test_quant_concat(self):
+        out_1 = paddle.concat([self.x, self.y], axis=0)
+        out_2 = paddle.nn.quant.concat()([self.x, self.y], 0)
+        self.check(out_1, out_2)
+        self.assertTrue(out_1.shape == out_2.shape)
+
+    def test_quant_flatten(self):
+        start_axis = 1
+        end_axis = 2
+        out_1 = paddle.flatten(self.x, start_axis, end_axis)
+        out_2 = paddle.nn.quant.flatten()(self.x.clone(), start_axis, end_axis)
+        self.check(out_1, out_2)
+        self.assertTrue(out_1.shape == out_2.shape)
+
+
+if __name__ == '__main__':
+    unittest.main()