PaddlePaddle · baiyfbupt · Nov 18, 2020 · Nov 12, 2020 · Nov 12, 2020 · Nov 12, 2020
diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py
@@ -59,7 +59,11 @@ def __init__(self,
                  weight_quantize_type='abs_max',
                  activation_quantize_type='moving_average_abs_max',
                  moving_rate=0.9,
-                 quantizable_layer_type=['Conv2D', 'Linear']):
+                 quantizable_layer_type=['Conv2D', 'Linear'],
+                 weight_preprocess_layer=None,
+                 act_preprocess_layer=None,
+                 weight_quantize_layer=None,
+                 act_quantize_layer=None):
         """
         The constructor for ImperativeQuantAware.
 
@@ -81,7 +85,28 @@ def __init__(self,
             quantizable_op_type(list[str]): List the type of layers that will be quantized. 
                 Default is ['Conv2D', 'Linear']. The quantizable_op_type in
                 QuantizationFreezePass and ConvertToInt8Pass must be the same as this.
-
+            weight_preprocess_layer(paddle.nn.Layer, optional): A paddle Layer that defines how to preprocess
+                weight before quantization. Using this can quickly test if user's
+                preprocess method works or not. The input is non-quantized
+                weight and function returns processed weight to be quantized.
+                If None, the weight will be quantized directly. Default is None.
+            act_preprocess_layer(paddle.nn.Layer, optional): A paddle Layer that defines how to preprocess
+                activation before quantization. Using this can quickly test if user's
+                preprocess method works or not. The input is non-quantized
+                activation and function returns processed activation to be quantized.
+                If None, the activation will be quantized directly. Default is None.
+            weight_quantize_layer(paddle.nn.Layer, optional): A paddle Layer that defines how to quantize weight.
+                Using this can quickly test if user's quantization method works or not.
+                In this layer, user should both define quantization method and
+                dequantization method, that is, the function's input is non-quantized
+                weight and returns dequantized weight. If None, will use
+                quantization op defined by 'weight_quantize_type'. Default is None.
+            act_quantize_layer(paddle.nn.Layer, optional): A paddle Layer that defines how to quantize activation.
+                Using this can quickly test if user's quantization method works or not.
+                In this layer, user should both define quantization method and
+                dequantization method, that is, the function's input is non-quantized
+                activation and returns dequantized activation. If None, will use
+                quantization op defined by 'activation_quantize_type'. Default is None.
 
         Examples:
         .. code-block:: python
@@ -118,6 +143,19 @@ def __init__(self,
         self._activation_bits = activation_bits
         self._moving_rate = moving_rate
 
+        self._weight_pre_layer = weight_preprocess_layer
+        self._act_pre_layer = act_preprocess_layer
+        self._weight_quant_layer = weight_quantize_layer
+        self._act_quant_layer = act_quantize_layer
+
+        t_check = lambda method: method is None or issubclass(method, dygraph.layers.Layer)
+        assert t_check(
+            self._weight_pre_layer), "weight_preprocess should be nn.Layer"
+        assert t_check(self._act_pre_layer), "act_preprocess should be nn.Layer"
+        assert t_check(
+            self._weight_quant_layer), "weight_quantize should be nn.Layer"
+        assert t_check(self._act_quant_layer), "act_quantize should be nn.Layer"
+
         quant_type = {
             'abs_max', 'moving_average_abs_max', 'channel_wise_abs_max'
         }
@@ -189,7 +227,9 @@ def _get_quantized_counterpart(self, layer):
 
         quantized_layer = quant_nn.__dict__[quantized_counterpart[index]](
             layer, self._weight_bits, self._activation_bits, self._moving_rate,
-            self._weight_quantize_type, self._activation_quantize_type)
+            self._weight_quantize_type, self._activation_quantize_type,
+            self._weight_pre_layer, self._act_pre_layer,
+            self._weight_quant_layer, self._act_quant_layer)
         return quantized_layer
 
 

diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py b/python/paddle/fluid/contrib/slim/quantization/imperative/quant_nn.py
@@ -332,7 +332,11 @@ def __init__(self,
                  activation_bits=8,
                  moving_rate=0.9,
                  weight_quantize_type='abs_max',
-                 activation_quantize_type='abs_max'):
+                 activation_quantize_type='abs_max',
+                 weight_pre_layer=None,
+                 act_pre_layer=None,
+                 weight_quant_layer=None,
+                 act_quant_layer=None):
         super(QuantizedConv2D, self).__init__()
         # For Conv2D
         self._groups = getattr(layer, '_groups')
@@ -347,26 +351,44 @@ def __init__(self,
         self.bias = getattr(layer, 'bias')
         # For FakeQuant
         self._conv2d_quant_axis = 0
-        self._fake_quant_weight = _get_fake_quant_type(
-            weight_quantize_type,
-            name=self.weight.name,
-            moving_rate=moving_rate,
-            quant_bits=weight_bits,
-            dtype=self._dtype,
-            quant_on_weight=True,
-            channel_num=self.weight.shape[self._conv2d_quant_axis],
-            quant_axis=self._conv2d_quant_axis)
-        self._fake_quant_input = _get_fake_quant_type(
-            activation_quantize_type,
-            name=layer.full_name(),
-            moving_rate=moving_rate,
-            quant_bits=activation_bits,
-            dtype=self._dtype,
-            quant_on_weight=False)
+
+        if weight_quant_layer is not None:
+            self._fake_quant_weight = weight_quant_layer()
+        else:
+            self._fake_quant_weight = _get_fake_quant_type(
+                weight_quantize_type,
+                name=self.weight.name,
+                moving_rate=moving_rate,
+                quant_bits=weight_bits,
+                dtype=self._dtype,
+                quant_on_weight=True,
+                channel_num=self.weight.shape[self._conv2d_quant_axis],
+                quant_axis=self._conv2d_quant_axis)
+        if act_quant_layer is not None:
+            self._fake_quant_input = act_quant_layer()
+        else:
+            self._fake_quant_input = _get_fake_quant_type(
+                activation_quantize_type,
+                name=layer.full_name(),
+                moving_rate=moving_rate,
+                quant_bits=activation_bits,
+                dtype=self._dtype,
+                quant_on_weight=False)
+
+        self._act_preprocess = act_pre_layer(
+        ) if act_pre_layer is not None else None
+        self._weight_preprocess = weight_pre_layer(
+        ) if weight_pre_layer is not None else None
 
     def forward(self, input):
+        if self._act_preprocess is not None:
+            input = self._act_preprocess(input)
         quant_input = self._fake_quant_input(input)
-        quant_weight = self._fake_quant_weight(self.weight)
+
+        weight = self.weight
+        if self._weight_preprocess is not None:
+            weight = self._weight_preprocess(self.weight)
+        quant_weight = self._fake_quant_weight(weight)
 
         if in_dygraph_mode() and self._l_type == 'conv2d':
             attrs = ('strides', self._stride, 'paddings', self._padding,
@@ -428,7 +450,11 @@ def __init__(self,
                  activation_bits=8,
                  moving_rate=0.9,
                  weight_quantize_type='abs_max',
-                 activation_quantize_type='abs_max'):
+                 activation_quantize_type='abs_max',
+                 weight_pre_layer=None,
+                 act_pre_layer=None,
+                 weight_quant_layer=None,
+                 act_quant_layer=None):
         super(QuantizedLinear, self).__init__()
         # For Linear
         self._act = getattr(layer, '_act')
@@ -437,26 +463,46 @@ def __init__(self,
         self.bias = getattr(layer, 'bias')
         # For FakeQuant
         self._linear_quant_axis = 1
-        self._fake_quant_weight = _get_fake_quant_type(
-            weight_quantize_type,
-            name=self.weight.name,
-            moving_rate=moving_rate,
-            quant_bits=weight_bits,
-            dtype=self._dtype,
-            quant_on_weight=True,
-            channel_num=self.weight.shape[self._linear_quant_axis],
-            quant_axis=self._linear_quant_axis)
-        self._fake_quant_input = _get_fake_quant_type(
-            activation_quantize_type,
-            name=layer.full_name(),
-            moving_rate=moving_rate,
-            quant_bits=activation_bits,
-            dtype=self._dtype,
-            quant_on_weight=False)
+
+        if weight_quant_layer is not None:
+            self._fake_quant_weight = weight_quant_layer()
+        else:
+            self._fake_quant_weight = _get_fake_quant_type(
+                weight_quantize_type,
+                name=self.weight.name,
+                moving_rate=moving_rate,
+                quant_bits=weight_bits,
+                dtype=self._dtype,
+                quant_on_weight=True,
+                channel_num=self.weight.shape[self._linear_quant_axis],
+                quant_axis=self._linear_quant_axis)
+
+        if act_quant_layer is not None:
+            self._fake_quant_input = act_quant_layer()
+        else:
+            self._fake_quant_input = _get_fake_quant_type(
+                activation_quantize_type,
+                name=layer.full_name(),
+                moving_rate=moving_rate,
+                quant_bits=activation_bits,
+                dtype=self._dtype,
+                quant_on_weight=False)
+
+        self._act_preprocess = act_pre_layer(
+        ) if act_pre_layer is not None else None
+        self._weight_preprocess = weight_pre_layer(
+        ) if weight_pre_layer is not None else None
 
     def forward(self, input):
+        if self._act_preprocess is not None:
+            input = self._act_preprocess(input)
         quant_input = self._fake_quant_input(input)
-        quant_weight = self._fake_quant_weight(self.weight)
+
+        weight = self.weight
+        if self._weight_preprocess is not None:
+            weight = self._weight_preprocess(self.weight)
+        quant_weight = self._fake_quant_weight(weight)
+
         if in_dygraph_mode():
             pre_bias = _varbase_creator(dtype=input.dtype)
             core.ops.matmul(quant_input, quant_weight, pre_bias, 'transpose_X',