PaddlePaddle · juncaipeng · Jul 9, 2021 · Jul 5, 2021 · Jul 6, 2021 · Jul 6, 2021
diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/ptq.py b/python/paddle/fluid/contrib/slim/quantization/imperative/ptq.py
diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/ptq_config.py b/python/paddle/fluid/contrib/slim/quantization/imperative/ptq_config.py
@@ -39,15 +39,18 @@ def __init__(self, activation_quantizer, weight_quantizer):
                 It should be the instance of BaseQuantizer.    
         """
         super(PTQConfig, self).__init__()
-
-        assert isinstance(activation_quantizer, BaseQuantizer)
-        assert isinstance(weight_quantizer, BaseQuantizer)
+        assert isinstance(activation_quantizer, tuple(SUPPORT_ACT_QUANTIZERS))
+        assert isinstance(weight_quantizer, tuple(SUPPORT_WT_QUANTIZERS))
 
         self.in_act_quantizer = copy.deepcopy(activation_quantizer)
         self.out_act_quantizer = copy.deepcopy(activation_quantizer)
         self.wt_quantizer = copy.deepcopy(weight_quantizer)
 
         self.quant_hook_handle = None
 
+        # In order to wrap simulated layers, use in_act_quantizer
+        # to calculate the input thresholds for conv2d, linear and etc.
+        self.enable_in_act_quantizer = False
+
 
 default_ptq_config = PTQConfig(AbsmaxQuantizer(), AbsmaxQuantizer())
diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/ptq_hooks.py b/python/paddle/fluid/contrib/slim/quantization/imperative/ptq_hooks.py
@@ -16,6 +16,7 @@
 import math
 import numpy as np
 from . import ptq_config
+from .ptq_registry import PTQRegistry
 
 
 def quant_forward_post_hook(layer, inputs, outputs):
@@ -24,5 +25,8 @@ def quant_forward_post_hook(layer, inputs, outputs):
     """
     assert hasattr(layer, '_quant_config'), \
         "The layer should have _quant_config attr"
-    layer._quant_config.in_act_quantizer.sample_data(layer, inputs)
-    layer._quant_config.out_act_quantizer.sample_data(layer, (outputs, ))
+
+    qc = layer._quant_config
+    if qc.enable_in_act_quantizer:
+        qc.in_act_quantizer.sample_data(layer, inputs)
+    qc.out_act_quantizer.sample_data(layer, (outputs, ))
diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/ptq_quantizer.py b/python/paddle/fluid/contrib/slim/quantization/imperative/ptq_quantizer.py
@@ -24,11 +24,9 @@
 from ..cal_kl_threshold import cal_kl_threshold
 
 __all__ = [
-    'BaseQuantizer',
-    'AbsmaxQuantizer',
-    'PerChannelAbsmaxQuantizer',
-    'KLQuantizer',
-    'HistQuantizer',
+    'BaseQuantizer', 'AbsmaxQuantizer', 'PerChannelAbsmaxQuantizer',
+    'KLQuantizer', 'HistQuantizer', 'SUPPORT_ACT_QUANTIZERS',
+    'SUPPORT_WT_QUANTIZERS'
 ]
 
 
@@ -110,6 +108,7 @@ def __init__(self, quant_bits=8):
 
         self.quant_bits = quant_bits
 
+        self.abs_max_vals = []
         self.thresholds = []
 
     @abc.abstractmethod
@@ -133,10 +132,10 @@ def sample_data(self, layer, tensors):
         assert isinstance(tensors, tuple)
 
         abs_max_vals = [abs_max_value(t) for t in tensors]
-        self.thresholds = merge_max_value(self.thresholds, abs_max_vals)
+        self.abs_max_vals = merge_max_value(self.abs_max_vals, abs_max_vals)
 
     def cal_thresholds(self):
-        pass
+        self.thresholds = self.abs_max_vals
 
 
 class PerChannelAbsmaxQuantizer(BaseQuantizer):
@@ -164,10 +163,11 @@ def sample_data(self, layer, tensors):
                 ]
                 abs_max_vals_list.append(abs_max_vals)
 
-        self.thresholds = merge_max_value(self.thresholds, abs_max_vals_list)
+        self.abs_max_vals = merge_max_value(self.abs_max_vals,
+                                            abs_max_vals_list)
 
     def cal_thresholds(self):
-        pass
+        self.thresholds = self.abs_max_vals
 
 
 @six.add_metaclass(abc.ABCMeta)
@@ -180,7 +180,6 @@ def __init__(self, quant_bits=8, bins=1024, upsample_bins=64):
         self.bins = bins
         self.upsample_bins = upsample_bins
 
-        self.abs_max_vals = []
         self.hists = []
 
     def sample_data(self, layer, tensors):
@@ -262,3 +261,7 @@ def cal_thresholds(self):
                 bin_width = abs_max_val / hist.shape[0]
                 threshold = cal_kl_threshold(hist, bin_width, self.quant_bits)
                 self.thresholds.append(threshold)
+
+
+SUPPORT_ACT_QUANTIZERS = [AbsmaxQuantizer, HistQuantizer, KLQuantizer]
+SUPPORT_WT_QUANTIZERS = [AbsmaxQuantizer, PerChannelAbsmaxQuantizer]
diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/ptq_registry.py b/python/paddle/fluid/contrib/slim/quantization/imperative/ptq_registry.py
@@ -47,12 +47,22 @@ def __init__(self, layer, input_names, weight_names, output_names):
     LayerInfo(paddle.nn.quant.add, ['X', 'Y'], [], ['Out']),
 ]
 
+QUANT_LAYERS_INFO = [
+    LayerInfo(paddle.nn.quant.quant_layers.QuantizedConv2D, ['Input'],
+              ['Filter'], ['Output']),
+    LayerInfo(paddle.nn.quant.quant_layers.QuantizedLinear, ['X'], ['Y'],
+              ['Out']),
+]
+
+SIMULATED_LAYERS = [paddle.nn.Conv2D, paddle.nn.Linear]
+
 
 class PTQRegistry(object):
     """
     Register the supported layers for PTQ and provide layers info.
     """
     supported_layers_map = {}
+    registered_layers_map = {}
     is_inited = False
 
     def __init__(self):
@@ -63,24 +73,62 @@ def _init(cls):
         if not cls.is_inited:
             for layer_info in PTQ_LAYERS_INFO:
                 cls.supported_layers_map[layer_info.layer] = layer_info
+
+            all_layers_info = PTQ_LAYERS_INFO + QUANT_LAYERS_INFO
+            for layer_info in all_layers_info:
+                cls.registered_layers_map[layer_info.layer] = layer_info
         cls.is_inited = True
 
     @classmethod
     def is_supported_layer(cls, layer):
         """
         Analyze whether the layer supports quantization.
+        Args:
+            layer(Layer): The input layer can be a python class or an instance.
+        Returns:
+            flag(bool): Whther the layer is supported.
         """
         cls._init()
         return layer in cls.supported_layers_map or \
             isinstance(layer, tuple(cls.supported_layers_map.keys()))
 
+    @classmethod
+    def is_registered_layer(cls, layer):
+        """
+        Analyze whether the layer is register layer_info.
+        Args:
+            layer(Layer): The input layer can be a python class or an instance.
+        Returns:
+            flag(bool): Wether the layer is register layer_info.
+        """
+        cls._init()
+        return layer in cls.registered_layers_map or \
+            isinstance(layer, tuple(cls.registered_layers_map.keys()))
+
+    @classmethod
+    def is_simulated_quant_layer(cls, layer):
+        """
+        Analyze whether the layer is simulated quant layer.
+        Args:
+            layer(Layer): The input layer can be a python class or an instance.
+        Returns:
+            flag(bool): Whther the layer is supported.
+        """
+        return layer in SIMULATED_LAYERS or \
+            isinstance(layer, tuple(SIMULATED_LAYERS))
+
+    @classmethod
     def layer_info(cls, layer):
         """
-        Get the infomation for the supported layer.
+        Get the infomation for the layer.
+        Args:
+            layer(Layer): The input layer can be a python class or an instance.
+        Returns:
+            layer_info(LayerInfo): The layer info of the input layer.
         """
-        assert cls.is_supported_layer(
-            layer), "The input layer is not supported."
+        assert cls.is_registered_layer(layer), \
+            "The input layer is not register."
 
-        for layer_key, layer_info in cls.supported_layers_map.items():
+        for layer_key, layer_info in cls.registered_layers_map.items():
             if layer == layer_key or isinstance(layer, layer_key):
                 return layer_info
diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py b/python/paddle/fluid/contrib/slim/quantization/imperative/qat.py
@@ -379,12 +379,12 @@ def apply(self, model):
 
             setattr(parent_layer, sub_name, cur_quant_layer)
 
-    def save_quantized_model(self, layer, path, input_spec=None, **config):
+    def save_quantized_model(self, model, path, input_spec=None, **config):
         """
         Save the quantized model for the inference.
 
         Args:
-            layer (Layer): The Layer to be saved.
+            model (Layer): The model to be saved.
             path (str): The path prefix to save model. The format is 
                 ``dirname/file_prefix`` or ``file_prefix``.
             input_spec (list[InputSpec|Tensor], optional): Describes the input
@@ -407,10 +407,10 @@ def save_quantized_model(self, layer, path, input_spec=None, **config):
         Returns:
             None
         """
-        assert isinstance(layer, dygraph.Layer), \
+        assert isinstance(model, dygraph.Layer), \
             "The model must be the instance of dygraph.Layer."
 
-        paddle.jit.save(layer=layer, path=path, input_spec=input_spec, **config)
+        paddle.jit.save(layer=model, path=path, input_spec=input_spec, **config)
 
         is_dynamic_mode = False
         if paddle.in_dynamic_mode():

diff --git a/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py b/python/paddle/fluid/contrib/slim/quantization/imperative/utils.py
@@ -69,7 +69,7 @@
 ]
 
 # The weight format of these layers is Cin * Cout * H * W 
-spec_channel_axis_layers = [paddle.nn.Conv2D, paddle.nn.Conv2DTranspose]
+spec_channel_axis_layers = [paddle.nn.Conv2DTranspose, paddle.nn.Linear]
 
 weight_op_types = [
     "conv2d", "depthwise_conv2d", "matmul", "conv2d_transpose",
@@ -139,6 +139,17 @@ def find_parent_layer_and_sub_name(model, name):
     return parent_layer, sub_name
 
 
+def program_all_ops(program):
+    """
+    Return all ops for the input program.
+    """
+    all_ops = []
+    for block in program.blocks:
+        for op in block.ops:
+            all_ops.append(op)
+    return all_ops
+
+
 def is_leaf_layer(layer):
     """
     Whether the layer is leaf layer.

diff --git a/python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py b/python/paddle/fluid/contrib/slim/tests/imperative_test_utils.py
@@ -128,9 +128,11 @@ def __init__(self, num_classes=10):
                 bias_attr=fc_b3_attr),
             Softmax())
         self.add = paddle.nn.quant.add()
+        self.quant_stub = paddle.nn.quant.QuantStub()
 
     def forward(self, inputs):
-        x = self.features(inputs)
+        x = self.quant_stub(inputs)
+        x = self.features(x)
 
         x = fluid.layers.flatten(x, 1)
         x = self.add(x, paddle.to_tensor(0.0))  # For CI