Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[PTQ ] wrap simulated layers and save the quantized model #33962

Merged
merged 3 commits into from
Jul 9, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
339 changes: 322 additions & 17 deletions python/paddle/fluid/contrib/slim/quantization/imperative/ptq.py

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,18 @@ def __init__(self, activation_quantizer, weight_quantizer):
It should be the instance of BaseQuantizer.
"""
super(PTQConfig, self).__init__()

assert isinstance(activation_quantizer, BaseQuantizer)
assert isinstance(weight_quantizer, BaseQuantizer)
assert isinstance(activation_quantizer, tuple(SUPPORT_ACT_QUANTIZERS))
assert isinstance(weight_quantizer, tuple(SUPPORT_WT_QUANTIZERS))

self.in_act_quantizer = copy.deepcopy(activation_quantizer)
self.out_act_quantizer = copy.deepcopy(activation_quantizer)
self.wt_quantizer = copy.deepcopy(weight_quantizer)

self.quant_hook_handle = None

# In order to wrap simulated layers, use in_act_quantizer
# to calculate the input thresholds for conv2d, linear and etc.
self.enable_in_act_quantizer = False


default_ptq_config = PTQConfig(AbsmaxQuantizer(), AbsmaxQuantizer())
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import math
import numpy as np
from . import ptq_config
from .ptq_registry import PTQRegistry


def quant_forward_post_hook(layer, inputs, outputs):
Expand All @@ -24,5 +25,8 @@ def quant_forward_post_hook(layer, inputs, outputs):
"""
assert hasattr(layer, '_quant_config'), \
"The layer should have _quant_config attr"
layer._quant_config.in_act_quantizer.sample_data(layer, inputs)
layer._quant_config.out_act_quantizer.sample_data(layer, (outputs, ))

qc = layer._quant_config
if qc.enable_in_act_quantizer:
qc.in_act_quantizer.sample_data(layer, inputs)
qc.out_act_quantizer.sample_data(layer, (outputs, ))
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,9 @@
from ..cal_kl_threshold import cal_kl_threshold

__all__ = [
'BaseQuantizer',
'AbsmaxQuantizer',
'PerChannelAbsmaxQuantizer',
'KLQuantizer',
'HistQuantizer',
'BaseQuantizer', 'AbsmaxQuantizer', 'PerChannelAbsmaxQuantizer',
'KLQuantizer', 'HistQuantizer', 'SUPPORT_ACT_QUANTIZERS',
'SUPPORT_WT_QUANTIZERS'
]


Expand Down Expand Up @@ -110,6 +108,7 @@ def __init__(self, quant_bits=8):

self.quant_bits = quant_bits

self.abs_max_vals = []
self.thresholds = []

@abc.abstractmethod
Expand All @@ -133,10 +132,10 @@ def sample_data(self, layer, tensors):
assert isinstance(tensors, tuple)

abs_max_vals = [abs_max_value(t) for t in tensors]
self.thresholds = merge_max_value(self.thresholds, abs_max_vals)
self.abs_max_vals = merge_max_value(self.abs_max_vals, abs_max_vals)

def cal_thresholds(self):
pass
self.thresholds = self.abs_max_vals


class PerChannelAbsmaxQuantizer(BaseQuantizer):
Expand Down Expand Up @@ -164,10 +163,11 @@ def sample_data(self, layer, tensors):
]
abs_max_vals_list.append(abs_max_vals)

self.thresholds = merge_max_value(self.thresholds, abs_max_vals_list)
self.abs_max_vals = merge_max_value(self.abs_max_vals,
abs_max_vals_list)

def cal_thresholds(self):
pass
self.thresholds = self.abs_max_vals


@six.add_metaclass(abc.ABCMeta)
Expand All @@ -180,7 +180,6 @@ def __init__(self, quant_bits=8, bins=1024, upsample_bins=64):
self.bins = bins
self.upsample_bins = upsample_bins

self.abs_max_vals = []
self.hists = []

def sample_data(self, layer, tensors):
Expand Down Expand Up @@ -262,3 +261,7 @@ def cal_thresholds(self):
bin_width = abs_max_val / hist.shape[0]
threshold = cal_kl_threshold(hist, bin_width, self.quant_bits)
self.thresholds.append(threshold)


SUPPORT_ACT_QUANTIZERS = [AbsmaxQuantizer, HistQuantizer, KLQuantizer]
SUPPORT_WT_QUANTIZERS = [AbsmaxQuantizer, PerChannelAbsmaxQuantizer]
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,22 @@ def __init__(self, layer, input_names, weight_names, output_names):
LayerInfo(paddle.nn.quant.add, ['X', 'Y'], [], ['Out']),
]

QUANT_LAYERS_INFO = [
LayerInfo(paddle.nn.quant.quant_layers.QuantizedConv2D, ['Input'],
['Filter'], ['Output']),
LayerInfo(paddle.nn.quant.quant_layers.QuantizedLinear, ['X'], ['Y'],
['Out']),
]

SIMULATED_LAYERS = [paddle.nn.Conv2D, paddle.nn.Linear]


class PTQRegistry(object):
"""
Register the supported layers for PTQ and provide layers info.
"""
supported_layers_map = {}
registered_layers_map = {}
is_inited = False

def __init__(self):
Expand All @@ -63,24 +73,62 @@ def _init(cls):
if not cls.is_inited:
for layer_info in PTQ_LAYERS_INFO:
cls.supported_layers_map[layer_info.layer] = layer_info

all_layers_info = PTQ_LAYERS_INFO + QUANT_LAYERS_INFO
for layer_info in all_layers_info:
cls.registered_layers_map[layer_info.layer] = layer_info
cls.is_inited = True

@classmethod
def is_supported_layer(cls, layer):
"""
Analyze whether the layer supports quantization.
Args:
layer(Layer): The input layer can be a python class or an instance.
Returns:
flag(bool): Whther the layer is supported.
"""
cls._init()
return layer in cls.supported_layers_map or \
isinstance(layer, tuple(cls.supported_layers_map.keys()))

@classmethod
def is_registered_layer(cls, layer):
"""
Analyze whether the layer is register layer_info.
Args:
layer(Layer): The input layer can be a python class or an instance.
Returns:
flag(bool): Wether the layer is register layer_info.
"""
cls._init()
return layer in cls.registered_layers_map or \
isinstance(layer, tuple(cls.registered_layers_map.keys()))

@classmethod
def is_simulated_quant_layer(cls, layer):
"""
Analyze whether the layer is simulated quant layer.
Args:
layer(Layer): The input layer can be a python class or an instance.
Returns:
flag(bool): Whther the layer is supported.
"""
return layer in SIMULATED_LAYERS or \
isinstance(layer, tuple(SIMULATED_LAYERS))

@classmethod
def layer_info(cls, layer):
"""
Get the infomation for the supported layer.
Get the infomation for the layer.
Args:
layer(Layer): The input layer can be a python class or an instance.
Returns:
layer_info(LayerInfo): The layer info of the input layer.
"""
assert cls.is_supported_layer(
layer), "The input layer is not supported."
assert cls.is_registered_layer(layer), \
"The input layer is not register."

for layer_key, layer_info in cls.supported_layers_map.items():
for layer_key, layer_info in cls.registered_layers_map.items():
if layer == layer_key or isinstance(layer, layer_key):
return layer_info
Original file line number Diff line number Diff line change
Expand Up @@ -379,12 +379,12 @@ def apply(self, model):

setattr(parent_layer, sub_name, cur_quant_layer)

def save_quantized_model(self, layer, path, input_spec=None, **config):
def save_quantized_model(self, model, path, input_spec=None, **config):
"""
Save the quantized model for the inference.

Args:
layer (Layer): The Layer to be saved.
model (Layer): The model to be saved.
path (str): The path prefix to save model. The format is
``dirname/file_prefix`` or ``file_prefix``.
input_spec (list[InputSpec|Tensor], optional): Describes the input
Expand All @@ -407,10 +407,10 @@ def save_quantized_model(self, layer, path, input_spec=None, **config):
Returns:
None
"""
assert isinstance(layer, dygraph.Layer), \
assert isinstance(model, dygraph.Layer), \
"The model must be the instance of dygraph.Layer."

paddle.jit.save(layer=layer, path=path, input_spec=input_spec, **config)
paddle.jit.save(layer=model, path=path, input_spec=input_spec, **config)

is_dynamic_mode = False
if paddle.in_dynamic_mode():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@
]

# The weight format of these layers is Cin * Cout * H * W
spec_channel_axis_layers = [paddle.nn.Conv2D, paddle.nn.Conv2DTranspose]
spec_channel_axis_layers = [paddle.nn.Conv2DTranspose, paddle.nn.Linear]

weight_op_types = [
"conv2d", "depthwise_conv2d", "matmul", "conv2d_transpose",
Expand Down Expand Up @@ -139,6 +139,17 @@ def find_parent_layer_and_sub_name(model, name):
return parent_layer, sub_name


def program_all_ops(program):
"""
Return all ops for the input program.
"""
all_ops = []
for block in program.blocks:
for op in block.ops:
all_ops.append(op)
return all_ops


def is_leaf_layer(layer):
"""
Whether the layer is leaf layer.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,9 +128,11 @@ def __init__(self, num_classes=10):
bias_attr=fc_b3_attr),
Softmax())
self.add = paddle.nn.quant.add()
self.quant_stub = paddle.nn.quant.QuantStub()

def forward(self, inputs):
x = self.features(inputs)
x = self.quant_stub(inputs)
x = self.features(x)

x = fluid.layers.flatten(x, 1)
x = self.add(x, paddle.to_tensor(0.0)) # For CI
Expand Down
Loading