Skip to content

Commit f21afbb

Browse files
authored
Refactor Keras PTQ Implementation (#1698)
Signed-off-by: zehao-intel <zehao.huang@intel.com>
1 parent 24419c9 commit f21afbb

File tree

14 files changed

+1049
-751
lines changed

14 files changed

+1049
-751
lines changed

neural_compressor/tensorflow/algorithms/static_quant/keras.py

Lines changed: 407 additions & 328 deletions
Large diffs are not rendered by default.

neural_compressor/tensorflow/keras/layers/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/usr/bin/env python
22
# -*- coding: utf-8 -*-
33
#
4-
# Copyright (c) 2022 Intel Corporation
4+
# Copyright (c) 2024 Intel Corporation
55
#
66
# Licensed under the Apache License, Version 2.0 (the "License");
77
# you may not use this file except in compliance with the License.
@@ -21,3 +21,4 @@
2121
from neural_compressor.tensorflow.keras.layers.pool2d import QAvgPool2D, QMaxPool2D
2222
from neural_compressor.tensorflow.keras.layers.quantizer import DeQuantize, FakeQuant, Quantize
2323
from neural_compressor.tensorflow.keras.layers.separable_conv2d import QSeparableConv2D
24+
from neural_compressor.tensorflow.keras.layers.layer_initializer import layer_initializer_dict

neural_compressor/tensorflow/keras/layers/conv2d.py

Lines changed: 81 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/usr/bin/env python
22
# -*- coding: utf-8 -*-
33
#
4-
# Copyright (c) 2022 Intel Corporation
4+
# Copyright (c) 2024 Intel Corporation
55
#
66
# Licensed under the Apache License, Version 2.0 (the "License");
77
# you may not use this file except in compliance with the License.
@@ -23,11 +23,7 @@
2323

2424
from neural_compressor.tensorflow.utils import version1_gte_version2
2525

26-
if version1_gte_version2(tf.__version__, "2.16.1"):
27-
from keras.src.layers.convolutional.base_conv import BaseConv # pylint: disable=E0401
28-
29-
Conv = BaseConv
30-
elif version1_gte_version2(tf.__version__, "2.13.0"):
26+
if version1_gte_version2(tf.__version__, "2.13.0"):
3127
from keras.src.layers.convolutional.base_conv import Conv # pylint: disable=E0401
3228
else:
3329
from keras.layers.convolutional.base_conv import Conv # pylint: disable=E0401
@@ -36,6 +32,7 @@
3632
class QConv2D(Conv):
3733
def __init__(
3834
self,
35+
name,
3936
filters,
4037
kernel_size,
4138
strides=(1, 1),
@@ -52,11 +49,12 @@ def __init__(
5249
activity_regularizer=None,
5350
kernel_constraint=None,
5451
bias_constraint=None,
55-
min_value=-10000,
56-
max_value=10000,
52+
min_value=None,
53+
max_value=None,
5754
**kwargs
5855
):
5956
super(QConv2D, self).__init__(
57+
name=name,
6058
rank=2,
6159
filters=filters,
6260
kernel_size=kernel_size,
@@ -76,10 +74,17 @@ def __init__(
7674
bias_constraint=constraints.get(bias_constraint),
7775
**kwargs
7876
)
79-
self.min_value = json.loads(min_value)
80-
self.max_value = json.loads(max_value)
77+
self.min_value = min_value
78+
self.max_value = max_value
8179

8280
def call(self, inputs):
81+
kernel_size = self.kernel.shape[-1]
82+
83+
if not self.min_value:
84+
self.min_value = [-10000] * kernel_size
85+
if not self.max_value:
86+
self.max_value = [10000] * kernel_size
87+
8388
# add the Q/DQ here
8489
kernel, _, _ = quantization.quantize(
8590
self.kernel, self.min_value, self.max_value, tf.qint8, axis=3, mode="SCALED"
@@ -111,3 +116,69 @@ def call(self, inputs):
111116
@classmethod
112117
def from_config(cls, config):
113118
return cls(**config)
119+
120+
121+
def initialize_int8_conv2d(fp32_layer):
122+
kwargs = fp32_layer.get_config()
123+
124+
if "name" in kwargs:
125+
del kwargs["name"]
126+
if "filters" in kwargs:
127+
del kwargs["filters"]
128+
if "kernel_size" in kwargs:
129+
del kwargs["kernel_size"]
130+
if "strides" in kwargs:
131+
del kwargs["strides"]
132+
if "padding" in kwargs:
133+
del kwargs["padding"]
134+
if "data_format" in kwargs:
135+
del kwargs["data_format"]
136+
if "dilation_rate" in kwargs:
137+
del kwargs["dilation_rate"]
138+
if "groups" in kwargs:
139+
del kwargs["groups"]
140+
if "activation" in kwargs:
141+
del kwargs["activation"]
142+
if "use_bias" in kwargs:
143+
del kwargs["use_bias"]
144+
if "kernel_initializer" in kwargs:
145+
del kwargs["kernel_initializer"]
146+
if "bias_initializer" in kwargs:
147+
del kwargs["bias_initializer"]
148+
if "kernel_regularizer" in kwargs:
149+
del kwargs["kernel_regularizer"]
150+
if "activity_regularizer" in kwargs:
151+
del kwargs["activity_regularizer"]
152+
if "bias_regularizer" in kwargs:
153+
del kwargs["bias_regularizer"]
154+
if "kernel_constraint" in kwargs:
155+
del kwargs["kernel_constraint"]
156+
if "bias_constraint" in kwargs:
157+
del kwargs["bias_constraint"]
158+
if "min_value" in kwargs:
159+
del kwargs["min_value"]
160+
if "max_value" in kwargs:
161+
del kwargs["max_value"]
162+
163+
return QConv2D(
164+
name=fp32_layer.name,
165+
filters=fp32_layer.filters,
166+
kernel_size=fp32_layer.kernel_size,
167+
strides=fp32_layer.strides,
168+
padding=fp32_layer.padding,
169+
data_format=fp32_layer.data_format,
170+
dilation_rate=fp32_layer.dilation_rate,
171+
groups=fp32_layer.groups,
172+
activation=fp32_layer.activation,
173+
use_bias=fp32_layer.use_bias,
174+
kernel_initializer=fp32_layer.kernel_initializer,
175+
bias_initializer=fp32_layer.bias_initializer,
176+
kernel_regularizer=fp32_layer.kernel_regularizer,
177+
bias_regularizer=fp32_layer.bias_regularizer,
178+
activity_regularizer=fp32_layer.activity_regularizer,
179+
kernel_constraint=fp32_layer.kernel_constraint,
180+
bias_constraint=fp32_layer.bias_constraint,
181+
min_value=fp32_layer.min_value,
182+
max_value=fp32_layer.max_value,
183+
**kwargs
184+
)

neural_compressor/tensorflow/keras/layers/dense.py

Lines changed: 65 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/usr/bin/env python
22
# -*- coding: utf-8 -*-
33
#
4-
# Copyright (c) 2022 Intel Corporation
4+
# Copyright (c) 2024 Intel Corporation
55
#
66
# Licensed under the Apache License, Version 2.0 (the "License");
77
# you may not use this file except in compliance with the License.
@@ -26,6 +26,7 @@
2626
class QDense(Dense):
2727
def __init__(
2828
self,
29+
name,
2930
units,
3031
activation=None,
3132
use_bias=True,
@@ -36,11 +37,12 @@ def __init__(
3637
activity_regularizer=None,
3738
kernel_constraint=None,
3839
bias_constraint=None,
39-
min_value=-10000,
40-
max_value=10000,
40+
min_value=None,
41+
max_value=None,
4142
**kwargs
4243
):
4344
super(QDense, self).__init__(
45+
name=name,
4446
units=units,
4547
activation=activation,
4648
use_bias=use_bias,
@@ -53,10 +55,17 @@ def __init__(
5355
bias_constraint=bias_constraint,
5456
**kwargs
5557
)
56-
self.min_value = json.loads(min_value)
57-
self.max_value = json.loads(max_value)
58+
self.min_value = min_value
59+
self.max_value = max_value
5860

5961
def call(self, inputs):
62+
kernel_size = self.kernel.shape[-1]
63+
64+
if not self.min_value:
65+
self.min_value = [-10000] * kernel_size
66+
if not self.max_value:
67+
self.max_value = [10000] * kernel_size
68+
6069
# add the Q/DQ here
6170
kernel, _, _ = quantization.quantize(
6271
self.kernel,
@@ -66,6 +75,7 @@ def call(self, inputs):
6675
axis=1,
6776
mode="SCALED",
6877
)
78+
6979
kernel = quantization.dequantize(
7080
kernel,
7181
self.min_value,
@@ -80,3 +90,53 @@ def call(self, inputs):
8090
if self.activation is not None:
8191
outputs = self.activation(outputs)
8292
return outputs
93+
94+
95+
def initialize_int8_dense(fp32_layer):
96+
kwargs = fp32_layer.get_config()
97+
98+
if "name" in kwargs:
99+
del kwargs["name"]
100+
if "units" in kwargs:
101+
del kwargs["units"]
102+
if "activation" in kwargs:
103+
del kwargs["activation"]
104+
if "use_bias" in kwargs:
105+
del kwargs["use_bias"]
106+
if "kernel_initializer" in kwargs:
107+
del kwargs["kernel_initializer"]
108+
if "bias_initializer" in kwargs:
109+
del kwargs["bias_initializer"]
110+
if "kernel_regularizer" in kwargs:
111+
del kwargs["kernel_regularizer"]
112+
if "activity_regularizer" in kwargs:
113+
del kwargs["activity_regularizer"]
114+
if "bias_regularizer" in kwargs:
115+
del kwargs["bias_regularizer"]
116+
if "kernel_constraint" in kwargs:
117+
del kwargs["kernel_constraint"]
118+
if "bias_constraint" in kwargs:
119+
del kwargs["bias_constraint"]
120+
if "min_value" in kwargs:
121+
del kwargs["min_value"]
122+
if "max_value" in kwargs:
123+
del kwargs["max_value"]
124+
125+
q_layer = QDense(
126+
name=fp32_layer.name,
127+
units=fp32_layer.units,
128+
activation=fp32_layer.activation,
129+
use_bias=fp32_layer.use_bias,
130+
kernel_initializer=fp32_layer.kernel_initializer,
131+
bias_initializer=fp32_layer.bias_initializer,
132+
kernel_regularizer=fp32_layer.kernel_regularizer,
133+
bias_regularizer=fp32_layer.bias_regularizer,
134+
activity_regularizer=fp32_layer.activity_regularizer,
135+
kernel_constraint=fp32_layer.kernel_constraint,
136+
bias_constraint=fp32_layer.bias_constraint,
137+
min_value=fp32_layer.min_value,
138+
max_value=fp32_layer.max_value,
139+
**kwargs
140+
)
141+
142+
return q_layer

0 commit comments

Comments
 (0)