Skip to content

Commit

Permalink
hide quant_linear api (#58521)
Browse files Browse the repository at this point in the history
* hide quant_linear api

* hide quant_linear api

* hide quant_linear api
  • Loading branch information
RichardWooSJTU authored Nov 2, 2023
1 parent 53e5a2d commit 89a2ce2
Show file tree
Hide file tree
Showing 3 changed files with 169 additions and 168 deletions.
2 changes: 0 additions & 2 deletions python/paddle/static/nn/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@
from .loss import nce # noqa: F401
from .common import prelu # noqa: F401
from .common import layer_norm # noqa: F401
from .common import quant_linear


from .common import embedding # noqa: F401
Expand Down Expand Up @@ -102,5 +101,4 @@
'sequence_enumerate',
'sequence_reverse',
'prelu',
'quant_linear',
]
163 changes: 0 additions & 163 deletions python/paddle/static/nn/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,169 +247,6 @@ def fc_base(
)


@static_only
def quant_linear(
x,
w,
size,
scale_in,
scale_weight,
num_flatten_dims=1,
bias_attr=None,
activation=None,
quant_round_type=1,
quant_max_bound=127.0,
quant_min_bound=-127.0,
name=None,
):
r"""
Quant linear layer can take a tensor as its input and a tensor as the weight tensor.
The quant linear layer multiplies the input tensor with the weight to produce
an output tensor with shape :math:`[batch\_size, *, size]` , where :math:`*`
means any number of additional dimensions. If :attr:`bias_attr` is not False, a 1-D bias tensor will
be created and added to the output. If :attr:`activation` is not None,
it will be applied to the output as well. Besides, the input tensor will be quantize to
the tensor with int8 type, the parameter w must be a tensor with int8 type and the computation will also
be with the int8 type.
For a single input tensor :math:`X` , the equation is:
.. math::
Out = Act({XW + b})
where:
* :math:`X`: The input tensor.
* :math:`W`: The weight matrix.
* :math:`b`: The bias created by this layer (if needed).
* :math:`Act`: The activation function.
* :math:`Out`: The output tensor.
Args:
x (Tensor): A tensor. The number of dimensions
of the tensor is at least 2. The data type should be float16, bfloat16, float32 or float64.
w (Tensor): A tensor. The data type should be int8.
size (int): The number of the output unit in this layer, which also means the feature
size of output tensor.
scale_in (float): The quantization scale for input.
scale_weight (list[float]): The quantization scale for weights.
num_flatten_dims (int, optional): The quant linear layer can accept an input tensor with more than
two dimensions. If this happens, the multi-dimensional tensor will first be flattened
into a 2-D matrix. The parameter :attr:`num_flatten_dims` determines how the input
tensor is flattened: the first :math:`num\_flatten\_dims` (inclusive, index starts from 1)
dimensions will be flatten to form the first dimension of the final matrix (height of
the matrix), and the rest :math:`rank(x) - num\_flatten\_dims` dimensions are
flattened to form the second dimension of the final matrix (width of the matrix).
For example, assuming that :attr:`x` is a 5-dimensional tensor with a shape
:math:`[2, 3, 4, 5, 6]` , and :attr:`num_flatten_dims` = 3.
Then, the flattened matrix will have a shape :math:`[2 * 3 * 4, 5 * 6] = [24, 30]` .
Default: 1.
bias_attr (ParamAttr|bool, optional): The attribute of the learnable bias.
If it is set to False, no bias will be added to the output.
If it is set to None or one kind of ParamAttr, a bias parameter will
be created according to ParamAttr. For detailed information, please refer
to :attr:`paddle.ParamAttr`. The default value is None and the bias will be
initialized to zero.
activation (str, optional): Activation to be applied to the output of
this layer. Only "relu" is supported. For more information,
please refer to :ref:`api_guide_activations_en` . Default: None.
quant_round_type (int, optional): The round type of float to int. 0 means rounding to nearest ties to even and 1 means rounding to nearest ties away from zero. Default: 1.
quant_max_bound (float, optional): The max bound of float type to int type. Defualt: 127.0.
quant_min_bound (float, optional): The min bound of float type to int type. Defualt: -127.0.
name (str, optional): The default value is None. Normally there is no need for user to set
it. For more information, please refer to :ref:`api_guide_Name` .
Returns:
Tensor, its shape is :math:`[batch\_size, *, size]` , and the data type is same with input.
"""

def quant_linear_base(
input,
weight,
size,
scale_in,
scale_weight,
num_flatten_dims=1,
bias_attr=None,
act=None,
quant_round_type=1,
quant_max_bound=127.0,
quant_min_bound=-127.0,
name=None,
):
helper = LayerHelper("quant_linear", **locals())
check_type(input, 'input', Variable, 'quant_linear')
dtype = helper.input_dtype()
check_dtype(
dtype,
'input',
['float16', 'float32', 'float64'],
'quant_linear',
)

input_shape = input.shape
if num_flatten_dims == -1:
num_flatten_dims = len(input_shape) - 1

check_type(weight, "weight", Variable, 'quant_linear')
check_dtype(
weight.dtype,
'weight',
['int8'],
'quant_linear',
)
check_type(scale_weight, "scale_weight", list, 'quant_linear')
if len(scale_weight) != size:
raise AttributeError(
"The length of scale_weight must be the same with the param size."
)

inputs_of_quant_linear = {"x": input, "w": weight}
if bias_attr is not False:
bias_shape = [size]
bias = helper.create_parameter(
attr=bias_attr, shape=bias_shape, dtype=dtype, is_bias=True
)
inputs_of_quant_linear["bias"] = bias

out = helper.create_variable_for_type_inference(dtype)
attrs_of_quant_linear = {
"in_num_col_dims": num_flatten_dims,
"activation_type": act,
"scale_in": scale_in,
"scale_weights": scale_weight,
"quant_round_type": quant_round_type,
"quant_max_bound": quant_max_bound,
"quant_min_bound": quant_min_bound,
}

helper.append_op(
type="quant_linear",
inputs=inputs_of_quant_linear,
outputs={"out": out},
attrs=attrs_of_quant_linear,
)
return out

return quant_linear_base(
input=x,
weight=w,
size=size,
scale_in=scale_in,
scale_weight=scale_weight,
num_flatten_dims=num_flatten_dims,
bias_attr=bias_attr,
act=activation,
quant_round_type=quant_round_type,
quant_max_bound=quant_max_bound,
quant_min_bound=quant_min_bound,
name=name,
)


def instance_norm(
input, epsilon=1e-05, param_attr=None, bias_attr=None, name=None
):
Expand Down
172 changes: 169 additions & 3 deletions test/legacy_test/test_quant_linear_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,176 @@
import paddle
from paddle import base
from paddle.base import Program, core, program_guard
from paddle.base.data_feeder import check_dtype
from paddle.base.framework import Variable, static_only
from paddle.common_ops_import import LayerHelper, check_type

SEED = 2020


@static_only
def quant_linear(
x,
w,
size,
scale_in,
scale_weight,
num_flatten_dims=1,
bias_attr=None,
activation=None,
quant_round_type=1,
quant_max_bound=127.0,
quant_min_bound=-127.0,
name=None,
):
r"""
Quant linear layer can take a tensor as its input and a tensor as the weight tensor.
The quant linear layer multiplies the input tensor with the weight to produce
an output tensor with shape :math:`[batch\_size, *, size]` , where :math:`*`
means any number of additional dimensions. If :attr:`bias_attr` is not False, a 1-D bias tensor will
be created and added to the output. If :attr:`activation` is not None,
it will be applied to the output as well. Besides, the input tensor will be quantize to
the tensor with int8 type, the parameter w must be a tensor with int8 type and the computation will also
be with the int8 type.
For a single input tensor :math:`X` , the equation is:
.. math::
Out = Act({XW + b})
where:
* :math:`X`: The input tensor.
* :math:`W`: The weight matrix.
* :math:`b`: The bias created by this layer (if needed).
* :math:`Act`: The activation function.
* :math:`Out`: The output tensor.
Args:
x (Tensor): A tensor. The number of dimensions
of the tensor is at least 2. The data type should be float16, bfloat16, float32 or float64.
w (Tensor): A tensor. The data type should be int8.
size (int): The number of the output unit in this layer, which also means the feature
size of output tensor.
scale_in (float): The quantization scale for input.
scale_weight (list[float]): The quantization scale for weights.
num_flatten_dims (int, optional): The quant linear layer can accept an input tensor with more than
two dimensions. If this happens, the multi-dimensional tensor will first be flattened
into a 2-D matrix. The parameter :attr:`num_flatten_dims` determines how the input
tensor is flattened: the first :math:`num\_flatten\_dims` (inclusive, index starts from 1)
dimensions will be flatten to form the first dimension of the final matrix (height of
the matrix), and the rest :math:`rank(x) - num\_flatten\_dims` dimensions are
flattened to form the second dimension of the final matrix (width of the matrix).
For example, assuming that :attr:`x` is a 5-dimensional tensor with a shape
:math:`[2, 3, 4, 5, 6]` , and :attr:`num_flatten_dims` = 3.
Then, the flattened matrix will have a shape :math:`[2 * 3 * 4, 5 * 6] = [24, 30]` .
Default: 1.
bias_attr (ParamAttr|bool, optional): The attribute of the learnable bias.
If it is set to False, no bias will be added to the output.
If it is set to None or one kind of ParamAttr, a bias parameter will
be created according to ParamAttr. For detailed information, please refer
to :attr:`paddle.ParamAttr`. The default value is None and the bias will be
initialized to zero.
activation (str, optional): Activation to be applied to the output of
this layer. Only "relu" is supported. For more information,
please refer to :ref:`api_guide_activations_en` . Default: None.
quant_round_type (int, optional): The round type of float to int. 0 means rounding to nearest ties to even and 1 means rounding to nearest ties away from zero. Default: 1.
quant_max_bound (float, optional): The max bound of float type to int type. Defualt: 127.0.
quant_min_bound (float, optional): The min bound of float type to int type. Defualt: -127.0.
name (str, optional): The default value is None. Normally there is no need for user to set
it. For more information, please refer to :ref:`api_guide_Name` .
Returns:
Tensor, its shape is :math:`[batch\_size, *, size]` , and the data type is same with input.
"""

def quant_linear_base(
input,
weight,
size,
scale_in,
scale_weight,
num_flatten_dims=1,
bias_attr=None,
act=None,
quant_round_type=1,
quant_max_bound=127.0,
quant_min_bound=-127.0,
name=None,
):
helper = LayerHelper("quant_linear", **locals())
check_type(input, 'input', Variable, 'quant_linear')
dtype = helper.input_dtype()
check_dtype(
dtype,
'input',
['float16', 'float32', 'float64'],
'quant_linear',
)

input_shape = input.shape
if num_flatten_dims == -1:
num_flatten_dims = len(input_shape) - 1

check_type(weight, "weight", Variable, 'quant_linear')
check_dtype(
weight.dtype,
'weight',
['int8'],
'quant_linear',
)
check_type(scale_weight, "scale_weight", list, 'quant_linear')
if len(scale_weight) != size:
raise AttributeError(
"The length of scale_weight must be the same with the param size."
)

inputs_of_quant_linear = {"x": input, "w": weight}
if bias_attr is not False:
bias_shape = [size]
bias = helper.create_parameter(
attr=bias_attr, shape=bias_shape, dtype=dtype, is_bias=True
)
inputs_of_quant_linear["bias"] = bias

out = helper.create_variable_for_type_inference(dtype)
attrs_of_quant_linear = {
"in_num_col_dims": num_flatten_dims,
"activation_type": act,
"scale_in": scale_in,
"scale_weights": scale_weight,
"quant_round_type": quant_round_type,
"quant_max_bound": quant_max_bound,
"quant_min_bound": quant_min_bound,
}

helper.append_op(
type="quant_linear",
inputs=inputs_of_quant_linear,
outputs={"out": out},
attrs=attrs_of_quant_linear,
)
return out

return quant_linear_base(
input=x,
weight=w,
size=size,
scale_in=scale_in,
scale_weight=scale_weight,
num_flatten_dims=num_flatten_dims,
bias_attr=bias_attr,
act=activation,
quant_round_type=quant_round_type,
quant_max_bound=quant_max_bound,
quant_min_bound=quant_min_bound,
name=name,
)


def round_array(x):
x[x > 0] = np.ceil(x[x > 0])
x[x <= 0] = np.floor(x[x <= 0])
Expand Down Expand Up @@ -412,7 +578,7 @@ def run_program(num_flatten_dims):
dtype="int8",
)

out = paddle.static.nn.quant_linear(
out = quant_linear(
x=x,
size=1,
num_flatten_dims=num_flatten_dims,
Expand Down Expand Up @@ -468,7 +634,7 @@ def test_Variable():
w2 = paddle.static.data(
name='w2', shape=[25, 1], dtype='int8'
)
paddle.static.nn.quant_linear(
quant_linear(
x=input_data,
size=1,
num_flatten_dims=1,
Expand Down Expand Up @@ -509,7 +675,7 @@ def test_Variable():
x3 = paddle.static.data(
name='x3', shape=[-1, 4], dtype='float32'
)
paddle.static.nn.quant_linear(
quant_linear(
x=x3,
size=1,
num_flatten_dims=1,
Expand Down

0 comments on commit 89a2ce2

Please sign in to comment.