hide quant_linear api (#58521)

* hide quant_linear api * hide quant_linear api * hide quant_linear api
PaddlePaddle · Nov 2, 2023 · 89a2ce2 · 89a2ce2
1 parent 53e5a2d
commit 89a2ce2
Show file tree

Hide file tree

Showing 3 changed files with 169 additions and 168 deletions.
diff --git a/python/paddle/static/nn/__init__.py b/python/paddle/static/nn/__init__.py
@@ -36,7 +36,6 @@
 from .loss import nce  # noqa: F401
 from .common import prelu  # noqa: F401
 from .common import layer_norm  # noqa: F401
-from .common import quant_linear
 
 
 from .common import embedding  # noqa: F401
@@ -102,5 +101,4 @@
     'sequence_enumerate',
     'sequence_reverse',
     'prelu',
-    'quant_linear',
 ]
diff --git a/python/paddle/static/nn/common.py b/python/paddle/static/nn/common.py
@@ -247,169 +247,6 @@ def fc_base(
     )
 
 
-@static_only
-def quant_linear(
-    x,
-    w,
-    size,
-    scale_in,
-    scale_weight,
-    num_flatten_dims=1,
-    bias_attr=None,
-    activation=None,
-    quant_round_type=1,
-    quant_max_bound=127.0,
-    quant_min_bound=-127.0,
-    name=None,
-):
-    r"""
-
-    Quant linear layer can take a tensor as its input and a tensor as the weight tensor.
-    The quant linear layer multiplies the input tensor with the weight to produce
-    an output tensor with shape :math:`[batch\_size, *, size]` , where :math:`*`
-    means any number of additional dimensions. If :attr:`bias_attr` is not False, a 1-D bias tensor will
-    be created and added to the output. If :attr:`activation` is not None,
-    it will be applied to the output as well. Besides, the input tensor will be quantize to
-    the tensor with int8 type, the parameter w must be a tensor with int8 type and the computation will also
-    be with the int8 type.
-
-    For a single input tensor :math:`X` , the equation is:
-
-    .. math::
-
-        Out = Act({XW + b})
-
-    where:
-
-    * :math:`X`: The input tensor.
-    * :math:`W`: The weight matrix.
-    * :math:`b`: The bias created by this layer (if needed).
-    * :math:`Act`: The activation function.
-    * :math:`Out`: The output tensor.
-
-    Args:
-        x (Tensor): A tensor. The number of dimensions
-            of the tensor is at least 2. The data type should be float16, bfloat16, float32 or float64.
-        w (Tensor): A tensor. The data type should be int8.
-        size (int): The number of the output unit in this layer, which also means the feature
-            size of output tensor.
-        scale_in (float): The quantization scale for input.
-        scale_weight (list[float]): The quantization scale for weights.
-        num_flatten_dims (int, optional): The quant linear layer can accept an input tensor with more than
-            two dimensions. If this happens, the multi-dimensional tensor will first be flattened
-            into a 2-D matrix. The parameter :attr:`num_flatten_dims` determines how the input
-            tensor is flattened: the first :math:`num\_flatten\_dims` (inclusive, index starts from 1)
-            dimensions will be flatten to form the first dimension of the final matrix (height of
-            the matrix), and the rest :math:`rank(x) - num\_flatten\_dims` dimensions are
-            flattened to form the second dimension of the final matrix (width of the matrix).
-            For example, assuming that :attr:`x` is a 5-dimensional tensor with a shape
-            :math:`[2, 3, 4, 5, 6]` , and :attr:`num_flatten_dims` = 3.
-            Then, the flattened matrix will have a shape :math:`[2 * 3 * 4, 5 * 6] = [24, 30]` .
-            Default: 1.
-        bias_attr (ParamAttr|bool, optional): The attribute of the learnable bias.
-            If it is set to False, no bias will be added to the output.
-            If it is set to None or one kind of ParamAttr, a bias parameter will
-            be created according to ParamAttr. For detailed information, please refer
-            to :attr:`paddle.ParamAttr`. The default value is None and the bias will be
-            initialized to zero.
-        activation (str, optional): Activation to be applied to the output of
-            this layer. Only "relu" is supported. For more information,
-            please refer to :ref:`api_guide_activations_en` . Default: None.
-        quant_round_type (int, optional): The round type of float to int. 0 means rounding to nearest ties to even and 1 means rounding to nearest ties away from zero. Default: 1.
-        quant_max_bound (float, optional): The max bound of float type to int type. Defualt: 127.0.
-        quant_min_bound (float, optional): The min bound of float type to int type. Defualt: -127.0.
-        name (str, optional): The default value is None. Normally there is no need for user to set
-            it. For more information, please refer to :ref:`api_guide_Name` .
-
-    Returns:
-        Tensor, its shape is :math:`[batch\_size, *, size]` , and the data type is same with input.
-
-    """
-
-    def quant_linear_base(
-        input,
-        weight,
-        size,
-        scale_in,
-        scale_weight,
-        num_flatten_dims=1,
-        bias_attr=None,
-        act=None,
-        quant_round_type=1,
-        quant_max_bound=127.0,
-        quant_min_bound=-127.0,
-        name=None,
-    ):
-        helper = LayerHelper("quant_linear", **locals())
-        check_type(input, 'input', Variable, 'quant_linear')
-        dtype = helper.input_dtype()
-        check_dtype(
-            dtype,
-            'input',
-            ['float16', 'float32', 'float64'],
-            'quant_linear',
-        )
-
-        input_shape = input.shape
-        if num_flatten_dims == -1:
-            num_flatten_dims = len(input_shape) - 1
-
-        check_type(weight, "weight", Variable, 'quant_linear')
-        check_dtype(
-            weight.dtype,
-            'weight',
-            ['int8'],
-            'quant_linear',
-        )
-        check_type(scale_weight, "scale_weight", list, 'quant_linear')
-        if len(scale_weight) != size:
-            raise AttributeError(
-                "The length of scale_weight must be the same with the param size."
-            )
-
-        inputs_of_quant_linear = {"x": input, "w": weight}
-        if bias_attr is not False:
-            bias_shape = [size]
-            bias = helper.create_parameter(
-                attr=bias_attr, shape=bias_shape, dtype=dtype, is_bias=True
-            )
-            inputs_of_quant_linear["bias"] = bias
-
-        out = helper.create_variable_for_type_inference(dtype)
-        attrs_of_quant_linear = {
-            "in_num_col_dims": num_flatten_dims,
-            "activation_type": act,
-            "scale_in": scale_in,
-            "scale_weights": scale_weight,
-            "quant_round_type": quant_round_type,
-            "quant_max_bound": quant_max_bound,
-            "quant_min_bound": quant_min_bound,
-        }
-
-        helper.append_op(
-            type="quant_linear",
-            inputs=inputs_of_quant_linear,
-            outputs={"out": out},
-            attrs=attrs_of_quant_linear,
-        )
-        return out
-
-    return quant_linear_base(
-        input=x,
-        weight=w,
-        size=size,
-        scale_in=scale_in,
-        scale_weight=scale_weight,
-        num_flatten_dims=num_flatten_dims,
-        bias_attr=bias_attr,
-        act=activation,
-        quant_round_type=quant_round_type,
-        quant_max_bound=quant_max_bound,
-        quant_min_bound=quant_min_bound,
-        name=name,
-    )
-
-
 def instance_norm(
     input, epsilon=1e-05, param_attr=None, bias_attr=None, name=None
 ):

diff --git a/test/legacy_test/test_quant_linear_op.py b/test/legacy_test/test_quant_linear_op.py
@@ -20,10 +20,176 @@
 import paddle
 from paddle import base
 from paddle.base import Program, core, program_guard
+from paddle.base.data_feeder import check_dtype
+from paddle.base.framework import Variable, static_only
+from paddle.common_ops_import import LayerHelper, check_type
 
 SEED = 2020
 
 
+@static_only
+def quant_linear(
+    x,
+    w,
+    size,
+    scale_in,
+    scale_weight,
+    num_flatten_dims=1,
+    bias_attr=None,
+    activation=None,
+    quant_round_type=1,
+    quant_max_bound=127.0,
+    quant_min_bound=-127.0,
+    name=None,
+):
+    r"""
+
+    Quant linear layer can take a tensor as its input and a tensor as the weight tensor.
+    The quant linear layer multiplies the input tensor with the weight to produce
+    an output tensor with shape :math:`[batch\_size, *, size]` , where :math:`*`
+    means any number of additional dimensions. If :attr:`bias_attr` is not False, a 1-D bias tensor will
+    be created and added to the output. If :attr:`activation` is not None,
+    it will be applied to the output as well. Besides, the input tensor will be quantize to
+    the tensor with int8 type, the parameter w must be a tensor with int8 type and the computation will also
+    be with the int8 type.
+
+    For a single input tensor :math:`X` , the equation is:
+
+    .. math::
+
+        Out = Act({XW + b})
+
+    where:
+
+    * :math:`X`: The input tensor.
+    * :math:`W`: The weight matrix.
+    * :math:`b`: The bias created by this layer (if needed).
+    * :math:`Act`: The activation function.
+    * :math:`Out`: The output tensor.
+
+    Args:
+        x (Tensor): A tensor. The number of dimensions
+            of the tensor is at least 2. The data type should be float16, bfloat16, float32 or float64.
+        w (Tensor): A tensor. The data type should be int8.
+        size (int): The number of the output unit in this layer, which also means the feature
+            size of output tensor.
+        scale_in (float): The quantization scale for input.
+        scale_weight (list[float]): The quantization scale for weights.
+        num_flatten_dims (int, optional): The quant linear layer can accept an input tensor with more than
+            two dimensions. If this happens, the multi-dimensional tensor will first be flattened
+            into a 2-D matrix. The parameter :attr:`num_flatten_dims` determines how the input
+            tensor is flattened: the first :math:`num\_flatten\_dims` (inclusive, index starts from 1)
+            dimensions will be flatten to form the first dimension of the final matrix (height of
+            the matrix), and the rest :math:`rank(x) - num\_flatten\_dims` dimensions are
+            flattened to form the second dimension of the final matrix (width of the matrix).
+            For example, assuming that :attr:`x` is a 5-dimensional tensor with a shape
+            :math:`[2, 3, 4, 5, 6]` , and :attr:`num_flatten_dims` = 3.
+            Then, the flattened matrix will have a shape :math:`[2 * 3 * 4, 5 * 6] = [24, 30]` .
+            Default: 1.
+        bias_attr (ParamAttr|bool, optional): The attribute of the learnable bias.
+            If it is set to False, no bias will be added to the output.
+            If it is set to None or one kind of ParamAttr, a bias parameter will
+            be created according to ParamAttr. For detailed information, please refer
+            to :attr:`paddle.ParamAttr`. The default value is None and the bias will be
+            initialized to zero.
+        activation (str, optional): Activation to be applied to the output of
+            this layer. Only "relu" is supported. For more information,
+            please refer to :ref:`api_guide_activations_en` . Default: None.
+        quant_round_type (int, optional): The round type of float to int. 0 means rounding to nearest ties to even and 1 means rounding to nearest ties away from zero. Default: 1.
+        quant_max_bound (float, optional): The max bound of float type to int type. Defualt: 127.0.
+        quant_min_bound (float, optional): The min bound of float type to int type. Defualt: -127.0.
+        name (str, optional): The default value is None. Normally there is no need for user to set
+            it. For more information, please refer to :ref:`api_guide_Name` .
+
+    Returns:
+        Tensor, its shape is :math:`[batch\_size, *, size]` , and the data type is same with input.
+
+    """
+
+    def quant_linear_base(
+        input,
+        weight,
+        size,
+        scale_in,
+        scale_weight,
+        num_flatten_dims=1,
+        bias_attr=None,
+        act=None,
+        quant_round_type=1,
+        quant_max_bound=127.0,
+        quant_min_bound=-127.0,
+        name=None,
+    ):
+        helper = LayerHelper("quant_linear", **locals())
+        check_type(input, 'input', Variable, 'quant_linear')
+        dtype = helper.input_dtype()
+        check_dtype(
+            dtype,
+            'input',
+            ['float16', 'float32', 'float64'],
+            'quant_linear',
+        )
+
+        input_shape = input.shape
+        if num_flatten_dims == -1:
+            num_flatten_dims = len(input_shape) - 1
+
+        check_type(weight, "weight", Variable, 'quant_linear')
+        check_dtype(
+            weight.dtype,
+            'weight',
+            ['int8'],
+            'quant_linear',
+        )
+        check_type(scale_weight, "scale_weight", list, 'quant_linear')
+        if len(scale_weight) != size:
+            raise AttributeError(
+                "The length of scale_weight must be the same with the param size."
+            )
+
+        inputs_of_quant_linear = {"x": input, "w": weight}
+        if bias_attr is not False:
+            bias_shape = [size]
+            bias = helper.create_parameter(
+                attr=bias_attr, shape=bias_shape, dtype=dtype, is_bias=True
+            )
+            inputs_of_quant_linear["bias"] = bias
+
+        out = helper.create_variable_for_type_inference(dtype)
+        attrs_of_quant_linear = {
+            "in_num_col_dims": num_flatten_dims,
+            "activation_type": act,
+            "scale_in": scale_in,
+            "scale_weights": scale_weight,
+            "quant_round_type": quant_round_type,
+            "quant_max_bound": quant_max_bound,
+            "quant_min_bound": quant_min_bound,
+        }
+
+        helper.append_op(
+            type="quant_linear",
+            inputs=inputs_of_quant_linear,
+            outputs={"out": out},
+            attrs=attrs_of_quant_linear,
+        )
+        return out
+
+    return quant_linear_base(
+        input=x,
+        weight=w,
+        size=size,
+        scale_in=scale_in,
+        scale_weight=scale_weight,
+        num_flatten_dims=num_flatten_dims,
+        bias_attr=bias_attr,
+        act=activation,
+        quant_round_type=quant_round_type,
+        quant_max_bound=quant_max_bound,
+        quant_min_bound=quant_min_bound,
+        name=name,
+    )
+
+
 def round_array(x):
     x[x > 0] = np.ceil(x[x > 0])
     x[x <= 0] = np.floor(x[x <= 0])
@@ -412,7 +578,7 @@ def run_program(num_flatten_dims):
                         dtype="int8",
                     )
 
-                    out = paddle.static.nn.quant_linear(
+                    out = quant_linear(
                         x=x,
                         size=1,
                         num_flatten_dims=num_flatten_dims,
@@ -468,7 +634,7 @@ def test_Variable():
                     w2 = paddle.static.data(
                         name='w2', shape=[25, 1], dtype='int8'
                     )
-                    paddle.static.nn.quant_linear(
+                    quant_linear(
                         x=input_data,
                         size=1,
                         num_flatten_dims=1,
@@ -509,7 +675,7 @@ def test_Variable():
                     x3 = paddle.static.data(
                         name='x3', shape=[-1, 4], dtype='float32'
                     )
-                    paddle.static.nn.quant_linear(
+                    quant_linear(
                         x=x3,
                         size=1,
                         num_flatten_dims=1,