[API Compatibility] Add paddle.compat.nn.Linear

Enigmatisms · Enigmatisms · commit 1dafe952783e · 2025-11-02T10:52:52.000Z
diff --git a/python/paddle/compat/nn/__init__.py b/python/paddle/compat/nn/__init__.py
@@ -14,6 +14,7 @@
 
 from __future__ import annotations
 
+from math import sqrt
 from typing import TYPE_CHECKING
 
 import paddle
@@ -23,18 +24,18 @@
 )
 from paddle.utils.decorator_utils import ForbidKeywordsDecorator
 
-from . import functional  # noqa: F401
+from . import functional
 
 if TYPE_CHECKING:
     from paddle import Tensor
     from paddle._typing import (
+        DTypeLike,
+        PlaceLike,
         Size2,
     )
 
 
-__all__ = [
-    'Unfold',
-]
+__all__ = ['Unfold', 'Linear']
 
 
 class Unfold(nn.Unfold):
@@ -114,3 +115,155 @@ def to_list_if_necessary(x, size_check=False):
             dilations=to_list_if_necessary(self.dilations),
             name=self.name,
         )
+
+
+class Linear(nn.Layer):
+    r"""
+
+    Python compatible fully-connected linear transformation layer. For each input :math:`X` ,
+    the equation is:
+
+    .. math::
+
+        Out = XW^T + b
+
+    where :math:`W` is the weight and :math:`b` is the bias.
+
+    Linear layer takes only one multi-dimensional tensor as input with the
+    shape :math:`[*, in\_features]` , where :math:`*` means any
+    number of additional dimensions. It multiplies input tensor with the transpose
+    of weight (a 2-D tensor of shape :math:`[out\_features, in\_features]` ) and
+    produces an output tensor of shape :math:`[*, out\_features]` .
+    If ``bias`` is not False, the bias (a 1-D tensor of
+    shape :math:`[out\_features]` ) will be created and added to the output. At the
+    end of the initialization, ``reset_parameters`` will be called to initialize
+    the ``weight`` and ``bias`` (if available) randomly.
+
+    Parameters:
+        in_features (int):
+            The number of input units.
+        out_features (int):
+            The number of output units.
+        bias (bool): If True, the bias (a 1-D tensor of shape :math:`[out\_features]` ) will be created and
+            added to the output. Default: True.
+        device (PlaceLike): The device of the parameters created. Default: None,
+            representing the default paddle device.
+        dtype (DTypeLike): The dtype of the parameters created. Default: None, and is set by
+            the default dtype of Linear (float32).
+
+    Variables:
+        weight (paddle.Tensor): learnable parameters of the module of shape :math:`[out\_features, in\_features]`.
+            The values are initialized from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})`, where :math:`k` is :math:`\frac{1}{in\_features}`.
+        bias (paddle.Tensor): learnable parameters of the module of shape :math:`[out\_features]`. If ``bias`` is True,
+            the values are initialized from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})`, where :math:`k` is :math:`\frac{1}{in\_features}`.
+
+    Shape:
+        - input: Multi-dimensional tensor with shape :math:`[*, in\_features]` . Its data types are float16, float32, float64 ,The default is float32 .
+        - output: Multi-dimensional tensor with shape :math:`[*, out\_features]` . The data type is the same as the input .
+
+    Examples:
+        .. code-block:: python
+
+            >>> import paddle
+            >>> paddle.seed(100)
+
+            >>> # Define the linear layer.
+            >>> linear = paddle.compat.nn.Linear(2, 4, bias=True)
+            >>> print(linear.weight)
+            Parameter containing:
+            Tensor(shape=[4, 2], dtype=float32, place=Place(cpu), stop_gradient=False,
+                   [[-0.49191639,  0.28120756],
+                    [-0.17887023,  0.40572405],
+                    [ 0.35139430,  0.45717543],
+                    [-0.06135514, -0.21088189]])
+
+            >>> print(linear.bias)
+            Parameter containing:
+            Tensor(shape=[4], dtype=float32, place=Place(cpu), stop_gradient=False,
+                   [ 0.49166456, -0.06108528, -0.14973064,  0.31168410])
+
+            >>> x = paddle.arange(6, dtype="float32").reshape([3, 2])
+            >>> y = linear(x)
+            >>> print(y)
+            Tensor(shape=[3, 4], dtype=float32, place=Place(cpu), stop_gradient=False,
+                   [[ 0.77287209,  0.34463876,  0.30744481,  0.10080221],
+                    [ 0.35145447,  0.79834640,  1.92458415, -0.44367185],
+                    [-0.06996319,  1.25205410,  3.54172373, -0.98814595]])
+    """
+
+    __constants__ = ["in_features", "out_features"]
+    in_features: int
+    out_features: int
+    weight: Tensor
+
+    @ForbidKeywordsDecorator(
+        illegal_keys={"weight_attr", "bias_attr", "name"},
+        func_name="paddle.compat.nn.Linear",
+        correct_name="paddle.nn.Linear",
+    )
+    def __init__(
+        self,
+        in_features: int,
+        out_features: int,
+        bias: bool = True,
+        device: PlaceLike | None = None,
+        dtype: DTypeLike | None = None,
+    ) -> None:
+        super().__init__()
+        self._dtype = (
+            self._helper.get_default_dtype() if dtype is None else dtype
+        )
+        self.in_features = in_features
+        self.out_features = out_features
+        self.weight = self.create_parameter(
+            shape=[out_features, in_features],
+            attr=None,
+            dtype=self._dtype,
+            is_bias=False,
+            device=device,
+        )
+        self.bias = None
+        if bias:
+            self.bias = self.create_parameter(
+                shape=[out_features],
+                attr=None,
+                dtype=self._dtype,
+                is_bias=True,
+                device=device,
+            )
+        # The same parameter initialization as PyTorch
+        self.reset_parameters()
+
+    def forward(self, input: Tensor) -> Tensor:
+        return functional.linear.__wrapped__(  # bypass ForbidKeywordsDecorator
+            input=input, weight=self.weight, bias=self.bias
+        )
+
+    def extra_repr(self) -> str:
+        """
+        Return the extra representation of the module.
+        """
+        return f"in_features={self.in_features}, out_features={self.out_features}, bias={self.bias is not None}"
+
+    def reset_parameters(self) -> None:
+        """
+        Resets parameters based on their initialization used in ``__init__``.
+        """
+
+        # KaimingUniform initializer should be more flexible: user should be able to specify place
+        expected_place = paddle.base.framework._current_expected_place()
+        original_place = self.weight.place
+        nn.init.kaiming_uniform_(self.weight, a=sqrt(5))
+
+        place_mismatch = expected_place != original_place
+        if place_mismatch and in_dynamic_mode():
+            self.weight = self.weight.to(original_place)
+        if self.bias is not None:
+            # nn.init._calculate_fan_in_and_fan_out(self.weight) for 2D array
+            # is equivalent to returning (weight.shape[1], weight.shape[0])
+            fan_in = self.weight.shape[1]
+            bound = 1 / sqrt(fan_in) if fan_in > 0 else 0
+            nn.init.uniform_(self.bias, -bound, bound)
+
+            if place_mismatch and in_dynamic_mode():
+                self.bias = self.bias.to(original_place)
diff --git a/python/paddle/nn/layer/common.py b/python/paddle/nn/layer/common.py
@@ -198,6 +198,12 @@ class Linear(Layer):
     bias: Tensor
     name: str | None
 
+    @ForbidKeywordsDecorator(
+        illegal_keys={"bias", "device", "dtype"},
+        func_name="paddle.nn.Linear",
+        correct_name="paddle.compat.nn.Linear",
+        url_suffix="nn/torch.nn.Linear",
+    )
     def __init__(
         self,
         in_features: int,
diff --git a/test/legacy_test/test_compat_nn_linear.py b/test/legacy_test/test_compat_nn_linear.py