[API Compatibility] Add paddle.compat.nn.functional.linear and remove paddle.compat.softmax (#76144)

Enigmatisms · web-flow · commit 84d14f83fb60 · 2025-11-01T10:00:32.000+08:00
* [API Compatibility] Add paddle.compat.nn.functional.linear

paddle.compat.softmax is removed

Adjust some of the import line position

* [Fix] Fixed compat.nn.functional import
diff --git a/python/paddle/compat/__init__.py b/python/paddle/compat/__init__.py
@@ -19,13 +19,10 @@
 import warnings
 from contextlib import contextmanager
 
-from paddle.tensor import softmax
-
 from . import nn  # noqa: F401
 
 __all__ = [
     'slogdet',
-    'softmax',
     'sort',
     'split',
     'min',
@@ -43,16 +40,15 @@
 from paddle.framework import (
     in_dynamic_mode,
 )
+from paddle.utils.decorator_utils import ForbidKeywordsDecorator
+
+from .utils import _check_out_status
 
 if TYPE_CHECKING:
     from collections.abc import Sequence
 
     from paddle import Tensor
 
-from paddle.utils.decorator_utils import ForbidKeywordsDecorator
-
-from .utils import _check_out_status
-
 
 class MedianRetType(NamedTuple):
     values: Tensor
diff --git a/python/paddle/compat/nn/__init__.py b/python/paddle/compat/nn/__init__.py
@@ -17,9 +17,11 @@
 from typing import TYPE_CHECKING
 
 import paddle
+from paddle import nn
 from paddle.framework import (
     in_dynamic_mode,
 )
+from paddle.utils.decorator_utils import ForbidKeywordsDecorator
 
 from . import functional  # noqa: F401
 
@@ -30,9 +32,6 @@
     )
 
 
-from paddle import nn
-from paddle.utils.decorator_utils import ForbidKeywordsDecorator
-
 __all__ = [
     'Unfold',
 ]
diff --git a/python/paddle/compat/nn/functional/__init__.py b/python/paddle/compat/nn/functional/__init__.py
@@ -23,6 +23,7 @@
     in_dynamic_mode,
 )
 from paddle.tensor import softmax
+from paddle.utils.decorator_utils import ForbidKeywordsDecorator
 
 if TYPE_CHECKING:
     from typing_extensions import TypeAlias
@@ -36,9 +37,8 @@
         "zeros", "constant", "reflect", "replicate", "circular"
     ]
 
-from paddle.utils.decorator_utils import ForbidKeywordsDecorator
 
-__all__ = ['pad', 'softmax']
+__all__ = ['pad', 'softmax', 'linear']
 
 
 def _check_valid_pad_len(pad_len, x_dim, is_constant):
@@ -191,3 +191,78 @@ def pad(
     if ndim_to_unsqueeze:
         return out.squeeze(axis=ndim_to_unsqueeze)
     return out
+
+
+@ForbidKeywordsDecorator(
+    illegal_keys={"x", "name"},
+    func_name="paddle.compat.nn.functional.linear",
+    correct_name="paddle.nn.functional.linear",
+)
+def linear(input: Tensor, weight: Tensor, bias: Tensor | None = None) -> Tensor:
+    r"""
+
+    Fully-connected linear transformation operator. For each input :math:`x` ,
+    the equation is:
+
+    .. math::
+
+        Out = xW^T + b
+
+    where :math: `W` is the weight and :math:`b` is the bias.
+
+    If the weight is a 2-D tensor of shape :math:`[out\_features, in\_features]` ,
+    input should be a multi-dimensional tensor of shape
+    :math:`[*, in\_features]` , where :math:`*` means any number of
+    additional dimensions. The linear operator multiplies input tensor with
+    weight and produces an output tensor of shape :math:`[*, out\_features]` ,
+    If :math:`bias` is not None, the bias should be a 1-D tensor of shape
+    :math:`[out\_features]` and will be added to the output.
+
+    This implementation is aligned with PyTorch's linear function which computes
+    :math:`y = xW^T + b`.
+
+    Parameters:
+        input (Tensor): Input tensor. The data type should be bfloat16, float16, float32 or float64.
+            The input tensor should be of shape :math:`[*, in\_features]`, where :math:`*` means any number of additional dimensions, including none
+        weight (Tensor): Weight tensor. The data type should be float16, float32 or float64.
+            Shape should be [out_features, in_features].
+        bias (Tensor, optional): Bias tensor. The data type should be float16, float32 or float64.
+            If it is set to None, no bias will be added to the output units.
+
+    Returns:
+        Tensor, the shape is :math:`[*, out\_features]` and the
+        data type is the same with input :math:`x` .
+
+    Examples:
+        .. code-block:: python
+
+            >>> import paddle
+
+            >>> paddle.seed(2025)
+
+            >>> x = paddle.arange(6, dtype=paddle.float32).reshape([3, 2])
+            >>> x
+            Tensor(shape=[3, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
+                   [[0., 1.],
+                    [2., 3.],
+                    [4., 5.]])
+            >>> weight = paddle.full(shape=[4, 2], fill_value=0.5, dtype="float32", name="weight")
+            >>> weight
+            Tensor(shape=[4, 2], dtype=float32, place=Place(cpu), stop_gradient=True,
+                   [[0.50000000, 0.50000000],
+                    [0.50000000, 0.50000000],
+                    [0.50000000, 0.50000000],
+                    [0.50000000, 0.50000000]])
+            >>> bias = paddle.ones(shape=[4], dtype="float32", name="bias")
+            >>> y = paddle.compat.nn.functional.linear(x, weight, bias)
+            >>> print(y)
+            Tensor(shape=[3, 4], dtype=float32, place=Place(cpu), stop_gradient=True,
+                   [[1.50000000, 1.50000000, 1.50000000, 1.50000000],
+                    [3.50000000, 3.50000000, 3.50000000, 3.50000000],
+                    [5.50000000, 5.50000000, 5.50000000, 5.50000000]])
+    """
+    # transpose y is True, since _C_ops.linear(input, weight.T, bias) can introduce more overhead. With CINN, matmul and add can be fused.
+    out = _C_ops.matmul(input, weight, False, True)
+    if bias is not None:
+        out = _C_ops.add(out, bias)
+    return out
diff --git a/python/paddle/tensor/compat_softmax.py b/python/paddle/tensor/compat_softmax.py
@@ -30,7 +30,7 @@
 @ForbidKeywordsIgnoreOneParamDecorator(
     illegal_keys={"x", "axis", "name"},
     ignore_param=('_stacklevel', 2, int),
-    func_name="paddle.compat.softmax",
+    func_name="paddle.compat.nn.functional.softmax",
     correct_name="paddle.nn.functional.softmax",
 )
 def softmax(
@@ -41,7 +41,7 @@ def softmax(
     out: Tensor | None = None,
 ) -> Tensor:
     r"""
-    This operator implements the compat.softmax. The calculation process is as follows:
+    This operator implements PyTorch compatible softmax. The calculation process is as follows:
 
     1. The dimension :attr:`dim` of ``input`` will be permuted to the last.
 
@@ -139,8 +139,8 @@ def softmax(
             ...                       [[1.0, 2.0, 3.0, 4.0],
             ...                        [5.0, 6.0, 7.0, 8.0],
             ...                        [6.0, 7.0, 8.0, 9.0]]],dtype='float32')
-            >>> out1 = paddle.compat.softmax(x, -1)
-            >>> out2 = paddle.compat.softmax(x, -1, dtype='float64')
+            >>> out1 = paddle.compat.nn.functional.softmax(x, -1)
+            >>> out2 = paddle.compat.nn.functional.softmax(x, -1, dtype='float64')
             >>> #out1's data type is float32; out2's data type is float64
             >>> #out1 and out2's value is as follows:
             >>> print(out1)
diff --git a/test/legacy_test/test_compat_functional_linear.py b/test/legacy_test/test_compat_functional_linear.py
diff --git a/test/legacy_test/test_softmax_op.py b/test/legacy_test/test_softmax_op.py

Original file line number	Diff line number	Diff line change
`@@ -17,9 +17,11 @@`
`17`	`17`	`from typing import TYPE_CHECKING`
`18`	`18`
`19`	`19`	`import paddle`
	`20`	`+from paddle import nn`
`20`	`21`	`from paddle.framework import (`
`21`	`22`	`in_dynamic_mode,`
`22`	`23`	`)`
	`24`	`+from paddle.utils.decorator_utils import ForbidKeywordsDecorator`
`23`	`25`
`24`	`26`	`from . import functional # noqa: F401`
`25`	`27`
`@@ -30,9 +32,6 @@`
`30`	`32`	`)`
`31`	`33`
`32`	`34`
`33`		`-from paddle import nn`
`34`		`-from paddle.utils.decorator_utils import ForbidKeywordsDecorator`
`35`		`-`
`36`	`35`	`__all__ = [`
`37`	`36`	`'Unfold',`
`38`	`37`	`]`