[API compatibility] softmax, nonzero, randn (PaddlePaddle#74623)

DongBaiYue · Luckycheng222 · commit 50e889fa5081 · 2025-08-25T11:10:56.000+08:00
* [API compatibility] softmax, nonzero, randn

* delete chinese

* deleta *shape

* fix comment example

* fix

* fix
diff --git a/python/paddle/nn/functional/activation.py b/python/paddle/nn/functional/activation.py
@@ -19,6 +19,7 @@
 import paddle
 from paddle import _C_ops, in_dynamic_mode
 from paddle.framework import core, in_dynamic_or_pir_mode
+from paddle.utils.decorator_utils import ParamAliasDecorator
 from paddle.utils.inplace_utils import inplace_apis_in_dygraph_only
 
 from ...base.data_feeder import check_dtype, check_variable_and_dtype
@@ -1127,6 +1128,7 @@ def silu(x: Tensor, name: str | None = None) -> Tensor:
         return out
 
 
+@ParamAliasDecorator({"x": ["input"], "axis": ["dim"]})
 def softmax(
     x: Tensor,
     axis: int = -1,
@@ -1208,12 +1210,18 @@ def softmax(
                          [0.26762315, 0.26762315, 0.26762315, 0.26762315],
                          [0.72747516, 0.72747516, 0.72747516, 0.72747516]]]
 
+    .. note::
+        Alias Support: The parameter name ``input`` can be used as an alias for ``x``, and ``dim`` can be used as an alias for ``axis``.
+        For example, ``softmax(input=tensor_x, dim=1, ...)`` is equivalent to ``softmax(x=tensor_x, axis=1, ...)``.
+
     Parameters:
         x (Tensor): The input Tensor with data type bfloat16, float16, float32, float64.
+            alias: ``input``.
         axis (int, optional): The axis along which to perform softmax
             calculations. It should be in range [-D, D), where D is the
             rank of ``x`` . If ``axis`` < 0, it works the same way as
             :math:`axis + D` . Default is -1.
+            alias: ``dim``.
         dtype (str, optional): The data type of the output tensor, can be bfloat16, float16, float32, float64.
         name (str|None, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.
 
diff --git a/python/paddle/tensor/random.py b/python/paddle/tensor/random.py
@@ -29,7 +29,7 @@
     in_pir_mode,
     use_pir_api,
 )
-from paddle.utils.decorator_utils import param_one_alias
+from paddle.utils.decorator_utils import SizeArgsDecorator, param_one_alias
 
 from ..base.data_feeder import (
     check_dtype,
@@ -903,6 +903,7 @@ def standard_normal(
         return gaussian(shape=shape, mean=0.0, std=1.0, dtype=dtype, name=name)
 
 
+@SizeArgsDecorator()
 def randn(
     shape: ShapeLike, dtype: DTypeLike | None = None, name: str | None = None
 ) -> Tensor:
@@ -912,9 +913,11 @@ def randn(
     and ``dtype``.
 
     Args:
-        shape (tuple|list|Tensor): Shape of the Tensor to be created. The data type is ``int32`` or ``int64`` .
+        shape (tuple|list|Tensor|*shape): Shape of the Tensor to be created. The data type is ``int32`` or ``int64`` .
             If ``shape`` is a list or tuple, each element of it should be integer or 0-D Tensor with shape [].
             If ``shape`` is an Tensor, it should be an 1-D Tensor which represents a list.
+            If ``shape`` is *shape, directly pass integers as variable-length arguments (e.g., `randn(2, 3)`).
+            alias: ``size``.
         dtype (str|np.dtype|paddle.dtype|None, optional): The data type of the output Tensor.
             Supported data types: float16, bfloat16, float32, float64, complex64, complex128.
             Default is None, use global default dtype (see ``get_default_dtype``
diff --git a/python/paddle/tensor/search.py b/python/paddle/tensor/search.py
@@ -22,7 +22,7 @@
 import paddle
 from paddle import _C_ops
 from paddle.common_ops_import import VarDesc, Variable
-from paddle.utils.decorator_utils import ParamAliasDecorator
+from paddle.utils.decorator_utils import ParamAliasDecorator, param_one_alias
 from paddle.utils.inplace_utils import inplace_apis_in_dygraph_only
 
 from ..base.data_feeder import check_dtype, check_variable_and_dtype
@@ -467,7 +467,8 @@ def nonzero(x: Tensor, as_tuple: Literal[True] = ...) -> tuple[Tensor, ...]: ...
 def nonzero(x: Tensor, as_tuple: bool = ...) -> Tensor | tuple[Tensor, ...]: ...
 
 
-def nonzero(x: Tensor, as_tuple=False):
+@param_one_alias(['x', 'input'])
+def nonzero(x: Tensor, as_tuple=False, *, out: Tensor | None = None):
     """
     Return a tensor containing the indices of all non-zero elements of the `input`
     tensor. If as_tuple is True, return a tuple of 1-D tensors, one for each dimension
@@ -477,9 +478,15 @@ def nonzero(x: Tensor, as_tuple=False):
     number of all non-zero elements in the `input` tensor. If as_tuple is True, we can get
     a 1-D tensor tuple of length `n`, and the shape of each 1-D tensor is [z, 1].
 
+    .. note::
+        Alias Support: The parameter name ``input`` can be used as an alias for ``x``.
+        For example, ``nonzero(input=tensor_x)`` is equivalent to ``nonzero(x=tensor_x)``.
+
     Args:
         x (Tensor): The input tensor variable.
+            alias: ``input``.
         as_tuple (bool, optional): Return type, Tensor or tuple of Tensor.
+        out (Tensor|None, optional): The output tensor. Default: None.
 
     Returns:
         Tensor or tuple of Tensor, The data type is int64.
@@ -504,14 +511,10 @@ def nonzero(x: Tensor, as_tuple=False):
             >>> out_z1_tuple = paddle.nonzero(x1, as_tuple=True)
             >>> for out in out_z1_tuple:
             ...     print(out)
-            Tensor(shape=[3, 1], dtype=int64, place=Place(cpu), stop_gradient=True,
-            [[0],
-             [1],
-             [2]])
-            Tensor(shape=[3, 1], dtype=int64, place=Place(cpu), stop_gradient=True,
-            [[0],
-             [1],
-             [2]])
+            Tensor(shape=[3], dtype=int64, place=Place(cpu), stop_gradient=True,
+            [0, 1, 2])
+            Tensor(shape=[3], dtype=int64, place=Place(cpu), stop_gradient=True,
+            [0, 1, 2])
 
             >>> out_z2 = paddle.nonzero(x2)
             >>> print(out_z2)
@@ -522,13 +525,12 @@ def nonzero(x: Tensor, as_tuple=False):
             >>> out_z2_tuple = paddle.nonzero(x2, as_tuple=True)
             >>> for out in out_z2_tuple:
             ...     print(out)
-            Tensor(shape=[2, 1], dtype=int64, place=Place(cpu), stop_gradient=True,
-            [[1],
-             [3]])
+            Tensor(shape=[2], dtype=int64, place=Place(cpu), stop_gradient=True,
+            [1, 3])
 
     """
     if in_dynamic_or_pir_mode():
-        outs = _C_ops.nonzero(x)
+        outs = _C_ops.nonzero(x, out=out)
     else:
         check_variable_and_dtype(
             x,
diff --git a/test/legacy_test/test_nonzero_api.py b/test/legacy_test/test_nonzero_api.py
@@ -16,6 +16,7 @@
 
 import numpy as np
 from op_test import OpTest, convert_float_to_uint16
+from utils import dygraph_guard
 
 import paddle
 from paddle import base
@@ -228,5 +229,75 @@ def test_check_output(self):
         self.check_output(check_pir=True, check_symbol_infer=True)
 
 
+class TestNonzeroCompatibility(unittest.TestCase):
+    def setUp(self):
+        self.places = [paddle.CPUPlace()]
+        if paddle.base.core.is_compiled_with_cuda():
+            self.places.append(paddle.CUDAPlace(0))
+        self.input_data = [[1, 0, 3], [0, 5, 0], [7, 0, 9]]
+        self.expected_indices = np.array(
+            [[0, 0], [0, 2], [1, 1], [2, 0], [2, 2]]
+        )
+
+    def test_nonzero_with_param_aliases(self):
+        with dygraph_guard():
+            for place in self.places:
+                paddle.device.set_device(place)
+                input_tensor = paddle.to_tensor(
+                    self.input_data, dtype='float32'
+                )
+                for param_name in ['x', 'input']:
+                    for as_tuple in [False, True]:
+                        kwargs = {
+                            param_name: input_tensor,
+                            'as_tuple': as_tuple,
+                        }
+                        result = paddle.nonzero(**kwargs)
+                        if as_tuple:
+                            combined = np.stack(
+                                [r.numpy() for r in result], axis=1
+                            )
+                            np.testing.assert_array_equal(
+                                combined, self.expected_indices
+                            )
+                        else:
+                            np.testing.assert_array_equal(
+                                result.numpy(), self.expected_indices
+                            )
+
+    def test_nonzero_with_out(self):
+        def run_nonzero(test_type):
+            x = paddle.to_tensor(self.input_data, dtype='float32')
+            x.stop_gradient = False
+            out_shape = [len(self.expected_indices), 2]
+            out = (
+                paddle.zeros(out_shape, dtype='int64')
+                if test_type in ["with_out", "both"]
+                else None
+            )
+            if test_type == "return":
+                out = paddle.nonzero(x, out=None)
+            elif test_type == "with_out":
+                paddle.nonzero(x, out=out)
+            elif test_type == "both":
+                out = paddle.nonzero(x, out=out)
+            expected = paddle._C_ops.nonzero(x)
+            np.testing.assert_array_equal(out.numpy(), expected.numpy())
+            loss = out.sum().astype('float32')
+            loss.backward()
+            return out, x.grad
+
+        with dygraph_guard():
+            for place in self.places:
+                paddle.device.set_device(place)
+                out1, _ = run_nonzero("return")
+                out2, _ = run_nonzero("with_out")
+                out3, _ = run_nonzero("both")
+                for out in [out2, out3]:
+                    np.testing.assert_allclose(
+                        out1.numpy(), out.numpy(), rtol=1e-10
+                    )
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/test/legacy_test/test_randn_op.py b/test/legacy_test/test_randn_op.py
@@ -16,6 +16,7 @@
 
 import numpy as np
 from op_test import get_device_place
+from utils import dygraph_guard
 
 import paddle
 from paddle.static import Program, program_guard
@@ -74,13 +75,45 @@ def test_api(self):
 class TestRandnOpError(unittest.TestCase):
     def test_error(self):
         with program_guard(Program(), Program()):
-            # The argument shape's type of randn_op should be list or tuple.
-            self.assertRaises(TypeError, paddle.randn, 1)
 
             # The argument dtype of randn_op should be float32 or float64.
             self.assertRaises(TypeError, paddle.randn, [1, 2], 'int32')
 
 
+class TestRandnOpCompatibility(unittest.TestCase):
+    def setUp(self):
+        self.places = [paddle.CPUPlace()]
+        if paddle.base.core.is_compiled_with_cuda():
+            self.places.append(paddle.CUDAPlace(0))
+        self.expected_shape = [2, 3]
+        self.dtype = paddle.float32
+
+    def test_gather_with_param_aliases(self):
+        with dygraph_guard():
+            for place in self.places:
+                paddle.device.set_device(place)
+                for param_name in ['shape', 'size']:
+
+                    tensor = paddle.randn(
+                        **{param_name: self.expected_shape}, dtype=self.dtype
+                    )
+                    self.assertEqual(tensor.shape, self.expected_shape)
+                    self.assertEqual(tensor.dtype, self.dtype)
+
+                    shape_tensor = paddle.to_tensor(
+                        self.expected_shape, dtype='int32'
+                    )
+                    tensor = paddle.randn(
+                        **{param_name: shape_tensor}, dtype=self.dtype
+                    )
+                    self.assertEqual(tensor.shape, self.expected_shape)
+                    self.assertEqual(tensor.dtype, self.dtype)
+
+                tensor = paddle.randn(*self.expected_shape, dtype=self.dtype)
+                self.assertEqual(tensor.shape, self.expected_shape)
+                self.assertEqual(tensor.dtype, self.dtype)
+
+
 if __name__ == "__main__":
     paddle.enable_static()
     unittest.main()
diff --git a/test/legacy_test/test_softmax_op.py b/test/legacy_test/test_softmax_op.py
@@ -21,7 +21,7 @@
     get_device_place,
     get_places,
 )
-from utils import static_guard
+from utils import dygraph_guard, static_guard
 
 import paddle
 import paddle.nn.functional as F
@@ -662,5 +662,46 @@ def test_dygraph(self):
             paddle.enable_static()
 
 
+class TestSoftmaxCompatibility(unittest.TestCase):
+    def setUp(self):
+        self.input = [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]
+        self.axes = [0, 1]
+        self.places = [paddle.CPUPlace()]
+        if paddle.base.core.is_compiled_with_cuda():
+            self.places.append(paddle.CUDAPlace(0))
+
+    def test_gather_with_param_aliases(self):
+        with dygraph_guard():
+            for place in self.places:
+                paddle.device.set_device(place)
+                for axis in self.axes:
+                    input_tensor = paddle.to_tensor(self.input, dtype='float32')
+                    for param_x in ['x', 'input']:
+                        for param_axis in ['axis', 'dim']:
+                            kwargs = {param_x: input_tensor, param_axis: axis}
+                            result = paddle.nn.functional.softmax(**kwargs)
+                            expected = np.exp(
+                                input_tensor.numpy()
+                                - np.max(
+                                    input_tensor.numpy(),
+                                    axis=axis,
+                                    keepdims=True,
+                                )
+                            )
+                            expected = expected / np.sum(
+                                expected, axis=axis, keepdims=True
+                            )
+                            np.testing.assert_allclose(
+                                (
+                                    result.numpy()
+                                    if place.is_cpu_place()
+                                    else result.cpu().numpy()
+                                ),
+                                expected,
+                                rtol=1e-5,
+                                err_msg=f"Failed at axis={axis}, param_x={param_x}, param_axis={param_axis}",
+                            )
+
+
 if __name__ == "__main__":
     unittest.main()