PaddlePaddle
diff --git a/‎python/paddle/__init__.py‎
Lines changed: 4 additions & 0 deletions b/‎python/paddle/__init__.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎python/paddle/nn/functional/activation.py‎
Lines changed: 1 addition & 184 deletions b/‎python/paddle/nn/functional/activation.py‎
Lines changed: 1 addition & 184 deletions
diff --git a/‎python/paddle/tensor/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎python/paddle/tensor/__init__.py‎
Lines changed: 2 additions & 0 deletions
@@ -626,6 +626,9 @@
     where,
     where_,
 )
+from .tensor.softmax import (
+    softmax,
+)
 from .tensor.stat import (
     mean,
     median,
@@ -1329,6 +1332,7 @@ def __dir__(self):
     'get_autocast_dtype',
     'get_autocast_cpu_dtype',
     'get_autocast_gpu_dtype',
+    'softmax',
 ]
 import os
 
 
@@ -19,7 +19,6 @@
 import paddle
 from paddle import _C_ops, in_dynamic_mode
 from paddle.framework import core, in_dynamic_or_pir_mode
-from paddle.utils.decorator_utils import ParamAliasDecorator
 from paddle.utils.inplace_utils import inplace_apis_in_dygraph_only
 
 from ...base.data_feeder import check_dtype, check_variable_and_dtype
@@ -28,6 +27,7 @@
 from ...tensor.manipulation import chunk
 from ...tensor.math import tanh, tanh_  # noqa: F401
 from ...tensor.ops import sigmoid
+from ...tensor.softmax import softmax as softmax
 
 if TYPE_CHECKING:
     from paddle import Tensor
@@ -1128,189 +1128,6 @@ def silu(x: Tensor, name: str | None = None) -> Tensor:
         return out
 
 
-@ParamAliasDecorator({"x": ["input"], "axis": ["dim"]})
-def softmax(
-    x: Tensor,
-    axis: int = -1,
-    dtype: DTypeLike | None = None,
-    name: str | None = None,
-) -> Tensor:
-    r"""
-    This operator implements the softmax layer. The calculation process is as follows:
-
-    1. The dimension :attr:`axis` of ``x`` will be permuted to the last.
-
-    2. Then ``x`` will be logically flattened to a 2-D matrix. The matrix's second
-    dimension(row length) is the same as the dimension :attr:`axis` of ``x``,
-    and the first dimension(column length) is the product of all other dimensions
-    of ``x``. For each row of the matrix, the softmax operator squashes the
-    K-dimensional(K is the width of the matrix, which is also the size of ``x``'s
-    dimension :attr:`axis`) vector of arbitrary real values to a K-dimensional
-    vector of real values in the range [0, 1] that add up to 1.
-
-    3. After the softmax operation is completed, the inverse operations of steps 1 and 2
-    are performed to restore the two-dimensional matrix to the same dimension as the ``x`` .
-
-    It computes the exponential of the given dimension and the sum of exponential
-    values of all the other dimensions in the K-dimensional vector input.
-    Then the ratio of the exponential of the given dimension and the sum of
-    exponential values of all the other dimensions is the output of the softmax
-    operator.
-
-    For each row :math:`i` and each column :math:`j` in the matrix, we have:
-
-    .. math::
-
-        softmax[i, j] = \frac{\exp(x[i, j])}{\sum_j(exp(x[i, j])}
-
-    Example:
-
-    .. code-block:: text
-
-        Case 1:
-          Input:
-            x.shape = [2, 3, 4]
-            x.data = [[[2.0, 3.0, 4.0, 5.0],
-                       [3.0, 4.0, 5.0, 6.0],
-                       [7.0, 8.0, 8.0, 9.0]],
-                      [[1.0, 2.0, 3.0, 4.0],
-                       [5.0, 6.0, 7.0, 8.0],
-                       [6.0, 7.0, 8.0, 9.0]]]
-
-          Attrs:
-            axis = -1
-
-          Output:
-            out.shape = [2, 3, 4]
-            out.data = [[[0.0320586 , 0.08714432, 0.23688282, 0.64391426],
-                         [0.0320586 , 0.08714432, 0.23688282, 0.64391426],
-                         [0.07232949, 0.19661193, 0.19661193, 0.53444665]],
-                        [[0.0320586 , 0.08714432, 0.23688282, 0.64391426],
-                         [0.0320586 , 0.08714432, 0.23688282, 0.64391426],
-                         [0.0320586 , 0.08714432, 0.23688282, 0.64391426]]]
-
-        Case 2:
-          Input:
-            x.shape = [2, 3, 4]
-            x.data = [[[2.0, 3.0, 4.0, 5.0],
-                       [3.0, 4.0, 5.0, 6.0],
-                       [7.0, 8.0, 8.0, 9.0]],
-                      [[1.0, 2.0, 3.0, 4.0],
-                       [5.0, 6.0, 7.0, 8.0],
-                       [6.0, 7.0, 8.0, 9.0]]]
-          Attrs:
-            axis = 1
-
-          Output:
-            out.shape = [2, 3, 4]
-            out.data = [[[0.00657326, 0.00657326, 0.01714783, 0.01714783],
-                         [0.01786798, 0.01786798, 0.04661262, 0.04661262],
-                         [0.97555875, 0.97555875, 0.93623955, 0.93623955]],
-                        [[0.00490169, 0.00490169, 0.00490169, 0.00490169],
-                         [0.26762315, 0.26762315, 0.26762315, 0.26762315],
-                         [0.72747516, 0.72747516, 0.72747516, 0.72747516]]]
-
-    .. note::
-        Alias Support: The parameter name ``input`` can be used as an alias for ``x``, and ``dim`` can be used as an alias for ``axis``.
-        For example, ``softmax(input=tensor_x, dim=1, ...)`` is equivalent to ``softmax(x=tensor_x, axis=1, ...)``.
-
-    Parameters:
-        x (Tensor): The input Tensor with data type bfloat16, float16, float32, float64.
-            alias: ``input``.
-        axis (int, optional): The axis along which to perform softmax
-            calculations. It should be in range [-D, D), where D is the
-            rank of ``x`` . If ``axis`` < 0, it works the same way as
-            :math:`axis + D` . Default is -1.
-            alias: ``dim``.
-        dtype (str, optional): The data type of the output tensor, can be bfloat16, float16, float32, float64.
-        name (str|None, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.
-
-    Returns:
-        A Tensor with the same shape and data type (use ``dtype`` if it is
-        specified) as x.
-
-    Examples:
-        .. code-block:: python
-
-            >>> import paddle
-            >>> import paddle.nn.functional as F
-
-            >>> x = paddle.to_tensor([[[2.0, 3.0, 4.0, 5.0],
-            ...                        [3.0, 4.0, 5.0, 6.0],
-            ...                        [7.0, 8.0, 8.0, 9.0]],
-            ...                       [[1.0, 2.0, 3.0, 4.0],
-            ...                        [5.0, 6.0, 7.0, 8.0],
-            ...                        [6.0, 7.0, 8.0, 9.0]]],dtype='float32')
-            >>> out1 = F.softmax(x)
-            >>> out2 = F.softmax(x, dtype='float64')
-            >>> #out1's data type is float32; out2's data type is float64
-            >>> #out1 and out2's value is as follows:
-            >>> print(out1)
-            >>> print(out2)
-            Tensor(shape=[2, 3, 4], dtype=float32, place=Place(cpu), stop_gradient=True,
-            [[[0.03205860, 0.08714432, 0.23688284, 0.64391428],
-              [0.03205860, 0.08714432, 0.23688284, 0.64391428],
-              [0.07232949, 0.19661194, 0.19661194, 0.53444666]],
-             [[0.03205860, 0.08714432, 0.23688284, 0.64391428],
-              [0.03205860, 0.08714432, 0.23688284, 0.64391428],
-              [0.03205860, 0.08714432, 0.23688284, 0.64391428]]])
-            Tensor(shape=[2, 3, 4], dtype=float64, place=Place(cpu), stop_gradient=True,
-            [[[0.03205860, 0.08714432, 0.23688282, 0.64391426],
-              [0.03205860, 0.08714432, 0.23688282, 0.64391426],
-              [0.07232949, 0.19661193, 0.19661193, 0.53444665]],
-             [[0.03205860, 0.08714432, 0.23688282, 0.64391426],
-              [0.03205860, 0.08714432, 0.23688282, 0.64391426],
-              [0.03205860, 0.08714432, 0.23688282, 0.64391426]]])
-    """
-
-    if (
-        (dtype is not None)
-        and (not isinstance(dtype, core.VarDesc.VarType))
-        and (not isinstance(dtype, core.DataType))
-    ):
-        dtype = convert_np_dtype_to_dtype_(dtype)
-    if in_dynamic_or_pir_mode():
-        outs_cast = x if dtype is None else _C_ops.cast(x, dtype)
-        return _C_ops.softmax(outs_cast, axis)
-    else:
-        use_cudnn = True
-        if dtype is None:
-            check_variable_and_dtype(
-                x, 'x', ['uint16', 'float16', 'float32', 'float64'], 'softmax'
-            )
-        else:
-            check_dtype(
-                dtype,
-                'dtype',
-                ['uint16', 'float16', 'float32', 'float64'],
-                'softmax',
-                'If dtype is not None, it only support uint16, float16, float32 or float64.',
-            )
-
-        helper = LayerHelper("softmax", **locals())
-        outs_cast = x
-        if dtype is not None:
-            outs_cast = helper.create_variable_for_type_inference(dtype)
-            helper.append_op(
-                type='cast',
-                inputs={'X': x},
-                outputs={'Out': outs_cast},
-                attrs={'in_dtype': x.dtype, 'out_dtype': dtype},
-            )
-
-        outs_softmax = helper.create_variable_for_type_inference(
-            outs_cast.dtype
-        )
-        helper.append_op(
-            type='softmax',
-            inputs={'X': outs_cast},
-            outputs={'Out': outs_softmax},
-            attrs={'axis': axis, 'use_cudnn': use_cudnn},
-        )
-
-        return outs_softmax
-
-
 @inplace_apis_in_dygraph_only
 def softmax_(
     x: Tensor,
 
@@ -479,6 +479,7 @@
     where,
     where_,
 )
+from .softmax import softmax as softmax
 from .stat import (  # noqa: F401
     mean,
     median,
@@ -908,6 +909,7 @@
     'set_',
     'resize_',
     'argwhere',
+    'softmax',
 ]
 
 mul = multiply
Original file line number	Diff line number	Diff line change
`@@ -479,6 +479,7 @@`
`479`	`479`	`where,`
`480`	`480`	`where_,`
`481`	`481`	`)`
	`482`	`+from .softmax import softmax as softmax`
`482`	`483`	`from .stat import ( # noqa: F401`
`483`	`484`	`mean,`
`484`	`485`	`median,`
`@@ -908,6 +909,7 @@`
`908`	`909`	`'set_',`
`909`	`910`	`'resize_',`
`910`	`911`	`'argwhere',`
	`912`	`+ 'softmax',`
`911`	`913`	`]`
`912`	`914`
`913`	`915`	`mul = multiply`