|
19 | 19 | import paddle |
20 | 20 | from paddle import _C_ops, in_dynamic_mode |
21 | 21 | from paddle.framework import core, in_dynamic_or_pir_mode |
22 | | -from paddle.utils.decorator_utils import ParamAliasDecorator |
23 | 22 | from paddle.utils.inplace_utils import inplace_apis_in_dygraph_only |
24 | 23 |
|
25 | 24 | from ...base.data_feeder import check_dtype, check_variable_and_dtype |
|
28 | 27 | from ...tensor.manipulation import chunk |
29 | 28 | from ...tensor.math import tanh, tanh_ # noqa: F401 |
30 | 29 | from ...tensor.ops import sigmoid |
| 30 | +from ...tensor.softmax import softmax as softmax |
31 | 31 |
|
32 | 32 | if TYPE_CHECKING: |
33 | 33 | from paddle import Tensor |
@@ -1128,189 +1128,6 @@ def silu(x: Tensor, name: str | None = None) -> Tensor: |
1128 | 1128 | return out |
1129 | 1129 |
|
1130 | 1130 |
|
1131 | | -@ParamAliasDecorator({"x": ["input"], "axis": ["dim"]}) |
1132 | | -def softmax( |
1133 | | - x: Tensor, |
1134 | | - axis: int = -1, |
1135 | | - dtype: DTypeLike | None = None, |
1136 | | - name: str | None = None, |
1137 | | -) -> Tensor: |
1138 | | - r""" |
1139 | | - This operator implements the softmax layer. The calculation process is as follows: |
1140 | | -
|
1141 | | - 1. The dimension :attr:`axis` of ``x`` will be permuted to the last. |
1142 | | -
|
1143 | | - 2. Then ``x`` will be logically flattened to a 2-D matrix. The matrix's second |
1144 | | - dimension(row length) is the same as the dimension :attr:`axis` of ``x``, |
1145 | | - and the first dimension(column length) is the product of all other dimensions |
1146 | | - of ``x``. For each row of the matrix, the softmax operator squashes the |
1147 | | - K-dimensional(K is the width of the matrix, which is also the size of ``x``'s |
1148 | | - dimension :attr:`axis`) vector of arbitrary real values to a K-dimensional |
1149 | | - vector of real values in the range [0, 1] that add up to 1. |
1150 | | -
|
1151 | | - 3. After the softmax operation is completed, the inverse operations of steps 1 and 2 |
1152 | | - are performed to restore the two-dimensional matrix to the same dimension as the ``x`` . |
1153 | | -
|
1154 | | - It computes the exponential of the given dimension and the sum of exponential |
1155 | | - values of all the other dimensions in the K-dimensional vector input. |
1156 | | - Then the ratio of the exponential of the given dimension and the sum of |
1157 | | - exponential values of all the other dimensions is the output of the softmax |
1158 | | - operator. |
1159 | | -
|
1160 | | - For each row :math:`i` and each column :math:`j` in the matrix, we have: |
1161 | | -
|
1162 | | - .. math:: |
1163 | | -
|
1164 | | - softmax[i, j] = \frac{\exp(x[i, j])}{\sum_j(exp(x[i, j])} |
1165 | | -
|
1166 | | - Example: |
1167 | | -
|
1168 | | - .. code-block:: text |
1169 | | -
|
1170 | | - Case 1: |
1171 | | - Input: |
1172 | | - x.shape = [2, 3, 4] |
1173 | | - x.data = [[[2.0, 3.0, 4.0, 5.0], |
1174 | | - [3.0, 4.0, 5.0, 6.0], |
1175 | | - [7.0, 8.0, 8.0, 9.0]], |
1176 | | - [[1.0, 2.0, 3.0, 4.0], |
1177 | | - [5.0, 6.0, 7.0, 8.0], |
1178 | | - [6.0, 7.0, 8.0, 9.0]]] |
1179 | | -
|
1180 | | - Attrs: |
1181 | | - axis = -1 |
1182 | | -
|
1183 | | - Output: |
1184 | | - out.shape = [2, 3, 4] |
1185 | | - out.data = [[[0.0320586 , 0.08714432, 0.23688282, 0.64391426], |
1186 | | - [0.0320586 , 0.08714432, 0.23688282, 0.64391426], |
1187 | | - [0.07232949, 0.19661193, 0.19661193, 0.53444665]], |
1188 | | - [[0.0320586 , 0.08714432, 0.23688282, 0.64391426], |
1189 | | - [0.0320586 , 0.08714432, 0.23688282, 0.64391426], |
1190 | | - [0.0320586 , 0.08714432, 0.23688282, 0.64391426]]] |
1191 | | -
|
1192 | | - Case 2: |
1193 | | - Input: |
1194 | | - x.shape = [2, 3, 4] |
1195 | | - x.data = [[[2.0, 3.0, 4.0, 5.0], |
1196 | | - [3.0, 4.0, 5.0, 6.0], |
1197 | | - [7.0, 8.0, 8.0, 9.0]], |
1198 | | - [[1.0, 2.0, 3.0, 4.0], |
1199 | | - [5.0, 6.0, 7.0, 8.0], |
1200 | | - [6.0, 7.0, 8.0, 9.0]]] |
1201 | | - Attrs: |
1202 | | - axis = 1 |
1203 | | -
|
1204 | | - Output: |
1205 | | - out.shape = [2, 3, 4] |
1206 | | - out.data = [[[0.00657326, 0.00657326, 0.01714783, 0.01714783], |
1207 | | - [0.01786798, 0.01786798, 0.04661262, 0.04661262], |
1208 | | - [0.97555875, 0.97555875, 0.93623955, 0.93623955]], |
1209 | | - [[0.00490169, 0.00490169, 0.00490169, 0.00490169], |
1210 | | - [0.26762315, 0.26762315, 0.26762315, 0.26762315], |
1211 | | - [0.72747516, 0.72747516, 0.72747516, 0.72747516]]] |
1212 | | -
|
1213 | | - .. note:: |
1214 | | - Alias Support: The parameter name ``input`` can be used as an alias for ``x``, and ``dim`` can be used as an alias for ``axis``. |
1215 | | - For example, ``softmax(input=tensor_x, dim=1, ...)`` is equivalent to ``softmax(x=tensor_x, axis=1, ...)``. |
1216 | | -
|
1217 | | - Parameters: |
1218 | | - x (Tensor): The input Tensor with data type bfloat16, float16, float32, float64. |
1219 | | - alias: ``input``. |
1220 | | - axis (int, optional): The axis along which to perform softmax |
1221 | | - calculations. It should be in range [-D, D), where D is the |
1222 | | - rank of ``x`` . If ``axis`` < 0, it works the same way as |
1223 | | - :math:`axis + D` . Default is -1. |
1224 | | - alias: ``dim``. |
1225 | | - dtype (str, optional): The data type of the output tensor, can be bfloat16, float16, float32, float64. |
1226 | | - name (str|None, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None. |
1227 | | -
|
1228 | | - Returns: |
1229 | | - A Tensor with the same shape and data type (use ``dtype`` if it is |
1230 | | - specified) as x. |
1231 | | -
|
1232 | | - Examples: |
1233 | | - .. code-block:: python |
1234 | | -
|
1235 | | - >>> import paddle |
1236 | | - >>> import paddle.nn.functional as F |
1237 | | -
|
1238 | | - >>> x = paddle.to_tensor([[[2.0, 3.0, 4.0, 5.0], |
1239 | | - ... [3.0, 4.0, 5.0, 6.0], |
1240 | | - ... [7.0, 8.0, 8.0, 9.0]], |
1241 | | - ... [[1.0, 2.0, 3.0, 4.0], |
1242 | | - ... [5.0, 6.0, 7.0, 8.0], |
1243 | | - ... [6.0, 7.0, 8.0, 9.0]]],dtype='float32') |
1244 | | - >>> out1 = F.softmax(x) |
1245 | | - >>> out2 = F.softmax(x, dtype='float64') |
1246 | | - >>> #out1's data type is float32; out2's data type is float64 |
1247 | | - >>> #out1 and out2's value is as follows: |
1248 | | - >>> print(out1) |
1249 | | - >>> print(out2) |
1250 | | - Tensor(shape=[2, 3, 4], dtype=float32, place=Place(cpu), stop_gradient=True, |
1251 | | - [[[0.03205860, 0.08714432, 0.23688284, 0.64391428], |
1252 | | - [0.03205860, 0.08714432, 0.23688284, 0.64391428], |
1253 | | - [0.07232949, 0.19661194, 0.19661194, 0.53444666]], |
1254 | | - [[0.03205860, 0.08714432, 0.23688284, 0.64391428], |
1255 | | - [0.03205860, 0.08714432, 0.23688284, 0.64391428], |
1256 | | - [0.03205860, 0.08714432, 0.23688284, 0.64391428]]]) |
1257 | | - Tensor(shape=[2, 3, 4], dtype=float64, place=Place(cpu), stop_gradient=True, |
1258 | | - [[[0.03205860, 0.08714432, 0.23688282, 0.64391426], |
1259 | | - [0.03205860, 0.08714432, 0.23688282, 0.64391426], |
1260 | | - [0.07232949, 0.19661193, 0.19661193, 0.53444665]], |
1261 | | - [[0.03205860, 0.08714432, 0.23688282, 0.64391426], |
1262 | | - [0.03205860, 0.08714432, 0.23688282, 0.64391426], |
1263 | | - [0.03205860, 0.08714432, 0.23688282, 0.64391426]]]) |
1264 | | - """ |
1265 | | - |
1266 | | - if ( |
1267 | | - (dtype is not None) |
1268 | | - and (not isinstance(dtype, core.VarDesc.VarType)) |
1269 | | - and (not isinstance(dtype, core.DataType)) |
1270 | | - ): |
1271 | | - dtype = convert_np_dtype_to_dtype_(dtype) |
1272 | | - if in_dynamic_or_pir_mode(): |
1273 | | - outs_cast = x if dtype is None else _C_ops.cast(x, dtype) |
1274 | | - return _C_ops.softmax(outs_cast, axis) |
1275 | | - else: |
1276 | | - use_cudnn = True |
1277 | | - if dtype is None: |
1278 | | - check_variable_and_dtype( |
1279 | | - x, 'x', ['uint16', 'float16', 'float32', 'float64'], 'softmax' |
1280 | | - ) |
1281 | | - else: |
1282 | | - check_dtype( |
1283 | | - dtype, |
1284 | | - 'dtype', |
1285 | | - ['uint16', 'float16', 'float32', 'float64'], |
1286 | | - 'softmax', |
1287 | | - 'If dtype is not None, it only support uint16, float16, float32 or float64.', |
1288 | | - ) |
1289 | | - |
1290 | | - helper = LayerHelper("softmax", **locals()) |
1291 | | - outs_cast = x |
1292 | | - if dtype is not None: |
1293 | | - outs_cast = helper.create_variable_for_type_inference(dtype) |
1294 | | - helper.append_op( |
1295 | | - type='cast', |
1296 | | - inputs={'X': x}, |
1297 | | - outputs={'Out': outs_cast}, |
1298 | | - attrs={'in_dtype': x.dtype, 'out_dtype': dtype}, |
1299 | | - ) |
1300 | | - |
1301 | | - outs_softmax = helper.create_variable_for_type_inference( |
1302 | | - outs_cast.dtype |
1303 | | - ) |
1304 | | - helper.append_op( |
1305 | | - type='softmax', |
1306 | | - inputs={'X': outs_cast}, |
1307 | | - outputs={'Out': outs_softmax}, |
1308 | | - attrs={'axis': axis, 'use_cudnn': use_cudnn}, |
1309 | | - ) |
1310 | | - |
1311 | | - return outs_softmax |
1312 | | - |
1313 | | - |
1314 | 1131 | @inplace_apis_in_dygraph_only |
1315 | 1132 | def softmax_( |
1316 | 1133 | x: Tensor, |
|
0 commit comments