|
38 | 38 |
|
39 | 39 | from paddle.utils.decorator_utils import ForbidKeywordsDecorator |
40 | 40 |
|
41 | | -__all__ = ['pad', 'softmax'] |
| 41 | +__all__ = ['pad', 'softmax', 'linear'] |
42 | 42 |
|
43 | 43 |
|
44 | 44 | def _check_valid_pad_len(pad_len, x_dim, is_constant): |
@@ -191,3 +191,78 @@ def pad( |
191 | 191 | if ndim_to_unsqueeze: |
192 | 192 | return out.squeeze(axis=ndim_to_unsqueeze) |
193 | 193 | return out |
| 194 | + |
| 195 | + |
| 196 | +@ForbidKeywordsDecorator( |
| 197 | + illegal_keys={"x", "name"}, |
| 198 | + func_name="paddle.compat.nn.functional.linear", |
| 199 | + correct_name="paddle.nn.functional.linear", |
| 200 | +) |
| 201 | +def linear(input: Tensor, weight: Tensor, bias: Tensor | None = None) -> Tensor: |
| 202 | + r""" |
| 203 | +
|
| 204 | + Fully-connected linear transformation operator. For each input :math:`x` , |
| 205 | + the equation is: |
| 206 | +
|
| 207 | + .. math:: |
| 208 | +
|
| 209 | + Out = xW^T + b |
| 210 | +
|
| 211 | + where :math: `W` is the weight and :math:`b` is the bias. |
| 212 | +
|
| 213 | + If the weight is a 2-D tensor of shape :math:`[out\_features, in\_features]` , |
| 214 | + input should be a multi-dimensional tensor of shape |
| 215 | + :math:`[*, in\_features]` , where :math:`*` means any number of |
| 216 | + additional dimensions. The linear operator multiplies input tensor with |
| 217 | + weight and produces an output tensor of shape :math:`[*, out\_features]` , |
| 218 | + If :math:`bias` is not None, the bias should be a 1-D tensor of shape |
| 219 | + :math:`[out\_features]` and will be added to the output. |
| 220 | +
|
| 221 | + This implementation is aligned with PyTorch's linear function which computes |
| 222 | + :math:`y = xW^T + b`. |
| 223 | +
|
| 224 | + Parameters: |
| 225 | + input (Tensor): Input tensor. The data type should be bfloat16, float16, float32 or float64. |
| 226 | + The input tensor should be of shape :math:`[*, in\_features]`, where :math:`*` means any number of additional dimensions, including none |
| 227 | + weight (Tensor): Weight tensor. The data type should be float16, float32 or float64. |
| 228 | + Shape should be [out_features, in_features]. |
| 229 | + bias (Tensor, optional): Bias tensor. The data type should be float16, float32 or float64. |
| 230 | + If it is set to None, no bias will be added to the output units. |
| 231 | +
|
| 232 | + Returns: |
| 233 | + Tensor, the shape is :math:`[*, out\_features]` and the |
| 234 | + data type is the same with input :math:`x` . |
| 235 | +
|
| 236 | + Examples: |
| 237 | + .. code-block:: python |
| 238 | +
|
| 239 | + >>> import paddle |
| 240 | +
|
| 241 | + >>> paddle.seed(2025) |
| 242 | +
|
| 243 | + >>> x = paddle.randn((3, 2), dtype="float32") |
| 244 | + >>> x |
| 245 | + Tensor(shape=[3, 2], dtype=float32, place=Place(cpu), stop_gradient=True, |
| 246 | + [[0., 1.], |
| 247 | + [2., 3.], |
| 248 | + [4., 5.]]) |
| 249 | + >>> weight = paddle.full(shape=[4, 2], fill_value=0.5, dtype="float32", name="weight") |
| 250 | + >>> weight |
| 251 | + Tensor(shape=[4, 2], dtype=float32, place=Place(cpu), stop_gradient=True, |
| 252 | + [[0.50000000, 0.50000000], |
| 253 | + [0.50000000, 0.50000000], |
| 254 | + [0.50000000, 0.50000000], |
| 255 | + [0.50000000, 0.50000000]]) |
| 256 | + >>> bias = paddle.ones(shape=[4], dtype="float32", name="bias") |
| 257 | + >>> y = paddle.compat.nn.functional.linear(x, weight, bias) |
| 258 | + >>> print(y) |
| 259 | + Tensor(shape=[3, 4], dtype=float32, place=Place(cpu), stop_gradient=True, |
| 260 | + [[1.50000000, 1.50000000, 1.50000000, 1.50000000], |
| 261 | + [3.50000000, 3.50000000, 3.50000000, 3.50000000], |
| 262 | + [5.50000000, 5.50000000, 5.50000000, 5.50000000]]) |
| 263 | + """ |
| 264 | + # transpose y is True, since _C_ops.linear(input, weight.T, bias) can introduce more overhead. With CINN, matmul and add can be fused. |
| 265 | + out = _C_ops.matmul(input, weight, False, True) |
| 266 | + if bias is not None: |
| 267 | + out = _C_ops.add(out, bias) |
| 268 | + return out |
0 commit comments