Skip to content

Commit

Permalink
remove fused_bias_dropout_residual_layer_norm
Browse files Browse the repository at this point in the history
  • Loading branch information
zhangting2020 committed Jun 21, 2022
1 parent a089601 commit 9941a14
Showing 1 changed file with 0 additions and 128 deletions.
128 changes: 0 additions & 128 deletions python/paddle/incubate/nn/functional/fused_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,134 +221,6 @@ def fused_feedforward(x,
return out


def fused_bias_dropout_residual_layer_norm(x,
residual,
bias=None,
ln_scale=None,
ln_bias=None,
dropout_rate=0.5,
ln_epsilon=1e-5,
training=True,
mode='upscale_in_train',
name=None):
r"""
The fused_bias_dropout_residual_layer_norm operator. The pseudo code is as follows:
.. code-block:: python
y = layer_norm(residual + dropout(bias + x))
Parameters:
x (Tensor): The input tensor. The shape is `[*, embed\_dim]`.
residual (Tensor): The residual tensor. The shape is same as x.
bias (Tensor, optional): The bias of linear. The shape is `[embed_dim]`. Default None.
ln_scale (Tensor, optional): The weight tensor of layernorm. The shape is `[embed_dim]`. Default None.
ln_bias (Tensor, optional): The bias tensor of layernorm. The shape is `[embed_dim]`. Default None.
dropout_rate (float, optional): The dropout probability used on attention
weights to drop some attention targets for the dropout after attention.
0 for no dropout. Default 0.5.
ln_epsilon (float, optional): Small float value added to denominator of layer_norm
to avoid dividing by zero. Default is 1e-5.
training (bool, optional): A flag indicating whether it is in train phrase or not. Default True.
mode (str, optional): ['upscale_in_train'(default) | 'downscale_in_infer']
1. upscale_in_train(default), upscale the output at training time
- train: out = input * mask / ( 1.0 - p )
- inference: out = input
2. downscale_in_infer, downscale the output at inference
- train: out = input * mask
- inference: out = input * (1.0 - p)
name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
Returns:
Tensor: The output Tensor, the data type and shape is same as `x`.
Examples:
.. code-block:: python
# required: gpu
import paddle
import paddle.incubate.nn.functional as F
# input: [batch_size, seq_len, embed_dim]
x = paddle.rand(shape=(2, 4, 128), dtype="float32")
# residual: [batch_size, seq_len, embed_dim]
residual = paddle.rand(shape=(2, 4, 128), dtype="float32")
# linear bias: [embed_dim]
bias = paddle.rand(shape=[128], dtype="float32")
# output: [batch_size, seq_len, embed_dim]
output = F.fused_bias_dropout_residual_layer_norm(
x, residual, bias)
# [2, 4, 128]
print(output.shape)
"""
seed = None
if mode not in ('downscale_in_infer', 'upscale_in_train'):
raise ValueError(
"mode argument should be 'downscale_in_infer' or 'upscale_in_train'"
)
mode = 'downgrade_in_infer' if mode == 'downscale_in_infer' else mode #semantic transfer

if ln_scale is not None:
assert len(ln_scale.shape
) == 1, "The dims of the shape of ln_scale should be 1."
assert x.shape[len(x.shape) - 1] == ln_scale.shape[
0], "The dim of ln_scale must equal to the last dim of x."
if ln_bias is not None:
assert len(
ln_bias.shape) == 1, "The dims of the shape of ln_bias should be 1."
assert x.shape[len(x.shape) - 1] == ln_bias.shape[
0], "The dim of ln_bias must equal to the last dim of x."

if _non_static_mode():
if default_main_program().random_seed != 0:
seed = default_main_program().random_seed
_, _, _, _, final_out = _C_ops.fused_bias_dropout_residual_layer_norm(
x, residual, bias, ln_scale, ln_bias, 'dropout_rate', dropout_rate,
'ln_epsilon', ln_epsilon, 'is_test', not training,
'dropout_fix_seed', seed is not None, 'dropout_seed',
seed if seed is not None else 0, 'dropout_implementation', mode)
return final_out
else:
helper = LayerHelper('fused_bias_dropout_residual_layer_norm',
**locals())
dtype = x.dtype
# check dtypes
check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
'fused_bias_dropout_residual_layer_norm')
check_dtype(dtype, 'dtype', ['float16', 'float32', 'float64'],
'fused_bias_dropout_residual_layer_norm')
# set inputs
inputs = dict()
inputs['X'] = [x]
inputs['Residual'] = [residual]
if bias is not None:
inputs['Bias'] = [bias]
if ln_scale:
inputs['LnScale'] = [ln_scale]
if ln_bias:
inputs['LnBias'] = [ln_bias]
if (seed is None or seed == 0) and helper.main_program.random_seed != 0:
seed = helper.main_program.random_seed
# set attrs
attrs = {
'ln_epsilon': ln_epsilon,
'dropout_rate': dropout_rate,
'is_test': not training,
'dropout1_fix_seed': seed is not None,
'dropout2_fix_seed': seed is not None,
'dropout1_seed': seed if seed is not None else 0,
'dropout2_seed': seed if seed is not None else 0,
'dropout1_implementation': mode,
'dropout2_implementation': mode,
'ring_id': ring_id,
})
return out


def fused_multi_head_attention(x,
qkv_weight,
linear_weight,
Expand Down

0 comments on commit 9941a14

Please sign in to comment.