diff --git a/paddle/phi/api/yaml/legacy_api.yaml b/paddle/phi/api/yaml/legacy_api.yaml index 2b83f79055a344..6cbdf7424b7ca2 100644 --- a/paddle/phi/api/yaml/legacy_api.yaml +++ b/paddle/phi/api/yaml/legacy_api.yaml @@ -2175,6 +2175,16 @@ use_gpudnn : true backward : softmax_grad +- api : softplus + args : (Tensor x, float beta, float threshold) + output : Tensor + infer_meta : + func : UnchangedInferMeta + param : [x] + kernel : + func : softplus + backward : softplus_grad + # softsign - api : softsign args : (Tensor x) diff --git a/paddle/phi/api/yaml/legacy_backward.yaml b/paddle/phi/api/yaml/legacy_backward.yaml index 39addc9421d808..e7167b17637315 100644 --- a/paddle/phi/api/yaml/legacy_backward.yaml +++ b/paddle/phi/api/yaml/legacy_backward.yaml @@ -2046,6 +2046,18 @@ func : softmax_grad use_gpudnn : true +# softplus +- backward_api : softplus_grad + forward : softplus (Tensor x, float beta, float threshold) -> Tensor(out) + args : (Tensor x, Tensor out_grad, float beta, float threshold) + output : Tensor(x_grad) + infer_meta : + func : UnchangedInferMeta + param : [x] + kernel : + func : softplus_grad + inplace : (out_grad -> x_grad) + - backward_api : softsign_grad forward : softsign (Tensor x) -> Tensor(out) args : (Tensor x, Tensor out_grad) diff --git a/python/paddle/fluid/layers/ops.py b/python/paddle/fluid/layers/ops.py index b6ae3b0e58a2a9..01da331e57b2ef 100755 --- a/python/paddle/fluid/layers/ops.py +++ b/python/paddle/fluid/layers/ops.py @@ -32,7 +32,6 @@ 'silu', 'logsigmoid', 'tanh_shrink', - 'softplus', 'softsign', 'tanh', ] @@ -53,7 +52,15 @@ 'reciprocal_', ] -__all__ = [] +__all__ = [ + 'softplus', + 'softshrink', + 'hard_shrink', + 'cumsum', + 'thresholded_relu', + 'gelu', + 'erf', +] for _OP in set(__all__): globals()[_OP] = generate_layer_fn(_OP) @@ -462,8 +469,40 @@ """) -add_sample_code( - globals()["softplus"], r""" +_softplus_ = generate_layer_fn('softplus') + + +def softplus(x, beta: float = 1.0, threshold: float = 20.0, name=None): + check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'softplus') + locals_val = locals().copy() + kwargs = dict() + for name, val in locals_val.items(): + if val is not None: + kwargs[name] = val + return _softplus_(**kwargs) + + +softplus.__doc__ = r""" + :alias_main: paddle.nn.functional.softplus + :alias: paddle.nn.functional.softplus, paddle.nn.functional.activation.softplus + :old_api: paddle.fluid.layers.softplus + +:strong:`Softplus Activation Operator` + +Equation: + .. math:: + out = \\frac{1}{beta} * log(1 + e^{beta * x}) + For numerical stability, the implementation reverts to the linear function when: beta * x > threshold. + +Args: + x(Tensor): Input of Softplus op, Tensor, dtype: float32 or float64 + beta(float, optional): The value of beta for softplus. Default is 1 + threshold (float, optional): The value of threshold for softplus. Default is 20 + name(str, optional): Name for the operation (optional, default is None) + +Returns: + Variable: The output of Softplus op, Tensor, dtype: float32 or float64 + Examples: .. code-block:: python @@ -474,8 +513,7 @@ out = F.softplus(x) print(out) # [0.513015, 0.598139, 0.744397, 0.854355] - -""") +""" add_sample_code( globals()["softsign"], r""" @@ -492,8 +530,6 @@ """) -__all__ += ['softshrink'] - _softshrink_ = generate_layer_fn('softshrink') @@ -542,8 +578,6 @@ def softshrink(x, alpha=None): result = fluid.layers.softshrink(x=data, alpha=0.3) """ -__all__ += ['hard_shrink'] - _hard_shrink_ = generate_layer_fn('hard_shrink') @@ -568,8 +602,6 @@ def hard_shrink(x, threshold=None): >>> result = fluid.layers.hard_shrink(x=data, threshold=0.3) """ -__all__ += ['cumsum'] - _cum_sum_ = generate_layer_fn('cumsum') @@ -610,8 +642,6 @@ def cumsum(x, axis=None, exclusive=None, reverse=None): result = fluid.layers.cumsum(data, axis=0) """ -__all__ += ['thresholded_relu'] - _thresholded_relu_ = generate_layer_fn('thresholded_relu') @@ -700,8 +730,6 @@ def thresholded_relu(x, threshold=None): # [-0. , -0. , 1.0013918 ]], dtype=float32) """ -__all__ += ['gelu'] - _gelu_ = generate_layer_fn('gelu') @@ -785,8 +813,6 @@ def gelu(x, approximate=False): # [ 0.08796856, 0.20387867, 0.2080159 ]], dtype=float32) """ -__all__ += ['erf'] - _erf_ = generate_layer_fn('erf') diff --git a/python/paddle/fluid/tests/unittests/test_activation_op.py b/python/paddle/fluid/tests/unittests/test_activation_op.py index 5edb1185ad6445..cdb01b4c994165 100755 --- a/python/paddle/fluid/tests/unittests/test_activation_op.py +++ b/python/paddle/fluid/tests/unittests/test_activation_op.py @@ -2676,6 +2676,7 @@ class TestSoftplus(TestActivation): def setUp(self): self.op_type = "softplus" + self.python_api = paddle.nn.functional.softplus self.init_dtype() beta = 2 @@ -2688,10 +2689,14 @@ def setUp(self): self.attrs = {'beta': beta, "threshold": threshold} self.outputs = {'Out': out} + self.check_eager = True + def test_check_grad(self): if self.dtype == np.float16: return - self.check_grad(['X'], 'Out') + if hasattr(self, 'check_eager'): + check_eager = self.check_eager + self.check_grad(['X'], 'Out', check_eager=check_eager) @unittest.skipIf(not core.is_compiled_with_cuda(), diff --git a/python/paddle/nn/functional/activation.py b/python/paddle/nn/functional/activation.py index f0f04fb1332835..373186096bda0c 100644 --- a/python/paddle/nn/functional/activation.py +++ b/python/paddle/nn/functional/activation.py @@ -1177,7 +1177,11 @@ def softplus(x, beta=1, threshold=20, name=None): x = paddle.to_tensor(np.array([-0.4, -0.2, 0.1, 0.3])) out = F.softplus(x) # [0.513015, 0.598139, 0.744397, 0.854355] """ - if in_dynamic_mode(): + + if in_dygraph_mode(): + return _C_ops.final_state_softplus(x, beta, threshold) + + if _in_legacy_dygraph(): return _C_ops.softplus(x, 'beta', beta, 'threshold', threshold) check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],