From c47f3cc7fe3221e074d2e3e301d1e0e6bfa78921 Mon Sep 17 00:00:00 2001 From: zhoukunsheng Date: Thu, 28 Mar 2019 14:57:55 +0800 Subject: [PATCH 1/3] test=develop add rsqrt op --- paddle/fluid/API.spec | 1 + paddle/fluid/operators/activation_op.cc | 12 ++++++++++ paddle/fluid/operators/activation_op.h | 23 +++++++++++++++++-- python/paddle/fluid/layers/ops.py | 1 + .../tests/unittests/test_activation_op.py | 17 ++++++++++++++ 5 files changed, 52 insertions(+), 2 deletions(-) diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 032da0cad85ce..ccf0c0841b2eb 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -308,6 +308,7 @@ paddle.fluid.layers.atan (ArgSpec(args=['x', 'name'], varargs=None, keywords=Non paddle.fluid.layers.tanh_shrink (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '1e521554b9fdda9061ec6d306f0709b7')) paddle.fluid.layers.softshrink (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '9eef31597bbafa2bd49691e072296e13')) paddle.fluid.layers.sqrt (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'e9e27491c39ac74d0b1ffe506aec0ebb')) +paddle.fluid.layers.rsqrt (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'c445467ebe58b3c0d7f0bba7795b6f56')) paddle.fluid.layers.abs (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '64650ac42cf82e9920cb0b172b1d29fd')) paddle.fluid.layers.ceil (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'c75d67dc5fe28f68e4cfffead4f698ad')) paddle.fluid.layers.floor (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '647b16c5da5ef909649ae02abb434973')) diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index c87e4b22b3702..a228d530e6252 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -199,6 +199,15 @@ you should add a small epsilon(1e-12) to avoid negative number caused by numeric )DOC"; +UNUSED constexpr char RsqrtDoc[] = R"DOC( +Rsqrt Activation Operator. + +Please make sure input is legal in case of numeric errors. + +$out = \frac{1}{\sqrt{x}}$ + +)DOC"; + UNUSED constexpr char AbsDoc[] = R"DOC( Abs Activation Operator. @@ -547,6 +556,7 @@ REGISTER_ACTIVATION_OP_MAKER(Gelu, GeluDoc); REGISTER_ACTIVATION_OP_MAKER(Tanh, TanhDoc); REGISTER_ACTIVATION_OP_MAKER(TanhShrink, TanhShrinkDoc); REGISTER_ACTIVATION_OP_MAKER(Sqrt, SqrtDoc); +REGISTER_ACTIVATION_OP_MAKER(Rsqrt, RsqrtDoc); REGISTER_ACTIVATION_OP_MAKER(Abs, AbsDoc); REGISTER_ACTIVATION_OP_MAKER(Ceil, CeilDoc); REGISTER_ACTIVATION_OP_MAKER(Floor, FloorDoc); @@ -567,6 +577,7 @@ REGISTER_ACTIVATION_OP_GRAD_MAKER(Tanh, tanh); REGISTER_ACTIVATION_OP_GRAD_MAKER(Ceil, ceil); REGISTER_ACTIVATION_OP_GRAD_MAKER(Floor, floor); REGISTER_ACTIVATION_OP_GRAD_MAKER(Sqrt, sqrt); +REGISTER_ACTIVATION_OP_GRAD_MAKER(Rsqrt, rsqrt); REGISTER_ACTIVATION_OP_GRAD_MAKER(SoftRelu, soft_relu); REGISTER_ACTIVATION_OP_GRAD_MAKER(Relu6, relu6); REGISTER_ACTIVATION_OP_GRAD_MAKER(Reciprocal, reciprocal); @@ -584,6 +595,7 @@ namespace ops = paddle::operators; __macro(Ceil, ceil); \ __macro(Floor, floor); \ __macro(Sqrt, sqrt); \ + __macro(Rsqrt, rsqrt); \ __macro(SoftRelu, soft_relu); \ __macro(Relu6, relu6); \ __macro(Reciprocal, reciprocal); \ diff --git a/paddle/fluid/operators/activation_op.h b/paddle/fluid/operators/activation_op.h index ff7e623f6f383..1839c7e9bce3c 100644 --- a/paddle/fluid/operators/activation_op.h +++ b/paddle/fluid/operators/activation_op.h @@ -39,8 +39,8 @@ namespace operators { Please refer to the layer_helper.py and get the details. */ static std::unordered_set InplaceOpSet = { - "sigmoid", "exp", "relu", "tanh", "sqrt", "ceil", - "floor", "reciprocal", "relu6", "soft_relu", "hard_sigmoid"}; + "sigmoid", "exp", "relu", "tanh", "sqrt", "ceil", + "floor", "reciprocal", "relu6", "soft_relu", "hard_sigmoid", "rsqrt"}; static bool IsInplace(const std::string& op) { bool inplace = InplaceOpSet.count(op); @@ -463,6 +463,24 @@ struct SqrtGradFunctor : public BaseActivationFunctor { } }; +// rsqrt(x) = x^(-1/2) +template +struct RsqrtFunctor : public BaseActivationFunctor { + template + void operator()(Device d, X x, Out out) const { + out.device(d) = x.rsqrt(); + } +}; + +template +struct RsqrtGradFunctor : public BaseActivationFunctor { + template + void operator()(Device d, X x, Out out, dOut dout, dX dx) const { + dx.device(d) = static_cast(-0.5) * dout * out.pow(3); + } +}; + // ceil(x) = ceiling(x) template struct CeilFunctor : public BaseActivationFunctor { @@ -1098,6 +1116,7 @@ struct SwishGradFunctor : public BaseActivationFunctor { __macro(atan, AtanFunctor, AtanGradFunctor); \ __macro(softshrink, SoftShrinkFunctor, SoftShrinkGradFunctor); \ __macro(sqrt, SqrtFunctor, SqrtGradFunctor); \ + __macro(rsqrt, RsqrtFunctor, RsqrtGradFunctor); \ __macro(abs, AbsFunctor, AbsGradFunctor); \ __macro(ceil, CeilFunctor, ZeroGradFunctor); \ __macro(floor, FloorFunctor, ZeroGradFunctor); \ diff --git a/python/paddle/fluid/layers/ops.py b/python/paddle/fluid/layers/ops.py index f018bb8af8cc9..17c84b1a4332a 100644 --- a/python/paddle/fluid/layers/ops.py +++ b/python/paddle/fluid/layers/ops.py @@ -27,6 +27,7 @@ 'tanh_shrink', 'softshrink', 'sqrt', + 'rsqrt', 'abs', 'ceil', 'floor', diff --git a/python/paddle/fluid/tests/unittests/test_activation_op.py b/python/paddle/fluid/tests/unittests/test_activation_op.py index d587715d607c6..4d66b7a989732 100644 --- a/python/paddle/fluid/tests/unittests/test_activation_op.py +++ b/python/paddle/fluid/tests/unittests/test_activation_op.py @@ -192,6 +192,23 @@ def test_check_grad(self): self.check_grad(['X'], 'Out', max_relative_error=0.007) +class TestRsqrt(TestActivation): + def setUp(self): + self.op_type = "rsqrt" + self.init_dtype() + + x = np.random.uniform(0.1, 1, [2, 3]).astype(self.dtype) + out = 1.0 / np.sqrt(x) + + self.inputs = {'X': OpTest.np_dtype_to_fluid_dtype(x)} + self.outputs = {'Out': out} + + def test_check_grad(self): + if self.dtype == np.float16: + return + self.check_grad(['X'], 'Out', max_relative_error=0.0005) + + class TestAbs(TestActivation): def setUp(self): self.op_type = "abs" From 91ba75000c9387f648f823a43242acd4e865026e Mon Sep 17 00:00:00 2001 From: zhoukunsheng Date: Fri, 29 Mar 2019 15:54:54 +0800 Subject: [PATCH 2/3] fix type conversion problem in rsqrt functor --- paddle/fluid/operators/activation_op.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/fluid/operators/activation_op.h b/paddle/fluid/operators/activation_op.h index 1839c7e9bce3c..292505ae6c3b3 100644 --- a/paddle/fluid/operators/activation_op.h +++ b/paddle/fluid/operators/activation_op.h @@ -477,7 +477,7 @@ struct RsqrtGradFunctor : public BaseActivationFunctor { template void operator()(Device d, X x, Out out, dOut dout, dX dx) const { - dx.device(d) = static_cast(-0.5) * dout * out.pow(3); + dx.device(d) = static_cast(-0.5) * dout * out * out * out; } }; From b1c5820b3fe2361d416bc1cab95b457d3e300448 Mon Sep 17 00:00:00 2001 From: zhoukunsheng Date: Fri, 12 Apr 2019 15:09:58 +0800 Subject: [PATCH 3/3] fix merge conflict --- paddle/fluid/operators/activation_op.cc | 2 -- paddle/fluid/operators/activation_op.h | 5 +++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index c53427b465bc3..348902c656cec 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -602,7 +602,6 @@ REGISTER_ACTIVATION_OP_MAKER(Softsign, SoftsignDoc); namespace ops = paddle::operators; - #define REGISTER_ACTIVATION_OP(KERNEL_TYPE, OP_NAME, functor, grad_functor) \ REGISTER_OPERATOR( \ KERNEL_TYPE, ops::ActivationOp, ops::OP_NAME##OpMaker, \ @@ -619,7 +618,6 @@ namespace ops = paddle::operators; #define REGISTER_ACTIVATION_CPU_KERNEL(act_type, op_name, functor, \ grad_functor) \ - REGISTER_OP_CPU_KERNEL( \ act_type, ops::ActivationKernel>, \ diff --git a/paddle/fluid/operators/activation_op.h b/paddle/fluid/operators/activation_op.h index e50f3bf766d13..1732f61582f79 100644 --- a/paddle/fluid/operators/activation_op.h +++ b/paddle/fluid/operators/activation_op.h @@ -36,7 +36,6 @@ limitations under the License. */ namespace paddle { namespace operators { - enum ActBwdOpFwdDeps { kNoDeps = 0x00, // Do not need any forward input/output kDepX = 0x01, // Only need forward input X @@ -528,6 +527,8 @@ struct RsqrtGradFunctor : public BaseActivationFunctor { void operator()(Device d, X x, Out out, dOut dout, dX dx) const { dx.device(d) = static_cast(-0.5) * dout * out * out * out; } + + static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepOut; } }; // ceil(x) = ceiling(x) @@ -1200,7 +1201,6 @@ struct SwishGradFunctor : public BaseActivationFunctor { } // namespace operators } // namespace paddle - #define FOR_EACH_ACTIVATION_OP(__macro) \ __macro(sigmoid, Sigmoid, SigmoidFunctor, SigmoidGradFunctor); \ __macro(logsigmoid, LogSigmoid, LogSigmoidFunctor, LogSigmoidGradFunctor); \ @@ -1211,6 +1211,7 @@ struct SwishGradFunctor : public BaseActivationFunctor { __macro(atan, Atan, AtanFunctor, AtanGradFunctor); \ __macro(softshrink, SoftShrink, SoftShrinkFunctor, SoftShrinkGradFunctor); \ __macro(sqrt, Sqrt, SqrtFunctor, SqrtGradFunctor); \ + __macro(rsqrt, Rsqrt, RsqrtFunctor, RsqrtGradFunctor); \ __macro(abs, Abs, AbsFunctor, AbsGradFunctor); \ __macro(ceil, Ceil, CeilFunctor, ZeroGradFunctor); \ __macro(floor, Floor, FloorFunctor, ZeroGradFunctor); \