Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add double grad op for sigmoid activation #32971

Merged
merged 1 commit into from
May 26, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 62 additions & 0 deletions paddle/fluid/operators/activation_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -789,6 +789,27 @@ class ActivationOpDoubleGrad2 : public framework::OperatorWithKernel {
}
};

template <typename T>
class SigmoidDoubleGradMaker
: public ::paddle::framework::SingleGradOpMaker<T> {
public:
using ::paddle::framework::SingleGradOpMaker<T>::SingleGradOpMaker;

protected:
void Apply(GradOpPtr<T> op) const override {
op->SetType("sigmoid_grad_grad");
// input1: Out
op->SetInput("Out", this->Input("Out"));
// input2: ddx
op->SetInput("DDX", this->OutputGrad(framework::GradVarName("X")));
op->SetInput("DOut", this->Input(framework::GradVarName("Out")));
op->SetAttrMap(this->Attrs());
// output: ddy
op->SetOutput("DOutNew", this->InputGrad("Out"));
op->SetOutput("DDOut", this->InputGrad(framework::GradVarName("Out")));
}
};

template <typename T>
class TanhDoubleGradMaker : public ::paddle::framework::SingleGradOpMaker<T> {
public:
Expand Down Expand Up @@ -1068,6 +1089,47 @@ namespace plat = paddle::platform;
FOR_EACH_ACTIVATION_OP(REGISTER_ACTIVATION_OP);
FOR_EACH_ACTIVATION_OP(REGISTER_ACTIVATION_CPU_KERNEL);

/* ========================== sigmoid register =============================
*/
// 1. Register Sigmoid Operator
REGISTER_OPERATOR(
sigmoid, ops::ActivationOp, ops::SigmoidOpMaker,
ops::ActivationOpInferVarType,
ops::ActivationGradOpMaker<ops::SigmoidGradFunctor<float>::FwdDeps(),
paddle::framework::OpDesc>,
ops::ActivationGradOpMaker<ops::SigmoidGradFunctor<float>::FwdDeps(),
paddle::imperative::OpBase>,
std::conditional<ops::CanInplaceAct<ops::SigmoidGradFunctor<float>>(),
ops::ActFwdInplaceInferer, void>::type);

// 2. Register Sigmoid Grad Operator
REGISTER_OPERATOR(sigmoid_grad, ops::ActivationOpGrad,
ops::ActivationGradOpInplaceInferer,
ops::SigmoidDoubleGradMaker<paddle::framework::OpDesc>,
ops::SigmoidDoubleGradMaker<paddle::imperative::OpBase>)

// 3. Register Sigmoid DoubleGrad Operator
REGISTER_OPERATOR(
sigmoid_grad_grad,
ops::ActivationOpDoubleGrad<ops::SigmoidGradFunctor<float>::FwdDeps()>,
ops::ActivationDoubleGradOpInplaceInferer);

// Register Sigmoid/GradSigmoid Kernels
REGISTER_ACTIVATION_CPU_KERNEL(sigmoid, Sigmoid, SigmoidFunctor,
SigmoidGradFunctor);

// Register DoubleGrad Kernel
REGISTER_OP_CPU_KERNEL(
sigmoid_grad_grad,
ops::SigmoidDoubleGradKernel<plat::CPUDeviceContext,
ops::SigmoidGradGradFunctor<float>>,
ops::SigmoidDoubleGradKernel<plat::CPUDeviceContext,
ops::SigmoidGradGradFunctor<double>>,
ops::SigmoidDoubleGradKernel<plat::CPUDeviceContext,
ops::SigmoidGradGradFunctor<plat::float16>>);

/* ========================================================================== */

/* ========================== tanh register ============================= */
REGISTER_OPERATOR(
tanh, ops::ActivationOp, ops::TanhOpMaker, ops::ActivationOpInferVarType,
Expand Down
16 changes: 15 additions & 1 deletion paddle/fluid/operators/activation_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1481,6 +1481,21 @@ REGISTER_OP_CUDA_KERNEL(
#endif
/* ========================================================================== */

/* =========================== sigmoid register ============================
*/
REGISTER_ACTIVATION_CUDA_KERNEL(sigmoid, Sigmoid, CudaSigmoidFunctor,
CudaSigmoidGradFunctor);

REGISTER_OP_CUDA_KERNEL(
sigmoid_grad_grad,
ops::SigmoidDoubleGradKernel<paddle::platform::CUDADeviceContext,
ops::SigmoidGradGradFunctor<float>>,
ops::SigmoidDoubleGradKernel<paddle::platform::CUDADeviceContext,
ops::SigmoidGradGradFunctor<double>>,
ops::SigmoidDoubleGradKernel<plat::CUDADeviceContext,
ops::SigmoidGradGradFunctor<plat::float16>>);
/* ========================================================================== */

/* =========================== tanh register ============================ */
REGISTER_ACTIVATION_CUDA_KERNEL(tanh, Tanh, CudaTanhFunctor,
CudaTanhGradFunctor);
Expand Down Expand Up @@ -1595,7 +1610,6 @@ REGISTER_OP_CUDA_KERNEL(
/* ========================================================================== */

#define FOR_EACH_ACTIVATION_CUDA_OP(__macro) \
__macro(sigmoid, Sigmoid, CudaSigmoidFunctor, CudaSigmoidGradFunctor); \
__macro(silu, Silu, CudaSiluFunctor, CudaSiluGradFunctor); \
__macro(logsigmoid, LogSigmoid, CudaLogSigmoidFunctor, \
CudaLogSigmoidGradFunctor); \
Expand Down
82 changes: 81 additions & 1 deletion paddle/fluid/operators/activation_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,43 @@ struct SigmoidGradFunctor : public BaseActivationFunctor<T> {
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepOut; }
};

jim19930609 marked this conversation as resolved.
Show resolved Hide resolved
/*
Out
DOut -> SigmoidGradGrad -> DOutNew
DDX DDOut

DDOut = (1-Out)*Out*DDX
DOutNew = (1-2*Out)*DOut*DDX
*/
template <typename T>
struct SigmoidGradGradFunctor : public BaseActivationFunctor<T> {
template <typename Device>
void operator()(const Device& dev, const framework::Tensor* Out,
const framework::Tensor* ddX, const framework::Tensor* dOut,
framework::Tensor* dOutNew, framework::Tensor* ddOut) const {
auto* d = dev.eigen_device();
auto ddx = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(ddX, "Input", "DDX", "SigmoidGradGrad"));
auto out = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(Out, "Input", "Out", "SigmoidGradGrad"));

if (dOutNew) {
auto dout = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(dOut, "Input", "DOut", "SigmoidGradGrad"));
auto dout_new = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(dOutNew, "Output", "DOutNew", "SquareGradGrad"));
dout_new.device(*d) =
(static_cast<T>(1) - static_cast<T>(2) * out) * dout * ddx;
}
if (ddOut) {
auto ddout = framework::EigenVector<T>::Flatten(
GET_DATA_SAFELY(ddOut, "Output", "DDOut", "SquareGradGrad"));
ddout.device(*d) = (static_cast<T>(1) - out) * out * ddx;
}
}
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepOut; }
};

// silu(x) = x / (1 + exp(-x))
template <typename T>
struct SiluFunctor : public BaseActivationFunctor<T> {
Expand Down Expand Up @@ -1789,6 +1826,50 @@ inline void ExtractDoubleGradTensorWithInputDOut(
}
}

template <typename DeviceContext, typename Functor>
class SigmoidDoubleGradKernel
: public framework::OpKernel<typename Functor::ELEMENT_TYPE> {
public:
using T = typename Functor::ELEMENT_TYPE;
void Compute(const framework::ExecutionContext& ctx) const override {
const framework::Tensor *Out, *ddX, *dOut;
framework::Tensor *dOutNew, *ddOut;
Out = ddX = dOut = nullptr;
dOutNew = ddOut = nullptr;

// extract ddx(input) and out(input)
ddX = ctx.Input<framework::Tensor>("DDX");
Out = ctx.Input<framework::Tensor>("Out");
PADDLE_ENFORCE_NOT_NULL(
ddX, platform::errors::NotFound(
"Cannot get input Variable ddX, variable name = %s",
ctx.InputName("DDX")));
PADDLE_ENFORCE_NOT_NULL(
Out, platform::errors::NotFound(
"Cannot get input Variable Out, variable name = %s",
ctx.InputName("Out")));

// set output ddout
ddOut = ctx.Output<framework::Tensor>("DDOut");

// extract dOut(intput)
dOut = ctx.Input<framework::Tensor>("DOut");
PADDLE_ENFORCE_NOT_NULL(
dOut, platform::errors::NotFound(
"Cannot get input Variable dOut, variable name = %s",
ctx.InputName("DOut")));

// set output dout_new
dOutNew = ctx.Output<framework::Tensor>("DOutNew");

if (dOutNew) dOutNew->mutable_data<T>(Out->dims(), ctx.GetPlace());
if (ddOut) ddOut->mutable_data<T>(Out->dims(), ctx.GetPlace());
auto& place = ctx.template device_context<DeviceContext>();
Functor functor;
functor(place, Out, ddX, dOut, dOutNew, ddOut);
}
};

template <typename DeviceContext, typename Functor>
class TanhDoubleGradKernel
: public framework::OpKernel<typename Functor::ELEMENT_TYPE> {
Expand Down Expand Up @@ -2153,7 +2234,6 @@ struct LogGradGradFunctor : public BaseActivationFunctor<T> {
} // namespace paddle

#define FOR_EACH_ACTIVATION_OP(__macro) \
__macro(sigmoid, Sigmoid, SigmoidFunctor, SigmoidGradFunctor); \
__macro(silu, Silu, SiluFunctor, SiluGradFunctor); \
__macro(logsigmoid, LogSigmoid, LogSigmoidFunctor, LogSigmoidGradFunctor); \
__macro(atan, Atan, AtanFunctor, AtanGradFunctor); \
Expand Down
22 changes: 22 additions & 0 deletions python/paddle/fluid/tests/unittests/test_activation_nn_grad.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,28 @@
from decorator_helper import prog_scope


class TestSigmoidDoubleGradCheck(unittest.TestCase):
@prog_scope()
def func(self, place):
shape = [2, 3, 7, 9]
eps = 0.0005
dtype = np.float64
x = layers.data('x', shape, False, dtype=dtype)
x.persistable = True
y = layers.sigmoid(x)
x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
x_arr[np.abs(x_arr) < 0.005] = 0.002
gradient_checker.double_grad_check(
[x], y, x_init=x_arr, place=place, eps=eps)

def test_grad(self):
places = [fluid.CPUPlace()]
if core.is_compiled_with_cuda():
places.append(fluid.CUDAPlace(0))
for p in places:
self.func(p)


class TestTanhDoubleGradCheck(unittest.TestCase):
@prog_scope()
def func(self, place):
Expand Down