Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

[MXNET-978] Higher Order Gradient Support logp1, expm1, square. #15416

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 53 additions & 2 deletions src/operator/tensor/elemwise_unary_op_logexp.cc
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,32 @@ The storage type of ``log1p`` output depends upon the input storage type:
.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{"_backward_log1p"});

MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_log1p,
unary_bwd<mshadow_op::log1p_grad>);
unary_bwd<mshadow_op::log1p_grad>)
.set_attr<nnvm::FGradient>("FGradient",
[](const nnvm::NodePtr& n, const std::vector<nnvm::NodeEntry>& ograds) {
// ograds[0]: head_grad_grads (dL/dxgrad)
// inputs[0]: dL/dy
// inputs[1]: x (ElemwiseGradUseIn)
// f(x) = y = log(1+x)
// f'(x) = 1/(1+x)
// f''(x) = -1/(1+x)^2
auto dldy = n->inputs[0];
auto x = n->inputs[1];
auto dydx_mul_dldy = nnvm::NodeEntry{n}; // f'(x) * head_grads
auto op = mxnet::util::NodeOpGen{n};

auto dydx = op.div(dydx_mul_dldy, dldy);

auto d2ydx2_mid = op.mul(dydx_mul_dldy, dydx_mul_dldy);
auto d2ydx2_neg_mid = op.negative(d2ydx2_mid);
auto d2ydx2 = op.div(d2ydx2_neg_mid, dldy);

std::vector<nnvm::NodeEntry> ret;

ret.emplace_back(op.mul(ograds[0], dydx));
ret.emplace_back(op.mul(ograds[0], d2ydx2));
return ret;
});

// expm1
MXNET_OPERATOR_REGISTER_UNARY_WITH_RSP_CSR(expm1, cpu, mshadow_op::expm1)
Expand All @@ -217,7 +242,33 @@ The storage type of ``expm1`` output depends upon the input storage type:
)code" ADD_FILELINE)
.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{"_backward_expm1"});

MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_expm1, unary_bwd<mshadow_op::exp>);
MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_expm1, unary_bwd<mshadow_op::exp>)
.set_attr<nnvm::FGradient>("FGradient",
[](const nnvm::NodePtr& n, const std::vector<nnvm::NodeEntry>& ograds) {
// ograds[0]: head_grad_grads (dL/dxgrad)
// inputs[0]: dL/dy
// inputs[1]: x (ElemwiseGradUseIn)
// f(x) = y = exp(x) - 1
// f'(x) = exp(x)
// f''(x) = exp(x)
auto dldy = n->inputs[0];
auto x = n->inputs[1];
auto dydx_mul_dldy = nnvm::NodeEntry{n}; // f'(x) * head_grads
auto op = mxnet::util::NodeOpGen{n};

auto dydx = op.div(dydx_mul_dldy, dldy);

auto exp_x = MakeNode("exp", n->attrs.name + "_backward_exp_grad",
{n->inputs[1]}, nullptr, &n);
auto d2ydx2_mul_dldy = op.mul(nnvm::NodeEntry{exp_x}, dldy);

std::vector<nnvm::NodeEntry> ret;


ret.emplace_back(op.mul(ograds[0], dydx));
ret.emplace_back(op.mul(ograds[0], d2ydx2_mul_dldy));
return ret;
});

} // namespace op
} // namespace mxnet
30 changes: 29 additions & 1 deletion src/operator/tensor/elemwise_unary_op_pow.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "elemwise_unary_op.h"
#include "./elemwise_binary_op-inl.h"
#include "../nn/mkldnn/mkldnn_ops-inl.h"
#include "../../nnvm/node_op_util.h"

namespace mxnet {
namespace op {
Expand Down Expand Up @@ -120,7 +121,34 @@ The storage type of ``square`` output depends upon the input storage type:
#endif

MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_square,
unary_bwd<mshadow_op::square_grad>);
unary_bwd<mshadow_op::square_grad>)
.set_attr<nnvm::FGradient>("FGradient",
[](const nnvm::NodePtr& n, const std::vector<nnvm::NodeEntry>& ograds) {
// ograds[0]: head_grad_grads (dL/dxgrad)
// inputs[0]: dL/dy
// inputs[1]: x (ElemwiseGradUseIn)
// f(x) = y = x^2
// f'(x) = 2*x
// f''(x) = 2
auto dldy = n->inputs[0];
auto x = n->inputs[1];
auto dydx_mul_dldy = nnvm::NodeEntry{n}; // f'(x) * head_grads
auto op = mxnet::util::NodeOpGen{n};

auto dydx = op.div(dydx_mul_dldy, dldy);

std::unordered_map<std::string, std::string> args = {{"scalar", "2.0"}};
auto ones_like = MakeNode("ones_like", n->attrs.name + "_backward_ones_like",
{n->inputs[1]}, nullptr, &n);
auto d2ydx2 = op.mul(2.0, nnvm::NodeEntry{ones_like});
auto d2ydx2_mul_dldy = op.mul(d2ydx2, dldy);

std::vector<nnvm::NodeEntry> ret;

ret.emplace_back(op.mul(ograds[0], dydx));
ret.emplace_back(op.mul(ograds[0], d2ydx2_mul_dldy));
return ret;
});

// sqrt
MXNET_OPERATOR_REGISTER_UNARY_WITH_RSP_CSR(sqrt, cpu, mshadow_op::square_root)
Expand Down
33 changes: 33 additions & 0 deletions tests/python/unittest/test_higher_order_grad.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,39 @@ def grad_grad_op(x):
check_second_order_unary(array, log10, grad_grad_op)


@with_seed()
def test_square():
def grad_grad_op(x):
return nd.ones_like(x) * 2

for dim in range(1, 5):
shape = rand_shape_nd(dim)
array = random_arrays(shape)
check_second_order_unary(array, nd.square, grad_grad_op)


@with_seed()
def test_expm1():
def grad_grad_op(x):
return nd.exp(x)

for dim in range(1, 5):
shape = rand_shape_nd(dim)
array = random_arrays(shape)
check_second_order_unary(array, nd.expm1, grad_grad_op)


@with_seed()
def test_log1p():
def grad_grad_op(x):
return -1/((1+x)**2)

for dim in range(1, 5):
shape = rand_shape_nd(dim)
array = random_arrays(shape)
check_second_order_unary(array, nd.log1p, grad_grad_op)


@with_seed()
def test_reciprocal():
def reciprocal(x):
Expand Down