Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
[MXNET-978] Second order gradient support for some unary operators (#…
Browse files Browse the repository at this point in the history
…14613)

* try to add support some ops

* add unit test for second order grad

* implement grad for relu and add unit test

* fix lint

* register FGradient attribute for backward relu

* resolve conflict

* remove unused imports

* change gradient using set_attr

* remove higher order grad test for negative(x)

* fix lint

* reverse indent

* remove unused backward operator

* refactor backward for sin(x) and cos(x)

* change value init to list init

* change to list initialization

* generate random shape in test

* fix a bug in second order backward

* fix lint

* fix lint

* address reviewer comment and renaming
  • Loading branch information
apeforest committed Jun 10, 2019
1 parent b64e00a commit 3c82ce2
Show file tree
Hide file tree
Showing 3 changed files with 131 additions and 15 deletions.
22 changes: 20 additions & 2 deletions src/operator/tensor/elemwise_unary_op_basic.cc
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,26 @@ The storage type of ``relu`` output depends upon the input storage type:
)code" ADD_FILELINE)
.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseOut{"_backward_relu"});

MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_relu,
unary_bwd<mshadow_op::relu_grad>);
MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_relu, unary_bwd<mshadow_op::relu_grad>)
.set_attr<nnvm::FGradient>("FGradient",
[](const nnvm::NodePtr& n, const std::vector<nnvm::NodeEntry>& ograds) {
std::vector<nnvm::NodeEntry> ret;
// ograds[0]: dL/dxgrad
// inputs[0]: dL/dy
// inputs[1]: y
// f(x) -> relu(x)
// f'(x) = 1 if x > 0 else 0
// f''(x) = 0
auto dydx = MakeNode("_greater", n->attrs.name + "_dydx",
{n->inputs[1], nnvm::NodeEntry{
MakeNode("zeros_like", n->attrs.name + "tmp", {n->inputs[1]}, nullptr, &n)
}}, nullptr, &n);
ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad",
{ograds[0], nnvm::NodeEntry(dydx)}, nullptr, &n));
ret.emplace_back(MakeNode("zeros_like", n->attrs.name + "_backward_grad_grad_in",
{n->inputs[1]}, nullptr, &n));
return ret;
});

// sigmoid
MXNET_OPERATOR_REGISTER_UNARY(sigmoid)
Expand Down
60 changes: 58 additions & 2 deletions src/operator/tensor/elemwise_unary_op_trig.cc
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,33 @@ The storage type of ``sin`` output depends upon the input storage type:
)code" ADD_FILELINE)
.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{ "_backward_sin" });

MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_sin, unary_bwd<mshadow_op::sin_grad>);
MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_sin, unary_bwd<mshadow_op::sin_grad>)
.set_attr<nnvm::FGradient>("FGradient",
[](const nnvm::NodePtr& n, const std::vector<nnvm::NodeEntry>& ograds) {
// ograds[0]: d^2L/dx^2
// inputs[0]: dL/dy
// inputs[1]: x (ElemwiseUseIn)
// f(x) = sin(x)
// f'(x) = cos(x)
// f''(x) = -sin(x)
auto dydx = MakeNode("cos", n->attrs.name + "_dydx",
{n->inputs[1]}, nullptr, &n);
auto d2ydx2 = MakeNode("negative", n->attrs.name + "_d2ydx2",
{nnvm::NodeEntry{
MakeNode("sin", n->attrs.name + "_grad_grad_mid", {n->inputs[1]}, nullptr, &n)
}}, nullptr, &n);

auto grad_grad_mid = MakeNode("elemwise_mul", n->attrs.name + "backward_grad_grad_mid",
{n->inputs[0], nnvm::NodeEntry{d2ydx2}}, nullptr, &n);

std::vector<nnvm::NodeEntry> ret;

ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad",
{ograds[0], nnvm::NodeEntry{dydx}}, nullptr, &n));
ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad_in",
{ograds[0], nnvm::NodeEntry{grad_grad_mid}}, nullptr, &n));
return ret;
});

// cos
MXNET_OPERATOR_REGISTER_UNARY_WITH_SPARSE_DR(cos, cpu, mshadow_op::cos)
Expand All @@ -63,7 +89,37 @@ The storage type of ``cos`` output is always dense
)code" ADD_FILELINE)
.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{"_backward_cos"});

MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_cos, unary_bwd<mshadow_op::cos_grad>);
MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_cos, unary_bwd<mshadow_op::cos_grad>)
.set_attr<nnvm::FGradient>("FGradient",
[](const nnvm::NodePtr& n, const std::vector<nnvm::NodeEntry>& ograds) {
// ograds[0]: d^2L/dx^2
// inputs[0]: dL/dy
// inputs[1]: x (ElemwiseUseIn)
// f(x) = cos(x)
// f'(x) = -sin(x)
// f''(x) = -cos(x)
auto dydx = MakeNode("negative", n->attrs.name + "_dydx",
{nnvm::NodeEntry{
MakeNode("sin", n->attrs.name + "_grad_mid", {n->inputs[1]}, nullptr, &n)
}}, nullptr, &n);
auto d2ydx2 = MakeNode("negative", n->attrs.name + "_d2ydx2",
{nnvm::NodeEntry{
MakeNode("cos", n->attrs.name + "_grad_grad_mid", {n->inputs[1]}, nullptr, &n)
}}, nullptr, &n);

auto grad_grad_mid = MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad_mid",
{n->inputs[0], nnvm::NodeEntry{d2ydx2}}, nullptr, &n);

std::vector<nnvm::NodeEntry> ret;
// for the backward of the _backward_cos node
// first input is the ograd and second input is x (because ElemwiseUseIn)
ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad",
{ograds[0], nnvm::NodeEntry{dydx}}, nullptr, &n));
ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad_in",
{ograds[0], nnvm::NodeEntry{grad_grad_mid}}, nullptr, &n));
return ret;
});


// tan
MXNET_OPERATOR_REGISTER_UNARY_WITH_RSP_CSR(tan, cpu, mshadow_op::tan)
Expand Down
64 changes: 53 additions & 11 deletions tests/python/unittest/test_higher_order_grad.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,55 @@
# specific language governing permissions and limitations
# under the License.

import math

import math
from mxnet import nd, autograd
from mxnet.test_utils import assert_almost_equal, random_arrays
from mxnet.test_utils import assert_almost_equal, random_arrays, rand_shape_nd
from common import with_seed


@with_seed()
def test_sin():
def sin(x):
return nd.sin(x)

def grad_grad_op(x):
return -nd.sin(x)

for dim in range(1, 5):
shape = rand_shape_nd(dim)
array = random_arrays(shape)
check_second_order_unary(array, sin, grad_grad_op)


@with_seed()
def test_cos():
def cos(x):
return nd.cos(x)

def grad_grad_op(x):
return -nd.cos(x)

for dim in range(1, 5):
shape = rand_shape_nd(dim)
array = random_arrays(shape)
check_second_order_unary(array, cos, grad_grad_op)


@with_seed()
def test_relu():
def relu(x):
return nd.relu(x)

def grad_grad_op(x):
return nd.zeros_like(x)

for dim in range(1, 5):
shape = rand_shape_nd(dim)
array = random_arrays(shape)
check_second_order_unary(array, relu, grad_grad_op)


@with_seed()
def test_log():
def log(x):
Expand All @@ -30,9 +72,9 @@ def log(x):
def grad_grad_op(x):
return -1/(x**2)

arrays = random_arrays((2, 2), (2, 3), (4, 5, 2), (3, 1, 4, 5))

for array in arrays:
for dim in range(1, 5):
shape = rand_shape_nd(dim)
array = random_arrays(shape)
check_second_order_unary(array, log, grad_grad_op)


Expand All @@ -44,9 +86,9 @@ def log2(x):
def grad_grad_op(x):
return -1/((x**2) * math.log(2))

arrays = random_arrays((2, 2), (2, 3), (4, 5, 2), (3, 1, 4, 5))

for array in arrays:
for dim in range(1, 5):
shape = rand_shape_nd(dim)
array = random_arrays(shape)
check_second_order_unary(array, log2, grad_grad_op)


Expand All @@ -58,9 +100,9 @@ def log10(x):
def grad_grad_op(x):
return -1/((x**2) * math.log(10))

arrays = random_arrays((2, 2), (2, 3), (4, 5, 2), (3, 1, 4, 5))

for array in arrays:
for dim in range(1, 5):
shape = rand_shape_nd(dim)
array = random_arrays(shape)
check_second_order_unary(array, log10, grad_grad_op)


Expand Down

0 comments on commit 3c82ce2

Please sign in to comment.