From 45e1502e4b0cf8c38380417776c391464d89c6fb Mon Sep 17 00:00:00 2001
From: sxjscience <xshiab@ust.hk>
Date: Sun, 14 Oct 2018 14:56:04 +0800
Subject: [PATCH 01/20] try to add support some ops

---
 .../tensor/elemwise_binary_op_basic.cc        | 12 +++++++++-
 .../tensor/elemwise_unary_op_basic.cc         |  8 ++++++-
 src/operator/tensor/elemwise_unary_op_trig.cc | 22 +++++++++++++++++--
 3 files changed, 38 insertions(+), 4 deletions(-)
diff --git a/src/operator/tensor/elemwise_binary_op_basic.cc b/src/operator/tensor/elemwise_binary_op_basic.cc
index 339290df8bf9..710ce5510236 100644
--- a/src/operator/tensor/elemwise_binary_op_basic.cc
+++ b/src/operator/tensor/elemwise_binary_op_basic.cc
@@ -224,7 +224,17 @@ The storage type of ``elemwise_mul`` output depends on storage types of inputs
                                 return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
                               })
 .add_alias("_mul").add_alias("_Mul")
-.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{"_backward_mul"});
+.set_attr<nnvm::FGradient>("FGradient",
+  [](const nnvm::NodePtr& n, const std::vector<nnvm::NodeEntry>& ograds) {
+    auto lhs_grad = MakeNode("elemwise_mul", n->attrs.name + "_backward_lhs",
+                             {ograds[0], n->inputs[1]}, nullptr, &n);
+    auto rhs_grad = MakeNode("elemwise_mul", n->attrs.name + "_backward_rhs",
+                             {ograds[0], n->inputs[0]}, nullptr, &n);
+    std::vector<nnvm::NodeEntry> ret;
+    ret.emplace_back(nnvm::NodeEntry{lhs_grad, 0, 0});
+    ret.emplace_back(nnvm::NodeEntry{rhs_grad, 0, 0});
+    return ret;
+  });
 
 NNVM_REGISTER_OP(_backward_mul)
 .set_num_inputs(3)
diff --git a/src/operator/tensor/elemwise_unary_op_basic.cc b/src/operator/tensor/elemwise_unary_op_basic.cc
index 49ae976cfc2c..b11c1ebbcc28 100644
--- a/src/operator/tensor/elemwise_unary_op_basic.cc
+++ b/src/operator/tensor/elemwise_unary_op_basic.cc
@@ -623,7 +623,13 @@ The storage type of ``negative`` output depends upon the input storage type:
    - negative(csr) = csr
 
 )code")
-.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{"negative"});
+.set_attr<nnvm::FGradient>("FGradient",
+  [](const nnvm::NodePtr& n, const std::vector<nnvm::NodeEntry>& ograds) {
+    auto in_grad = MakeNode("negative", n->attrs.name + "_backward", {ograds[0]}, nullptr, &n);
+    std::vector<nnvm::NodeEntry> ret;
+    ret.emplace_back(nnvm::NodeEntry{in_grad, 0, 0});
+    return ret;
+  });
 
 // reciprocal
 MXNET_OPERATOR_REGISTER_UNARY(reciprocal)
diff --git a/src/operator/tensor/elemwise_unary_op_trig.cc b/src/operator/tensor/elemwise_unary_op_trig.cc
index 288719f48a96..5de6de63c06d 100644
--- a/src/operator/tensor/elemwise_unary_op_trig.cc
+++ b/src/operator/tensor/elemwise_unary_op_trig.cc
@@ -44,7 +44,15 @@ The storage type of ``sin`` output depends upon the input storage type:
    - sin(csr) = csr
 
 )code" ADD_FILELINE)
-.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{ "_backward_sin" });
+.set_attr<nnvm::FGradient>("FGradient",
+  [](const nnvm::NodePtr& n, const std::vector<nnvm::NodeEntry>& ograds) {
+    auto x_grad = MakeNode("cos", n->attrs.name + "_mid_x_grad", {n->inputs[0]}, nullptr, &n);
+    auto in_grad = MakeNode("elemwise_mul", n->attrs.name + "_backward",
+                            {ograds[0], nnvm::NodeEntry{x_grad, 0, 0}}, nullptr, &n);
+    std::vector<nnvm::NodeEntry> ret;
+    ret.emplace_back(nnvm::NodeEntry{in_grad, 0, 0});
+    return ret;
+  });
 
 MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_sin, unary_bwd<mshadow_op::sin_grad>);
 
@@ -61,7 +69,17 @@ The input should be in radians (:math:`2\pi` rad equals 360 degrees).
 The storage type of ``cos`` output is always dense
 
 )code" ADD_FILELINE)
-.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{"_backward_cos"});
+.set_attr<nnvm::FGradient>("FGradient",
+  [](const nnvm::NodePtr& n, const std::vector<nnvm::NodeEntry>& ograds) {
+    auto x_grad = MakeNode("sin", n->attrs.name + "_mid_x_grad", {n->inputs[0]}, nullptr, &n);
+    auto neg_x_grad = MakeNode("negative", n->attrs.name + "_mid_neg_x_grad",
+                               {nnvm::NodeEntry{x_grad, 0, 0}}, nullptr, &n);
+    auto in_grad = MakeNode("elemwise_mul", n->attrs.name + "_backward",
+                            {ograds[0], nnvm::NodeEntry{neg_x_grad, 0, 0}}, nullptr, &n);
+    std::vector<nnvm::NodeEntry> ret;
+    ret.emplace_back(nnvm::NodeEntry{in_grad, 0, 0});
+    return ret;
+  });
 
 MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_cos, unary_bwd<mshadow_op::cos_grad>);
 

From 492e4cdd19db27c1a930ccb5f1ed6562d044fe8b Mon Sep 17 00:00:00 2001
From: Lin Yuan <apeforest@gmail.com>
Date: Wed, 3 Apr 2019 10:38:04 -0700
Subject: [PATCH 02/20] add unit test for second order grad

---
 .../python/unittest/test_higher_order_grad.py | 64 +++++++++++++++++++
 1 file changed, 64 insertions(+)
 create mode 100644 tests/python/unittest/test_higher_order_grad.py

diff --git a/tests/python/unittest/test_higher_order_grad.py b/tests/python/unittest/test_higher_order_grad.py
new file mode 100644
index 000000000000..696ac9fa8cab
--- /dev/null
+++ b/tests/python/unittest/test_higher_order_grad.py
@@ -0,0 +1,64 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import mxnet as mx
+import numpy as np
+from mxnet import gluon, nd, autograd
+from mxnet.test_utils import assert_almost_equal
+from tests.python.unittest.common import with_seed
+
+
+@with_seed()
+def test_elemwise_mul():
+    x = nd.array([1, 2, 3])
+    y = nd.zeros(3)
+    x.attach_grad()
+    with autograd.record():
+        y = nd.elemwise_mul(x, x) 
+        y_grad = autograd.grad(y, x, create_graph=True, retain_graph=True)[0]
+    y_grad.backward()
+    expect_grad = nd.array([2, 2, 2])
+    assert_almost_equal(expect_grad.asnumpy(), x.grad.asnumpy())
+
+
+@with_seed()
+def test_sin():
+    x = nd.array([1, 2, 3])
+    x.attach_grad()
+    with autograd.record():
+        y = nd.sin(x)
+        y_grad = autograd.grad(y, x, create_graph=True, retain_graph=True)[0]
+    y_grad.backward()
+    expect_grad = -nd.sin(x)
+    assert_almost_equal(expect_grad.asnumpy(), x.grad.asnumpy())
+
+
+@with_seed()
+def test_cos():
+    x = nd.array([1, 2, 3])
+    x.attach_grad()
+    with autograd.record():
+        y = nd.cos(x)
+        y_grad = autograd.grad(y, x, create_graph=True, retain_graph=True)[0]
+    y_grad.backward()
+    expect_grad = -nd.cos(x)
+    assert_almost_equal(expect_grad.asnumpy(), x.grad.asnumpy())
+
+
+if __name__ == '__main__':
+    import nose
+    nose.runmodule()

From 45b334ebb571880388b924eba7629f63b4cd4a9d Mon Sep 17 00:00:00 2001
From: Lin Yuan <apeforest@gmail.com>
Date: Wed, 3 Apr 2019 16:55:35 -0700
Subject: [PATCH 03/20] implement grad for relu and add unit test

---
 src/imperative/imperative.cc                  |  5 ++-
 .../tensor/elemwise_unary_op_basic.cc         | 10 ++++-
 .../python/unittest/test_higher_order_grad.py | 41 +++++++++++++++----
 3 files changed, 45 insertions(+), 11 deletions(-)

diff --git a/src/imperative/imperative.cc b/src/imperative/imperative.cc
index 3e5b3987522c..b07e761aa124 100644
--- a/src/imperative/imperative.cc
+++ b/src/imperative/imperative.cc
@@ -347,8 +347,9 @@ std::vector<NDArray*> Imperative::Backward(
       x_reqs.push_back(info.grad_req);
       info.fresh_out_grad = true;
     }
-    CHECK_GT(xs.size(), 0)
-        << "There are no inputs in computation graph that require gradients.";
+    if (xs.empty()) {
+      LOG(WARNING) << "There are no inputs in computation graph that require gradients.";
+    }
   }
 
   Graph g_graph = pass::MXGradient(
diff --git a/src/operator/tensor/elemwise_unary_op_basic.cc b/src/operator/tensor/elemwise_unary_op_basic.cc
index 68654e3a116e..a16d3f2d89eb 100644
--- a/src/operator/tensor/elemwise_unary_op_basic.cc
+++ b/src/operator/tensor/elemwise_unary_op_basic.cc
@@ -83,7 +83,15 @@ The storage type of ``relu`` output depends upon the input storage type:
    - relu(csr) = csr
 
 )code" ADD_FILELINE)
-.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseOut{"_backward_relu"});
+.set_attr<nnvm::FGradient>("FGradient",
+  [](const nnvm::NodePtr& n, const std::vector<nnvm::NodeEntry>& ograds) {
+    auto zero_node = MakeNode("zeros_like", n->attrs.name + "_relu_backward", {n->inputs[0]}, nullptr, &n);
+    auto x_grad = MakeNode("_greater", n->attrs.name + "_mid_x_grad", {n->inputs[0], nnvm::NodeEntry{zero_node, 0, 0}}, nullptr, &n);
+    auto in_grad = MakeNode("elemwise_mul", n->attrs.name + "_backward", {ograds[0], nnvm::NodeEntry{x_grad, 0 , 0}}, nullptr, &n);
+    std::vector<nnvm::NodeEntry> ret;
+    ret.emplace_back(nnvm::NodeEntry{in_grad, 0, 0});
+    return ret;
+  });
 
 MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_relu,
                                                unary_bwd<mshadow_op::relu_grad>);
diff --git a/tests/python/unittest/test_higher_order_grad.py b/tests/python/unittest/test_higher_order_grad.py
index 696ac9fa8cab..4b6bce7f6a29 100644
--- a/tests/python/unittest/test_higher_order_grad.py
+++ b/tests/python/unittest/test_higher_order_grad.py
@@ -37,25 +37,50 @@ def test_elemwise_mul():
 
 @with_seed()
 def test_sin():
+    def sin(x):
+        return nd.sin(x)
+
     x = nd.array([1, 2, 3])
-    x.attach_grad()
-    with autograd.record():
-        y = nd.sin(x)
-        y_grad = autograd.grad(y, x, create_graph=True, retain_graph=True)[0]
-    y_grad.backward()
     expect_grad = -nd.sin(x)
-    assert_almost_equal(expect_grad.asnumpy(), x.grad.asnumpy())
+    check_second_order_unary(x, sin, expect_grad)
 
 
 @with_seed()
 def test_cos():
+    def cos(x):
+        return nd.cos(x)
+
     x = nd.array([1, 2, 3])
+    expect_grad = -nd.cos(x)
+    check_second_order_unary(x, cos, expect_grad)
+
+
+@with_seed()
+def test_negative():
+    def negative(x):
+        return nd.negative(x)
+
+    x = nd.array([1, 2, 3])
+    expect_grad = nd.zeros_like(x)
+    check_second_order_unary(x, negative, expect_grad)
+
+
+@with_seed()
+def test_relu():
+    def relu(x):
+        return nd.relu(x)
+
+    x = nd.array([1, 2, 3])
+    expect_grad = nd.zeros_like(x)
+    check_second_order_unary(x, relu, expect_grad)
+
+
+def check_second_order_unary(x, op, expect_grad):
     x.attach_grad()
     with autograd.record():
-        y = nd.cos(x)
+        y = op(x)
         y_grad = autograd.grad(y, x, create_graph=True, retain_graph=True)[0]
     y_grad.backward()
-    expect_grad = -nd.cos(x)
     assert_almost_equal(expect_grad.asnumpy(), x.grad.asnumpy())
 
 

From 4dc0907a6cb636966d44018879c14ca4cfcf2a61 Mon Sep 17 00:00:00 2001
From: Lin Yuan <apeforest@gmail.com>
Date: Fri, 5 Apr 2019 11:26:41 -0700
Subject: [PATCH 04/20] fix lint

---
 src/operator/tensor/elemwise_unary_op_basic.cc | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/operator/tensor/elemwise_unary_op_basic.cc b/src/operator/tensor/elemwise_unary_op_basic.cc
index a16d3f2d89eb..3f794966dc92 100644
--- a/src/operator/tensor/elemwise_unary_op_basic.cc
+++ b/src/operator/tensor/elemwise_unary_op_basic.cc
@@ -85,9 +85,12 @@ The storage type of ``relu`` output depends upon the input storage type:
 )code" ADD_FILELINE)
 .set_attr<nnvm::FGradient>("FGradient",
   [](const nnvm::NodePtr& n, const std::vector<nnvm::NodeEntry>& ograds) {
-    auto zero_node = MakeNode("zeros_like", n->attrs.name + "_relu_backward", {n->inputs[0]}, nullptr, &n);
-    auto x_grad = MakeNode("_greater", n->attrs.name + "_mid_x_grad", {n->inputs[0], nnvm::NodeEntry{zero_node, 0, 0}}, nullptr, &n);
-    auto in_grad = MakeNode("elemwise_mul", n->attrs.name + "_backward", {ograds[0], nnvm::NodeEntry{x_grad, 0 , 0}}, nullptr, &n);
+    auto zero_node = MakeNode("zeros_like", n->attrs.name + "_backward",
+      {n->inputs[0]}, nullptr, &n);
+    auto x_grad = MakeNode("_greater", n->attrs.name + "_mid_x_grad",
+      {n->inputs[0], nnvm::NodeEntry{zero_node, 0, 0}}, nullptr, &n);
+    auto in_grad = MakeNode("elemwise_mul", n->attrs.name + "_backward",
+      {ograds[0], nnvm::NodeEntry{x_grad, 0 , 0}}, nullptr, &n);
     std::vector<nnvm::NodeEntry> ret;
     ret.emplace_back(nnvm::NodeEntry{in_grad, 0, 0});
     return ret;

From 30ff1e9925a9bdb94514ab4a1f876c46419f0d27 Mon Sep 17 00:00:00 2001
From: Lin Yuan <apeforest@gmail.com>
Date: Tue, 28 May 2019 13:28:02 -0700
Subject: [PATCH 05/20] register FGradient attribute for backward relu

---
 .../tensor/elemwise_binary_op_basic.cc        | 12 +---------
 .../tensor/elemwise_unary_op_basic.cc         | 24 ++++++++-----------
 .../python/unittest/test_higher_order_grad.py | 13 ----------
 3 files changed, 11 insertions(+), 38 deletions(-)

diff --git a/src/operator/tensor/elemwise_binary_op_basic.cc b/src/operator/tensor/elemwise_binary_op_basic.cc
index 2e1f979c805a..c5e30c68de7e 100644
--- a/src/operator/tensor/elemwise_binary_op_basic.cc
+++ b/src/operator/tensor/elemwise_binary_op_basic.cc
@@ -233,17 +233,7 @@ The storage type of ``elemwise_mul`` output depends on storage types of inputs
                                 return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
                               })
 .add_alias("_mul").add_alias("_Mul")
-.set_attr<nnvm::FGradient>("FGradient",
-  [](const nnvm::NodePtr& n, const std::vector<nnvm::NodeEntry>& ograds) {
-    auto lhs_grad = MakeNode("elemwise_mul", n->attrs.name + "_backward_lhs",
-                             {ograds[0], n->inputs[1]}, nullptr, &n);
-    auto rhs_grad = MakeNode("elemwise_mul", n->attrs.name + "_backward_rhs",
-                             {ograds[0], n->inputs[0]}, nullptr, &n);
-    std::vector<nnvm::NodeEntry> ret;
-    ret.emplace_back(nnvm::NodeEntry{lhs_grad, 0, 0});
-    ret.emplace_back(nnvm::NodeEntry{rhs_grad, 0, 0});
-    return ret;
-  });
+.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{"_backward_mul"});
 
 NNVM_REGISTER_OP(_backward_mul)
 .set_num_inputs(3)
diff --git a/src/operator/tensor/elemwise_unary_op_basic.cc b/src/operator/tensor/elemwise_unary_op_basic.cc
index ad835c654959..a096de4df067 100644
--- a/src/operator/tensor/elemwise_unary_op_basic.cc
+++ b/src/operator/tensor/elemwise_unary_op_basic.cc
@@ -83,21 +83,17 @@ The storage type of ``relu`` output depends upon the input storage type:
    - relu(csr) = csr
 
 )code" ADD_FILELINE)
-.set_attr<nnvm::FGradient>("FGradient",
-  [](const nnvm::NodePtr& n, const std::vector<nnvm::NodeEntry>& ograds) {
-    auto zero_node = MakeNode("zeros_like", n->attrs.name + "_backward",
-      {n->inputs[0]}, nullptr, &n);
-    auto x_grad = MakeNode("_greater", n->attrs.name + "_mid_x_grad",
-      {n->inputs[0], nnvm::NodeEntry{zero_node, 0, 0}}, nullptr, &n);
-    auto in_grad = MakeNode("elemwise_mul", n->attrs.name + "_backward",
-      {ograds[0], nnvm::NodeEntry{x_grad, 0 , 0}}, nullptr, &n);
-    std::vector<nnvm::NodeEntry> ret;
-    ret.emplace_back(nnvm::NodeEntry{in_grad, 0, 0});
-    return ret;
-  });
+.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseOut{"_backward_relu"});
 
-MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_relu,
-                                               unary_bwd<mshadow_op::relu_grad>);
+MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_relu, unary_bwd<mshadow_op::relu_grad>)
+.set_attr<nnvm::FGradient>("FGradient",
+    [](const nnvm::NodePtr& n, const std::vector<nnvm::NodeEntry>& ograds) {
+      std::vector<nnvm::NodeEntry> ret;
+      auto in_grad = MakeNode("zeros_like", n->attrs.name + "_backward",
+                              {n->inputs[0]}, nullptr, &n);
+      ret.emplace_back(in_grad);
+      return ret;
+    });
 
 // sigmoid
 MXNET_OPERATOR_REGISTER_UNARY(sigmoid)
diff --git a/tests/python/unittest/test_higher_order_grad.py b/tests/python/unittest/test_higher_order_grad.py
index 4b6bce7f6a29..ec94e1e9d376 100644
--- a/tests/python/unittest/test_higher_order_grad.py
+++ b/tests/python/unittest/test_higher_order_grad.py
@@ -22,19 +22,6 @@
 from tests.python.unittest.common import with_seed
 
 
-@with_seed()
-def test_elemwise_mul():
-    x = nd.array([1, 2, 3])
-    y = nd.zeros(3)
-    x.attach_grad()
-    with autograd.record():
-        y = nd.elemwise_mul(x, x) 
-        y_grad = autograd.grad(y, x, create_graph=True, retain_graph=True)[0]
-    y_grad.backward()
-    expect_grad = nd.array([2, 2, 2])
-    assert_almost_equal(expect_grad.asnumpy(), x.grad.asnumpy())
-
-
 @with_seed()
 def test_sin():
     def sin(x):

From d9ba3da306c7b541122abae7d85434cfc574a91e Mon Sep 17 00:00:00 2001
From: Lin Yuan <apeforest@gmail.com>
Date: Tue, 28 May 2019 13:33:37 -0700
Subject: [PATCH 06/20] resolve conflict

---
 tests/python/unittest/test_higher_order_grad.py | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/tests/python/unittest/test_higher_order_grad.py b/tests/python/unittest/test_higher_order_grad.py
index 3fedd7cdb374..e8dc80c3e5ab 100644
--- a/tests/python/unittest/test_higher_order_grad.py
+++ b/tests/python/unittest/test_higher_order_grad.py
@@ -15,13 +15,13 @@
 # specific language governing permissions and limitations
 # under the License.
 
-<<<<<<< HEAD
-import mxnet as mx
+
+import math
 import numpy as np
+import mxnet as mx
 from mxnet import gluon, nd, autograd
-from mxnet.test_utils import assert_almost_equal
-from tests.python.unittest.common import with_seed
-
+from mxnet.test_utils import assert_almost_equal, random_arrays
+from common import with_seed
 
 @with_seed()
 def test_sin():
@@ -63,13 +63,6 @@ def relu(x):
     check_second_order_unary(x, relu, expect_grad)
 
 
-def check_second_order_unary(x, op, expect_grad):
-=======
-import math
-
-from mxnet import nd, autograd
-from mxnet.test_utils import assert_almost_equal, random_arrays
-from common import with_seed
 
 
 @with_seed()

From 1c93c7d5167e69d7f99b870a70185a2ac501de4c Mon Sep 17 00:00:00 2001
From: Lin Yuan <apeforest@gmail.com>
Date: Tue, 28 May 2019 13:35:08 -0700
Subject: [PATCH 07/20] remove unused imports

---
 tests/python/unittest/test_higher_order_grad.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tests/python/unittest/test_higher_order_grad.py b/tests/python/unittest/test_higher_order_grad.py
index e8dc80c3e5ab..f05ee21e5713 100644
--- a/tests/python/unittest/test_higher_order_grad.py
+++ b/tests/python/unittest/test_higher_order_grad.py
@@ -17,12 +17,11 @@
 
 
 import math
-import numpy as np
-import mxnet as mx
-from mxnet import gluon, nd, autograd
+from mxnet import nd, autograd
 from mxnet.test_utils import assert_almost_equal, random_arrays
 from common import with_seed
 
+
 @with_seed()
 def test_sin():
     def sin(x):

From de721bc877bc420cb2bbe3d119b7023610e662c7 Mon Sep 17 00:00:00 2001
From: Lin Yuan <apeforest@gmail.com>
Date: Wed, 29 May 2019 17:05:16 -0700
Subject: [PATCH 08/20] change gradient using set_attr

---
 .../tensor/elemwise_unary_op_basic.cc         | 19 +++++-----
 .../python/unittest/test_higher_order_grad.py | 36 ++++++++++++-------
 2 files changed, 32 insertions(+), 23 deletions(-)

diff --git a/src/operator/tensor/elemwise_unary_op_basic.cc b/src/operator/tensor/elemwise_unary_op_basic.cc
index 904561695ce2..ec2ea3707bb9 100644
--- a/src/operator/tensor/elemwise_unary_op_basic.cc
+++ b/src/operator/tensor/elemwise_unary_op_basic.cc
@@ -89,9 +89,14 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_relu, unary_bwd<mshadow
 .set_attr<nnvm::FGradient>("FGradient",
     [](const nnvm::NodePtr& n, const std::vector<nnvm::NodeEntry>& ograds) {
       std::vector<nnvm::NodeEntry> ret;
-      auto in_grad = MakeNode("zeros_like", n->attrs.name + "_backward",
-                              {n->inputs[0]}, nullptr, &n);
-      ret.emplace_back(in_grad);
+      // f(x) -> f = relu
+      // f'(x) = 1 if x > 0 else 0
+      // f''(x) = 0
+      auto gx = nnvm::NodeEntry{n}; // f'(x)
+      ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad",
+                                {ograds[0], gx}, nullptr, &n));
+      ret.emplace_back(MakeNode("zeros_like", n->attrs.name + "_backward_grad_grad_in",
+                                {gx}, nullptr, &n));
       return ret;
     });
 
@@ -660,13 +665,7 @@ The storage type of ``negative`` output depends upon the input storage type:
    - negative(csr) = csr
 
 )code")
-.set_attr<nnvm::FGradient>("FGradient",
-  [](const nnvm::NodePtr& n, const std::vector<nnvm::NodeEntry>& ograds) {
-    auto in_grad = MakeNode("negative", n->attrs.name + "_backward", {ograds[0]}, nullptr, &n);
-    std::vector<nnvm::NodeEntry> ret;
-    ret.emplace_back(nnvm::NodeEntry{in_grad, 0, 0});
-    return ret;
-  });
+.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{"negative"});
 
 // reciprocal
 MXNET_OPERATOR_REGISTER_UNARY(reciprocal)
diff --git a/tests/python/unittest/test_higher_order_grad.py b/tests/python/unittest/test_higher_order_grad.py
index f05ee21e5713..60816bf6487b 100644
--- a/tests/python/unittest/test_higher_order_grad.py
+++ b/tests/python/unittest/test_higher_order_grad.py
@@ -27,9 +27,12 @@ def test_sin():
     def sin(x):
         return nd.sin(x)
 
-    x = nd.array([1, 2, 3])
-    expect_grad = -nd.sin(x)
-    check_second_order_unary(x, sin, expect_grad)
+    def grad_grad_op(x):
+        return -nd.sin(x)
+
+    arrays = random_arrays((2, 2), (2, 3), (4, 5, 2), (3, 1, 4, 5))
+    for array in arrays:
+        check_second_order_unary(array, sin, grad_grad_op)
 
 
 @with_seed()
@@ -37,9 +40,12 @@ def test_cos():
     def cos(x):
         return nd.cos(x)
 
-    x = nd.array([1, 2, 3])
-    expect_grad = -nd.cos(x)
-    check_second_order_unary(x, cos, expect_grad)
+    def grad_grad_op(x):
+        return -nd.cos(x)
+
+    arrays = random_arrays((2, 2), (2, 3), (4, 5, 2), (3, 1, 4, 5))
+    for array in arrays:
+        check_second_order_unary(array, cos, grad_grad_op)
 
 
 @with_seed()
@@ -47,9 +53,12 @@ def test_negative():
     def negative(x):
         return nd.negative(x)
 
-    x = nd.array([1, 2, 3])
-    expect_grad = nd.zeros_like(x)
-    check_second_order_unary(x, negative, expect_grad)
+    def grad_grad_op(x):
+        return nd.zeros_like(x)
+
+    arrays = random_arrays((2, 2), (2, 3), (4, 5, 2), (3, 1, 4, 5))
+    for array in arrays:
+        check_second_order_unary(array, negative, grad_grad_op)
 
 
 @with_seed()
@@ -57,11 +66,12 @@ def test_relu():
     def relu(x):
         return nd.relu(x)
 
-    x = nd.array([1, 2, 3])
-    expect_grad = nd.zeros_like(x)
-    check_second_order_unary(x, relu, expect_grad)
-
+    def grad_grad_op(x):
+        return nd.zeros_like(x)
 
+    arrays = random_arrays((2, 2), (2, 3), (4, 5, 2), (3, 1, 4, 5))
+    for array in arrays:
+        check_second_order_unary(array, relu, grad_grad_op)
 
 
 @with_seed()

From 0ac094262b740370d667aa79ce55ee68bf1095e5 Mon Sep 17 00:00:00 2001
From: Lin Yuan <apeforest@gmail.com>
Date: Wed, 29 May 2019 17:13:43 -0700
Subject: [PATCH 09/20] remove higher order grad test for negative(x)

---
 src/imperative/imperative.cc                    |  5 ++---
 tests/python/unittest/test_higher_order_grad.py | 13 -------------
 2 files changed, 2 insertions(+), 16 deletions(-)

diff --git a/src/imperative/imperative.cc b/src/imperative/imperative.cc
index cb6e1446f5ba..9c7621975cf5 100644
--- a/src/imperative/imperative.cc
+++ b/src/imperative/imperative.cc
@@ -349,9 +349,8 @@ std::vector<NDArray*> Imperative::Backward(
       x_reqs.push_back(info.grad_req);
       info.fresh_out_grad = true;
     }
-    if (xs.empty()) {
-      LOG(WARNING) << "There are no inputs in computation graph that require gradients.";
-    }
+    CHECK_GT(xs.size(), 0)
+      << "There are no inputs in computation graph that require gradients.";
   }
 
   Graph g_graph = pass::MXGradient(
diff --git a/tests/python/unittest/test_higher_order_grad.py b/tests/python/unittest/test_higher_order_grad.py
index 60816bf6487b..3b686e276e7f 100644
--- a/tests/python/unittest/test_higher_order_grad.py
+++ b/tests/python/unittest/test_higher_order_grad.py
@@ -48,19 +48,6 @@ def grad_grad_op(x):
         check_second_order_unary(array, cos, grad_grad_op)
 
 
-@with_seed()
-def test_negative():
-    def negative(x):
-        return nd.negative(x)
-
-    def grad_grad_op(x):
-        return nd.zeros_like(x)
-
-    arrays = random_arrays((2, 2), (2, 3), (4, 5, 2), (3, 1, 4, 5))
-    for array in arrays:
-        check_second_order_unary(array, negative, grad_grad_op)
-
-
 @with_seed()
 def test_relu():
     def relu(x):

From f8e624ef29ffc8a6c475e4754ec66bd2f8ef08fa Mon Sep 17 00:00:00 2001
From: Lin Yuan <apeforest@gmail.com>
Date: Wed, 29 May 2019 22:07:21 -0700
Subject: [PATCH 10/20] fix lint

---
 src/operator/tensor/elemwise_unary_op_basic.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/operator/tensor/elemwise_unary_op_basic.cc b/src/operator/tensor/elemwise_unary_op_basic.cc
index ec2ea3707bb9..bd94f81bff26 100644
--- a/src/operator/tensor/elemwise_unary_op_basic.cc
+++ b/src/operator/tensor/elemwise_unary_op_basic.cc
@@ -92,7 +92,7 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_relu, unary_bwd<mshadow
       // f(x) -> f = relu
       // f'(x) = 1 if x > 0 else 0
       // f''(x) = 0
-      auto gx = nnvm::NodeEntry{n}; // f'(x)
+      auto gx = nnvm::NodeEntry{n};  // f'(x)
       ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad",
                                 {ograds[0], gx}, nullptr, &n));
       ret.emplace_back(MakeNode("zeros_like", n->attrs.name + "_backward_grad_grad_in",

From 8538980062db57ec4dbe3b97a436b8b34f7921c8 Mon Sep 17 00:00:00 2001
From: Lin Yuan <apeforest@gmail.com>
Date: Thu, 30 May 2019 09:44:31 -0700
Subject: [PATCH 11/20] reverse indent

---
 src/imperative/imperative.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/imperative/imperative.cc b/src/imperative/imperative.cc
index 9c7621975cf5..d8fba1c169ec 100644
--- a/src/imperative/imperative.cc
+++ b/src/imperative/imperative.cc
@@ -350,7 +350,7 @@ std::vector<NDArray*> Imperative::Backward(
       info.fresh_out_grad = true;
     }
     CHECK_GT(xs.size(), 0)
-      << "There are no inputs in computation graph that require gradients.";
+        << "There are no inputs in computation graph that require gradients.";
   }
 
   Graph g_graph = pass::MXGradient(

From 1ee38b5a1b6db91a7dfb5c223465aaec79791891 Mon Sep 17 00:00:00 2001
From: Lin Yuan <apeforest@gmail.com>
Date: Thu, 30 May 2019 09:46:32 -0700
Subject: [PATCH 12/20] remove unused backward operator

---
 src/operator/tensor/elemwise_unary_op_trig.cc | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/operator/tensor/elemwise_unary_op_trig.cc b/src/operator/tensor/elemwise_unary_op_trig.cc
index 3308d5f13f1e..8a04d023376e 100644
--- a/src/operator/tensor/elemwise_unary_op_trig.cc
+++ b/src/operator/tensor/elemwise_unary_op_trig.cc
@@ -54,8 +54,6 @@ The storage type of ``sin`` output depends upon the input storage type:
     return ret;
   });
 
-MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_sin, unary_bwd<mshadow_op::sin_grad>);
-
 // cos
 MXNET_OPERATOR_REGISTER_UNARY_WITH_SPARSE_DR(cos, cpu, mshadow_op::cos)
 MXNET_ADD_SPARSE_OP_ALIAS(cos)
@@ -81,8 +79,6 @@ The storage type of ``cos`` output is always dense
     return ret;
   });
 
-MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_cos, unary_bwd<mshadow_op::cos_grad>);
-
 // tan
 MXNET_OPERATOR_REGISTER_UNARY_WITH_RSP_CSR(tan, cpu, mshadow_op::tan)
 .describe(R"code(Computes the element-wise tangent of the input array.

From c18f317f7eb547163a987b15d5bd481cc2137550 Mon Sep 17 00:00:00 2001
From: Lin Yuan <apeforest@gmail.com>
Date: Thu, 30 May 2019 10:28:04 -0700
Subject: [PATCH 13/20] refactor backward for sin(x) and cos(x)

---
 src/operator/tensor/elemwise_unary_op_trig.cc | 61 +++++++++++++------
 1 file changed, 43 insertions(+), 18 deletions(-)

diff --git a/src/operator/tensor/elemwise_unary_op_trig.cc b/src/operator/tensor/elemwise_unary_op_trig.cc
index 8a04d023376e..d97b34f6207f 100644
--- a/src/operator/tensor/elemwise_unary_op_trig.cc
+++ b/src/operator/tensor/elemwise_unary_op_trig.cc
@@ -44,15 +44,28 @@ The storage type of ``sin`` output depends upon the input storage type:
    - sin(csr) = csr
 
 )code" ADD_FILELINE)
+.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{ "_backward_sin" });
+
+MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_sin, unary_bwd<mshadow_op::sin_grad>)
 .set_attr<nnvm::FGradient>("FGradient",
-  [](const nnvm::NodePtr& n, const std::vector<nnvm::NodeEntry>& ograds) {
-    auto x_grad = MakeNode("cos", n->attrs.name + "_mid_x_grad", {n->inputs[0]}, nullptr, &n);
-    auto in_grad = MakeNode("elemwise_mul", n->attrs.name + "_backward",
-                            {ograds[0], nnvm::NodeEntry{x_grad, 0, 0}}, nullptr, &n);
-    std::vector<nnvm::NodeEntry> ret;
-    ret.emplace_back(nnvm::NodeEntry{in_grad, 0, 0});
-    return ret;
-  });
+    [](const nnvm::NodePtr& n, const std::vector<nnvm::NodeEntry>& ograds) {
+      // f(x) = sin(x)
+      // f'(x) = cos(x)
+      // f''(x) = -sin(x)
+      auto grad_x = nnvm::NodeEntry(n);
+      auto grad_grad_x_mid = MakeNode("sin", n->attrs.name + "_mid_grad_grad",
+                                      {n->inputs[1]}, nullptr, &n);
+      auto grad_grad_x = MakeNode("negative", n->attrs.name + "_backward_grad_grad",
+                                  {nnvm::NodeEntry(grad_grad_x_mid)}, nullptr, &n);
+      std::vector<nnvm::NodeEntry> ret;
+      // for the backward of the _backward_sin node
+      // first input is the ograd and second input is x (because ElemwiseUseIn)
+      ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad",
+                                {ograds[0], grad_x}, nullptr, &n));
+      ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad_in",
+                                {ograds[0], nnvm::NodeEntry(grad_grad_x)}, nullptr, &n));
+      return ret;
+    });
 
 // cos
 MXNET_OPERATOR_REGISTER_UNARY_WITH_SPARSE_DR(cos, cpu, mshadow_op::cos)
@@ -67,17 +80,29 @@ The input should be in radians (:math:`2\pi` rad equals 360 degrees).
 The storage type of ``cos`` output is always dense
 
 )code" ADD_FILELINE)
+.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{"_backward_cos"});
+
+MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_cos, unary_bwd<mshadow_op::cos_grad>)
 .set_attr<nnvm::FGradient>("FGradient",
-  [](const nnvm::NodePtr& n, const std::vector<nnvm::NodeEntry>& ograds) {
-    auto x_grad = MakeNode("sin", n->attrs.name + "_mid_x_grad", {n->inputs[0]}, nullptr, &n);
-    auto neg_x_grad = MakeNode("negative", n->attrs.name + "_mid_neg_x_grad",
-                               {nnvm::NodeEntry{x_grad, 0, 0}}, nullptr, &n);
-    auto in_grad = MakeNode("elemwise_mul", n->attrs.name + "_backward",
-                            {ograds[0], nnvm::NodeEntry{neg_x_grad, 0, 0}}, nullptr, &n);
-    std::vector<nnvm::NodeEntry> ret;
-    ret.emplace_back(nnvm::NodeEntry{in_grad, 0, 0});
-    return ret;
-  });
+    [](const nnvm::NodePtr& n, const std::vector<nnvm::NodeEntry>& ograds) {
+      // f(x) = cos(x)
+      // f'(x) = -sin(x)
+      // f''(x) = -cos(x)
+      auto grad_x = nnvm::NodeEntry(n);
+      auto grad_grad_x_mid = MakeNode("cos", n->attrs.name + "_mid_grad_grad",
+                                      {n->inputs[1]}, nullptr, &n);
+      auto grad_grad_x = MakeNode("negative", n->attrs.name + "_backward_grad_grad",
+                                  {nnvm::NodeEntry(grad_grad_x_mid)}, nullptr, &n);
+      std::vector<nnvm::NodeEntry> ret;
+      // for the backward of the _backward_cos node
+      // first input is the ograd and second input is x (because ElemwiseUseIn)
+      ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad",
+                                {ograds[0], grad_x}, nullptr, &n));
+      ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad_in",
+                                {ograds[0], nnvm::NodeEntry(grad_grad_x)}, nullptr, &n));
+      return ret;
+    });
+
 
 // tan
 MXNET_OPERATOR_REGISTER_UNARY_WITH_RSP_CSR(tan, cpu, mshadow_op::tan)

From 689cfeeaf3972a4d726fde7c58810a68399d6ebe Mon Sep 17 00:00:00 2001
From: Lin Yuan <apeforest@gmail.com>
Date: Thu, 30 May 2019 13:27:19 -0700
Subject: [PATCH 14/20] change value init to list init

---
 src/operator/tensor/elemwise_unary_op_trig.cc | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/operator/tensor/elemwise_unary_op_trig.cc b/src/operator/tensor/elemwise_unary_op_trig.cc
index d97b34f6207f..dd72e4ee58c2 100644
--- a/src/operator/tensor/elemwise_unary_op_trig.cc
+++ b/src/operator/tensor/elemwise_unary_op_trig.cc
@@ -52,7 +52,7 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_sin, unary_bwd<mshad
       // f(x) = sin(x)
       // f'(x) = cos(x)
       // f''(x) = -sin(x)
-      auto grad_x = nnvm::NodeEntry(n);
+      auto grad_x = nnvm::NodeEntry{n};
       auto grad_grad_x_mid = MakeNode("sin", n->attrs.name + "_mid_grad_grad",
                                       {n->inputs[1]}, nullptr, &n);
       auto grad_grad_x = MakeNode("negative", n->attrs.name + "_backward_grad_grad",
@@ -63,7 +63,7 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_sin, unary_bwd<mshad
       ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad",
                                 {ograds[0], grad_x}, nullptr, &n));
       ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad_in",
-                                {ograds[0], nnvm::NodeEntry(grad_grad_x)}, nullptr, &n));
+                                {ograds[0], nnvm::NodeEntry{grad_grad_x}}, nullptr, &n));
       return ret;
     });
 
@@ -88,7 +88,7 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_cos, unary_bwd<mshadow_
       // f(x) = cos(x)
       // f'(x) = -sin(x)
       // f''(x) = -cos(x)
-      auto grad_x = nnvm::NodeEntry(n);
+      auto grad_x = nnvm::NodeEntry{n};
       auto grad_grad_x_mid = MakeNode("cos", n->attrs.name + "_mid_grad_grad",
                                       {n->inputs[1]}, nullptr, &n);
       auto grad_grad_x = MakeNode("negative", n->attrs.name + "_backward_grad_grad",
@@ -99,7 +99,7 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_cos, unary_bwd<mshadow_
       ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad",
                                 {ograds[0], grad_x}, nullptr, &n));
       ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad_in",
-                                {ograds[0], nnvm::NodeEntry(grad_grad_x)}, nullptr, &n));
+                                {ograds[0], nnvm::NodeEntry{grad_grad_x}}, nullptr, &n));
       return ret;
     });
 

From 0b6c2ef7374718168d45da5ba673c17f5993dc5b Mon Sep 17 00:00:00 2001
From: Lin Yuan <apeforest@gmail.com>
Date: Fri, 31 May 2019 11:48:14 -0700
Subject: [PATCH 15/20] change to list initialization

---
 src/operator/tensor/elemwise_unary_op_trig.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/operator/tensor/elemwise_unary_op_trig.cc b/src/operator/tensor/elemwise_unary_op_trig.cc
index dd72e4ee58c2..40801b5847cb 100644
--- a/src/operator/tensor/elemwise_unary_op_trig.cc
+++ b/src/operator/tensor/elemwise_unary_op_trig.cc
@@ -56,7 +56,7 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_sin, unary_bwd<mshad
       auto grad_grad_x_mid = MakeNode("sin", n->attrs.name + "_mid_grad_grad",
                                       {n->inputs[1]}, nullptr, &n);
       auto grad_grad_x = MakeNode("negative", n->attrs.name + "_backward_grad_grad",
-                                  {nnvm::NodeEntry(grad_grad_x_mid)}, nullptr, &n);
+                                  {nnvm::NodeEntry{grad_grad_x_mid}}, nullptr, &n);
       std::vector<nnvm::NodeEntry> ret;
       // for the backward of the _backward_sin node
       // first input is the ograd and second input is x (because ElemwiseUseIn)
@@ -92,7 +92,7 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_cos, unary_bwd<mshadow_
       auto grad_grad_x_mid = MakeNode("cos", n->attrs.name + "_mid_grad_grad",
                                       {n->inputs[1]}, nullptr, &n);
       auto grad_grad_x = MakeNode("negative", n->attrs.name + "_backward_grad_grad",
-                                  {nnvm::NodeEntry(grad_grad_x_mid)}, nullptr, &n);
+                                  {nnvm::NodeEntry{grad_grad_x_mid}}, nullptr, &n);
       std::vector<nnvm::NodeEntry> ret;
       // for the backward of the _backward_cos node
       // first input is the ograd and second input is x (because ElemwiseUseIn)

From 31f671f9de7d1a65470751c9748e5638e62874f5 Mon Sep 17 00:00:00 2001
From: Lin Yuan <apeforest@gmail.com>
Date: Fri, 31 May 2019 14:46:19 -0700
Subject: [PATCH 16/20] generate random shape in test

---
 .../python/unittest/test_higher_order_grad.py | 35 ++++++++++---------
 1 file changed, 19 insertions(+), 16 deletions(-)

diff --git a/tests/python/unittest/test_higher_order_grad.py b/tests/python/unittest/test_higher_order_grad.py
index 3b686e276e7f..77bfa68157aa 100644
--- a/tests/python/unittest/test_higher_order_grad.py
+++ b/tests/python/unittest/test_higher_order_grad.py
@@ -18,7 +18,7 @@
 
 import math
 from mxnet import nd, autograd
-from mxnet.test_utils import assert_almost_equal, random_arrays
+from mxnet.test_utils import assert_almost_equal, random_arrays, rand_shape_nd
 from common import with_seed
 
 
@@ -30,8 +30,9 @@ def sin(x):
     def grad_grad_op(x):
         return -nd.sin(x)
 
-    arrays = random_arrays((2, 2), (2, 3), (4, 5, 2), (3, 1, 4, 5))
-    for array in arrays:
+    for dim in range(1, 5):
+        shape = rand_shape_nd(dim)
+        array = random_arrays(shape)
         check_second_order_unary(array, sin, grad_grad_op)
 
 
@@ -43,8 +44,9 @@ def cos(x):
     def grad_grad_op(x):
         return -nd.cos(x)
 
-    arrays = random_arrays((2, 2), (2, 3), (4, 5, 2), (3, 1, 4, 5))
-    for array in arrays:
+    for dim in range(1, 5):
+        shape = rand_shape_nd(dim)
+        array = random_arrays(shape)
         check_second_order_unary(array, cos, grad_grad_op)
 
 
@@ -56,8 +58,9 @@ def relu(x):
     def grad_grad_op(x):
         return nd.zeros_like(x)
 
-    arrays = random_arrays((2, 2), (2, 3), (4, 5, 2), (3, 1, 4, 5))
-    for array in arrays:
+    for dim in range(1, 5):
+        shape = rand_shape_nd(dim)
+        array = random_arrays(shape)
         check_second_order_unary(array, relu, grad_grad_op)
 
 
@@ -69,9 +72,9 @@ def log(x):
     def grad_grad_op(x):
         return -1/(x**2)
 
-    arrays = random_arrays((2, 2), (2, 3), (4, 5, 2), (3, 1, 4, 5))
-
-    for array in arrays:
+    for dim in range(1, 5):
+        shape = rand_shape_nd(dim)
+        array = random_arrays(shape)
         check_second_order_unary(array, log, grad_grad_op)
 
 
@@ -83,9 +86,9 @@ def log2(x):
     def grad_grad_op(x):
         return -1/((x**2) * math.log(2))
 
-    arrays = random_arrays((2, 2), (2, 3), (4, 5, 2), (3, 1, 4, 5))
-
-    for array in arrays:
+    for dim in range(1, 5):
+        shape = rand_shape_nd(dim)
+        array = random_arrays(shape)
         check_second_order_unary(array, log2, grad_grad_op)
 
 
@@ -97,9 +100,9 @@ def log10(x):
     def grad_grad_op(x):
         return -1/((x**2) * math.log(10))
 
-    arrays = random_arrays((2, 2), (2, 3), (4, 5, 2), (3, 1, 4, 5))
-
-    for array in arrays:
+    for dim in range(1, 5):
+        shape = rand_shape_nd(dim)
+        array = random_arrays(shape)
         check_second_order_unary(array, log10, grad_grad_op)
 
 

From 62fcca3c4236195433295fdce6c83c9607ddfc6e Mon Sep 17 00:00:00 2001
From: Lin Yuan <apeforest@gmail.com>
Date: Mon, 3 Jun 2019 15:38:38 -0700
Subject: [PATCH 17/20] fix a bug in second order backward

---
 .../tensor/elemwise_unary_op_basic.cc         |  5 +-
 src/operator/tensor/elemwise_unary_op_trig.cc | 46 ++++++++++++-------
 2 files changed, 34 insertions(+), 17 deletions(-)

diff --git a/src/operator/tensor/elemwise_unary_op_basic.cc b/src/operator/tensor/elemwise_unary_op_basic.cc
index bd94f81bff26..e7c5cb6cd1d2 100644
--- a/src/operator/tensor/elemwise_unary_op_basic.cc
+++ b/src/operator/tensor/elemwise_unary_op_basic.cc
@@ -89,7 +89,10 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_relu, unary_bwd<mshadow
 .set_attr<nnvm::FGradient>("FGradient",
     [](const nnvm::NodePtr& n, const std::vector<nnvm::NodeEntry>& ograds) {
       std::vector<nnvm::NodeEntry> ret;
-      // f(x) -> f = relu
+      // ograds[0]: d^2L/dx^2
+      // inputs[0]: dL/dy
+      // inputs[1]: y
+      // f(x) -> relu(x)
       // f'(x) = 1 if x > 0 else 0
       // f''(x) = 0
       auto gx = nnvm::NodeEntry{n};  // f'(x)
diff --git a/src/operator/tensor/elemwise_unary_op_trig.cc b/src/operator/tensor/elemwise_unary_op_trig.cc
index 40801b5847cb..7ebba7773132 100644
--- a/src/operator/tensor/elemwise_unary_op_trig.cc
+++ b/src/operator/tensor/elemwise_unary_op_trig.cc
@@ -49,21 +49,27 @@ The storage type of ``sin`` output depends upon the input storage type:
 MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_sin, unary_bwd<mshadow_op::sin_grad>)
 .set_attr<nnvm::FGradient>("FGradient",
     [](const nnvm::NodePtr& n, const std::vector<nnvm::NodeEntry>& ograds) {
+      // ograds[0]: d^2L/dx^2
+      // inputs[0]: dL/dy
+      // inputs[1]: x (ElemwiseUseIn)
       // f(x) = sin(x)
       // f'(x) = cos(x)
       // f''(x) = -sin(x)
-      auto grad_x = nnvm::NodeEntry{n};
-      auto grad_grad_x_mid = MakeNode("sin", n->attrs.name + "_mid_grad_grad",
-                                      {n->inputs[1]}, nullptr, &n);
-      auto grad_grad_x = MakeNode("negative", n->attrs.name + "_backward_grad_grad",
-                                  {nnvm::NodeEntry{grad_grad_x_mid}}, nullptr, &n);
+      auto x_grad = MakeNode("cos", n->attrs.name + "_x_grad",
+                             {n->inputs[1]}, nullptr, &n);
+      auto x_grad_grad = MakeNode("negative", n->attrs.name + "_x_grad_grad",
+                                  {nnvm::NodeEntry{MakeNode("sin", n->attrs.name + "_grad_grad_mid",
+                                                            {n->inputs[1]}, nullptr, &n)}}, nullptr, &n);
+
+      auto grad_grad_mid = MakeNode("elemwise_mul", n->attrs.name + "backward_grad_grad_mid",
+                                    {n->inputs[0], nnvm::NodeEntry{x_grad_grad}}, nullptr, &n);
+
       std::vector<nnvm::NodeEntry> ret;
-      // for the backward of the _backward_sin node
-      // first input is the ograd and second input is x (because ElemwiseUseIn)
+
       ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad",
-                                {ograds[0], grad_x}, nullptr, &n));
+                                {ograds[0], nnvm::NodeEntry{x_grad}}, nullptr, &n));
       ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad_in",
-                                {ograds[0], nnvm::NodeEntry{grad_grad_x}}, nullptr, &n));
+                                {ograds[0], nnvm::NodeEntry{grad_grad_mid}}, nullptr, &n));
       return ret;
     });
 
@@ -85,21 +91,29 @@ The storage type of ``cos`` output is always dense
 MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_cos, unary_bwd<mshadow_op::cos_grad>)
 .set_attr<nnvm::FGradient>("FGradient",
     [](const nnvm::NodePtr& n, const std::vector<nnvm::NodeEntry>& ograds) {
+      // ograds[0]: d^2L/dx^2
+      // inputs[0]: dL/dy
+      // inputs[1]: x (ElemwiseUseIn)
       // f(x) = cos(x)
       // f'(x) = -sin(x)
       // f''(x) = -cos(x)
-      auto grad_x = nnvm::NodeEntry{n};
-      auto grad_grad_x_mid = MakeNode("cos", n->attrs.name + "_mid_grad_grad",
-                                      {n->inputs[1]}, nullptr, &n);
-      auto grad_grad_x = MakeNode("negative", n->attrs.name + "_backward_grad_grad",
-                                  {nnvm::NodeEntry{grad_grad_x_mid}}, nullptr, &n);
+      auto x_grad = MakeNode("negative", n->attrs.name + "_x_grad",
+                             {nnvm::NodeEntry{MakeNode("sin", n->attrs.name + "_grad_mid",
+                                                       {n->inputs[1]}, nullptr, &n)}}, nullptr, &n);
+      auto x_grad_grad = MakeNode("negative", n->attrs.name + "_x_grad_grad",
+                                  {nnvm::NodeEntry{MakeNode("cos", n->attrs.name + "_grad_grad_mid",
+                                                            {n->inputs[1]}, nullptr, &n)}}, nullptr, &n);
+
+      auto grad_grad_mid = MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad_mid",
+                                    {n->inputs[0], nnvm::NodeEntry{x_grad_grad}}, nullptr, &n);
+
       std::vector<nnvm::NodeEntry> ret;
       // for the backward of the _backward_cos node
       // first input is the ograd and second input is x (because ElemwiseUseIn)
       ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad",
-                                {ograds[0], grad_x}, nullptr, &n));
+                                {ograds[0], nnvm::NodeEntry{x_grad}}, nullptr, &n));
       ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad_in",
-                                {ograds[0], nnvm::NodeEntry{grad_grad_x}}, nullptr, &n));
+                                {ograds[0], nnvm::NodeEntry{grad_grad_mid}}, nullptr, &n));
       return ret;
     });
 

From a0a0e75eb25eaaf83d7cdab18d228d44b804b462 Mon Sep 17 00:00:00 2001
From: Lin Yuan <apeforest@gmail.com>
Date: Mon, 3 Jun 2019 16:49:53 -0700
Subject: [PATCH 18/20] fix lint

---
 src/operator/tensor/elemwise_unary_op_trig.cc | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/operator/tensor/elemwise_unary_op_trig.cc b/src/operator/tensor/elemwise_unary_op_trig.cc
index 7ebba7773132..e4b7b5b2638e 100644
--- a/src/operator/tensor/elemwise_unary_op_trig.cc
+++ b/src/operator/tensor/elemwise_unary_op_trig.cc
@@ -58,8 +58,10 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_sin, unary_bwd<mshad
       auto x_grad = MakeNode("cos", n->attrs.name + "_x_grad",
                              {n->inputs[1]}, nullptr, &n);
       auto x_grad_grad = MakeNode("negative", n->attrs.name + "_x_grad_grad",
-                                  {nnvm::NodeEntry{MakeNode("sin", n->attrs.name + "_grad_grad_mid",
-                                                            {n->inputs[1]}, nullptr, &n)}}, nullptr, &n);
+          {nnvm::NodeEntry{
+            MakeNode("sin", n->attrs.name + "_grad_grad_mid", {n->inputs[1]}, nullptr, &n)
+          }},
+          nullptr, &n);
 
       auto grad_grad_mid = MakeNode("elemwise_mul", n->attrs.name + "backward_grad_grad_mid",
                                     {n->inputs[0], nnvm::NodeEntry{x_grad_grad}}, nullptr, &n);
@@ -101,8 +103,10 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_cos, unary_bwd<mshadow_
                              {nnvm::NodeEntry{MakeNode("sin", n->attrs.name + "_grad_mid",
                                                        {n->inputs[1]}, nullptr, &n)}}, nullptr, &n);
       auto x_grad_grad = MakeNode("negative", n->attrs.name + "_x_grad_grad",
-                                  {nnvm::NodeEntry{MakeNode("cos", n->attrs.name + "_grad_grad_mid",
-                                                            {n->inputs[1]}, nullptr, &n)}}, nullptr, &n);
+          {nnvm::NodeEntry{
+            MakeNode("cos", n->attrs.name + "_grad_grad_mid",{n->inputs[1]}, nullptr, &n)
+          }},
+          nullptr, &n);
 
       auto grad_grad_mid = MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad_mid",
                                     {n->inputs[0], nnvm::NodeEntry{x_grad_grad}}, nullptr, &n);

From 451c4bd108babf4c6a7a33c86373734f77d9a084 Mon Sep 17 00:00:00 2001
From: Lin Yuan <apeforest@gmail.com>
Date: Mon, 3 Jun 2019 21:30:08 -0700
Subject: [PATCH 19/20] fix lint

---
 src/operator/tensor/elemwise_unary_op_trig.cc | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/src/operator/tensor/elemwise_unary_op_trig.cc b/src/operator/tensor/elemwise_unary_op_trig.cc
index e4b7b5b2638e..6157c056476b 100644
--- a/src/operator/tensor/elemwise_unary_op_trig.cc
+++ b/src/operator/tensor/elemwise_unary_op_trig.cc
@@ -60,8 +60,7 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_sin, unary_bwd<mshad
       auto x_grad_grad = MakeNode("negative", n->attrs.name + "_x_grad_grad",
           {nnvm::NodeEntry{
             MakeNode("sin", n->attrs.name + "_grad_grad_mid", {n->inputs[1]}, nullptr, &n)
-          }},
-          nullptr, &n);
+          }}, nullptr, &n);
 
       auto grad_grad_mid = MakeNode("elemwise_mul", n->attrs.name + "backward_grad_grad_mid",
                                     {n->inputs[0], nnvm::NodeEntry{x_grad_grad}}, nullptr, &n);
@@ -100,13 +99,13 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_cos, unary_bwd<mshadow_
       // f'(x) = -sin(x)
       // f''(x) = -cos(x)
       auto x_grad = MakeNode("negative", n->attrs.name + "_x_grad",
-                             {nnvm::NodeEntry{MakeNode("sin", n->attrs.name + "_grad_mid",
-                                                       {n->inputs[1]}, nullptr, &n)}}, nullptr, &n);
+          {nnvm::NodeEntry{
+            MakeNode("sin", n->attrs.name + "_grad_mid", {n->inputs[1]}, nullptr, &n)
+          }}, nullptr, &n);
       auto x_grad_grad = MakeNode("negative", n->attrs.name + "_x_grad_grad",
           {nnvm::NodeEntry{
-            MakeNode("cos", n->attrs.name + "_grad_grad_mid",{n->inputs[1]}, nullptr, &n)
-          }},
-          nullptr, &n);
+            MakeNode("cos", n->attrs.name + "_grad_grad_mid", {n->inputs[1]}, nullptr, &n)
+          }}, nullptr, &n);
 
       auto grad_grad_mid = MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad_mid",
                                     {n->inputs[0], nnvm::NodeEntry{x_grad_grad}}, nullptr, &n);

From b9b0c93cb14b6b12a3bbd955a709aa0949236811 Mon Sep 17 00:00:00 2001
From: Lin Yuan <apeforest@gmail.com>
Date: Wed, 5 Jun 2019 12:32:44 -0700
Subject: [PATCH 20/20] address reviewer comment and renaming

---
 src/operator/tensor/elemwise_unary_op_basic.cc | 11 +++++++----
 src/operator/tensor/elemwise_unary_op_trig.cc  | 16 ++++++++--------
 2 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/src/operator/tensor/elemwise_unary_op_basic.cc b/src/operator/tensor/elemwise_unary_op_basic.cc
index e7c5cb6cd1d2..f2b8dd6b1314 100644
--- a/src/operator/tensor/elemwise_unary_op_basic.cc
+++ b/src/operator/tensor/elemwise_unary_op_basic.cc
@@ -89,17 +89,20 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_relu, unary_bwd<mshadow
 .set_attr<nnvm::FGradient>("FGradient",
     [](const nnvm::NodePtr& n, const std::vector<nnvm::NodeEntry>& ograds) {
       std::vector<nnvm::NodeEntry> ret;
-      // ograds[0]: d^2L/dx^2
+      // ograds[0]: dL/dxgrad
       // inputs[0]: dL/dy
       // inputs[1]: y
       // f(x) -> relu(x)
       // f'(x) = 1 if x > 0 else 0
       // f''(x) = 0
-      auto gx = nnvm::NodeEntry{n};  // f'(x)
+      auto dydx = MakeNode("_greater", n->attrs.name + "_dydx",
+          {n->inputs[1], nnvm::NodeEntry{
+            MakeNode("zeros_like", n->attrs.name + "tmp", {n->inputs[1]}, nullptr, &n)
+          }}, nullptr, &n);
       ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad",
-                                {ograds[0], gx}, nullptr, &n));
+                                {ograds[0], nnvm::NodeEntry(dydx)}, nullptr, &n));
       ret.emplace_back(MakeNode("zeros_like", n->attrs.name + "_backward_grad_grad_in",
-                                {gx}, nullptr, &n));
+                                {n->inputs[1]}, nullptr, &n));
       return ret;
     });
 
diff --git a/src/operator/tensor/elemwise_unary_op_trig.cc b/src/operator/tensor/elemwise_unary_op_trig.cc
index 6157c056476b..b7cf76e4eb2d 100644
--- a/src/operator/tensor/elemwise_unary_op_trig.cc
+++ b/src/operator/tensor/elemwise_unary_op_trig.cc
@@ -55,20 +55,20 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU_DR(_backward_sin, unary_bwd<mshad
       // f(x) = sin(x)
       // f'(x) = cos(x)
       // f''(x) = -sin(x)
-      auto x_grad = MakeNode("cos", n->attrs.name + "_x_grad",
+      auto dydx = MakeNode("cos", n->attrs.name + "_dydx",
                              {n->inputs[1]}, nullptr, &n);
-      auto x_grad_grad = MakeNode("negative", n->attrs.name + "_x_grad_grad",
+      auto d2ydx2 = MakeNode("negative", n->attrs.name + "_d2ydx2",
           {nnvm::NodeEntry{
             MakeNode("sin", n->attrs.name + "_grad_grad_mid", {n->inputs[1]}, nullptr, &n)
           }}, nullptr, &n);
 
       auto grad_grad_mid = MakeNode("elemwise_mul", n->attrs.name + "backward_grad_grad_mid",
-                                    {n->inputs[0], nnvm::NodeEntry{x_grad_grad}}, nullptr, &n);
+                                    {n->inputs[0], nnvm::NodeEntry{d2ydx2}}, nullptr, &n);
 
       std::vector<nnvm::NodeEntry> ret;
 
       ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad",
-                                {ograds[0], nnvm::NodeEntry{x_grad}}, nullptr, &n));
+                                {ograds[0], nnvm::NodeEntry{dydx}}, nullptr, &n));
       ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad_in",
                                 {ograds[0], nnvm::NodeEntry{grad_grad_mid}}, nullptr, &n));
       return ret;
@@ -98,23 +98,23 @@ MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_cos, unary_bwd<mshadow_
       // f(x) = cos(x)
       // f'(x) = -sin(x)
       // f''(x) = -cos(x)
-      auto x_grad = MakeNode("negative", n->attrs.name + "_x_grad",
+      auto dydx = MakeNode("negative", n->attrs.name + "_dydx",
           {nnvm::NodeEntry{
             MakeNode("sin", n->attrs.name + "_grad_mid", {n->inputs[1]}, nullptr, &n)
           }}, nullptr, &n);
-      auto x_grad_grad = MakeNode("negative", n->attrs.name + "_x_grad_grad",
+      auto d2ydx2 = MakeNode("negative", n->attrs.name + "_d2ydx2",
           {nnvm::NodeEntry{
             MakeNode("cos", n->attrs.name + "_grad_grad_mid", {n->inputs[1]}, nullptr, &n)
           }}, nullptr, &n);
 
       auto grad_grad_mid = MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad_mid",
-                                    {n->inputs[0], nnvm::NodeEntry{x_grad_grad}}, nullptr, &n);
+                                    {n->inputs[0], nnvm::NodeEntry{d2ydx2}}, nullptr, &n);
 
       std::vector<nnvm::NodeEntry> ret;
       // for the backward of the _backward_cos node
       // first input is the ograd and second input is x (because ElemwiseUseIn)
       ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad",
-                                {ograds[0], nnvm::NodeEntry{x_grad}}, nullptr, &n));
+                                {ograds[0], nnvm::NodeEntry{dydx}}, nullptr, &n));
       ret.emplace_back(MakeNode("elemwise_mul", n->attrs.name + "_backward_grad_grad_in",
                                 {ograds[0], nnvm::NodeEntry{grad_grad_mid}}, nullptr, &n));
       return ret;