add TensorCopy in reduce_sum_grad

PaddlePaddle · Mar 16, 2021 · 1269cfe · 1269cfe · paddle-bot-old · Mar 16, 2021
1 parent 70b715b
commit 1269cfe
Show file tree

Hide file tree

Showing 2 changed files with 8 additions and 1 deletion.
diff --git a/paddle/fluid/operators/reduce_ops/reduce_sum_op_npu.cc b/paddle/fluid/operators/reduce_ops/reduce_sum_op_npu.cc
@@ -83,6 +83,11 @@ class ReduceSumGradNPUKernel : public framework::OpKernel<T> {
       Tensor out_grad_tmp(out_grad->type());
       out_grad_tmp.Resize(out_dims);
       out_grad_tmp.mutable_data<T>(ctx.GetPlace());
+      framework::TensorCopy(
+          *out_grad, ctx.GetPlace(),
+          ctx.template device_context<platform::DeviceContext>(),
+          &out_grad_tmp);
+      out_grad_tmp.Resize(out_dims);
 
       auto runner = NpuOpRunner("BroadcastToD", {out_grad_tmp}, {*x_grad},
                                 {{"shape", framework::vectorize(x->dims())}});

diff --git a/python/paddle/fluid/tests/unittests/npu/test_reduce_sum_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_reduce_sum_op_npu.py
@@ -102,7 +102,9 @@ def _test(self, run_npu=True):
             label = paddle.static.data(
                 name="label", shape=[2, 1], dtype='int64')
 
-            z = paddle.add(a, b)
+            a_1 = fluid.layers.fc(input=a, size=4, num_flatten_dims=2, act=None)
+            b_1 = fluid.layers.fc(input=b, size=4, num_flatten_dims=2, act=None)
+            z = paddle.add(a_1, b_1)
             z_1 = self.set_reduce_sum_function(z)
 
             prediction = fluid.layers.fc(input=z_1, size=2, act='softmax')