support mean,softmax_with_cross_entropy on Baidu Kunlun (#27792)

* support mean,softmax_with_cross_entropy on Baidu Kunlun,test=kunlun * fix unittests error,test=kunlun * delete boost::get,test=kunlun
PaddlePaddle · Oct 13, 2020 · 70c8c31 · 70c8c31
1 parent 1607e87
commit 70c8c31
Show file tree

Hide file tree

Showing 5 changed files with 702 additions and 2 deletions.
diff --git a/paddle/fluid/operators/mean_op_xpu.cc b/paddle/fluid/operators/mean_op_xpu.cc
@@ -0,0 +1,66 @@
+/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/operators/mean_op.h"
+#ifdef PADDLE_WITH_XPU
+#include <memory>
+#include <string>
+#include <unordered_map>
+
+namespace paddle {
+namespace operators {
+
+template <typename DeviceContext, typename T>
+class MeanXPUKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    auto* input = context.Input<Tensor>("X");
+    auto* output = context.Output<Tensor>("Out");
+    output->mutable_data<T>(context.GetPlace());
+    auto& dev_ctx = context.template device_context<DeviceContext>();
+    const float* x_data = input->data<float>();
+    float* y_data = output->data<float>();
+    int r = xpu::mean(dev_ctx.x_context(), x_data, y_data, input->numel());
+    PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
+                      platform::errors::InvalidArgument("XPU kernel error!"));
+  }
+};
+template <typename DeviceContext, typename T>
+class MeanGradXPUKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    auto OG = context.Input<Tensor>(framework::GradVarName("Out"));
+    PADDLE_ENFORCE_EQ(OG->numel(), 1, platform::errors::InvalidArgument(
+                                          "Mean Gradient should be scalar"));
+    auto IG = context.Output<Tensor>(framework::GradVarName("X"));
+    IG->mutable_data<T>(context.GetPlace());
+    auto& dev_ctx = context.template device_context<DeviceContext>();
+    float* dx = IG->data<float>();
+    const float* dy = OG->data<float>();
+    int r = xpu::mean_grad(dev_ctx.x_context(), dx, dy, IG->numel());
+    PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
+                      platform::errors::InvalidArgument("XPU kernel error!"));
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OP_XPU_KERNEL(
+    mean, ops::MeanXPUKernel<paddle::platform::XPUDeviceContext, float>);
+REGISTER_OP_XPU_KERNEL(
+    mean_grad,
+    ops::MeanGradXPUKernel<paddle::platform::XPUDeviceContext, float>);
+#endif
diff --git a/paddle/fluid/operators/softmax_with_cross_entropy_op_xpu.cc b/paddle/fluid/operators/softmax_with_cross_entropy_op_xpu.cc
@@ -0,0 +1,96 @@
+/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/operators/softmax_with_cross_entropy_op.h"
+#ifdef PADDLE_WITH_XPU
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+namespace paddle {
+namespace operators {
+
+template <typename T>
+class SoftmaxWithCrossEntropyXPUKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    PADDLE_ENFORCE_EQ(
+        platform::is_xpu_place(context.GetPlace()), true,
+        platform::errors::InvalidArgument("This kernel only runs on XPU."));
+    const Tensor* logits = context.Input<Tensor>("Logits");
+    const Tensor* labels = context.Input<Tensor>("Label");
+    Tensor* softmax = context.Output<Tensor>("Softmax");
+    Tensor* loss = context.Output<Tensor>("Loss");
+    const int rank = logits->dims().size();
+    const int axis = CanonicalAxis(context.Attr<int>("axis"), rank);
+    PADDLE_ENFORCE_EQ(axis, rank - 1, platform::errors::InvalidArgument(
+                                          "axis should == rank - 1"));
+    softmax->mutable_data<T>(context.GetPlace());
+    loss->mutable_data<T>(context.GetPlace());
+    const int n = SizeToAxis(axis, logits->dims());
+    const int d = SizeFromAxis(axis, logits->dims());
+    // softmax
+    auto& dev_ctx =
+        context.template device_context<platform::XPUDeviceContext>();
+    int r = xpu::softmax2d_forward(dev_ctx.x_context(), logits->data<float>(),
+                                   softmax->data<float>(), n, d);
+    PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
+                      platform::errors::InvalidArgument("XPU kernel error!"));
+    // cross_entropy
+    auto ignore_index = context.Attr<int>("ignore_index");
+    const bool soft_label = context.Attr<bool>("soft_label");
+    if (soft_label) {
+      PADDLE_THROW(platform::errors::InvalidArgument(
+          "XPU only support soft_label == false for now!"));
+    } else {
+      auto* p_labels = labels->data<int64_t>();
+      int64_t* labels_int64_host =
+          reinterpret_cast<int64_t*>(std::malloc(n * sizeof(int64_t)));
+      int* labels_int32_host =
+          reinterpret_cast<int*>(std::malloc(n * sizeof(int)));
+      int* labels_int32_device = NULL;
+      PADDLE_ENFORCE_EQ(
+          xpu_malloc(reinterpret_cast<void**>(&labels_int32_device),
+                     n * sizeof(int)),
+          XPU_SUCCESS, platform::errors::InvalidArgument("XPU kernel error!"));
+      dev_ctx.Wait();
+      memory::Copy(platform::CPUPlace(), labels_int64_host,
+                   BOOST_GET_CONST(platform::XPUPlace, context.GetPlace()),
+                   p_labels, n * sizeof(int64_t));
+      for (int i = 0; i < n; ++i) {
+        labels_int32_host[i] = labels_int64_host[i];
+      }
+      memory::Copy(BOOST_GET_CONST(platform::XPUPlace, context.GetPlace()),
+                   labels_int32_device, platform::CPUPlace(), labels_int32_host,
+                   n * sizeof(int));
+      int r = xpu::cross_entropy_forward(
+          dev_ctx.x_context(), n, d, softmax->data<float>(),
+          labels_int32_device, loss->data<float>(), nullptr, ignore_index);
+      PADDLE_ENFORCE_EQ(r, xpu::Error_t::SUCCESS,
+                        platform::errors::InvalidArgument("XPU kernel error!"));
+      dev_ctx.Wait();
+      std::free(labels_int32_host);
+      std::free(labels_int64_host);
+      xpu_free(labels_int32_device);
+    }
+  }
+};
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OP_XPU_KERNEL(softmax_with_cross_entropy,
+                       ops::SoftmaxWithCrossEntropyXPUKernel<float>);
+#endif
diff --git a/python/paddle/fluid/tests/unittests/op_test.py b/python/paddle/fluid/tests/unittests/op_test.py
@@ -26,6 +26,7 @@
 import collections
 from collections import defaultdict
 
+import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
 from paddle.fluid.backward import append_backward
@@ -1133,8 +1134,10 @@ def find_actual(target_name, fetch_list):
             )
         # Check inplace for given op, its grad op, its grad_grad op, etc.
         # No effect on original OpTest
-        self.check_inplace_output_with_place(
-            place, no_check_set=no_check_set, inplace_atol=inplace_atol)
+        # Currently not support ParallelExecutor on XPUPlace.
+        if not paddle.is_compiled_with_xpu():
+            self.check_inplace_output_with_place(
+                place, no_check_set=no_check_set, inplace_atol=inplace_atol)
 
         if check_dygraph:
             return outs, dygraph_outs, fetch_list

diff --git a/python/paddle/fluid/tests/unittests/xpu/test_mean_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_mean_op_xpu.py
@@ -0,0 +1,144 @@
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import unittest
+import numpy as np
+import sys
+sys.path.append("..")
+from op_test import OpTest
+import paddle
+import paddle.fluid.core as core
+import paddle.fluid as fluid
+from paddle.fluid import Program, program_guard
+
+np.random.seed(10)
+
+
+class TestMeanOp(OpTest):
+    def setUp(self):
+        self.op_type = "mean"
+        self.dtype = np.float64
+        self.init_dtype_type()
+        self.inputs = {'X': np.random.random((10, 10)).astype(self.dtype)}
+        self.outputs = {'Out': np.mean(self.inputs["X"])}
+
+    def init_dtype_type(self):
+        pass
+
+    def test_check_output(self):
+        self.check_output()
+
+    def test_checkout_grad(self):
+        self.check_grad(['X'], 'Out')
+
+
+class TestMeanOpError(unittest.TestCase):
+    def test_errors(self):
+        with program_guard(Program(), Program()):
+            # The input type of mean_op must be Variable.
+            input1 = 12
+            self.assertRaises(TypeError, fluid.layers.mean, input1)
+            # The input dtype of mean_op must be float16, float32, float64.
+            input2 = fluid.layers.data(
+                name='input2', shape=[12, 10], dtype="int32")
+            self.assertRaises(TypeError, fluid.layers.mean, input2)
+            input3 = fluid.layers.data(
+                name='input3', shape=[4], dtype="float16")
+            fluid.layers.softmax(input3)
+
+
+class TestXPUMeanOp(TestMeanOp):
+    def init_dtype_type(self):
+        self.dtype = np.float32
+
+    def test_check_output(self):
+        if paddle.is_compiled_with_xpu():
+            paddle.enable_static()
+            place = paddle.XPUPlace(0)
+            self.check_output_with_place(place)
+
+    def test_checkout_grad(self):
+        if paddle.is_compiled_with_xpu():
+            paddle.enable_static()
+            place = paddle.XPUPlace(0)
+            self.check_grad_with_place(place, ['X'], 'Out')
+
+
+class TestMeanAPI(unittest.TestCase):
+    # test paddle.tensor.stat.mean
+
+    def setUp(self):
+        self.x_shape = [2, 3, 4, 5]
+        self.x = np.random.uniform(-1, 1, self.x_shape).astype(np.float32)
+        self.place = paddle.XPUPlace(0)
+
+    def test_api_static(self):
+        paddle.enable_static()
+        with paddle.static.program_guard(paddle.static.Program()):
+            x = paddle.data('X', self.x_shape)
+            out1 = paddle.mean(x)
+            out2 = paddle.tensor.mean(x)
+            out3 = paddle.tensor.stat.mean(x)
+            axis = np.arange(len(self.x_shape)).tolist()
+            out4 = paddle.mean(x, axis)
+            out5 = paddle.mean(x, tuple(axis))
+
+            exe = paddle.static.Executor(self.place)
+            res = exe.run(feed={'X': self.x},
+                          fetch_list=[out1, out2, out3, out4, out5])
+        out_ref = np.mean(self.x)
+        for out in res:
+            self.assertEqual(np.allclose(out, out_ref, rtol=1e-04), True)
+
+    def test_api_dygraph(self):
+        paddle.disable_static(self.place)
+
+        def test_case(x, axis=None, keepdim=False):
+            x_tensor = paddle.to_tensor(x)
+            out = paddle.mean(x_tensor, axis, keepdim)
+            if isinstance(axis, list):
+                axis = tuple(axis)
+                if len(axis) == 0:
+                    axis = None
+            out_ref = np.mean(x, axis, keepdims=keepdim)
+            self.assertEqual(
+                np.allclose(
+                    out.numpy(), out_ref, rtol=1e-04), True)
+
+        test_case(self.x)
+        test_case(self.x, [])
+        test_case(self.x, -1)
+        test_case(self.x, keepdim=True)
+        test_case(self.x, 2, keepdim=True)
+        test_case(self.x, [0, 2])
+        test_case(self.x, (0, 2))
+        test_case(self.x, [0, 1, 2, 3])
+        paddle.enable_static()
+
+    def test_errors(self):
+        paddle.disable_static()
+        x = np.random.uniform(-1, 1, [10, 12]).astype('float32')
+        x = paddle.to_tensor(x)
+        self.assertRaises(Exception, paddle.mean, x, -3)
+        self.assertRaises(Exception, paddle.mean, x, 2)
+        paddle.enable_static()
+        with paddle.static.program_guard(paddle.static.Program()):
+            x = paddle.data('X', [10, 12], 'int32')
+            self.assertRaises(TypeError, paddle.mean, x)
+
+
+if __name__ == "__main__":
+    unittest.main()