PaddlePaddle · Aganlengzi · Jul 15, 2022 · Jul 14, 2022 · Jul 14, 2022 · Jul 14, 2022
diff --git a/python/paddle/fluid/tests/unittests/ipu/custom_ops/README.md b/python/paddle/fluid/tests/unittests/ipu/custom_ops/README.md
@@ -0,0 +1,71 @@
+# Add custom op for Paddle on IPU
+
+## Add custom op in Paddle
+
+reference
+
+https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/custom_op/new_cpp_op_cn.html
+
+## Write custom op for PopART
+
+reference
+
+https://docs.graphcore.ai/projects/popart-user-guide/en/latest/custom_ops.html
+
+## Register custom op for Paddle on IPU
+
+这里采用即时编译(JIT Compile) 的方法使用 custom op.
+
+### 实现 custom op
+
+根据上面的两个文档, 首先添加 custom op 的实现.
+
+`leaky_relu_cpu.cc` 包含了 Paddle 中 custom op 的定义和 cpu 实现, 这里的实现是和标准的 Paddle 添加 custom op 是完全一致的. 这里的 cpu 实现不是必须的, cpu 实现可以用来检验 ipu 实现的正确性.
+
+`leaky_relu_ipu.cc` 包含了 PopART 中 custom op 的定义和 ipu 实现, 同样的, 这里的实现和标准的 PopART 添加 custom op 是完全一致的.
+
+### 载入 custom op
+
+分别在 Paddle 和 PopART 中实现 custom op 的定义后, 使用 `paddle.utils.cpp_extension.load` 编译源文件并把对应的动态库加载到当前进程中.
+
+```python
+
+cur_dir = os.path.dirname(os.path.realpath(__file__))
+custom_ops = load(
+    name="custom_jit_ops",
+    sources=[
+        f"{cur_dir}/leaky_relu_cpu.cc",
+        f"{cur_dir}/leaky_relu_ipu.cc",
+    ],
+    # 编译 leaky_relu_ipu.cc 时需要添加此参数
+    extra_cxx_cflags=['-DONNX_NAMESPACE=onnx'])
+
+```
+
+由于 Paddle 中 op 的定义和 PopART 中存在一些差异, 需要手动映射 custom op
+
+```python
+
+# paddle_op is custom op type in Paddle
+# popart_op, domain and version is custom op identifier in PopART
+ipu_strategy = paddle.static.IpuStrategy()
+ipu_strategy.add_custom_op(
+    paddle_op="custom_leaky_relu",
+    popart_op="LeakyRelu",
+    domain='custom.ops',
+    version=1)
+
+```
+
+### 使用 custom op
+
+```python
+
+x = paddle.static.data(
+    name=self.feed_list[0],
+    shape=self.feed_shape[0],
+    dtype=self.feed_dtype[0])
+# custom op
+out = custom_ops.custom_leaky_relu(x, **self.attrs)
+
+```
diff --git a/python/paddle/fluid/tests/unittests/ipu/custom_ops/leaky_relu_cpu.cc b/python/paddle/fluid/tests/unittests/ipu/custom_ops/leaky_relu_cpu.cc
@@ -0,0 +1,111 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/extension.h"
+
+#define CHECK_INPUT(x) \
+  PD_CHECK(x.place() == paddle::PlaceType::kCPU, #x " must be a CPU Tensor.")
+
+template <typename data_t>
+void leaky_relu_cpu_forward_kernel(const data_t* x_data,
+                                   data_t* out_data,
+                                   int64_t x_numel,
+                                   float alpha) {
+  // x < 0.0f ? alpha * x : x
+  for (int i = 0; i < x_numel; ++i) {
+    if (x_data[i] > static_cast<data_t>(0.)) {
+      out_data[i] = x_data[i];
+    } else {
+      out_data[i] = static_cast<data_t>(alpha) * x_data[i];
+    }
+  }
+}
+
+template <typename data_t>
+void leaky_relu_cpu_backward_kernel(const data_t* grad_out_data,
+                                    const data_t* out_data,
+                                    data_t* grad_x_data,
+                                    int64_t out_numel,
+                                    float alpha) {
+  // (grad * (x < 0.0f ? alpha : 1))
+  for (int i = 0; i < out_numel; ++i) {
+    if (out_data[i]<out_data[i]> static_cast<data_t>(0)) {
+      grad_x_data[i] = static_cast<data_t>(alpha);
+    } else {
+      grad_x_data[i] = static_cast<data_t>(1.);
+    }
+  }
+}
+
+std::vector<paddle::Tensor> LeakyReluCPUForward(const paddle::Tensor& x,
+                                                float alpha) {
+  CHECK_INPUT(x);
+
+  auto out = paddle::Tensor(paddle::PlaceType::kCPU, x.shape());
+
+  PD_DISPATCH_FLOATING_TYPES(x.type(), "relu_cpu_forward_kernel", ([&] {
+                               leaky_relu_cpu_forward_kernel<data_t>(
+                                   x.data<data_t>(),
+                                   out.mutable_data<data_t>(x.place()),
+                                   x.size(),
+                                   alpha);
+                             }));
+
+  return {out};
+}
+
+std::vector<paddle::Tensor> LeakyReluCPUBackward(const paddle::Tensor& x,
+                                                 const paddle::Tensor& out,
+                                                 const paddle::Tensor& grad_out,
+                                                 float alpha) {
+  CHECK_INPUT(x);
+  CHECK_INPUT(out);
+  CHECK_INPUT(grad_out);
+
+  auto grad_x = paddle::Tensor(paddle::PlaceType::kCPU, x.shape());
+
+  PD_DISPATCH_FLOATING_TYPES(out.type(), "relu_cpu_backward_kernel", ([&] {
+                               leaky_relu_cpu_backward_kernel<data_t>(
+                                   grad_out.data<data_t>(),
+                                   out.data<data_t>(),
+                                   grad_x.mutable_data<data_t>(x.place()),
+                                   out.size(),
+                                   alpha);
+                             }));
+
+  return {grad_x};
+}
+
+std::vector<std::vector<int64_t>> LeakyReluInferShape(
+    std::vector<int64_t> x_shape) {
+  return {x_shape};
+}
+
+std::vector<paddle::DataType> LeakyReluInferDtype(paddle::DataType x_dtype) {
+  return {x_dtype};
+}
+
+PD_BUILD_OP(custom_leaky_relu)
+    .Inputs({"X"})
+    .Outputs({"Out"})
+    .Attrs({"alpha: float"})
+    .SetKernelFn(PD_KERNEL(LeakyReluCPUForward))
+    .SetInferShapeFn(PD_INFER_SHAPE(LeakyReluInferShape))
+    .SetInferDtypeFn(PD_INFER_DTYPE(LeakyReluInferDtype));
+
+PD_BUILD_GRAD_OP(custom_leaky_relu)
+    .Inputs({"X", "Out", paddle::Grad("Out")})
+    .Outputs({paddle::Grad("X")})
+    .Attrs({"alpha: float"})
+    .SetKernelFn(PD_KERNEL(LeakyReluCPUBackward));