PaddlePaddle · chenwhql · Nov 23, 2021 · Nov 22, 2021 · Nov 22, 2021 · chenwhql
diff --git a/paddle/fluid/operators/elementwise/elementwise_div_op.cc b/paddle/fluid/operators/elementwise/elementwise_div_op.cc
@@ -22,31 +22,6 @@ limitations under the License. */
 namespace paddle {
 namespace operators {
 
-template <typename T>
-struct SameDimsElemwiseDiv<
-    platform::CPUDeviceContext, T,
-    typename std::enable_if<std::is_floating_point<T>::value>::type> {
-  void operator()(const framework::ExecutionContext &ctx,
-                  const framework::Tensor *x, const framework::Tensor *y,
-                  framework::Tensor *z) {
-    auto blas = math::GetBlas<platform::CPUDeviceContext, T>(ctx);
-    blas.VDIV(x->numel(), x->data<T>(), y->data<T>(), z->data<T>());
-  }
-};
-
-// use default div function for int32/int64 type because of divison zero
-// checking.
-template <typename T>
-struct SameDimsElemwiseDiv<
-    platform::CPUDeviceContext, T,
-    typename std::enable_if<!std::is_floating_point<T>::value>::type> {
-  void operator()(const framework::ExecutionContext &ctx,
-                  const framework::Tensor *x, const framework::Tensor *y,
-                  framework::Tensor *z) {
-    default_elementwise_div<platform::CPUDeviceContext, T>(ctx, x, y, z);
-  }
-};
-
 class ElementwiseDivOpMaker : public ElementwiseOpMaker {
  protected:
   std::string GetName() const override { return "Div"; }

diff --git a/paddle/fluid/operators/elementwise/elementwise_div_op.cu b/paddle/fluid/operators/elementwise/elementwise_div_op.cu
@@ -23,22 +23,6 @@ namespace plat = paddle::platform;
 namespace paddle {
 namespace operators {
 
-template <typename T>
-class ElementwiseDivKernel<platform::CUDADeviceContext, T>
-    : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext& ctx) const override {
-    std::vector<const framework::Tensor*> ins;
-    std::vector<framework::Tensor*> outs;
-    const auto& cuda_ctx =
-        ctx.template device_context<platform::CUDADeviceContext>();
-
-    int axis = PackTensorsIntoVector<T>(ctx, &ins, &outs);
-    LaunchElementwiseCudaKernel<ElementwiseType::kBinary, T, T>(
-        cuda_ctx, ins, &outs, axis, DivFunctor<T>());
-  }
-};
-
 template <typename T>
 static __global__ void SimpleElemwiseDivGradCUDAKernel(const T* x, const T* y,
                                                        const T* out,

diff --git a/paddle/fluid/operators/elementwise/elementwise_div_op.h b/paddle/fluid/operators/elementwise/elementwise_div_op.h
@@ -23,6 +23,12 @@ limitations under the License. */
 #include "paddle/fluid/operators/math/blas.h"
 #include "paddle/fluid/operators/reduce_ops/reduce_op.h"
 
+#include "paddle/fluid/framework/pten_utils.h"
+
+// only can include the headers in paddle/pten/include dirs
+#include "paddle/pten/api/lib/utils/tensor_utils.h"
+#include "paddle/pten/include/core.h"
+#include "paddle/pten/include/math.h"
 namespace paddle {
 namespace operators {
 
@@ -42,13 +48,6 @@ void default_elementwise_div(const framework::ExecutionContext& ctx,
   }
 }
 
-template <typename DeviceContext, typename T, class Enable = void>
-struct SameDimsElemwiseDiv {
-  void operator()(const framework::ExecutionContext& ctx,
-                  const framework::Tensor* x, const framework::Tensor* y,
-                  framework::Tensor* z);
-};
-
 template <typename DeviceContext, typename T>
 class ElementwiseDivKernel : public framework::OpKernel<T> {
  public:
@@ -58,13 +57,13 @@ class ElementwiseDivKernel : public framework::OpKernel<T> {
     auto* z = ctx.Output<framework::LoDTensor>("Out");
     z->mutable_data<T>(ctx.GetPlace());
 
-    auto dims_equal = x->dims() == y->dims();
-    if (dims_equal) {
-      SameDimsElemwiseDiv<DeviceContext, T> same_dims_div;
-      same_dims_div(ctx, x, y, z);
-    } else {
-      default_elementwise_div<DeviceContext, T>(ctx, x, y, z);
-    }
+    auto& dev_ctx = ctx.device_context<DeviceContext>();
+    int axis = ctx.Attr<int>("axis");
+    auto pt_x = paddle::experimental::MakePtenDenseTensor(*x);
+    auto pt_y = paddle::experimental::MakePtenDenseTensor(*y);
+    auto pt_z = paddle::experimental::MakePtenDenseTensor(*z);
+    pten::ElementwiseDiv<T>(dev_ctx, *pt_x.get(), *pt_y.get(), axis,
+                            pt_z.get());
   }
 };
 

diff --git a/paddle/fluid/operators/elementwise/elementwise_op.h b/paddle/fluid/operators/elementwise/elementwise_op.h
@@ -154,6 +154,12 @@ class ElementwiseOp : public framework::OperatorWithKernel {
                                           {"axis"}, {"Out"});
       }
     }
+    if (Type() == "elementwise_div") {
+      if (ctx.InputVar("X")->IsType<framework::LoDTensor>()) {
+        return framework::KernelSignature("elementwise_div", {"X", "Y"},
+                                          {"axis"}, {"Out"});
+      }
+    }
     return framework::KernelSignature("None", {"X"}, {}, {"Out"});
   }
 };

diff --git a/paddle/pten/api/include/math.h b/paddle/pten/api/include/math.h
@@ -26,5 +26,7 @@ PD_DLL_DECL Tensor mean(const Tensor& x);
 PD_DLL_DECL Tensor add(const Tensor& x, const Tensor& y);
 
 PD_DLL_DECL Tensor subtract(const Tensor& x, const Tensor& y);
+
+PD_DLL_DECL Tensor divide(const Tensor& x, const Tensor& y);
 }  // namespace experimental
 }  // namespace paddle
diff --git a/paddle/pten/api/lib/math.cc b/paddle/pten/api/lib/math.cc
@@ -130,6 +130,41 @@ PD_DLL_DECL Tensor subtract(const Tensor& x, const Tensor& y) {
 
   return out;
 }
+
+PD_DLL_DECL Tensor divide(const Tensor& x, const Tensor& y) {
+  // 1. Get kernel signature and kernel
+  auto kernel_key_set = ParseKernelKeyByInputArgs(x);
+  auto kernel_key = kernel_key_set.GetHigestPriorityKernelKey();
+  auto kernel = pten::KernelFactory::Instance().SelectKernelOrThrowError(
+      "elementwise_div", kernel_key);
+
+  // 2. Get Device Context
+  auto* dev_ctx = GetDeviceContextByBackend(kernel_key.backend());
+  auto kernel_context = pten::KernelContext(dev_ctx);
+
+  // 3. Auto data transform
+  auto dense_x = std::dynamic_pointer_cast<pten::DenseTensor>(x.impl());
+  kernel_context.EmplaceBackInput(dense_x);
+  auto dense_y = std::dynamic_pointer_cast<pten::DenseTensor>(y.impl());
+  kernel_context.EmplaceBackInput(dense_y);
+  kernel_context.EmplaceBackAttr(-1);
+
+  // 4. InferShape
+  auto out_meta = ElementwiseInferShape(dense_x->meta(), dense_y->meta(), -1);
+
+  // 5. Prepare outputs
+  Tensor out;
+  const auto allocator = std::make_shared<DefaultAllocator>(
+      pten::TransToFluidPlace(kernel_key.backend()));
+  auto dense_out = std::make_shared<pten::DenseTensor>(allocator, out_meta);
+  kernel_context.EmplaceBackOutput(dense_out);
+  out.set_impl(dense_out);
+
+  // 6. Call kernel
+  kernel(&kernel_context);
+
+  return out;
+}
 }  // namespace experimental
 }  // namespace paddle
 

diff --git a/paddle/pten/include/math.h b/paddle/pten/include/math.h
@@ -75,10 +75,10 @@ DenseTensor Scale(const ContextT& dev_ctx,
 }
 
 template <typename T, typename ContextT>
-DenseTensor ElementwiseAdd(const ContextT& dev_ctx,
-                           const DenseTensor& x,
-                           const DenseTensor& y,
-                           int axis) {
+DenseTensor Add(const ContextT& dev_ctx,
+                const DenseTensor& x,
+                const DenseTensor& y,
+                int axis) {
   auto out_meta = ElementwiseInferShape(x.meta(), y.meta(), axis);
   const auto allocator =
       std::make_shared<paddle::experimental::DefaultAllocator>(
@@ -102,4 +102,17 @@ DenseTensor Subtract(const ContextT& dev_ctx,
   return dense_out;
 }
 
+template <typename T, typename ContextT>
+DenseTensor Divide(const ContextT& dev_ctx,
+                   const DenseTensor& x,
+                   const DenseTensor& y,
+                   int axis) {
+  auto out_meta = ElementwiseInferShape(x.meta(), y.meta(), axis);
+  const auto allocator =
+      std::make_shared<paddle::experimental::DefaultAllocator>(
+          dev_ctx.GetPlace());
+  pten::DenseTensor dense_out(allocator, out_meta);
+  ElementwiseDiv<T>(dev_ctx, x, y, axis, &dense_out);
+  return dense_out;
+}
 }  // namespace pten
diff --git a/paddle/pten/kernels/cpu/math.cc b/paddle/pten/kernels/cpu/math.cc
@@ -114,6 +114,30 @@ void ElementwiseSub(const CPUContext& dev_ctx,
   }
 }
 
+template <typename T>
+void ElementwiseDiv(const CPUContext& dev_ctx,
+                    const DenseTensor& x,
+                    const DenseTensor& y,
+                    int axis,
+                    DenseTensor* out) {
+  // allocate memory for out
+  out->mutable_data<T>();
+  if (x.dims() == y.dims() && std::is_floating_point<T>::value) {
+    SameDimsElementwiseCompute<general::SameDimsDivFunctor<CPUContext, T>>()(
+        dev_ctx, x, y, out);
+  } else {
+    auto x_dims = x.dims();
+    auto y_dims = y.dims();
+    if (x_dims.size() >= y_dims.size()) {
+      ElementwiseCompute<general::DivFunctor<T>, T>(
+          dev_ctx, x, y, axis, general::DivFunctor<T>(), out);
+    } else {
+      ElementwiseCompute<general::InverseDivFunctor<T>, T>(
+          dev_ctx, x, y, axis, general::InverseDivFunctor<T>(), out);
+    }
+  }
+}
+
 }  // namespace pten
 
 // TODO(chenweihang): replace by better impl
@@ -174,3 +198,13 @@ PT_REGISTER_KERNEL("elementwise_sub",
                    int64_t,
                    complex64,
                    complex128) {}
+PT_REGISTER_KERNEL("elementwise_div",
+                   CPU,
+                   ANY,
+                   pten::ElementwiseDiv,
+                   float,
+                   double,
+                   int,
+                   int64_t,
+                   complex64,
+                   complex128) {}
diff --git a/paddle/pten/kernels/cpu/math.h b/paddle/pten/kernels/cpu/math.h
@@ -60,4 +60,10 @@ void ElementwiseSub(const CPUContext& dev_ctx,
                     int axis,
                     DenseTensor* out);
 
+template <typename T>
+void ElementwiseDiv(const CPUContext& dev_ctx,
+                    const DenseTensor& x,
+                    const DenseTensor& y,
+                    int axis,
+                    DenseTensor* out);
 }  // namespace pten
diff --git a/paddle/pten/kernels/cuda/math.cu b/paddle/pten/kernels/cuda/math.cu
@@ -158,6 +158,23 @@ void ElementwiseSub(const CUDAContext& dev_ctx,
       dev_ctx, inputs, &outputs, axis, general::SubFunctor<T>());
 }
 
+template <typename T>
+void ElementwiseDiv(const CUDAContext& dev_ctx,
+                    const DenseTensor& x,
+                    const DenseTensor& y,
+                    int axis,
+                    DenseTensor* out) {
+  std::vector<const DenseTensor*> inputs;
+  std::vector<DenseTensor*> outputs;
+  inputs.emplace_back(&x);
+  inputs.emplace_back(&y);
+  // allocate memory for out
+  out->mutable_data<T>();
+  outputs.emplace_back(out);
+  LaunchElementwiseCudaKernel<ElementwiseType::kBinary, T, T>(
+      dev_ctx, inputs, &outputs, axis, general::DivFunctor<T>());
+}
+
 }  // namespace pten
 
 // TODO(chenweihang): replace by better impl
@@ -217,3 +234,14 @@ PT_REGISTER_KERNEL("elementwise_sub",
                    float16,
                    complex64,
                    complex128) {}
+PT_REGISTER_KERNEL("elementwise_div",
+                   CUDA,
+                   ANY,
+                   pten::ElementwiseDiv,
+                   float,
+                   double,
+                   int,
+                   int64_t,
+                   float16,
+                   complex64,
+                   complex128) {}
diff --git a/paddle/pten/kernels/cuda/math.h b/paddle/pten/kernels/cuda/math.h
@@ -62,6 +62,13 @@ void ElementwiseSub(const CUDAContext& dev_ctx,
                     int axis,
                     DenseTensor* out);
 
+template <typename T>
+void ElementwiseDiv(const CUDAContext& dev_ctx,
+                    const DenseTensor& x,
+                    const DenseTensor& y,
+                    int axis,
+                    DenseTensor* out);
+
 }  // namespace pten
 
 #endif
diff --git a/paddle/pten/kernels/functions/blas/elementwise.h b/paddle/pten/kernels/functions/blas/elementwise.h
@@ -38,5 +38,14 @@ void ElementwiseSub(const DevCtx& dev_ctx,
   blas.VSUB(x.numel(), x.data<T>(), y.data<T>(), out->mutable_data<T>());
 }
 
+template <typename DevCtx, typename T>
+void ElementwiseDiv(const DevCtx& dev_ctx,
+                    const DenseTensor& x,
+                    const DenseTensor& y,
+                    DenseTensor* out) {
+  auto blas = paddle::operators::math::GetBlas<DevCtx, T>(dev_ctx);
+  blas.VDIV(x.numel(), x.data<T>(), y.data<T>(), out->mutable_data<T>());
+}
+
 }  // namespace blas
 }  // namespace pten
diff --git a/paddle/pten/kernels/functions/general/elementwise_functor.h b/paddle/pten/kernels/functions/general/elementwise_functor.h
@@ -114,5 +114,65 @@ struct InverseSubFunctor {
   inline HOSTDEVICE T operator()(const T& a, const T& b) const { return b - a; }
 };
 
+// Divide
+template <typename DevCtx, typename T, class Enable = void>
+struct SameDimsDivFunctor {
+  void operator()(const DevCtx& dev_ctx,
+                  const DenseTensor& x,
+                  const DenseTensor& y,
+                  DenseTensor* z);
+};
+
+template <typename DevCtx, typename T>
+struct SameDimsDivFunctor<
+    DevCtx,
+    T,
+    typename std::enable_if<!std::is_floating_point<T>::value>::type> {
+  void operator()(const DevCtx& dev_ctx,
+                  const DenseTensor& x,
+                  const DenseTensor& y,
+                  DenseTensor* z) {
+    paddle::platform::errors::InvalidArgument(
+        "If use SameDimsDivFunctor, template args(T) must be floating point. ");
+  }
+};
+
+template <typename DevCtx, typename T>
+struct SameDimsDivFunctor<
+    DevCtx,
+    T,
+    typename std::enable_if<std::is_floating_point<T>::value>::type> {
+  void operator()(const DevCtx& dev_ctx,
+                  const DenseTensor& x,
+                  const DenseTensor& y,
+                  DenseTensor* z) {
+    blas::ElementwiseDiv<DevCtx, T>(dev_ctx, x, y, z);
+  }
+};
+
+#define DIV_ERROR_INFO                                             \
+  "InvalidArgumentError: Integer division by zero encountered in " \
+  "(floor) divide. Please check the input value."
+
+template <typename T, typename Enable = void>
+struct DivFunctor {
+  inline HOSTDEVICE T operator()(const T& a, const T& b) const { return a / b; }
+};
+
+template <typename T>
+struct DivFunctor<T,
+                  typename std::enable_if<std::is_integral<T>::value>::type> {
+  inline HOSTDEVICE T operator()(const T& a, const T& b) const {
+    // For int32/int64, need to check whether the divison is zero.
+    PADDLE_ENFORCE(b != 0, DIV_ERROR_INFO);
+    return a / b;
+  }
+};
+
+template <typename T, typename Enable = void>
+struct InverseDivFunctor {
+  inline HOSTDEVICE T operator()(const T& a, const T& b) const { return b / a; }
+};
+
 }  // namespace general
 }  // namespace pten