Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[PTen]Elementwise_div Kernel Refactor #37418

Merged
merged 2 commits into from
Nov 23, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 0 additions & 25 deletions paddle/fluid/operators/elementwise/elementwise_div_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,31 +22,6 @@ limitations under the License. */
namespace paddle {
namespace operators {

template <typename T>
struct SameDimsElemwiseDiv<
platform::CPUDeviceContext, T,
typename std::enable_if<std::is_floating_point<T>::value>::type> {
void operator()(const framework::ExecutionContext &ctx,
const framework::Tensor *x, const framework::Tensor *y,
framework::Tensor *z) {
auto blas = math::GetBlas<platform::CPUDeviceContext, T>(ctx);
blas.VDIV(x->numel(), x->data<T>(), y->data<T>(), z->data<T>());
}
};

// use default div function for int32/int64 type because of divison zero
// checking.
template <typename T>
struct SameDimsElemwiseDiv<
platform::CPUDeviceContext, T,
typename std::enable_if<!std::is_floating_point<T>::value>::type> {
void operator()(const framework::ExecutionContext &ctx,
const framework::Tensor *x, const framework::Tensor *y,
framework::Tensor *z) {
default_elementwise_div<platform::CPUDeviceContext, T>(ctx, x, y, z);
}
};

class ElementwiseDivOpMaker : public ElementwiseOpMaker {
protected:
std::string GetName() const override { return "Div"; }
Expand Down
16 changes: 0 additions & 16 deletions paddle/fluid/operators/elementwise/elementwise_div_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -23,22 +23,6 @@ namespace plat = paddle::platform;
namespace paddle {
namespace operators {

template <typename T>
class ElementwiseDivKernel<platform::CUDADeviceContext, T>
: public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
std::vector<const framework::Tensor*> ins;
std::vector<framework::Tensor*> outs;
const auto& cuda_ctx =
ctx.template device_context<platform::CUDADeviceContext>();

int axis = PackTensorsIntoVector<T>(ctx, &ins, &outs);
LaunchElementwiseCudaKernel<ElementwiseType::kBinary, T, T>(
cuda_ctx, ins, &outs, axis, DivFunctor<T>());
}
};

template <typename T>
static __global__ void SimpleElemwiseDivGradCUDAKernel(const T* x, const T* y,
const T* out,
Expand Down
27 changes: 13 additions & 14 deletions paddle/fluid/operators/elementwise/elementwise_div_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@ limitations under the License. */
#include "paddle/fluid/operators/math/blas.h"
#include "paddle/fluid/operators/reduce_ops/reduce_op.h"

#include "paddle/fluid/framework/pten_utils.h"

// only can include the headers in paddle/pten/include dirs
#include "paddle/pten/api/lib/utils/tensor_utils.h"
#include "paddle/pten/include/core.h"
#include "paddle/pten/include/math.h"
namespace paddle {
namespace operators {

Expand All @@ -42,13 +48,6 @@ void default_elementwise_div(const framework::ExecutionContext& ctx,
}
}

template <typename DeviceContext, typename T, class Enable = void>
struct SameDimsElemwiseDiv {
void operator()(const framework::ExecutionContext& ctx,
const framework::Tensor* x, const framework::Tensor* y,
framework::Tensor* z);
};

template <typename DeviceContext, typename T>
class ElementwiseDivKernel : public framework::OpKernel<T> {
public:
Expand All @@ -58,13 +57,13 @@ class ElementwiseDivKernel : public framework::OpKernel<T> {
auto* z = ctx.Output<framework::LoDTensor>("Out");
z->mutable_data<T>(ctx.GetPlace());

auto dims_equal = x->dims() == y->dims();
if (dims_equal) {
SameDimsElemwiseDiv<DeviceContext, T> same_dims_div;
same_dims_div(ctx, x, y, z);
} else {
default_elementwise_div<DeviceContext, T>(ctx, x, y, z);
}
auto& dev_ctx = ctx.device_context<DeviceContext>();
int axis = ctx.Attr<int>("axis");
auto pt_x = paddle::experimental::MakePtenDenseTensor(*x);
auto pt_y = paddle::experimental::MakePtenDenseTensor(*y);
auto pt_z = paddle::experimental::MakePtenDenseTensor(*z);
pten::ElementwiseDiv<T>(dev_ctx, *pt_x.get(), *pt_y.get(), axis,
pt_z.get());
}
};

Expand Down
6 changes: 6 additions & 0 deletions paddle/fluid/operators/elementwise/elementwise_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,12 @@ class ElementwiseOp : public framework::OperatorWithKernel {
{"axis"}, {"Out"});
}
}
if (Type() == "elementwise_div") {
if (ctx.InputVar("X")->IsType<framework::LoDTensor>()) {
return framework::KernelSignature("elementwise_div", {"X", "Y"},
{"axis"}, {"Out"});
}
}
return framework::KernelSignature("None", {"X"}, {}, {"Out"});
}
};
Expand Down
2 changes: 2 additions & 0 deletions paddle/pten/api/include/math.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,7 @@ PD_DLL_DECL Tensor mean(const Tensor& x);
PD_DLL_DECL Tensor add(const Tensor& x, const Tensor& y);

PD_DLL_DECL Tensor subtract(const Tensor& x, const Tensor& y);

PD_DLL_DECL Tensor divide(const Tensor& x, const Tensor& y);
} // namespace experimental
} // namespace paddle
35 changes: 35 additions & 0 deletions paddle/pten/api/lib/math.cc
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,41 @@ PD_DLL_DECL Tensor subtract(const Tensor& x, const Tensor& y) {

return out;
}

PD_DLL_DECL Tensor divide(const Tensor& x, const Tensor& y) {
// 1. Get kernel signature and kernel
auto kernel_key_set = ParseKernelKeyByInputArgs(x);
auto kernel_key = kernel_key_set.GetHigestPriorityKernelKey();
auto kernel = pten::KernelFactory::Instance().SelectKernelOrThrowError(
"elementwise_div", kernel_key);

// 2. Get Device Context
auto* dev_ctx = GetDeviceContextByBackend(kernel_key.backend());
auto kernel_context = pten::KernelContext(dev_ctx);

// 3. Auto data transform
auto dense_x = std::dynamic_pointer_cast<pten::DenseTensor>(x.impl());
kernel_context.EmplaceBackInput(dense_x);
auto dense_y = std::dynamic_pointer_cast<pten::DenseTensor>(y.impl());
kernel_context.EmplaceBackInput(dense_y);
kernel_context.EmplaceBackAttr(-1);

// 4. InferShape
auto out_meta = ElementwiseInferShape(dense_x->meta(), dense_y->meta(), -1);

// 5. Prepare outputs
Tensor out;
const auto allocator = std::make_shared<DefaultAllocator>(
pten::TransToFluidPlace(kernel_key.backend()));
auto dense_out = std::make_shared<pten::DenseTensor>(allocator, out_meta);
kernel_context.EmplaceBackOutput(dense_out);
out.set_impl(dense_out);

// 6. Call kernel
kernel(&kernel_context);

return out;
}
} // namespace experimental
} // namespace paddle

Expand Down
21 changes: 17 additions & 4 deletions paddle/pten/include/math.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,10 @@ DenseTensor Scale(const ContextT& dev_ctx,
}

template <typename T, typename ContextT>
DenseTensor ElementwiseAdd(const ContextT& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis) {
DenseTensor Add(const ContextT& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis) {
auto out_meta = ElementwiseInferShape(x.meta(), y.meta(), axis);
const auto allocator =
std::make_shared<paddle::experimental::DefaultAllocator>(
Expand All @@ -102,4 +102,17 @@ DenseTensor Subtract(const ContextT& dev_ctx,
return dense_out;
}

template <typename T, typename ContextT>
DenseTensor Divide(const ContextT& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis) {
auto out_meta = ElementwiseInferShape(x.meta(), y.meta(), axis);
const auto allocator =
std::make_shared<paddle::experimental::DefaultAllocator>(
dev_ctx.GetPlace());
pten::DenseTensor dense_out(allocator, out_meta);
ElementwiseDiv<T>(dev_ctx, x, y, axis, &dense_out);
return dense_out;
}
} // namespace pten
34 changes: 34 additions & 0 deletions paddle/pten/kernels/cpu/math.cc
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,30 @@ void ElementwiseSub(const CPUContext& dev_ctx,
}
}

template <typename T>
void ElementwiseDiv(const CPUContext& dev_ctx,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

中间层kernel API和底层kernel API的命名是不是一致比较好,可以后续统一更改下

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

好的,下个pr改一下

const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out) {
// allocate memory for out
out->mutable_data<T>();
if (x.dims() == y.dims() && std::is_floating_point<T>::value) {
SameDimsElementwiseCompute<general::SameDimsDivFunctor<CPUContext, T>>()(
dev_ctx, x, y, out);
} else {
auto x_dims = x.dims();
auto y_dims = y.dims();
if (x_dims.size() >= y_dims.size()) {
ElementwiseCompute<general::DivFunctor<T>, T>(
dev_ctx, x, y, axis, general::DivFunctor<T>(), out);
} else {
ElementwiseCompute<general::InverseDivFunctor<T>, T>(
dev_ctx, x, y, axis, general::InverseDivFunctor<T>(), out);
}
}
}

} // namespace pten

// TODO(chenweihang): replace by better impl
Expand Down Expand Up @@ -174,3 +198,13 @@ PT_REGISTER_KERNEL("elementwise_sub",
int64_t,
complex64,
complex128) {}
PT_REGISTER_KERNEL("elementwise_div",
CPU,
ANY,
pten::ElementwiseDiv,
float,
double,
int,
int64_t,
complex64,
complex128) {}
6 changes: 6 additions & 0 deletions paddle/pten/kernels/cpu/math.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,4 +60,10 @@ void ElementwiseSub(const CPUContext& dev_ctx,
int axis,
DenseTensor* out);

template <typename T>
void ElementwiseDiv(const CPUContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);
} // namespace pten
28 changes: 28 additions & 0 deletions paddle/pten/kernels/cuda/math.cu
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,23 @@ void ElementwiseSub(const CUDAContext& dev_ctx,
dev_ctx, inputs, &outputs, axis, general::SubFunctor<T>());
}

template <typename T>
void ElementwiseDiv(const CUDAContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out) {
std::vector<const DenseTensor*> inputs;
std::vector<DenseTensor*> outputs;
inputs.emplace_back(&x);
inputs.emplace_back(&y);
// allocate memory for out
out->mutable_data<T>();
outputs.emplace_back(out);
LaunchElementwiseCudaKernel<ElementwiseType::kBinary, T, T>(
dev_ctx, inputs, &outputs, axis, general::DivFunctor<T>());
}

} // namespace pten

// TODO(chenweihang): replace by better impl
Expand Down Expand Up @@ -217,3 +234,14 @@ PT_REGISTER_KERNEL("elementwise_sub",
float16,
complex64,
complex128) {}
PT_REGISTER_KERNEL("elementwise_div",
CUDA,
ANY,
pten::ElementwiseDiv,
float,
double,
int,
int64_t,
float16,
complex64,
complex128) {}
7 changes: 7 additions & 0 deletions paddle/pten/kernels/cuda/math.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,13 @@ void ElementwiseSub(const CUDAContext& dev_ctx,
int axis,
DenseTensor* out);

template <typename T>
void ElementwiseDiv(const CUDAContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
int axis,
DenseTensor* out);

} // namespace pten

#endif
9 changes: 9 additions & 0 deletions paddle/pten/kernels/functions/blas/elementwise.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,5 +38,14 @@ void ElementwiseSub(const DevCtx& dev_ctx,
blas.VSUB(x.numel(), x.data<T>(), y.data<T>(), out->mutable_data<T>());
}

template <typename DevCtx, typename T>
void ElementwiseDiv(const DevCtx& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* out) {
auto blas = paddle::operators::math::GetBlas<DevCtx, T>(dev_ctx);
blas.VDIV(x.numel(), x.data<T>(), y.data<T>(), out->mutable_data<T>());
}

} // namespace blas
} // namespace pten
60 changes: 60 additions & 0 deletions paddle/pten/kernels/functions/general/elementwise_functor.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,5 +114,65 @@ struct InverseSubFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return b - a; }
};

// Divide
template <typename DevCtx, typename T, class Enable = void>
struct SameDimsDivFunctor {
void operator()(const DevCtx& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* z);
};

template <typename DevCtx, typename T>
struct SameDimsDivFunctor<
DevCtx,
T,
typename std::enable_if<!std::is_floating_point<T>::value>::type> {
void operator()(const DevCtx& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* z) {
paddle::platform::errors::InvalidArgument(
"If use SameDimsDivFunctor, template args(T) must be floating point. ");
}
};

template <typename DevCtx, typename T>
struct SameDimsDivFunctor<
DevCtx,
T,
typename std::enable_if<std::is_floating_point<T>::value>::type> {
void operator()(const DevCtx& dev_ctx,
const DenseTensor& x,
const DenseTensor& y,
DenseTensor* z) {
blas::ElementwiseDiv<DevCtx, T>(dev_ctx, x, y, z);
}
};

#define DIV_ERROR_INFO \
"InvalidArgumentError: Integer division by zero encountered in " \
"(floor) divide. Please check the input value."

template <typename T, typename Enable = void>
struct DivFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return a / b; }
};

template <typename T>
struct DivFunctor<T,
typename std::enable_if<std::is_integral<T>::value>::type> {
inline HOSTDEVICE T operator()(const T& a, const T& b) const {
// For int32/int64, need to check whether the divison is zero.
PADDLE_ENFORCE(b != 0, DIV_ERROR_INFO);
return a / b;
}
};

template <typename T, typename Enable = void>
struct InverseDivFunctor {
inline HOSTDEVICE T operator()(const T& a, const T& b) const { return b / a; }
};

} // namespace general
} // namespace pten
Loading