Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

modify d2d copy to xpu::copy in xpu kernel, test=kunlun #48710

Merged
merged 1 commit into from
Dec 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion paddle/fluid/operators/reader/buffered_reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -417,8 +417,13 @@ void BufferedReader::ReadAsync(size_t i) {
// TODO(zhanghuan) for now hardware not support xpu_memcpy_async, maybe
// KL3
if ((platform::is_xpu_place(cpu_place))) {
memory::Copy(place_, xpu_ptr, cpu_place, cpu_ptr, size);
platform::XPUStreamSync(stream_.get());
char *tmp = new char[size];
PADDLE_ENFORCE_XPU_SUCCESS(xpu_memcpy(
tmp, cpu_ptr, size, XPUMemcpyKind::XPU_DEVICE_TO_HOST));
PADDLE_ENFORCE_XPU_SUCCESS(xpu_memcpy(
xpu_ptr, tmp, size, XPUMemcpyKind::XPU_HOST_TO_DEVICE));
delete[] tmp;
} else {
memory::Copy(place_, xpu_ptr, cpu_place, cpu_ptr, size);
}
Expand Down
19 changes: 6 additions & 13 deletions paddle/phi/backends/xpu/xpu_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -169,19 +169,12 @@ void MemcpySyncD2D(void* dst,
const phi::XPUContext& dev_ctx) {
int dev_id = GetXPUCurrentDeviceId();
if (dst_place.device == dev_id && src_place.device == dev_id) {
dev_ctx.Wait();
char* tmp = new char[count];
PADDLE_ENFORCE_XPU_SUCCESS(
xpu_memcpy(tmp, src, count, XPUMemcpyKind::XPU_DEVICE_TO_HOST));
PADDLE_ENFORCE_XPU_SUCCESS(
xpu_memcpy(dst, tmp, count, XPUMemcpyKind::XPU_HOST_TO_DEVICE));
delete[] tmp;
// PADDLE_ENFORCE_XDNN_SUCCESS(
// baidu::xpu::api::copy(dev_ctx.x_context(),
// static_cast<const int8_t*>(src),
// static_cast<int8_t*>(dst),
// count),
// "copy ");
PADDLE_ENFORCE_XDNN_SUCCESS(
baidu::xpu::api::copy(dev_ctx.x_context(),
static_cast<const int8_t*>(src),
static_cast<int8_t*>(dst),
count),
"copy ");
} else {
PADDLE_ENFORCE_XPU_SUCCESS(
xpu_memcpy_peer(dst_place.device, dst, src_place.device, src, count));
Expand Down
22 changes: 22 additions & 0 deletions paddle/phi/kernels/reshape_grad_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
#include "paddle/phi/backends/all_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/core/tensor_utils.h"
#ifdef PADDLE_WITH_XPU
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#endif

namespace phi {

Expand All @@ -29,6 +32,25 @@ void ReshapeGradKernel(const Context& dev_ctx,
x_grad->Resize(x_dims);
}

#ifdef PADDLE_WITH_XPU
template <>
void ReshapeGradKernel<phi::XPUContext>(const XPUContext& dev_ctx,
const DenseTensor& out_grad,
DenseTensor* x_grad) {
auto x_dims = x_grad->dims();
dev_ctx.Alloc(x_grad, out_grad.dtype());
auto* src_ptr = out_grad.data();
auto* dst_ptr = x_grad->data();
auto size = out_grad.numel() * paddle::experimental::SizeOf(out_grad.dtype());
int ret = xpu::copy(dev_ctx.x_context(),
reinterpret_cast<const int8_t*>(src_ptr),
reinterpret_cast<int8_t*>(dst_ptr),
size);
PADDLE_ENFORCE_XDNN_SUCCESS(ret, "copy");
x_grad->Resize(x_dims);
}
#endif

template <typename Context>
void ReshapeDoubleGradKernel(const Context& dev_ctx,
const DenseTensor& out_grad,
Expand Down
30 changes: 30 additions & 0 deletions paddle/phi/kernels/reshape_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
#include "paddle/phi/core/tensor_utils.h"
#include "paddle/phi/infermeta/unary.h"
#include "paddle/phi/kernels/funcs/common_shape.h"
#ifdef PADDLE_WITH_XPU
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#endif

namespace phi {

Expand All @@ -42,6 +45,33 @@ void ReshapeKernel(const Context& dev_ctx,
out->ResetLoD(x.lod());
}

#ifdef PADDLE_WITH_XPU
template <>
void ReshapeKernel<phi::XPUContext>(const XPUContext& dev_ctx,
const DenseTensor& x,
const IntArray& shape,
DenseTensor* out) {
MetaTensor meta_out(out);
InferMetaFromVecValue(x, shape.GetData(), &meta_out);
if (x.initialized() && x.Holder() == out->Holder()) {
dev_ctx.Alloc(out, x.dtype());
return;
}
dev_ctx.Alloc(out, x.dtype());
auto dims = out->dims();
auto* src_ptr = x.data();
auto* dst_ptr = out->data();
auto size = x.numel() * paddle::experimental::SizeOf(x.dtype());
int ret = xpu::copy(dev_ctx.x_context(),
reinterpret_cast<const int8_t*>(src_ptr),
reinterpret_cast<int8_t*>(dst_ptr),
size);
PADDLE_ENFORCE_XDNN_SUCCESS(ret, "copy");
out->Resize(dims);
out->ResetLoD(x.lod());
}
#endif

template <typename Context>
void ReshapeWithXShape(const Context& dev_ctx,
const DenseTensor& x,
Expand Down
12 changes: 5 additions & 7 deletions paddle/phi/kernels/xpu/gather_nd_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,10 @@ void GatherNdKernel(const Context &ctx,
if (x.numel() == 0) return;

if (index.numel() == 0) {
phi::Copy(ctx, x, phi::XPUPlace(), true, out);
out->Resize(x.dims());
ctx.template Alloc<T>(out);
int r = xpu::copy(ctx.x_context(), x.data<T>(), out->data<T>(), x.numel());
PADDLE_ENFORCE_XDNN_SUCCESS(r, "copy");
return;
}

Expand Down Expand Up @@ -69,12 +72,7 @@ void GatherNdKernel(const Context &ctx,
x_vec,
index_shape);
}
PADDLE_ENFORCE_EQ(
ret,
XPU_SUCCESS,
phi::errors::External("XPU gather_nd kernel return wrong value[%d %s]",
ret,
XPUAPIErrorMsg[ret]));
PADDLE_ENFORCE_XDNN_SUCCESS(ret, "gather_nd");
}

} // namespace phi
Expand Down
20 changes: 10 additions & 10 deletions paddle/phi/kernels/xpu/generate_proposals_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -372,16 +372,16 @@ void GenerateProposalsKernel(const Context& dev_ctx,
DenseTensor& proposals = tensor_pair.first;
DenseTensor& nscores = tensor_pair.second;

paddle::memory::Copy(place,
rpn_rois->data<T>() + num_proposals * 4,
place,
proposals.data<T>(),
sizeof(T) * proposals.numel());
paddle::memory::Copy(place,
rpn_roi_probs->data<T>() + num_proposals,
place,
nscores.data<T>(),
sizeof(T) * scores.numel());
r = xpu::copy(dev_ctx.x_context(),
proposals.data<T>(),
rpn_rois->data<T>() + num_proposals * 4,
proposals.numel());
PADDLE_ENFORCE_XDNN_SUCCESS(r, "copy");
r = xpu::copy(dev_ctx.x_context(),
nscores.data<T>(),
rpn_roi_probs->data<T>() + num_proposals,
nscores.numel());
PADDLE_ENFORCE_XDNN_SUCCESS(r, "copy");

if (dev_ctx.x_context()->xpu_stream) {
dev_ctx.Wait();
Expand Down
5 changes: 4 additions & 1 deletion paddle/phi/kernels/xpu/scatter_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,10 @@ void ScatterKernel(const Context &ctx,
const DenseTensor &updates,
bool overwrite,
DenseTensor *out) {
phi::Copy(ctx, x, ctx.GetPlace(), false, out);
out->Resize(x.dims());
ctx.template Alloc<T>(out);
int ret = xpu::copy(ctx.x_context(), x.data<T>(), out->data<T>(), x.numel());
PADDLE_ENFORCE_XDNN_SUCCESS(ret, "copy");
// Apply ScatterUpdate: Out[index] = Updates[:]
const auto &index_type = index.dtype();
bool index_type_match =
Expand Down
6 changes: 5 additions & 1 deletion paddle/phi/kernels/xpu/tile_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,11 @@ void TileKernel(const Context& dev_ctx,

std::vector<int64_t> temp(repeat_times.size(), 1);
if (repeat_times == temp) {
phi::Copy(dev_ctx, x, dev_ctx.GetPlace(), false, out);
out->Resize(x.dims());
dev_ctx.template Alloc<T>(out);
int r =
xpu::copy(dev_ctx.x_context(), x.data<T>(), out->data<T>(), x.numel());
PADDLE_ENFORCE_XDNN_SUCCESS(r, "copy");
return;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,8 @@
"c_embedding_float32", # unittests of collective ops do not using xpu testing framework
"c_sync_comm_stream_float32",
"c_sync_calc_stream_float32",
"reshape2_bool",
"reshape2_grad_bool",
]
xpu_test_device_op_white_list = []
xpu_test_device_op_type_white_list = []
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ class TestReshapeOp(XPUOpTest):
def setUp(self):
self.init_data()
self.op_type = "reshape2"
self.dtype = self.in_type
self.init_test_input()
self.init_test_output()
self.init_attrs()
Expand Down