Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CustomDevice] register Copy for custom device #44200

Merged
merged 7 commits into from
Jul 22, 2022
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 35 additions & 4 deletions paddle/phi/core/tensor_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -200,10 +200,9 @@ void Copy(const Context& dev_ctx,
paddle::memory::Copy(
dst_cuda_pinned_place, dst_ptr, src_gpu_place, src_ptr, size, stream);
#endif
}
#ifdef PADDLE_WITH_XPU
else if (paddle::platform::is_xpu_place(src_place) && // NOLINT
paddle::platform::is_cpu_place(dst_place)) {
} else if (paddle::platform::is_xpu_place(src_place) && // NOLINT
paddle::platform::is_cpu_place(dst_place)) {
paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size);
} else if (paddle::platform::is_cpu_place(src_place) &&
paddle::platform::is_xpu_place(dst_place)) {
Expand All @@ -216,11 +215,36 @@ void Copy(const Context& dev_ctx,
return;
}
paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size);
#endif
#ifdef PADDLE_WITH_CUSTOM_DEVICE
} else if (paddle::platform::is_custom_place(src_place) && // NOLINT
paddle::platform::is_cpu_place(dst_place)) {
auto stream =
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里需要考虑同步异步吗,还是说都是异步copy?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

多谢,已修改

reinterpret_cast<const paddle::platform::CustomDeviceContext&>(dev_ctx)
.stream();
paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size, stream);
} else if (paddle::platform::is_cpu_place(src_place) && // NOLINT
paddle::platform::is_custom_place(dst_place)) {
auto stream =
reinterpret_cast<const paddle::platform::CustomDeviceContext&>(dev_ctx)
.stream();
paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size, stream);
} else if (paddle::platform::is_custom_place(src_place) && // NOLINT
paddle::platform::is_custom_place(dst_place)) {
if (src_ptr == dst_ptr) {
VLOG(3) << "Skip copy the same data async from " << src_place << " to "
<< dst_place;
return;
}
auto stream =
reinterpret_cast<const paddle::platform::CustomDeviceContext&>(dev_ctx)
.stream();
paddle::memory::Copy(dst_place, dst_ptr, src_place, src_ptr, size, stream);
#endif
} else {
PADDLE_THROW(phi::errors::Unimplemented(
"Copy from %s to %s is not supported.", src_place, dst_place));
}
#endif
}

template <typename Context>
Expand Down Expand Up @@ -363,4 +387,11 @@ template void Copy(const XPUContext& dev_ctx,
DenseTensor* dst);
#endif

#ifdef PADDLE_WITH_CUSTOM_DEVICE
template void Copy(const CustomContext& dev_ctx,
const DenseTensor& src,
Place dst_place,
bool blocking,
DenseTensor* dst);
#endif
} // namespace phi
27 changes: 27 additions & 0 deletions python/paddle/fluid/tests/custom_runtime/test_custom_cpu_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def test_custom_device(self):
self._test_custom_device_dataloader()
self._test_custom_device_mnist()
self._test_eager_backward_api()
self._test_eager_copy_to()
self._test_custom_device_dataloader()
self._test_custom_device_mnist()

Expand Down Expand Up @@ -133,6 +134,32 @@ def _test_eager_backward_api(self):

self.assertTrue(x_tensor.grad.place.is_custom_place())

def _test_eager_copy_to(self):
import paddle
x = np.random.random([2, 2]).astype("float32")
# cpu -> custom
cpu_tensor = paddle.to_tensor(x,
dtype='float32',
place=paddle.CPUPlace())
custom_cpu_tensor = cpu_tensor._copy_to(
paddle.CustomPlace('custom_cpu', 0), True)
self.assertTrue(np.array_equal(custom_cpu_tensor, x))
self.assertTrue(custom_cpu_tensor.place.is_custom_place())
# custom -> custom
another_custom_cpu_tensor = custom_cpu_tensor._copy_to(
paddle.CustomPlace('custom_cpu', 0), True)
self.assertTrue(np.array_equal(another_custom_cpu_tensor, x))
self.assertTrue(another_custom_cpu_tensor.place.is_custom_place())
# custom -> cpu
another_cpu_tensor = custom_cpu_tensor._copy_to(paddle.CPUPlace(), True)
self.assertTrue(np.array_equal(another_cpu_tensor, x))
self.assertTrue(another_cpu_tensor.place.is_cpu_place())
# custom -> custom self
another_custom_cpu_tensor = another_custom_cpu_tensor._copy_to(
paddle.CustomPlace('custom_cpu', 0), True)
self.assertTrue(np.array_equal(another_custom_cpu_tensor, x))
self.assertTrue(another_custom_cpu_tensor.place.is_custom_place())

def tearDown(self):
del os.environ['CUSTOM_DEVICE_ROOT']

Expand Down