Skip to content

Commit

Permalink
[HIP] fix op not support AMD GPU bug, the flag PADDLE_WITH_ROCM is in… (
Browse files Browse the repository at this point in the history
#35394)

* [HIP] fix op not support AMD GPU bug, the flag PADDLE_WITH_ROCM is invalid

* [HIP] fix op not support AMD GPU bug, the flag PADDLE_WITH_ROCM is invalid

* [HIP] fix op not support AMD GPU bug
  • Loading branch information
xymyeah authored Sep 7, 2021
1 parent 266fcbe commit 28b6407
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 5 deletions.
2 changes: 1 addition & 1 deletion paddle/fluid/operators/memcpy_d2h_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ REGISTER_OP_CPU_KERNEL_FUNCTOR(memcpy_d2h, float, ops::MemcpyD2HKernel, double,
ops::MemcpyD2HKernel, plat::float16,
ops::MemcpyD2HKernel);

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_ROCM)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
REGISTER_OP_CUDA_KERNEL_FUNCTOR(memcpy_d2h, float, ops::MemcpyD2HKernel, double,
ops::MemcpyD2HKernel, int, ops::MemcpyD2HKernel,
int64_t, ops::MemcpyD2HKernel, bool,
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/operators/memcpy_h2d_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ REGISTER_OP_CPU_KERNEL_FUNCTOR(memcpy_h2d, float, ops::MemcpyH2DKernel, double,
ops::MemcpyH2DKernel, plat::float16,
ops::MemcpyH2DKernel);

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_ROCM)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
REGISTER_OP_CUDA_KERNEL_FUNCTOR(memcpy_h2d, float, ops::MemcpyH2DKernel, double,
ops::MemcpyH2DKernel, int, ops::MemcpyH2DKernel,
int64_t, ops::MemcpyH2DKernel, bool,
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/operators/memcpy_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ REGISTER_OP_CPU_KERNEL_FUNCTOR(memcpy, float, ops::MemcpyKernel, double,
ops::MemcpyKernel, plat::float16,
ops::MemcpyKernel);

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_ROCM)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
REGISTER_OP_CUDA_KERNEL_FUNCTOR(memcpy, float, ops::MemcpyKernel, double,
ops::MemcpyKernel, int, ops::MemcpyKernel,
int64_t, ops::MemcpyKernel, bool,
Expand Down
57 changes: 55 additions & 2 deletions python/paddle/fluid/tests/unittests/test_memcpy_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def get_prog(self):
})
return main_program, gpu_var, pinned_var

def test_gpu_cpoy_to_pinned(self):
def test_gpu_copy_to_pinned(self):
main_program, gpu_var, pinned_var = self.get_prog()
main_program.global_block().append_op(
type='memcpy',
Expand All @@ -79,7 +79,7 @@ def test_gpu_cpoy_to_pinned(self):
self.assertTrue(np.allclose(gpu_, pinned_))
self.assertTrue(np.allclose(pinned_, np.ones((10, 10))))

def test_pinned_cpoy_gpu(self):
def test_pinned_copy_gpu(self):
main_program, gpu_var, pinned_var = self.get_prog()
main_program.global_block().append_op(
type='memcpy',
Expand All @@ -94,6 +94,59 @@ def test_pinned_cpoy_gpu(self):
self.assertTrue(np.allclose(gpu_, pinned_))
self.assertTrue(np.allclose(gpu_, np.zeros((10, 10))))

def test_hip_copy_bool_value(self):
if core.is_compiled_with_rocm():
paddle.enable_static()
main_program = Program()
with program_guard(main_program):
pinned_var_name = "tensor@Pinned"
gpu_var_name = "tensor@GPU"
pinned_var = main_program.global_block().create_var(
name=pinned_var_name,
shape=[1],
dtype='bool',
persistable=False,
stop_gradient=True)
gpu_var = main_program.global_block().create_var(
name=gpu_var_name,
shape=[1],
dtype='bool',
persistable=False,
stop_gradient=True)
main_program.global_block().append_op(
type="fill_constant",
outputs={"Out": gpu_var_name},
attrs={
"shape": [1],
"dtype": gpu_var.dtype,
"value": False,
"place_type": 1
})
main_program.global_block().append_op(
type="fill_constant",
outputs={"Out": pinned_var_name},
attrs={
"shape": [1],
"dtype": gpu_var.dtype,
"value": True,
"place_type": 2
})

main_program.global_block().append_op(
type='memcpy',
inputs={'X': pinned_var},
outputs={'Out': gpu_var},
attrs={'dst_place_type': 1})
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
gpu_, pinned_ = exe.run(main_program,
feed={},
fetch_list=[gpu_var.name, pinned_var.name])
expect_value = np.array([1]).astype('bool')
self.assertTrue(np.array_equal(gpu_, expect_value))
else:
pass


class TestMemcpyOPError(unittest.TestCase):
def get_prog(self):
Expand Down

0 comments on commit 28b6407

Please sign in to comment.