diff --git a/paddle/phi/backends/xpu/xpu2_op_list.cc b/paddle/phi/backends/xpu/xpu2_op_list.cc index a296607bcb9db9..d616ecf147dd7d 100644 --- a/paddle/phi/backends/xpu/xpu2_op_list.cc +++ b/paddle/phi/backends/xpu/xpu2_op_list.cc @@ -193,6 +193,7 @@ XPUOpMap& get_kl2_ops() { phi::DataType::BOOL, phi::DataType::INT8, phi::DataType::UINT8, + phi::DataType::INT16, phi::DataType::INT64, phi::DataType::INT32})}, {"check_finite_and_unscale", diff --git a/paddle/phi/backends/xpu/xpu3_op_list.cc b/paddle/phi/backends/xpu/xpu3_op_list.cc index 88e042994d5a6e..a22111207f92c5 100644 --- a/paddle/phi/backends/xpu/xpu3_op_list.cc +++ b/paddle/phi/backends/xpu/xpu3_op_list.cc @@ -96,6 +96,7 @@ XPUOpMap& get_kl3_ops() { phi::DataType::BOOL})}, {"assign_value", XPUKernelSet({phi::DataType::FLOAT32, + phi::DataType::FLOAT64, phi::DataType::INT32, phi::DataType::INT64, phi::DataType::FLOAT16, @@ -215,6 +216,7 @@ XPUOpMap& get_kl3_ops() { phi::DataType::BOOL, phi::DataType::INT8, phi::DataType::UINT8, + phi::DataType::INT16, phi::DataType::INT64, phi::DataType::INT32})}, {"check_finite_and_unscale", diff --git a/paddle/phi/kernels/cpu/flatten2_grad_kernel.cc b/paddle/phi/kernels/cpu/flatten2_grad_kernel.cc index ddb2f6140cb58f..74bab2ae02a6f7 100644 --- a/paddle/phi/kernels/cpu/flatten2_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/flatten2_grad_kernel.cc @@ -25,5 +25,6 @@ PD_REGISTER_KERNEL(flatten2_grad, uint8_t, int, int8_t, + int16_t, int64_t, bool) {} diff --git a/paddle/phi/kernels/cpu/flatten2_kernel.cc b/paddle/phi/kernels/cpu/flatten2_kernel.cc index 358dfaa57c304f..2b2b6e696ff7c3 100644 --- a/paddle/phi/kernels/cpu/flatten2_kernel.cc +++ b/paddle/phi/kernels/cpu/flatten2_kernel.cc @@ -25,5 +25,6 @@ PD_REGISTER_KERNEL(flatten2, uint8_t, int, int8_t, + int16_t, int64_t, bool) {} diff --git a/paddle/phi/kernels/cpu/pad_grad_kernel.cc b/paddle/phi/kernels/cpu/pad_grad_kernel.cc index af1db8173f971c..7cea0820f97b4a 100644 --- a/paddle/phi/kernels/cpu/pad_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/pad_grad_kernel.cc @@ -24,6 +24,7 @@ PD_REGISTER_KERNEL(pad_grad, phi::PadGradKernel, float, double, + int16_t, int, int64_t, phi::dtype::complex, diff --git a/paddle/phi/kernels/cpu/pad_kernel.cc b/paddle/phi/kernels/cpu/pad_kernel.cc index ed0cb2f64442f4..474ba2ce29ad11 100644 --- a/paddle/phi/kernels/cpu/pad_kernel.cc +++ b/paddle/phi/kernels/cpu/pad_kernel.cc @@ -24,6 +24,7 @@ PD_REGISTER_KERNEL(pad, phi::PadKernel, float, double, + int16_t, int, int64_t, phi::dtype::complex, diff --git a/paddle/phi/kernels/flatten_grad_kernel.cc b/paddle/phi/kernels/flatten_grad_kernel.cc index 25e6a2f3666510..f12194165a8e06 100644 --- a/paddle/phi/kernels/flatten_grad_kernel.cc +++ b/paddle/phi/kernels/flatten_grad_kernel.cc @@ -45,6 +45,7 @@ PD_REGISTER_KERNEL(flatten_grad, double, uint8_t, int8_t, + int16_t, int, int64_t, bool) {} diff --git a/paddle/phi/kernels/reshape_kernel.cc b/paddle/phi/kernels/reshape_kernel.cc index d5142fdef75685..2a168f938142ef 100644 --- a/paddle/phi/kernels/reshape_kernel.cc +++ b/paddle/phi/kernels/reshape_kernel.cc @@ -19,9 +19,6 @@ #include "paddle/phi/core/tensor_utils.h" #include "paddle/phi/infermeta/unary.h" #include "paddle/phi/kernels/funcs/common_shape.h" -#ifdef PADDLE_WITH_XPU -#include "paddle/phi/backends/xpu/enforce_xpu.h" -#endif namespace phi { @@ -46,34 +43,6 @@ void ReshapeKernel(const Context& dev_ctx, out->ResetLoD(x.lod()); } -#ifdef PADDLE_WITH_XPU -template <> -void ReshapeKernel(const XPUContext& dev_ctx, - const DenseTensor& x, - const IntArray& shape, - DenseTensor* out) { - MetaTensor meta_out(out); - InferMetaFromVecValue(x, shape.GetData(), &meta_out); - - if (x.initialized() && x.Holder() == out->Holder()) { - dev_ctx.Alloc(out, x.dtype()); - return; - } - dev_ctx.Alloc(out, x.dtype()); - auto dims = out->dims(); - auto* src_ptr = x.data(); - auto* dst_ptr = out->data(); - auto size = x.numel() * phi::SizeOf(x.dtype()); - int ret = xpu::copy(dev_ctx.x_context(), - reinterpret_cast(src_ptr), - reinterpret_cast(dst_ptr), - size); - PADDLE_ENFORCE_XDNN_SUCCESS(ret, "copy"); - out->Resize(dims); - out->ResetLoD(x.lod()); -} -#endif - template void ReshapeWithXShapeKernel(const Context& dev_ctx, const DenseTensor& x, diff --git a/paddle/phi/kernels/xpu/cast_kernel.cc b/paddle/phi/kernels/xpu/cast_kernel.cc index 3096040472a0cd..a5e3e9290568b4 100644 --- a/paddle/phi/kernels/xpu/cast_kernel.cc +++ b/paddle/phi/kernels/xpu/cast_kernel.cc @@ -124,6 +124,7 @@ PD_REGISTER_KERNEL(cast, XPU, ALL_LAYOUT, phi::CastKernel, + int16_t, int32_t, float, phi::dtype::float16, diff --git a/test/dygraph_to_static/test_tensor_to.py b/test/dygraph_to_static/test_tensor_to.py index 9b73ac99844d8b..c50c30e959cad6 100644 --- a/test/dygraph_to_static/test_tensor_to.py +++ b/test/dygraph_to_static/test_tensor_to.py @@ -25,6 +25,7 @@ ) import paddle +from paddle import base # NOTE: only test in PIR mode @@ -38,19 +39,20 @@ "int32", "int64", "uint8", - "complex64", - "complex128", "bool", -] +] + ([] if base.core.is_compiled_with_xpu() else ["complex64", "complex128"]) _cpu_place = "Place(cpu)" _gpu_place = "Place(gpu:0)" +_xpu_place = "Place(xpu:0)" def place_res(): def res(): if paddle.is_compiled_with_cuda(): return _gpu_place + elif paddle.is_compiled_with_xpu(): + return _xpu_place else: return _cpu_place @@ -125,6 +127,8 @@ def test_tensor_to_dtype(self): def test_tensor_to_device(self): if paddle.is_compiled_with_cuda(): x = paddle.to_tensor([1, 2, 3], place="gpu") + elif paddle.is_compiled_with_xpu(): + x = paddle.to_tensor([1, 2, 3], place="xpu") else: x = paddle.to_tensor([1, 2, 3]) @@ -136,6 +140,8 @@ def test_tensor_to_device(self): def test_tensor_to_device2(self): if paddle.is_compiled_with_cuda(): x = paddle.to_tensor([1, 2, 3], place="gpu") + elif paddle.is_compiled_with_xpu(): + x = paddle.to_tensor([1, 2, 3], place="xpu") else: x = paddle.to_tensor([1, 2, 3]) @@ -150,6 +156,8 @@ def test_tensor_to_device_dtype(self): places = ["cpu"] if paddle.is_compiled_with_cuda(): places.append("gpu") + if paddle.is_compiled_with_xpu(): + places.append("xpu") for dtype in _valid_dtypes: for place in places: tensor_x = paddle.jit.to_static(to_device_dtype)( @@ -158,6 +166,8 @@ def test_tensor_to_device_dtype(self): place_x_str = str(tensor_x.place) if "gpu" == place: self.assertEqual(place_x_str, _gpu_place) + elif "xpu" == place: + self.assertEqual(place_x_str, _xpu_place) else: self.assertEqual(place_x_str, _cpu_place) type_x_str = str(tensor_x.dtype) diff --git a/test/dygraph_to_static/test_to_tensor.py b/test/dygraph_to_static/test_to_tensor.py index b36a09c4a12493..44ba50744852ee 100644 --- a/test/dygraph_to_static/test_to_tensor.py +++ b/test/dygraph_to_static/test_to_tensor.py @@ -40,6 +40,8 @@ def case1(x): def case2(x): if core.is_compiled_with_cuda(): place = paddle.CUDAPlace(0) + elif core.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) else: place = paddle.CPUPlace() a = paddle.to_tensor( @@ -53,6 +55,8 @@ def case3(x): paddle.set_default_dtype("float64") if core.is_compiled_with_cuda(): place = paddle.CUDAPlace(0) + elif core.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) else: place = paddle.CPUPlace() a = paddle.to_tensor([1.0, 2.0, 3.0], place=place) @@ -64,6 +68,8 @@ def case4(x): paddle.set_default_dtype("float64") if core.is_compiled_with_cuda(): place = paddle.CUDAPlace(0) + elif core.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) else: place = paddle.CPUPlace() a = paddle.to_tensor([1], place=place) @@ -182,6 +188,8 @@ def test_static(self): with paddle.static.program_guard(main_prog, startup_prog): if core.is_compiled_with_cuda(): place = paddle.CUDAPlace(0) + elif core.is_compiled_with_xpu(): + place = paddle.XPUPlace(0) else: place = paddle.CPUPlace() diff --git a/test/legacy_test/test_Tensor_to.py b/test/legacy_test/test_Tensor_to.py index 9821fac8616218..65aa691ed90992 100644 --- a/test/legacy_test/test_Tensor_to.py +++ b/test/legacy_test/test_Tensor_to.py @@ -31,10 +31,12 @@ def test_Tensor_to_dtype(self): "int32", "int64", "uint8", - "complex64", - "complex128", "bool", - ] + ] + ( + [] + if base.core.is_compiled_with_xpu() + else ["complex64", "complex128"] + ) for dtype in valid_dtypes: tensorx = tensorx.to(dtype) typex_str = str(tensorx.dtype) @@ -46,11 +48,14 @@ def test_Tensor_to_device(self): if base.core.is_compiled_with_cuda(): places.append("gpu:0") places.append("gpu") + if base.core.is_compiled_with_xpu(): + places.append("xpu:0") + places.append("xpu") for place in places: tensorx = tensorx.to(place) placex_str = str(tensorx.place) - if place == "gpu": + if place == "gpu" or place == "xpu": self.assertTrue(placex_str, "Place(" + place + ":0)") else: self.assertTrue(placex_str, "Place(" + place + ")") @@ -68,6 +73,9 @@ def test_Tensor_to_device_dtype(self): if base.core.is_compiled_with_cuda(): places.append("gpu:0") places.append("gpu") + if base.core.is_compiled_with_xpu(): + places.append("xpu:0") + places.append("xpu") valid_dtypes = [ "bfloat16", "float16", @@ -78,15 +86,17 @@ def test_Tensor_to_device_dtype(self): "int32", "int64", "uint8", - "complex64", - "complex128", "bool", - ] + ] + ( + [] + if base.core.is_compiled_with_xpu() + else ["complex64", "complex128"] + ) for dtype in valid_dtypes: for place in places: tensorx = tensorx.to(place, dtype) placex_str = str(tensorx.place) - if place == "gpu": + if place == "gpu" or place == "xpu": self.assertTrue(placex_str, "Place(" + place + ":0)") else: self.assertTrue(placex_str, "Place(" + place + ")") diff --git a/test/legacy_test/test_dygraph_multi_forward.py b/test/legacy_test/test_dygraph_multi_forward.py index 599160f5b39cb2..edbccb08d36c62 100644 --- a/test/legacy_test/test_dygraph_multi_forward.py +++ b/test/legacy_test/test_dygraph_multi_forward.py @@ -188,11 +188,12 @@ def test_mnist_forward_float32(self): paddle.framework.random._manual_program_seed(SEED) else: paddle.framework.random._manual_program_seed(SEED) - exe = base.Executor( - base.CPUPlace() - if not core.is_compiled_with_cuda() - else base.CUDAPlace(0) - ) + if core.is_compiled_with_cuda(): + exe = base.Executor(base.CUDAPlace(0)) + elif core.is_compiled_with_xpu(): + exe = base.Executor(base.XPUPlace(0)) + else: + exe = base.Executor(base.CPUPlace()) mnist = MNIST() sgd = paddle.optimizer.SGD(learning_rate=1e-3) diff --git a/test/legacy_test/test_random_seed.py b/test/legacy_test/test_random_seed.py index 8fbaf9a3d6942e..2af2bfff71551b 100644 --- a/test/legacy_test/test_random_seed.py +++ b/test/legacy_test/test_random_seed.py @@ -51,7 +51,7 @@ def test_generator_uniform_random_dygraph(self): x2_np = x2.numpy() x3_np = x3.numpy() - if not core.is_compiled_with_cuda(): + if not core.is_compiled_with_cuda() and not core.is_compiled_with_xpu(): np.testing.assert_allclose(x1_np, x2_np, rtol=1e-05) np.testing.assert_allclose(x_np, x3_np, rtol=1e-05) @@ -84,7 +84,10 @@ def test_generator_uniform_random_static(self): out2_res1 = np.array(out2[0]) out2_res2 = np.array(out2[1]) - if not core.is_compiled_with_cuda(): + if ( + not core.is_compiled_with_cuda() + and not core.is_compiled_with_xpu() + ): np.testing.assert_allclose(out1_res1, out2_res1, rtol=1e-05) np.testing.assert_allclose(out1_res2, out2_res2, rtol=1e-05) self.assertTrue(not np.allclose(out1_res2, out1_res1)) @@ -104,7 +107,7 @@ def test_gen_dropout_dygraph(self): y_np = y.numpy() y1_np = y1.numpy() - if not core.is_compiled_with_cuda(): + if not core.is_compiled_with_cuda() and not core.is_compiled_with_xpu(): print(">>>>>>> dropout dygraph >>>>>>>") np.testing.assert_allclose(y_np, y1_np, rtol=1e-05) @@ -129,7 +132,7 @@ def test_gen_dropout_static(self): out1_np = np.array(out1[0]) out2_np = np.array(out2[0]) - if not core.is_compiled_with_cuda(): + if not core.is_compiled_with_cuda() and not core.is_compiled_with_xpu(): print(">>>>>>> dropout static >>>>>>>") np.testing.assert_allclose(out1_np, out2_np, rtol=1e-05) @@ -150,7 +153,7 @@ def test_generator_gaussian_random_dygraph(self): x2_np = x2.numpy() x3_np = x3.numpy() - if not core.is_compiled_with_cuda(): + if not core.is_compiled_with_cuda() and not core.is_compiled_with_xpu(): print(">>>>>>> gaussian random dygraph >>>>>>>") np.testing.assert_allclose(x1_np, x2_np, rtol=1e-05) np.testing.assert_allclose(x_np, x3_np, rtol=1e-05) @@ -184,7 +187,10 @@ def test_generator_gaussian_random_static(self): out2_res1 = np.array(out2[0]) out2_res2 = np.array(out2[1]) - if not core.is_compiled_with_cuda(): + if ( + not core.is_compiled_with_cuda() + and not core.is_compiled_with_xpu() + ): print(">>>>>>> gaussian random static >>>>>>>") np.testing.assert_allclose(out1_res1, out2_res1, rtol=1e-05) np.testing.assert_allclose(out1_res2, out2_res2, rtol=1e-05) @@ -207,7 +213,7 @@ def test_generator_randint_dygraph(self): x2_np = x2.numpy() x3_np = x3.numpy() - if not core.is_compiled_with_cuda(): + if not core.is_compiled_with_cuda() and not core.is_compiled_with_xpu(): print(">>>>>>> randint dygraph >>>>>>>") np.testing.assert_allclose(x1_np, x2_np, rtol=1e-05) np.testing.assert_allclose(x_np, x3_np, rtol=1e-05) @@ -241,7 +247,10 @@ def test_generator_uniform_random_static_1(self): out2_res1 = np.array(out2[0]) out2_res2 = np.array(out2[1]) - if not core.is_compiled_with_cuda(): + if ( + not core.is_compiled_with_cuda() + and not core.is_compiled_with_xpu() + ): np.testing.assert_allclose(out1_res1, out2_res1, rtol=1e-05) np.testing.assert_allclose(out1_res2, out2_res2, rtol=1e-05) self.assertTrue(not np.allclose(out1_res2, out1_res1)) @@ -262,7 +271,7 @@ def test_generator_randint_dygraph_1(self): x1_np = x1.numpy() x2_np = x2.numpy() x3_np = x3.numpy() - if not core.is_compiled_with_cuda(): + if not core.is_compiled_with_cuda() and not core.is_compiled_with_xpu(): np.testing.assert_allclose(x1_np, x2_np, rtol=1e-05) np.testing.assert_allclose(x_np, x3_np, rtol=1e-05) @@ -295,7 +304,10 @@ def test_generator_ranint_static(self): out2_res1 = np.array(out2[0]) out2_res2 = np.array(out2[1]) - if not core.is_compiled_with_cuda(): + if ( + not core.is_compiled_with_cuda() + and not core.is_compiled_with_xpu() + ): print(">>>>>>> randint static >>>>>>>") np.testing.assert_allclose(out1_res1, out2_res1, rtol=1e-05) np.testing.assert_allclose(out1_res2, out2_res2, rtol=1e-05) @@ -319,7 +331,7 @@ def test_generator_randperm_dygraph(self): x2_np = x2.numpy() x3_np = x3.numpy() - if not core.is_compiled_with_cuda(): + if not core.is_compiled_with_cuda() and not core.is_compiled_with_xpu(): print(">>>>>>> randperm dygraph >>>>>>>") np.testing.assert_allclose(x1_np, x2_np, rtol=1e-05) np.testing.assert_allclose(x_np, x3_np, rtol=1e-05) @@ -353,7 +365,10 @@ def test_generator_randperm_static(self): out2_res1 = np.array(out2[0]) out2_res2 = np.array(out2[1]) - if not core.is_compiled_with_cuda(): + if ( + not core.is_compiled_with_cuda() + and not core.is_compiled_with_xpu() + ): print(">>>>>>> randperm static >>>>>>>") np.testing.assert_allclose(out1_res1, out2_res1, rtol=1e-05) np.testing.assert_allclose(out1_res2, out2_res2, rtol=1e-05) diff --git a/test/xpu/test_cast_op_xpu.py b/test/xpu/test_cast_op_xpu.py index 76c310cf4a83fd..e9b66c9a84aa61 100644 --- a/test/xpu/test_cast_op_xpu.py +++ b/test/xpu/test_cast_op_xpu.py @@ -29,6 +29,7 @@ from paddle.base import Program, core, program_guard typeid_dict = { + 'int16': int(core.VarDesc.VarType.INT16), 'int32': int(core.VarDesc.VarType.INT32), 'int64': int(core.VarDesc.VarType.INT64), 'float32': int(core.VarDesc.VarType.FP32),