From a1d05222f4baa3dffa4c8f6eec531f4745e86b9e Mon Sep 17 00:00:00 2001 From: loneranger <836253168@qq.com> Date: Tue, 14 Mar 2023 21:29:28 +0800 Subject: [PATCH 01/22] add fp16 and bf16 support for bernoulli --- paddle/phi/kernels/gpu/bernoulli_kernel.cu | 11 +++++++-- .../tests/unittests/test_bernoulli_op.py | 23 +++++++++++++++++++ 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/paddle/phi/kernels/gpu/bernoulli_kernel.cu b/paddle/phi/kernels/gpu/bernoulli_kernel.cu index edcf29e2d88d3..7847a9ce371d1 100644 --- a/paddle/phi/kernels/gpu/bernoulli_kernel.cu +++ b/paddle/phi/kernels/gpu/bernoulli_kernel.cu @@ -26,6 +26,7 @@ #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_launch_config.h" +#include "paddle/phi/common/data_type.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/distribution_helper.h" @@ -85,5 +86,11 @@ void BernoulliKernel(const Context& ctx, } // namespace phi -PD_REGISTER_KERNEL( - bernoulli, GPU, ALL_LAYOUT, phi::BernoulliKernel, float, double) {} +PD_REGISTER_KERNEL(bernoulli, + GPU, + ALL_LAYOUT, + phi::BernoulliKernel, + float, + double, + phi::dtype::float16, + phi::dtype::bfloat16) {} diff --git a/python/paddle/fluid/tests/unittests/test_bernoulli_op.py b/python/paddle/fluid/tests/unittests/test_bernoulli_op.py index 33a450310d1ef..3e26a3e32bf95 100644 --- a/python/paddle/fluid/tests/unittests/test_bernoulli_op.py +++ b/python/paddle/fluid/tests/unittests/test_bernoulli_op.py @@ -18,6 +18,7 @@ from eager_op_test import OpTest import paddle +import paddle.fluid.core as core def output_hist(out): @@ -98,5 +99,27 @@ def test_fixed_random_number(self): paddle.enable_static() +@unittest.skipIf( + not core.is_compiled_with_cuda() + or not core.is_float16_supported(core.CUDAPlace(0)), + "core is not complied with CUDA and not support the float16", +) +class TestBernoulliFP16OP(OpTest): + def setUp(self): + self.op_type = "bernoulli" + self.python_api = paddle.bernoulli + self.dtype = np.float16 + self.__class__.op_type = self.op_type + x = np.random.uniform(size=(1000, 784)).astype(np.float32) + out = np.zeros((1000, 784)).astype(np.float32) + self.inputs = {"X": x.astype(self.dtype)} + self.attrs = {} + self.outputs = {"Out": out} + + def test_check_output(self): + place = core.CUDAPlace(0) + self.check_output_with_place(place, atol=1e-3, check_eager=False) + + if __name__ == "__main__": unittest.main() From f6455e78b5678cf7958e1fcd8efb4cc5c069e48a Mon Sep 17 00:00:00 2001 From: loneranger <836253168@qq.com> Date: Tue, 14 Mar 2023 21:43:43 +0800 Subject: [PATCH 02/22] add fp16 and bf16 support for trunc --- paddle/phi/kernels/gpu/trunc_grad_kernel.cu | 5 +- paddle/phi/kernels/gpu/trunc_kernel.cu | 13 +++- paddle/phi/kernels/trunc_grad_kernel.h | 1 + paddle/phi/kernels/trunc_kernel.h | 1 + .../fluid/tests/unittests/test_trunc_op.py | 65 ++++++++++++++++++- 5 files changed, 81 insertions(+), 4 deletions(-) diff --git a/paddle/phi/kernels/gpu/trunc_grad_kernel.cu b/paddle/phi/kernels/gpu/trunc_grad_kernel.cu index 8a88383e6e4f0..a3f7ae556951c 100644 --- a/paddle/phi/kernels/gpu/trunc_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/trunc_grad_kernel.cu @@ -17,6 +17,7 @@ #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_info.h" #include "paddle/phi/backends/gpu/gpu_primitives.h" +#include "paddle/phi/common/data_type.h" #include "paddle/phi/core/kernel_registry.h" namespace phi { @@ -52,4 +53,6 @@ PD_REGISTER_KERNEL(trunc_grad, float, double, int, - int64_t) {} + int64_t, + phi::dtype::float16, + phi::dtype::bfloat16) {} diff --git a/paddle/phi/kernels/gpu/trunc_kernel.cu b/paddle/phi/kernels/gpu/trunc_kernel.cu index dfc4f6589e9cf..f93ab9fbd3528 100644 --- a/paddle/phi/kernels/gpu/trunc_kernel.cu +++ b/paddle/phi/kernels/gpu/trunc_kernel.cu @@ -17,6 +17,7 @@ #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_info.h" #include "paddle/phi/backends/gpu/gpu_primitives.h" +#include "paddle/phi/common/data_type.h" #include "paddle/phi/core/kernel_registry.h" namespace phi { @@ -78,5 +79,13 @@ void TruncKernel(const Context& dev_ctx, } // namespace phi -PD_REGISTER_KERNEL( - trunc, GPU, ALL_LAYOUT, phi::TruncKernel, float, double, int, int64_t) {} +PD_REGISTER_KERNEL(trunc, + GPU, + ALL_LAYOUT, + phi::TruncKernel, + float, + double, + int, + int64_t, + phi::dtype::float16, + phi::dtype::bfloat16) {} diff --git a/paddle/phi/kernels/trunc_grad_kernel.h b/paddle/phi/kernels/trunc_grad_kernel.h index f3f8032d3a23c..ad20fd59fc37f 100644 --- a/paddle/phi/kernels/trunc_grad_kernel.h +++ b/paddle/phi/kernels/trunc_grad_kernel.h @@ -15,6 +15,7 @@ #pragma once #include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/core/device_context.h" namespace phi { diff --git a/paddle/phi/kernels/trunc_kernel.h b/paddle/phi/kernels/trunc_kernel.h index d9a7ea6339348..13acab7582dba 100644 --- a/paddle/phi/kernels/trunc_kernel.h +++ b/paddle/phi/kernels/trunc_kernel.h @@ -15,6 +15,7 @@ #pragma once #include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/core/device_context.h" namespace phi { diff --git a/python/paddle/fluid/tests/unittests/test_trunc_op.py b/python/paddle/fluid/tests/unittests/test_trunc_op.py index db45b36b56302..181dcb68c0f78 100644 --- a/python/paddle/fluid/tests/unittests/test_trunc_op.py +++ b/python/paddle/fluid/tests/unittests/test_trunc_op.py @@ -15,9 +15,10 @@ import unittest import numpy as np -from op_test import OpTest +from op_test import OpTest, convert_float_to_uint16 import paddle +import paddle.fluid.core as core paddle.enable_static() @@ -90,5 +91,67 @@ def test_errors(self): self.assertRaises(TypeError, paddle.trunc, x) +@unittest.skipIf( + not core.is_compiled_with_cuda() + or not core.is_float16_supported(core.CUDAPlace(0)), + "core is not complied with CUDA and not support the float16", +) +class TestTruncFP16OP(OpTest): + def setUp(self): + self.op_type = "trunc" + self.python_api = paddle.trunc + self.init_dtype_type() + self.__class__.op_type = self.op_type + np.random.seed(2021) + x = np.random.random((20, 20)).astype(np.float32) + out = np.trunc(x) + self.inputs = {'X': x.astype(self.dtype)} + self.outputs = {'Out': out} + + def init_dtype_type(self): + self.dtype = np.float16 + + def test_check_output(self): + place = core.CUDAPlace(0) + self.check_output_with_place(place, atol=1e-3, check_eager=False) + + def test_check_grad(self): + place = core.CUDAPlace(0) + self.check_grad_with_place( + place, ['X'], 'Out', max_relative_error=1e-2, check_eager=True + ) + + +@unittest.skipIf( + not core.is_compiled_with_cuda() + or not core.is_bfloat16_supported(core.CUDAPlace(0)), + "core is not complied with CUDA and not support the bfloat16", +) +class TestTruncBF16(OpTest): + def setUp(self): + self.op_type = "trunc" + self.python_api = paddle.trunc + self.init_dtype_type() + self.__class__.op_type = self.op_type + np.random.seed(2021) + x = np.random.random((20, 20)).astype(np.float32) + out = np.trunc(x) + self.inputs = {'X': convert_float_to_uint16(x)} + self.outputs = {'Out': convert_float_to_uint16(out)} + + def init_dtype_type(self): + self.dtype = np.uint16 + + def test_check_output(self): + place = core.CUDAPlace(0) + self.check_output_with_place(place, atol=1e-3, check_eager=False) + + def test_check_grad(self): + place = core.CUDAPlace(0) + self.check_grad_with_place( + place, ['X'], 'Out', max_relative_error=1e-2, check_eager=True + ) + + if __name__ == "__main__": unittest.main() From 99f58543197a5f015438ecb519ff1610289b974f Mon Sep 17 00:00:00 2001 From: longranger2 <836253168@qq.com> Date: Wed, 22 Mar 2023 19:22:02 +0800 Subject: [PATCH 03/22] fix bug --- paddle/phi/kernels/gpu/bernoulli_kernel.cu | 1 - paddle/phi/kernels/gpu/trunc_grad_kernel.cu | 1 - paddle/phi/kernels/gpu/trunc_kernel.cu | 1 - paddle/phi/kernels/trunc_grad_kernel.h | 1 - paddle/phi/kernels/trunc_kernel.h | 1 - .../tests/unittests/test_bernoulli_op.py | 29 ++++---------- .../fluid/tests/unittests/test_trunc_op.py | 39 +++---------------- 7 files changed, 13 insertions(+), 60 deletions(-) diff --git a/paddle/phi/kernels/gpu/bernoulli_kernel.cu b/paddle/phi/kernels/gpu/bernoulli_kernel.cu index 7847a9ce371d1..3905378694e17 100644 --- a/paddle/phi/kernels/gpu/bernoulli_kernel.cu +++ b/paddle/phi/kernels/gpu/bernoulli_kernel.cu @@ -26,7 +26,6 @@ #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_launch_config.h" -#include "paddle/phi/common/data_type.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/distribution_helper.h" diff --git a/paddle/phi/kernels/gpu/trunc_grad_kernel.cu b/paddle/phi/kernels/gpu/trunc_grad_kernel.cu index a3f7ae556951c..40e1404cd900d 100644 --- a/paddle/phi/kernels/gpu/trunc_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/trunc_grad_kernel.cu @@ -17,7 +17,6 @@ #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_info.h" #include "paddle/phi/backends/gpu/gpu_primitives.h" -#include "paddle/phi/common/data_type.h" #include "paddle/phi/core/kernel_registry.h" namespace phi { diff --git a/paddle/phi/kernels/gpu/trunc_kernel.cu b/paddle/phi/kernels/gpu/trunc_kernel.cu index f93ab9fbd3528..09f9c804a1d1b 100644 --- a/paddle/phi/kernels/gpu/trunc_kernel.cu +++ b/paddle/phi/kernels/gpu/trunc_kernel.cu @@ -17,7 +17,6 @@ #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_info.h" #include "paddle/phi/backends/gpu/gpu_primitives.h" -#include "paddle/phi/common/data_type.h" #include "paddle/phi/core/kernel_registry.h" namespace phi { diff --git a/paddle/phi/kernels/trunc_grad_kernel.h b/paddle/phi/kernels/trunc_grad_kernel.h index ad20fd59fc37f..f3f8032d3a23c 100644 --- a/paddle/phi/kernels/trunc_grad_kernel.h +++ b/paddle/phi/kernels/trunc_grad_kernel.h @@ -15,7 +15,6 @@ #pragma once #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/device_context.h" namespace phi { diff --git a/paddle/phi/kernels/trunc_kernel.h b/paddle/phi/kernels/trunc_kernel.h index 13acab7582dba..d9a7ea6339348 100644 --- a/paddle/phi/kernels/trunc_kernel.h +++ b/paddle/phi/kernels/trunc_kernel.h @@ -15,7 +15,6 @@ #pragma once #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/device_context.h" namespace phi { diff --git a/python/paddle/fluid/tests/unittests/test_bernoulli_op.py b/python/paddle/fluid/tests/unittests/test_bernoulli_op.py index 3e26a3e32bf95..c1c734d389571 100644 --- a/python/paddle/fluid/tests/unittests/test_bernoulli_op.py +++ b/python/paddle/fluid/tests/unittests/test_bernoulli_op.py @@ -18,7 +18,6 @@ from eager_op_test import OpTest import paddle -import paddle.fluid.core as core def output_hist(out): @@ -32,10 +31,15 @@ def output_hist(out): class TestBernoulliOp(OpTest): def setUp(self): self.op_type = "bernoulli" - self.inputs = {"X": np.random.uniform(size=(1000, 784))} + self.inputs = { + "X": np.random.uniform(size=(1000, 784)).astype(self.dtype) + } self.attrs = {} self.outputs = {"Out": np.zeros((1000, 784)).astype("float32")} + def init_dtype(self): + self.dtype = np.float32 + def test_check_output(self): self.check_output_customized(self.verify_output) @@ -99,26 +103,9 @@ def test_fixed_random_number(self): paddle.enable_static() -@unittest.skipIf( - not core.is_compiled_with_cuda() - or not core.is_float16_supported(core.CUDAPlace(0)), - "core is not complied with CUDA and not support the float16", -) -class TestBernoulliFP16OP(OpTest): - def setUp(self): - self.op_type = "bernoulli" - self.python_api = paddle.bernoulli +class TestBernoulliFP16OP(TestBernoulliOp): + def init_dtype(self): self.dtype = np.float16 - self.__class__.op_type = self.op_type - x = np.random.uniform(size=(1000, 784)).astype(np.float32) - out = np.zeros((1000, 784)).astype(np.float32) - self.inputs = {"X": x.astype(self.dtype)} - self.attrs = {} - self.outputs = {"Out": out} - - def test_check_output(self): - place = core.CUDAPlace(0) - self.check_output_with_place(place, atol=1e-3, check_eager=False) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_trunc_op.py b/python/paddle/fluid/tests/unittests/test_trunc_op.py index 525aecdffaff3..4e882de91dfb0 100644 --- a/python/paddle/fluid/tests/unittests/test_trunc_op.py +++ b/python/paddle/fluid/tests/unittests/test_trunc_op.py @@ -91,50 +91,23 @@ def test_errors(self): self.assertRaises(TypeError, paddle.trunc, x) -@unittest.skipIf( - not core.is_compiled_with_cuda() - or not core.is_float16_supported(core.CUDAPlace(0)), - "core is not complied with CUDA and not support the float16", -) -class TestTruncFP16OP(OpTest): - def setUp(self): - self.op_type = "trunc" - self.python_api = paddle.trunc - self.init_dtype_type() - self.__class__.op_type = self.op_type - np.random.seed(2021) - x = np.random.random((20, 20)).astype(np.float32) - out = np.trunc(x) - self.inputs = {'X': x.astype(self.dtype)} - self.outputs = {'Out': out} - +class TestTruncFP16OP(TestTruncOp): def init_dtype_type(self): self.dtype = np.float16 - def test_check_output(self): - place = core.CUDAPlace(0) - self.check_output_with_place(place, atol=1e-3, check_eager=False) - - def test_check_grad(self): - place = core.CUDAPlace(0) - self.check_grad_with_place( - place, ['X'], 'Out', max_relative_error=1e-2, check_eager=True - ) - @unittest.skipIf( not core.is_compiled_with_cuda() or not core.is_bfloat16_supported(core.CUDAPlace(0)), "core is not complied with CUDA and not support the bfloat16", ) -class TestTruncBF16(OpTest): +class TestTruncBF16OP(OpTest): def setUp(self): self.op_type = "trunc" self.python_api = paddle.trunc self.init_dtype_type() - self.__class__.op_type = self.op_type np.random.seed(2021) - x = np.random.random((20, 20)).astype(np.float32) + x = np.random.random((20, 20)).astype(np.float64) out = np.trunc(x) self.inputs = {'X': convert_float_to_uint16(x)} self.outputs = {'Out': convert_float_to_uint16(out)} @@ -144,13 +117,11 @@ def init_dtype_type(self): def test_check_output(self): place = core.CUDAPlace(0) - self.check_output_with_place(place, atol=1e-3, check_eager=False) + self.check_output_with_place(place) def test_check_grad(self): place = core.CUDAPlace(0) - self.check_grad_with_place( - place, ['X'], 'Out', max_relative_error=1e-2, check_eager=True - ) + self.check_grad_with_place(place, ['X'], 'Out') if __name__ == "__main__": From dce175459ca26bc81cd83b95a67bd790e33f0f4b Mon Sep 17 00:00:00 2001 From: longranger2 <836253168@qq.com> Date: Mon, 3 Apr 2023 20:28:04 +0800 Subject: [PATCH 04/22] fix bug --- .../tests/unittests/test_bernoulli_op.py | 30 +++++++++++++++++-- .../fluid/tests/unittests/test_trunc_op.py | 2 +- 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_bernoulli_op.py b/python/paddle/fluid/tests/unittests/test_bernoulli_op.py index c1c734d389571..fe2a4491582fa 100644 --- a/python/paddle/fluid/tests/unittests/test_bernoulli_op.py +++ b/python/paddle/fluid/tests/unittests/test_bernoulli_op.py @@ -15,9 +15,10 @@ import unittest import numpy as np -from eager_op_test import OpTest +from eager_op_test import OpTest, convert_float_to_uint16 import paddle +from paddle.fluid import core def output_hist(out): @@ -103,10 +104,35 @@ def test_fixed_random_number(self): paddle.enable_static() -class TestBernoulliFP16OP(TestBernoulliOp): +class TestBernoulliFP16Op(TestBernoulliOp): def init_dtype(self): self.dtype = np.float16 +@unittest.skipIf( + not core.is_compiled_with_cuda() + or not core.is_bfloat16_supported(core.CUDAPlace(0)), + "core is not complied with CUDA and not support the bfloat16", +) +class TestBernoulliBF16Op(OpTest): + def setUp(self): + self.python_api = paddle.bernoulli + self.op_type = "bernoulli" + self.dtype = np.uint16 + self.init_test_case() + + self.inputs = {'X': convert_float_to_uint16(self.x)} + self.attrs = {} + self.outputs = {'Out': convert_float_to_uint16(self.out)} + + def test_check_output(self): + place = core.CUDAPlace(0) + self.check_output_with_place(place) + + def init_test_case(self): + self.x = np.random.uniform(size=(1000, 784)).astype("float32") + self.out = np.zeros((1000, 784)).astype("float32") + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_trunc_op.py b/python/paddle/fluid/tests/unittests/test_trunc_op.py index e10ae279ac589..2f34c5577b5e5 100644 --- a/python/paddle/fluid/tests/unittests/test_trunc_op.py +++ b/python/paddle/fluid/tests/unittests/test_trunc_op.py @@ -18,7 +18,7 @@ from eager_op_test import OpTest, convert_float_to_uint16 import paddle -import paddle.fluid.core as core +from paddle.fluid import core paddle.enable_static() From b1771ebd4039f1b23fa80566f921aac527d40462 Mon Sep 17 00:00:00 2001 From: longranger2 <836253168@qq.com> Date: Sat, 22 Apr 2023 05:48:31 +0000 Subject: [PATCH 05/22] fix bug --- paddle/phi/kernels/gpu/bernoulli_kernel.cu | 31 +++++++++++++++++++--- paddle/phi/kernels/gpu/trunc_kernel.cu | 8 +++++- 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/paddle/phi/kernels/gpu/bernoulli_kernel.cu b/paddle/phi/kernels/gpu/bernoulli_kernel.cu index 3905378694e17..e0a9d3750d3fc 100644 --- a/paddle/phi/kernels/gpu/bernoulli_kernel.cu +++ b/paddle/phi/kernels/gpu/bernoulli_kernel.cu @@ -32,6 +32,29 @@ namespace phi { +template +__device__ T convert_to_T(float rand_value, T x_value); + +template <> +__device__ phi::dtype::float16 convert_to_T(float rand_value, phi::dtype::float16 x_value) { + return static_cast(rand_value <= static_cast(x_value)); +} + +template <> +__device__ phi::dtype::bfloat16 convert_to_T(float rand_value, phi::dtype::bfloat16 x_value) { + return static_cast(rand_value <= static_cast(x_value)); +} + +template <> +__device__ float convert_to_T(float rand_value, float x_value) { + return static_cast(rand_value <= x_value); +} + +template <> +__device__ double convert_to_T(float rand_value, double x_value) { + return static_cast(rand_value <= x_value); +} + // 'curand_uniform4/hiprand_uniform4' generate 4 random number each time template __global__ void bernoulli_cuda_kernel( @@ -55,7 +78,7 @@ __global__ void bernoulli_cuda_kernel( for (size_t j = 0; j < 4; j++) { size_t idx = i + j; if (idx < size) { - out_data[idx] = static_cast((&rand.x)[j] <= x_data[idx]); + out_data[idx] = convert_to_T((&rand.x)[j], x_data[idx]); } } } @@ -89,7 +112,7 @@ PD_REGISTER_KERNEL(bernoulli, GPU, ALL_LAYOUT, phi::BernoulliKernel, - float, - double, phi::dtype::float16, - phi::dtype::bfloat16) {} + phi::dtype::bfloat16, + float, + double) {} diff --git a/paddle/phi/kernels/gpu/trunc_kernel.cu b/paddle/phi/kernels/gpu/trunc_kernel.cu index 09f9c804a1d1b..449fe9bf400c8 100644 --- a/paddle/phi/kernels/gpu/trunc_kernel.cu +++ b/paddle/phi/kernels/gpu/trunc_kernel.cu @@ -27,7 +27,13 @@ template class TruncFunctor { public: __device__ TruncFunctor(const T x) : x_(x) {} - __device__ T operator()() { return trunc(x_); } + __device__ T operator()() { + if constexpr (std::is_same::value || std::is_same::value) { + return static_cast(trunc(static_cast(x_))); + } else { + return trunc(x_); + } + } public: const T x_; From 528e5b88881250c8ea623e767a18ac9609520775 Mon Sep 17 00:00:00 2001 From: longranger2 <836253168@qq.com> Date: Sat, 22 Apr 2023 14:02:33 +0800 Subject: [PATCH 06/22] fix PR-CI-Codestyle-Check --- paddle/phi/kernels/gpu/bernoulli_kernel.cu | 12 ++++++++---- paddle/phi/kernels/gpu/trunc_kernel.cu | 3 ++- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/paddle/phi/kernels/gpu/bernoulli_kernel.cu b/paddle/phi/kernels/gpu/bernoulli_kernel.cu index e0a9d3750d3fc..f86e128db3e2a 100644 --- a/paddle/phi/kernels/gpu/bernoulli_kernel.cu +++ b/paddle/phi/kernels/gpu/bernoulli_kernel.cu @@ -36,13 +36,17 @@ template __device__ T convert_to_T(float rand_value, T x_value); template <> -__device__ phi::dtype::float16 convert_to_T(float rand_value, phi::dtype::float16 x_value) { - return static_cast(rand_value <= static_cast(x_value)); +__device__ phi::dtype::float16 convert_to_T( + float rand_value, phi::dtype::float16 x_value) { + return static_cast(rand_value <= + static_cast(x_value)); } template <> -__device__ phi::dtype::bfloat16 convert_to_T(float rand_value, phi::dtype::bfloat16 x_value) { - return static_cast(rand_value <= static_cast(x_value)); +__device__ phi::dtype::bfloat16 convert_to_T( + float rand_value, phi::dtype::bfloat16 x_value) { + return static_cast(rand_value <= + static_cast(x_value)); } template <> diff --git a/paddle/phi/kernels/gpu/trunc_kernel.cu b/paddle/phi/kernels/gpu/trunc_kernel.cu index 449fe9bf400c8..c56344f4c7887 100644 --- a/paddle/phi/kernels/gpu/trunc_kernel.cu +++ b/paddle/phi/kernels/gpu/trunc_kernel.cu @@ -28,7 +28,8 @@ class TruncFunctor { public: __device__ TruncFunctor(const T x) : x_(x) {} __device__ T operator()() { - if constexpr (std::is_same::value || std::is_same::value) { + if constexpr (std::is_same::value || + std::is_same::value) { return static_cast(trunc(static_cast(x_))); } else { return trunc(x_); From 2fc39e1bda36571169c1f54cc19e831c7769a2f2 Mon Sep 17 00:00:00 2001 From: longranger2 <836253168@qq.com> Date: Sat, 22 Apr 2023 16:56:35 +0800 Subject: [PATCH 07/22] fix bug of trunc_kernel.cu --- paddle/phi/kernels/gpu/trunc_kernel.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/phi/kernels/gpu/trunc_kernel.cu b/paddle/phi/kernels/gpu/trunc_kernel.cu index c56344f4c7887..103eecfced886 100644 --- a/paddle/phi/kernels/gpu/trunc_kernel.cu +++ b/paddle/phi/kernels/gpu/trunc_kernel.cu @@ -26,7 +26,7 @@ using phi::PADDLE_CUDA_NUM_THREADS; template class TruncFunctor { public: - __device__ TruncFunctor(const T x) : x_(x) {} + __device__ TruncFunctor(T x) : x_(x) {} __device__ T operator()() { if constexpr (std::is_same::value || std::is_same::value) { @@ -37,7 +37,7 @@ class TruncFunctor { } public: - const T x_; + T x_; }; template <> From 8b8361dd8b41792607aad2ebaa8f22fc507294b9 Mon Sep 17 00:00:00 2001 From: longranger2 <836253168@qq.com> Date: Sat, 22 Apr 2023 17:07:32 +0800 Subject: [PATCH 08/22] fix bug of trunc_kernel.cu --- paddle/phi/kernels/gpu/trunc_kernel.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/phi/kernels/gpu/trunc_kernel.cu b/paddle/phi/kernels/gpu/trunc_kernel.cu index 103eecfced886..b3713c69b8aa9 100644 --- a/paddle/phi/kernels/gpu/trunc_kernel.cu +++ b/paddle/phi/kernels/gpu/trunc_kernel.cu @@ -28,8 +28,8 @@ class TruncFunctor { public: __device__ TruncFunctor(T x) : x_(x) {} __device__ T operator()() { - if constexpr (std::is_same::value || - std::is_same::value) { + if (phi::is_same::value || + phi::is_same::value) { return static_cast(trunc(static_cast(x_))); } else { return trunc(x_); From 099d3bb3681276b0cd47256c21da8ab0c234bbf3 Mon Sep 17 00:00:00 2001 From: longranger2 <836253168@qq.com> Date: Sat, 22 Apr 2023 17:32:27 +0800 Subject: [PATCH 09/22] fix bug of trunc_kernel.cu --- paddle/phi/kernels/gpu/trunc_kernel.cu | 34 ++++++++++++++++++++------ 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/paddle/phi/kernels/gpu/trunc_kernel.cu b/paddle/phi/kernels/gpu/trunc_kernel.cu index b3713c69b8aa9..5b014f84a5270 100644 --- a/paddle/phi/kernels/gpu/trunc_kernel.cu +++ b/paddle/phi/kernels/gpu/trunc_kernel.cu @@ -23,18 +23,36 @@ namespace phi { using phi::PADDLE_CUDA_NUM_THREADS; +template +__device__ T device_trunc(T x); + +template <> +__device__ float device_trunc(float x) { + return truncf(x); +} + +template <> +__device__ double device_trunc(double x) { + return trunc(x); +} + +template <> +__device__ phi::dtype::float16 device_trunc( + phi::dtype::float16 x) { + return static_cast(truncf(static_cast(x))); +} + +template <> +__device__ phi::dtype::bfloat16 device_trunc( + phi::dtype::bfloat16 x) { + return static_cast(truncf(static_cast(x))); +} + template class TruncFunctor { public: __device__ TruncFunctor(T x) : x_(x) {} - __device__ T operator()() { - if (phi::is_same::value || - phi::is_same::value) { - return static_cast(trunc(static_cast(x_))); - } else { - return trunc(x_); - } - } + __device__ T operator()() { return device_trunc(x_); } public: T x_; From 22dbf8d1384e8fea0976fdaf9b64e8dcbbb96943 Mon Sep 17 00:00:00 2001 From: longranger2 <836253168@qq.com> Date: Wed, 3 May 2023 15:05:39 +0800 Subject: [PATCH 10/22] fix bug of trunc and bernoulli --- .../paddle/fluid/tests/unittests/test_bernoulli_op.py | 6 +++++- python/paddle/fluid/tests/unittests/test_trunc_op.py | 11 ++++------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_bernoulli_op.py b/python/paddle/fluid/tests/unittests/test_bernoulli_op.py index e955f81338f6f..7ff356a74cc70 100644 --- a/python/paddle/fluid/tests/unittests/test_bernoulli_op.py +++ b/python/paddle/fluid/tests/unittests/test_bernoulli_op.py @@ -127,12 +127,16 @@ def setUp(self): def test_check_output(self): place = core.CUDAPlace(0) - self.check_output_with_place(place) + self.check_output_with_place_customized(self.verify_output, place) def init_test_case(self): self.x = np.random.uniform(size=(1000, 784)).astype("float32") self.out = np.zeros((1000, 784)).astype("float32") + def verify_output(self, outs): + hist, prob = output_hist(np.array(outs[0])) + np.testing.assert_allclose(hist, prob, rtol=0, atol=0.01) + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_trunc_op.py b/python/paddle/fluid/tests/unittests/test_trunc_op.py index 2f34c5577b5e5..404403fca0453 100644 --- a/python/paddle/fluid/tests/unittests/test_trunc_op.py +++ b/python/paddle/fluid/tests/unittests/test_trunc_op.py @@ -103,25 +103,22 @@ def init_dtype_type(self): ) class TestTruncBF16OP(OpTest): def setUp(self): - self.op_type = "trunc" self.python_api = paddle.trunc - self.init_dtype_type() + self.op_type = "trunc" + self.dtype = np.uint16 np.random.seed(2021) - x = np.random.random((20, 20)).astype(np.float64) + x = np.random.random((20, 20)).astype("float32") out = np.trunc(x) self.inputs = {'X': convert_float_to_uint16(x)} self.outputs = {'Out': convert_float_to_uint16(out)} - def init_dtype_type(self): - self.dtype = np.uint16 - def test_check_output(self): place = core.CUDAPlace(0) self.check_output_with_place(place) def test_check_grad(self): place = core.CUDAPlace(0) - self.check_grad_with_place(place, ['X'], 'Out') + self.check_grad_with_place(place, ['X'], 'Out', numeric_grad_delta=1e-5) if __name__ == "__main__": From 9db702f753c6fc9a80e32490e3c0719a68e0d2e1 Mon Sep 17 00:00:00 2001 From: longranger2 <836253168@qq.com> Date: Tue, 9 May 2023 16:22:15 +0000 Subject: [PATCH 11/22] fix bug --- paddle/phi/kernels/gpu/bernoulli_kernel.cu | 29 +---------------- paddle/phi/kernels/gpu/trunc_kernel.cu | 31 ++----------------- .../tests/unittests/test_bernoulli_op.py | 2 +- 3 files changed, 5 insertions(+), 57 deletions(-) diff --git a/paddle/phi/kernels/gpu/bernoulli_kernel.cu b/paddle/phi/kernels/gpu/bernoulli_kernel.cu index f86e128db3e2a..a2055597e7a64 100644 --- a/paddle/phi/kernels/gpu/bernoulli_kernel.cu +++ b/paddle/phi/kernels/gpu/bernoulli_kernel.cu @@ -32,33 +32,6 @@ namespace phi { -template -__device__ T convert_to_T(float rand_value, T x_value); - -template <> -__device__ phi::dtype::float16 convert_to_T( - float rand_value, phi::dtype::float16 x_value) { - return static_cast(rand_value <= - static_cast(x_value)); -} - -template <> -__device__ phi::dtype::bfloat16 convert_to_T( - float rand_value, phi::dtype::bfloat16 x_value) { - return static_cast(rand_value <= - static_cast(x_value)); -} - -template <> -__device__ float convert_to_T(float rand_value, float x_value) { - return static_cast(rand_value <= x_value); -} - -template <> -__device__ double convert_to_T(float rand_value, double x_value) { - return static_cast(rand_value <= x_value); -} - // 'curand_uniform4/hiprand_uniform4' generate 4 random number each time template __global__ void bernoulli_cuda_kernel( @@ -82,7 +55,7 @@ __global__ void bernoulli_cuda_kernel( for (size_t j = 0; j < 4; j++) { size_t idx = i + j; if (idx < size) { - out_data[idx] = convert_to_T((&rand.x)[j], x_data[idx]); + out_data[idx] = convert_to_T((&rand.x)[j], static_cast(x_data[idx])); } } } diff --git a/paddle/phi/kernels/gpu/trunc_kernel.cu b/paddle/phi/kernels/gpu/trunc_kernel.cu index 5b014f84a5270..c0004d5ba9fda 100644 --- a/paddle/phi/kernels/gpu/trunc_kernel.cu +++ b/paddle/phi/kernels/gpu/trunc_kernel.cu @@ -23,39 +23,14 @@ namespace phi { using phi::PADDLE_CUDA_NUM_THREADS; -template -__device__ T device_trunc(T x); - -template <> -__device__ float device_trunc(float x) { - return truncf(x); -} - -template <> -__device__ double device_trunc(double x) { - return trunc(x); -} - -template <> -__device__ phi::dtype::float16 device_trunc( - phi::dtype::float16 x) { - return static_cast(truncf(static_cast(x))); -} - -template <> -__device__ phi::dtype::bfloat16 device_trunc( - phi::dtype::bfloat16 x) { - return static_cast(truncf(static_cast(x))); -} - template class TruncFunctor { public: - __device__ TruncFunctor(T x) : x_(x) {} - __device__ T operator()() { return device_trunc(x_); } + __device__ TruncFunctor(const T x) : x_(x) {} + __device__ T operator()() { return trunc(static_cast(x_)); } public: - T x_; + const T x_; }; template <> diff --git a/python/paddle/fluid/tests/unittests/test_bernoulli_op.py b/python/paddle/fluid/tests/unittests/test_bernoulli_op.py index 7ff356a74cc70..8f0871585ce98 100644 --- a/python/paddle/fluid/tests/unittests/test_bernoulli_op.py +++ b/python/paddle/fluid/tests/unittests/test_bernoulli_op.py @@ -36,7 +36,7 @@ def setUp(self): "X": np.random.uniform(size=(1000, 784)).astype(self.dtype) } self.attrs = {} - self.outputs = {"Out": np.zeros((1000, 784)).astype("float32")} + self.outputs = {"Out": np.zeros((1000, 784))} def init_dtype(self): self.dtype = np.float32 From 38d7bc1e61b9dff87321384b7a69a11261dde9c5 Mon Sep 17 00:00:00 2001 From: longranger2 <836253168@qq.com> Date: Tue, 9 May 2023 16:40:00 +0000 Subject: [PATCH 12/22] fix bug --- paddle/phi/kernels/gpu/bernoulli_kernel.cu | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/paddle/phi/kernels/gpu/bernoulli_kernel.cu b/paddle/phi/kernels/gpu/bernoulli_kernel.cu index a2055597e7a64..38e2a6801dd54 100644 --- a/paddle/phi/kernels/gpu/bernoulli_kernel.cu +++ b/paddle/phi/kernels/gpu/bernoulli_kernel.cu @@ -29,6 +29,7 @@ #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/distribution_helper.h" +#include "paddle/phi/common/amp_type_traits.h" namespace phi { @@ -52,10 +53,11 @@ __global__ void bernoulli_cuda_kernel( funcs::uniform_distribution dist; float4 rand = dist(&state); #pragma unroll + using MPType = typename phi::dtype::MPTypeTrait::Type; for (size_t j = 0; j < 4; j++) { size_t idx = i + j; if (idx < size) { - out_data[idx] = convert_to_T((&rand.x)[j], static_cast(x_data[idx])); + out_data[idx] = static_cast((&rand.x)[j], static_cast(x_data[idx])); } } } From f4ce7735ce29f7dca1b5c26760011cb76c2c05fa Mon Sep 17 00:00:00 2001 From: longranger2 <836253168@qq.com> Date: Wed, 10 May 2023 03:12:15 +0000 Subject: [PATCH 13/22] fix bug of MPType --- paddle/phi/kernels/gpu/bernoulli_kernel.cu | 3 ++- paddle/phi/kernels/gpu/trunc_kernel.cu | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/paddle/phi/kernels/gpu/bernoulli_kernel.cu b/paddle/phi/kernels/gpu/bernoulli_kernel.cu index 38e2a6801dd54..b1d0f7608aa7c 100644 --- a/paddle/phi/kernels/gpu/bernoulli_kernel.cu +++ b/paddle/phi/kernels/gpu/bernoulli_kernel.cu @@ -57,7 +57,8 @@ __global__ void bernoulli_cuda_kernel( for (size_t j = 0; j < 4; j++) { size_t idx = i + j; if (idx < size) { - out_data[idx] = static_cast((&rand.x)[j], static_cast(x_data[idx])); + out_data[idx] = + static_cast((&rand.x)[j], static_cast(x_data[idx])); } } } diff --git a/paddle/phi/kernels/gpu/trunc_kernel.cu b/paddle/phi/kernels/gpu/trunc_kernel.cu index c0004d5ba9fda..4db1622de4efc 100644 --- a/paddle/phi/kernels/gpu/trunc_kernel.cu +++ b/paddle/phi/kernels/gpu/trunc_kernel.cu @@ -22,11 +22,12 @@ namespace phi { using phi::PADDLE_CUDA_NUM_THREADS; +using MPType = typename phi::dtype::MPTypeTrait::Type; template class TruncFunctor { public: - __device__ TruncFunctor(const T x) : x_(x) {} + __device__ TruncFunctor(const T x) : x_(x) {} __device__ T operator()() { return trunc(static_cast(x_)); } public: From bd62029b96fc99ca6ae1196741ca5a204458545b Mon Sep 17 00:00:00 2001 From: longranger2 <836253168@qq.com> Date: Wed, 10 May 2023 03:18:41 +0000 Subject: [PATCH 14/22] fix check_variable_and_dtype --- python/paddle/tensor/random.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/tensor/random.py b/python/paddle/tensor/random.py index 9629796494055..1db1ae522b43b 100644 --- a/python/paddle/tensor/random.py +++ b/python/paddle/tensor/random.py @@ -76,7 +76,7 @@ def bernoulli(x, name=None): if in_dygraph_mode(): return _C_ops.bernoulli(x) else: - check_variable_and_dtype(x, "x", ["float32", "float64"], "bernoulli") + check_variable_and_dtype(x, "x", ["float16", "float32", "float64"], "bernoulli") helper = LayerHelper("randint", **locals()) out = helper.create_variable_for_type_inference( From 3782bd1c9efabc04fef25a6f83e7b6aeb3ddc4d8 Mon Sep 17 00:00:00 2001 From: longranger2 <836253168@qq.com> Date: Wed, 10 May 2023 14:54:43 +0800 Subject: [PATCH 15/22] fix bug of MPType --- paddle/phi/kernels/gpu/trunc_kernel.cu | 1 + 1 file changed, 1 insertion(+) diff --git a/paddle/phi/kernels/gpu/trunc_kernel.cu b/paddle/phi/kernels/gpu/trunc_kernel.cu index 4db1622de4efc..a87ba55a310b3 100644 --- a/paddle/phi/kernels/gpu/trunc_kernel.cu +++ b/paddle/phi/kernels/gpu/trunc_kernel.cu @@ -17,6 +17,7 @@ #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_info.h" #include "paddle/phi/backends/gpu/gpu_primitives.h" +#include "paddle/phi/common/amp_type_traits.h" #include "paddle/phi/core/kernel_registry.h" namespace phi { From b20ac1a28ee46c238c5814cbb97749c924962bb6 Mon Sep 17 00:00:00 2001 From: longranger2 <836253168@qq.com> Date: Wed, 10 May 2023 17:42:59 +0800 Subject: [PATCH 16/22] fix bug of undefined T --- paddle/phi/kernels/gpu/trunc_kernel.cu | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/paddle/phi/kernels/gpu/trunc_kernel.cu b/paddle/phi/kernels/gpu/trunc_kernel.cu index a87ba55a310b3..4724edf506382 100644 --- a/paddle/phi/kernels/gpu/trunc_kernel.cu +++ b/paddle/phi/kernels/gpu/trunc_kernel.cu @@ -23,13 +23,15 @@ namespace phi { using phi::PADDLE_CUDA_NUM_THREADS; -using MPType = typename phi::dtype::MPTypeTrait::Type; template class TruncFunctor { public: __device__ TruncFunctor(const T x) : x_(x) {} - __device__ T operator()() { return trunc(static_cast(x_)); } + __device__ T operator()() { + using MPType = typename phi::dtype::MPTypeTrait::Type; + return trunc(static_cast(x_)); + } public: const T x_; From 7def562807d3b46705553de3dd9391a027288942 Mon Sep 17 00:00:00 2001 From: longranger2 <836253168@qq.com> Date: Thu, 11 May 2023 22:03:43 +0800 Subject: [PATCH 17/22] fix bug --- paddle/phi/kernels/gpu/bernoulli_kernel.cu | 2 +- paddle/phi/kernels/gpu/trunc_kernel.cu | 2 +- python/paddle/tensor/random.py | 4 +++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/paddle/phi/kernels/gpu/bernoulli_kernel.cu b/paddle/phi/kernels/gpu/bernoulli_kernel.cu index b1d0f7608aa7c..18cd7e796ef11 100644 --- a/paddle/phi/kernels/gpu/bernoulli_kernel.cu +++ b/paddle/phi/kernels/gpu/bernoulli_kernel.cu @@ -26,10 +26,10 @@ #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_launch_config.h" +#include "paddle/phi/common/amp_type_traits.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/distribution_helper.h" -#include "paddle/phi/common/amp_type_traits.h" namespace phi { diff --git a/paddle/phi/kernels/gpu/trunc_kernel.cu b/paddle/phi/kernels/gpu/trunc_kernel.cu index 4724edf506382..bdbdb80a97f5c 100644 --- a/paddle/phi/kernels/gpu/trunc_kernel.cu +++ b/paddle/phi/kernels/gpu/trunc_kernel.cu @@ -30,7 +30,7 @@ class TruncFunctor { __device__ TruncFunctor(const T x) : x_(x) {} __device__ T operator()() { using MPType = typename phi::dtype::MPTypeTrait::Type; - return trunc(static_cast(x_)); + return static_cast(trunc(static_cast(x_))); } public: diff --git a/python/paddle/tensor/random.py b/python/paddle/tensor/random.py index 1db1ae522b43b..98b123b96e411 100644 --- a/python/paddle/tensor/random.py +++ b/python/paddle/tensor/random.py @@ -76,7 +76,9 @@ def bernoulli(x, name=None): if in_dygraph_mode(): return _C_ops.bernoulli(x) else: - check_variable_and_dtype(x, "x", ["float16", "float32", "float64"], "bernoulli") + check_variable_and_dtype( + x, "x", ["float16", "float32", "float64"], "bernoulli" + ) helper = LayerHelper("randint", **locals()) out = helper.create_variable_for_type_inference( From 3e9063aa522eee0a768050891b8a9e5ac1ffab40 Mon Sep 17 00:00:00 2001 From: LoneRanger <836253168@qq.com> Date: Fri, 12 May 2023 17:30:56 +0800 Subject: [PATCH 18/22] Update test_bernoulli_op.py --- python/paddle/fluid/tests/unittests/test_bernoulli_op.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/fluid/tests/unittests/test_bernoulli_op.py b/python/paddle/fluid/tests/unittests/test_bernoulli_op.py index 8f0871585ce98..6d7a73ba49d1a 100644 --- a/python/paddle/fluid/tests/unittests/test_bernoulli_op.py +++ b/python/paddle/fluid/tests/unittests/test_bernoulli_op.py @@ -135,7 +135,7 @@ def init_test_case(self): def verify_output(self, outs): hist, prob = output_hist(np.array(outs[0])) - np.testing.assert_allclose(hist, prob, rtol=0, atol=0.01) + np.testing.assert_allclose(hist, prob) if __name__ == "__main__": From 13a2c74a8c3f295cb2f1a52950b0470442f70cf3 Mon Sep 17 00:00:00 2001 From: LoneRanger <836253168@qq.com> Date: Mon, 15 May 2023 14:55:43 +0800 Subject: [PATCH 19/22] Update test_bernoulli_op.py --- python/paddle/fluid/tests/unittests/test_bernoulli_op.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/fluid/tests/unittests/test_bernoulli_op.py b/python/paddle/fluid/tests/unittests/test_bernoulli_op.py index 6d7a73ba49d1a..05d4c75824465 100644 --- a/python/paddle/fluid/tests/unittests/test_bernoulli_op.py +++ b/python/paddle/fluid/tests/unittests/test_bernoulli_op.py @@ -36,7 +36,7 @@ def setUp(self): "X": np.random.uniform(size=(1000, 784)).astype(self.dtype) } self.attrs = {} - self.outputs = {"Out": np.zeros((1000, 784))} + self.outputs = {"Out": np.zeros((1000, 784)).astype(self.dtype} def init_dtype(self): self.dtype = np.float32 From 3c4e333f5dcfe470a72262223f0995bc2f40f31f Mon Sep 17 00:00:00 2001 From: LoneRanger <836253168@qq.com> Date: Mon, 15 May 2023 22:30:57 +0800 Subject: [PATCH 20/22] Update test_bernoulli_op.py --- python/paddle/fluid/tests/unittests/test_bernoulli_op.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/paddle/fluid/tests/unittests/test_bernoulli_op.py b/python/paddle/fluid/tests/unittests/test_bernoulli_op.py index 05d4c75824465..7b3fabdf1484f 100644 --- a/python/paddle/fluid/tests/unittests/test_bernoulli_op.py +++ b/python/paddle/fluid/tests/unittests/test_bernoulli_op.py @@ -132,7 +132,9 @@ def test_check_output(self): def init_test_case(self): self.x = np.random.uniform(size=(1000, 784)).astype("float32") self.out = np.zeros((1000, 784)).astype("float32") - + self.x = convert_uint16_to_float(convert_float_to_uint16(self.x)) + self.out = convert_uint16_to_float(convert_float_to_uint16(self.out)) + def verify_output(self, outs): hist, prob = output_hist(np.array(outs[0])) np.testing.assert_allclose(hist, prob) From e7ad7f237110421a01203bd09354de97444016e4 Mon Sep 17 00:00:00 2001 From: longranger2 <836253168@qq.com> Date: Tue, 16 May 2023 10:06:41 +0800 Subject: [PATCH 21/22] fix bug of import --- .../paddle/fluid/tests/unittests/test_bernoulli_op.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_bernoulli_op.py b/python/paddle/fluid/tests/unittests/test_bernoulli_op.py index 7b3fabdf1484f..b4ae430d5ead6 100644 --- a/python/paddle/fluid/tests/unittests/test_bernoulli_op.py +++ b/python/paddle/fluid/tests/unittests/test_bernoulli_op.py @@ -15,7 +15,11 @@ import unittest import numpy as np -from eager_op_test import OpTest, convert_float_to_uint16 +from eager_op_test import ( + OpTest, + convert_float_to_uint16, + convert_uint16_to_float, +) import paddle from paddle.fluid import core @@ -36,7 +40,7 @@ def setUp(self): "X": np.random.uniform(size=(1000, 784)).astype(self.dtype) } self.attrs = {} - self.outputs = {"Out": np.zeros((1000, 784)).astype(self.dtype} + self.outputs = {"Out": np.zeros((1000, 784)).astype(self.dtype)} def init_dtype(self): self.dtype = np.float32 @@ -134,7 +138,7 @@ def init_test_case(self): self.out = np.zeros((1000, 784)).astype("float32") self.x = convert_uint16_to_float(convert_float_to_uint16(self.x)) self.out = convert_uint16_to_float(convert_float_to_uint16(self.out)) - + def verify_output(self, outs): hist, prob = output_hist(np.array(outs[0])) np.testing.assert_allclose(hist, prob) From f922dd818902f0f19741064806251a1b2fd482fd Mon Sep 17 00:00:00 2001 From: longranger2 <836253168@qq.com> Date: Wed, 31 May 2023 16:07:14 +0800 Subject: [PATCH 22/22] remove the trunc --- paddle/phi/kernels/gpu/bernoulli_kernel.cu | 2 +- paddle/phi/kernels/gpu/trunc_grad_kernel.cu | 4 +--- paddle/phi/kernels/gpu/trunc_kernel.cu | 17 +++-------------- 3 files changed, 5 insertions(+), 18 deletions(-) diff --git a/paddle/phi/kernels/gpu/bernoulli_kernel.cu b/paddle/phi/kernels/gpu/bernoulli_kernel.cu index 18cd7e796ef11..be41dcb524947 100644 --- a/paddle/phi/kernels/gpu/bernoulli_kernel.cu +++ b/paddle/phi/kernels/gpu/bernoulli_kernel.cu @@ -58,7 +58,7 @@ __global__ void bernoulli_cuda_kernel( size_t idx = i + j; if (idx < size) { out_data[idx] = - static_cast((&rand.x)[j], static_cast(x_data[idx])); + static_cast((&rand.x)[j] <= static_cast(x_data[idx])); } } } diff --git a/paddle/phi/kernels/gpu/trunc_grad_kernel.cu b/paddle/phi/kernels/gpu/trunc_grad_kernel.cu index 40e1404cd900d..8a88383e6e4f0 100644 --- a/paddle/phi/kernels/gpu/trunc_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/trunc_grad_kernel.cu @@ -52,6 +52,4 @@ PD_REGISTER_KERNEL(trunc_grad, float, double, int, - int64_t, - phi::dtype::float16, - phi::dtype::bfloat16) {} + int64_t) {} diff --git a/paddle/phi/kernels/gpu/trunc_kernel.cu b/paddle/phi/kernels/gpu/trunc_kernel.cu index bdbdb80a97f5c..4c5876c2ba924 100644 --- a/paddle/phi/kernels/gpu/trunc_kernel.cu +++ b/paddle/phi/kernels/gpu/trunc_kernel.cu @@ -28,10 +28,7 @@ template class TruncFunctor { public: __device__ TruncFunctor(const T x) : x_(x) {} - __device__ T operator()() { - using MPType = typename phi::dtype::MPTypeTrait::Type; - return static_cast(trunc(static_cast(x_))); - } + __device__ T operator()() { return trunc(x_); } public: const T x_; @@ -82,13 +79,5 @@ void TruncKernel(const Context& dev_ctx, } // namespace phi -PD_REGISTER_KERNEL(trunc, - GPU, - ALL_LAYOUT, - phi::TruncKernel, - float, - double, - int, - int64_t, - phi::dtype::float16, - phi::dtype::bfloat16) {} +PD_REGISTER_KERNEL( + trunc, GPU, ALL_LAYOUT, phi::TruncKernel, float, double, int, int64_t) {}