From 7b702bcad76887858c0658b523edb89c27bb8830 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Wed, 21 Jun 2023 23:53:06 -0700 Subject: [PATCH 1/2] floor_divide now rounds signed integers toward negative infinity - Resolves #1247 --- .../elementwise_functions/floor_divide.hpp | 17 ++++++++++--- dpctl/tests/elementwise/test_floor_divide.py | 25 +++++++++++++++++++ 2 files changed, 39 insertions(+), 3 deletions(-) diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/floor_divide.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/floor_divide.hpp index b6fe105bf8..2bed776f2a 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/floor_divide.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/floor_divide.hpp @@ -56,14 +56,25 @@ struct FloorDivideFunctor using supports_sg_loadstore = std::negation, tu_ns::is_complex>>; // TRUE - using supports_vec = std::negation< - std::disjunction, tu_ns::is_complex>>; + using supports_vec = std::negation, + tu_ns::is_complex, + std::conjunction, std::is_signed>, + std::conjunction, std::is_signed>>>; + // no vec overload for signed integers to avoid loop resT operator()(const argT1 &in1, const argT2 &in2) { auto tmp = in1 / in2; if constexpr (std::is_integral_v) { - return tmp; + if constexpr (std::is_unsigned_v) { + return tmp; + } + else { + auto rem = in1 % in2; + auto corr = (rem != 0 && ((rem < 0) != (in2 < 0))); + return (tmp - corr); + } } else { return sycl::floor(tmp); diff --git a/dpctl/tests/elementwise/test_floor_divide.py b/dpctl/tests/elementwise/test_floor_divide.py index def39a6570..f9be677c06 100644 --- a/dpctl/tests/elementwise/test_floor_divide.py +++ b/dpctl/tests/elementwise/test_floor_divide.py @@ -186,3 +186,28 @@ def __sycl_usm_array_interface__(self): c = Canary() with pytest.raises(ValueError): dpt.floor_divide(a, c) + + +def test_floor_divide_gh_1247(): + get_queue_or_skip() + + x = dpt.ones(1, dtype="i4") + res = dpt.floor_divide(x, -2) + np.testing.assert_array_equal( + dpt.asnumpy(res), np.full(res.shape, -1, dtype=res.dtype) + ) + + x = dpt.full(1, -1, dtype="i4") + res = dpt.floor_divide(x, 2) + np.testing.assert_array_equal( + dpt.asnumpy(res), np.full(res.shape, -1, dtype=res.dtype) + ) + + x = dpt.arange(-5, 6, 1, dtype="i4") + np.testing.assert_array_equal( + dpt.asnumpy(dpt.floor_divide(x, 3)), np.floor_divide(dpt.asnumpy(x), 3) + ) + np.testing.assert_array_equal( + dpt.asnumpy(dpt.floor_divide(x, -3)), + np.floor_divide(dpt.asnumpy(x), -3), + ) From 3f436b27ca3a1e779a2f466cb9e9895c0c8c9372 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Fri, 23 Jun 2023 14:23:51 -0700 Subject: [PATCH 2/2] Integer division by 0 in floor_divide now handled properly - Also fully enables sycl::vec overload for floor_divide - Added a test for integer division by 0 behavior --- .../elementwise_functions/floor_divide.hpp | 68 ++++++++++++------- dpctl/tests/elementwise/test_floor_divide.py | 24 ++++++- 2 files changed, 67 insertions(+), 25 deletions(-) diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/floor_divide.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/floor_divide.hpp index 2bed776f2a..43e7e8ae90 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/floor_divide.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/floor_divide.hpp @@ -53,27 +53,27 @@ template struct FloorDivideFunctor { - using supports_sg_loadstore = - std::negation, - tu_ns::is_complex>>; // TRUE - using supports_vec = std::negation, - tu_ns::is_complex, - std::conjunction, std::is_signed>, - std::conjunction, std::is_signed>>>; - // no vec overload for signed integers to avoid loop + using supports_sg_loadstore = std::negation< + std::disjunction, tu_ns::is_complex>>; + using supports_vec = std::negation< + std::disjunction, tu_ns::is_complex>>; resT operator()(const argT1 &in1, const argT2 &in2) { auto tmp = in1 / in2; if constexpr (std::is_integral_v) { if constexpr (std::is_unsigned_v) { - return tmp; + return (in2 == argT2(0)) ? resT(0) : tmp; } else { - auto rem = in1 % in2; - auto corr = (rem != 0 && ((rem < 0) != (in2 < 0))); - return (tmp - corr); + if (in2 == argT2(0)) { + return resT(0); + } + else { + auto rem = in1 % in2; + auto corr = (rem != 0 && ((rem < 0) != (in2 < 0))); + return (tmp - corr); + } } } else { @@ -86,17 +86,37 @@ struct FloorDivideFunctor const sycl::vec &in2) { auto tmp = in1 / in2; - if constexpr (std::is_same_v && - std::is_integral_v) - { - return tmp; - } - else if constexpr (std::is_integral_v) { - using dpctl::tensor::type_utils::vec_cast; - return vec_cast( - tmp); + using tmpT = typename decltype(tmp)::element_type; + if constexpr (std::is_integral_v) { + if constexpr (std::is_signed_v) { + auto rem_tmp = in1 % in2; +#pragma unroll + for (int i = 0; i < vec_sz; ++i) { + if (in2[i] == argT2(0)) { + tmp[i] = tmpT(0); + } + else { + tmpT corr = (rem_tmp[i] != 0 && + ((rem_tmp[i] < 0) != (in2[i] < 0))); + tmp[i] -= corr; + } + } + } + else { +#pragma unroll + for (int i = 0; i < vec_sz; ++i) { + if (in2[i] == argT2(0)) { + tmp[i] = tmpT(0); + } + } + } + if constexpr (std::is_same_v) { + return tmp; + } + else { + using dpctl::tensor::type_utils::vec_cast; + return vec_cast(tmp); + } } else { sycl::vec res = sycl::floor(tmp); diff --git a/dpctl/tests/elementwise/test_floor_divide.py b/dpctl/tests/elementwise/test_floor_divide.py index f9be677c06..61b77afccd 100644 --- a/dpctl/tests/elementwise/test_floor_divide.py +++ b/dpctl/tests/elementwise/test_floor_divide.py @@ -203,7 +203,8 @@ def test_floor_divide_gh_1247(): dpt.asnumpy(res), np.full(res.shape, -1, dtype=res.dtype) ) - x = dpt.arange(-5, 6, 1, dtype="i4") + # attempt to invoke sycl::vec overload using a larger array + x = dpt.arange(-64, 65, 1, dtype="i4") np.testing.assert_array_equal( dpt.asnumpy(dpt.floor_divide(x, 3)), np.floor_divide(dpt.asnumpy(x), 3) ) @@ -211,3 +212,24 @@ def test_floor_divide_gh_1247(): dpt.asnumpy(dpt.floor_divide(x, -3)), np.floor_divide(dpt.asnumpy(x), -3), ) + + +@pytest.mark.parametrize("dtype", _no_complex_dtypes[1:9]) +def test_floor_divide_integer_zero(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + x = dpt.arange(10, dtype=dtype, sycl_queue=q) + y = dpt.zeros_like(x, sycl_queue=q) + res = dpt.floor_divide(x, y) + np.testing.assert_array_equal( + dpt.asnumpy(res), np.zeros(x.shape, dtype=res.dtype) + ) + + # attempt to invoke sycl::vec overload using a larger array + x = dpt.arange(129, dtype=dtype, sycl_queue=q) + y = dpt.zeros_like(x, sycl_queue=q) + res = dpt.floor_divide(x, y) + np.testing.assert_array_equal( + dpt.asnumpy(res), np.zeros(x.shape, dtype=res.dtype) + )