diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/floor_divide.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/floor_divide.hpp index b6fe105bf8..43e7e8ae90 100644 --- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/floor_divide.hpp +++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/floor_divide.hpp @@ -53,9 +53,8 @@ template struct FloorDivideFunctor { - using supports_sg_loadstore = - std::negation, - tu_ns::is_complex>>; // TRUE + using supports_sg_loadstore = std::negation< + std::disjunction, tu_ns::is_complex>>; using supports_vec = std::negation< std::disjunction, tu_ns::is_complex>>; @@ -63,7 +62,19 @@ struct FloorDivideFunctor { auto tmp = in1 / in2; if constexpr (std::is_integral_v) { - return tmp; + if constexpr (std::is_unsigned_v) { + return (in2 == argT2(0)) ? resT(0) : tmp; + } + else { + if (in2 == argT2(0)) { + return resT(0); + } + else { + auto rem = in1 % in2; + auto corr = (rem != 0 && ((rem < 0) != (in2 < 0))); + return (tmp - corr); + } + } } else { return sycl::floor(tmp); @@ -75,17 +86,37 @@ struct FloorDivideFunctor const sycl::vec &in2) { auto tmp = in1 / in2; - if constexpr (std::is_same_v && - std::is_integral_v) - { - return tmp; - } - else if constexpr (std::is_integral_v) { - using dpctl::tensor::type_utils::vec_cast; - return vec_cast( - tmp); + using tmpT = typename decltype(tmp)::element_type; + if constexpr (std::is_integral_v) { + if constexpr (std::is_signed_v) { + auto rem_tmp = in1 % in2; +#pragma unroll + for (int i = 0; i < vec_sz; ++i) { + if (in2[i] == argT2(0)) { + tmp[i] = tmpT(0); + } + else { + tmpT corr = (rem_tmp[i] != 0 && + ((rem_tmp[i] < 0) != (in2[i] < 0))); + tmp[i] -= corr; + } + } + } + else { +#pragma unroll + for (int i = 0; i < vec_sz; ++i) { + if (in2[i] == argT2(0)) { + tmp[i] = tmpT(0); + } + } + } + if constexpr (std::is_same_v) { + return tmp; + } + else { + using dpctl::tensor::type_utils::vec_cast; + return vec_cast(tmp); + } } else { sycl::vec res = sycl::floor(tmp); diff --git a/dpctl/tests/elementwise/test_floor_divide.py b/dpctl/tests/elementwise/test_floor_divide.py index def39a6570..61b77afccd 100644 --- a/dpctl/tests/elementwise/test_floor_divide.py +++ b/dpctl/tests/elementwise/test_floor_divide.py @@ -186,3 +186,50 @@ def __sycl_usm_array_interface__(self): c = Canary() with pytest.raises(ValueError): dpt.floor_divide(a, c) + + +def test_floor_divide_gh_1247(): + get_queue_or_skip() + + x = dpt.ones(1, dtype="i4") + res = dpt.floor_divide(x, -2) + np.testing.assert_array_equal( + dpt.asnumpy(res), np.full(res.shape, -1, dtype=res.dtype) + ) + + x = dpt.full(1, -1, dtype="i4") + res = dpt.floor_divide(x, 2) + np.testing.assert_array_equal( + dpt.asnumpy(res), np.full(res.shape, -1, dtype=res.dtype) + ) + + # attempt to invoke sycl::vec overload using a larger array + x = dpt.arange(-64, 65, 1, dtype="i4") + np.testing.assert_array_equal( + dpt.asnumpy(dpt.floor_divide(x, 3)), np.floor_divide(dpt.asnumpy(x), 3) + ) + np.testing.assert_array_equal( + dpt.asnumpy(dpt.floor_divide(x, -3)), + np.floor_divide(dpt.asnumpy(x), -3), + ) + + +@pytest.mark.parametrize("dtype", _no_complex_dtypes[1:9]) +def test_floor_divide_integer_zero(dtype): + q = get_queue_or_skip() + skip_if_dtype_not_supported(dtype, q) + + x = dpt.arange(10, dtype=dtype, sycl_queue=q) + y = dpt.zeros_like(x, sycl_queue=q) + res = dpt.floor_divide(x, y) + np.testing.assert_array_equal( + dpt.asnumpy(res), np.zeros(x.shape, dtype=res.dtype) + ) + + # attempt to invoke sycl::vec overload using a larger array + x = dpt.arange(129, dtype=dtype, sycl_queue=q) + y = dpt.zeros_like(x, sycl_queue=q) + res = dpt.floor_divide(x, y) + np.testing.assert_array_equal( + dpt.asnumpy(res), np.zeros(x.shape, dtype=res.dtype) + )