From 7b702bcad76887858c0658b523edb89c27bb8830 Mon Sep 17 00:00:00 2001
From: Nikita Grigorian <nikita.grigorian@intel.com>
Date: Wed, 21 Jun 2023 23:53:06 -0700
Subject: [PATCH 1/2] floor_divide now rounds signed integers toward negative
 infinity - Resolves #1247

---
 .../elementwise_functions/floor_divide.hpp    | 17 ++++++++++---
 dpctl/tests/elementwise/test_floor_divide.py  | 25 +++++++++++++++++++
 2 files changed, 39 insertions(+), 3 deletions(-)
diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/floor_divide.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/floor_divide.hpp
index b6fe105bf8..2bed776f2a 100644
--- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/floor_divide.hpp
+++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/floor_divide.hpp
@@ -56,14 +56,25 @@ struct FloorDivideFunctor
     using supports_sg_loadstore =
         std::negation<std::disjunction<tu_ns::is_complex<argT1>,
                                        tu_ns::is_complex<argT2>>>; // TRUE
-    using supports_vec = std::negation<
-        std::disjunction<tu_ns::is_complex<argT1>, tu_ns::is_complex<argT2>>>;
+    using supports_vec = std::negation<std::disjunction<
+        tu_ns::is_complex<argT1>,
+        tu_ns::is_complex<argT2>,
+        std::conjunction<std::is_integral<argT1>, std::is_signed<argT1>>,
+        std::conjunction<std::is_integral<argT2>, std::is_signed<argT2>>>>;
+    // no vec overload for signed integers to avoid loop
 
     resT operator()(const argT1 &in1, const argT2 &in2)
     {
         auto tmp = in1 / in2;
         if constexpr (std::is_integral_v<decltype(tmp)>) {
-            return tmp;
+            if constexpr (std::is_unsigned_v<decltype(tmp)>) {
+                return tmp;
+            }
+            else {
+                auto rem = in1 % in2;
+                auto corr = (rem != 0 && ((rem < 0) != (in2 < 0)));
+                return (tmp - corr);
+            }
         }
         else {
             return sycl::floor(tmp);
diff --git a/dpctl/tests/elementwise/test_floor_divide.py b/dpctl/tests/elementwise/test_floor_divide.py
index def39a6570..f9be677c06 100644
--- a/dpctl/tests/elementwise/test_floor_divide.py
+++ b/dpctl/tests/elementwise/test_floor_divide.py
@@ -186,3 +186,28 @@ def __sycl_usm_array_interface__(self):
     c = Canary()
     with pytest.raises(ValueError):
         dpt.floor_divide(a, c)
+
+
+def test_floor_divide_gh_1247():
+    get_queue_or_skip()
+
+    x = dpt.ones(1, dtype="i4")
+    res = dpt.floor_divide(x, -2)
+    np.testing.assert_array_equal(
+        dpt.asnumpy(res), np.full(res.shape, -1, dtype=res.dtype)
+    )
+
+    x = dpt.full(1, -1, dtype="i4")
+    res = dpt.floor_divide(x, 2)
+    np.testing.assert_array_equal(
+        dpt.asnumpy(res), np.full(res.shape, -1, dtype=res.dtype)
+    )
+
+    x = dpt.arange(-5, 6, 1, dtype="i4")
+    np.testing.assert_array_equal(
+        dpt.asnumpy(dpt.floor_divide(x, 3)), np.floor_divide(dpt.asnumpy(x), 3)
+    )
+    np.testing.assert_array_equal(
+        dpt.asnumpy(dpt.floor_divide(x, -3)),
+        np.floor_divide(dpt.asnumpy(x), -3),
+    )

From 3f436b27ca3a1e779a2f466cb9e9895c0c8c9372 Mon Sep 17 00:00:00 2001
From: Nikita Grigorian <nikita.grigorian@intel.com>
Date: Fri, 23 Jun 2023 14:23:51 -0700
Subject: [PATCH 2/2] Integer division by 0 in floor_divide now handled
 properly - Also fully enables sycl::vec overload for floor_divide - Added a
 test for integer division by 0 behavior

---
 .../elementwise_functions/floor_divide.hpp    | 68 ++++++++++++-------
 dpctl/tests/elementwise/test_floor_divide.py  | 24 ++++++-
 2 files changed, 67 insertions(+), 25 deletions(-)

diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/floor_divide.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/floor_divide.hpp
index 2bed776f2a..43e7e8ae90 100644
--- a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/floor_divide.hpp
+++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/floor_divide.hpp
@@ -53,27 +53,27 @@ template <typename argT1, typename argT2, typename resT>
 struct FloorDivideFunctor
 {
 
-    using supports_sg_loadstore =
-        std::negation<std::disjunction<tu_ns::is_complex<argT1>,
-                                       tu_ns::is_complex<argT2>>>; // TRUE
-    using supports_vec = std::negation<std::disjunction<
-        tu_ns::is_complex<argT1>,
-        tu_ns::is_complex<argT2>,
-        std::conjunction<std::is_integral<argT1>, std::is_signed<argT1>>,
-        std::conjunction<std::is_integral<argT2>, std::is_signed<argT2>>>>;
-    // no vec overload for signed integers to avoid loop
+    using supports_sg_loadstore = std::negation<
+        std::disjunction<tu_ns::is_complex<argT1>, tu_ns::is_complex<argT2>>>;
+    using supports_vec = std::negation<
+        std::disjunction<tu_ns::is_complex<argT1>, tu_ns::is_complex<argT2>>>;
 
     resT operator()(const argT1 &in1, const argT2 &in2)
     {
         auto tmp = in1 / in2;
         if constexpr (std::is_integral_v<decltype(tmp)>) {
             if constexpr (std::is_unsigned_v<decltype(tmp)>) {
-                return tmp;
+                return (in2 == argT2(0)) ? resT(0) : tmp;
             }
             else {
-                auto rem = in1 % in2;
-                auto corr = (rem != 0 && ((rem < 0) != (in2 < 0)));
-                return (tmp - corr);
+                if (in2 == argT2(0)) {
+                    return resT(0);
+                }
+                else {
+                    auto rem = in1 % in2;
+                    auto corr = (rem != 0 && ((rem < 0) != (in2 < 0)));
+                    return (tmp - corr);
+                }
             }
         }
         else {
@@ -86,17 +86,37 @@ struct FloorDivideFunctor
                                        const sycl::vec<argT2, vec_sz> &in2)
     {
         auto tmp = in1 / in2;
-        if constexpr (std::is_same_v<resT,
-                                     typename decltype(tmp)::element_type> &&
-                      std::is_integral_v<resT>)
-        {
-            return tmp;
-        }
-        else if constexpr (std::is_integral_v<typename decltype(
-                               tmp)::element_type>) {
-            using dpctl::tensor::type_utils::vec_cast;
-            return vec_cast<resT, typename decltype(tmp)::element_type, vec_sz>(
-                tmp);
+        using tmpT = typename decltype(tmp)::element_type;
+        if constexpr (std::is_integral_v<tmpT>) {
+            if constexpr (std::is_signed_v<tmpT>) {
+                auto rem_tmp = in1 % in2;
+#pragma unroll
+                for (int i = 0; i < vec_sz; ++i) {
+                    if (in2[i] == argT2(0)) {
+                        tmp[i] = tmpT(0);
+                    }
+                    else {
+                        tmpT corr = (rem_tmp[i] != 0 &&
+                                     ((rem_tmp[i] < 0) != (in2[i] < 0)));
+                        tmp[i] -= corr;
+                    }
+                }
+            }
+            else {
+#pragma unroll
+                for (int i = 0; i < vec_sz; ++i) {
+                    if (in2[i] == argT2(0)) {
+                        tmp[i] = tmpT(0);
+                    }
+                }
+            }
+            if constexpr (std::is_same_v<resT, tmpT>) {
+                return tmp;
+            }
+            else {
+                using dpctl::tensor::type_utils::vec_cast;
+                return vec_cast<resT, tmpT, vec_sz>(tmp);
+            }
         }
         else {
             sycl::vec<resT, vec_sz> res = sycl::floor(tmp);
diff --git a/dpctl/tests/elementwise/test_floor_divide.py b/dpctl/tests/elementwise/test_floor_divide.py
index f9be677c06..61b77afccd 100644
--- a/dpctl/tests/elementwise/test_floor_divide.py
+++ b/dpctl/tests/elementwise/test_floor_divide.py
@@ -203,7 +203,8 @@ def test_floor_divide_gh_1247():
         dpt.asnumpy(res), np.full(res.shape, -1, dtype=res.dtype)
     )
 
-    x = dpt.arange(-5, 6, 1, dtype="i4")
+    # attempt to invoke sycl::vec overload using a larger array
+    x = dpt.arange(-64, 65, 1, dtype="i4")
     np.testing.assert_array_equal(
         dpt.asnumpy(dpt.floor_divide(x, 3)), np.floor_divide(dpt.asnumpy(x), 3)
     )
@@ -211,3 +212,24 @@ def test_floor_divide_gh_1247():
         dpt.asnumpy(dpt.floor_divide(x, -3)),
         np.floor_divide(dpt.asnumpy(x), -3),
     )
+
+
+@pytest.mark.parametrize("dtype", _no_complex_dtypes[1:9])
+def test_floor_divide_integer_zero(dtype):
+    q = get_queue_or_skip()
+    skip_if_dtype_not_supported(dtype, q)
+
+    x = dpt.arange(10, dtype=dtype, sycl_queue=q)
+    y = dpt.zeros_like(x, sycl_queue=q)
+    res = dpt.floor_divide(x, y)
+    np.testing.assert_array_equal(
+        dpt.asnumpy(res), np.zeros(x.shape, dtype=res.dtype)
+    )
+
+    # attempt to invoke sycl::vec overload using a larger array
+    x = dpt.arange(129, dtype=dtype, sycl_queue=q)
+    y = dpt.zeros_like(x, sycl_queue=q)
+    res = dpt.floor_divide(x, y)
+    np.testing.assert_array_equal(
+        dpt.asnumpy(res), np.zeros(x.shape, dtype=res.dtype)
+    )