diff --git a/dpctl/tensor/__init__.py b/dpctl/tensor/__init__.py
index 77a6f233fd..3e470a736c 100644
--- a/dpctl/tensor/__init__.py
+++ b/dpctl/tensor/__init__.py
@@ -101,11 +101,14 @@
     exp,
     expm1,
     floor_divide,
+    greater,
+    greater_equal,
     imag,
     isfinite,
     isinf,
     isnan,
     less,
+    less_equal,
     log,
     log1p,
     multiply,
@@ -199,11 +202,14 @@
     "cos",
     "exp",
     "expm1",
+    "greater",
+    "greater_equal",
     "imag",
     "isinf",
     "isnan",
     "isfinite",
     "less",
+    "less_equal",
     "log",
     "log1p",
     "proj",
diff --git a/dpctl/tensor/_elementwise_funcs.py b/dpctl/tensor/_elementwise_funcs.py
index 12651dd0fb..33357696f9 100644
--- a/dpctl/tensor/_elementwise_funcs.py
+++ b/dpctl/tensor/_elementwise_funcs.py
@@ -297,10 +297,62 @@
 )
 
 # B11: ==== GREATER       (x1, x2)
-# FIXME: implement B11
+_greater_docstring_ = """
+greater(x1, x2, out=None, order='K')
+Computes the greater-than test results for each element `x1_i` of
+the input array `x1` the respective element `x2_i` of the input array `x2`.
+Args:
+    x1 (usm_ndarray):
+        First input array, expected to have numeric data type.
+    x2 (usm_ndarray):
+        Second input array, also expected to have numeric data type.
+    out ({None, usm_ndarray}, optional):
+        Output array to populate.
+        Array have the correct shape and the expected data type.
+    order ("C","F","A","K", optional):
+        Memory layout of the newly output array, if parameter `out` is `None`.
+        Default: "K".
+Returns:
+    usm_narray:
+        An array containing the result of element-wise greater-than comparison.
+        The data type of the returned array is determined by the
+        Type Promotion Rules.
+"""
+
+greater = BinaryElementwiseFunc(
+    "greater", ti._greater_result_type, ti._greater, _greater_docstring_
+)
 
 # B12: ==== GREATER_EQUAL (x1, x2)
-# FIXME: implement B12
+_greater_equal_docstring_ = """
+greater_equal(x1, x2, out=None, order='K')
+Computes the greater-than or equal-to test results for each element `x1_i` of
+the input array `x1` the respective element `x2_i` of the input array `x2`.
+Args:
+    x1 (usm_ndarray):
+        First input array, expected to have numeric data type.
+    x2 (usm_ndarray):
+        Second input array, also expected to have numeric data type.
+    out ({None, usm_ndarray}, optional):
+        Output array to populate.
+        Array have the correct shape and the expected data type.
+    order ("C","F","A","K", optional):
+        Memory layout of the newly output array, if parameter `out` is `None`.
+        Default: "K".
+Returns:
+    usm_narray:
+        An array containing the result of element-wise greater-than or equal-to
+        comparison.
+        The data type of the returned array is determined by the
+        Type Promotion Rules.
+"""
+
+greater_equal = BinaryElementwiseFunc(
+    "greater_equal",
+    ti._greater_equal_result_type,
+    ti._greater_equal,
+    _greater_equal_docstring_,
+)
 
 # U16: ==== IMAG        (x)
 _imag_docstring = """
@@ -434,7 +486,35 @@
 )
 
 # B14: ==== LESS_EQUAL  (x1, x2)
-# FIXME: implement B14
+_less_equal_docstring_ = """
+less_equal(x1, x2, out=None, order='K')
+Computes the less-than or equal-to test results for each element `x1_i` of
+the input array `x1` the respective element `x2_i` of the input array `x2`.
+Args:
+    x1 (usm_ndarray):
+        First input array, expected to have numeric data type.
+    x2 (usm_ndarray):
+        Second input array, also expected to have numeric data type.
+    out ({None, usm_ndarray}, optional):
+        Output array to populate.
+        Array have the correct shape and the expected data type.
+    order ("C","F","A","K", optional):
+        Memory layout of the newly output array, if parameter `out` is `None`.
+        Default: "K".
+Returns:
+    usm_narray:
+        An array containing the result of element-wise less-than or equal-to
+        comparison.
+        The data type of the returned array is determined by the
+        Type Promotion Rules.
+"""
+
+less_equal = BinaryElementwiseFunc(
+    "less_equal",
+    ti._less_equal_result_type,
+    ti._less_equal,
+    _less_equal_docstring_,
+)
 
 # U20: ==== LOG         (x)
 _log_docstring = """
diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/greater.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/greater.hpp
new file mode 100644
index 0000000000..e95047a510
--- /dev/null
+++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/greater.hpp
@@ -0,0 +1,319 @@
+//=== greater.hpp -   Binary function GREATER              ------
+//*-C++-*--/===//
+//
+//                      Data Parallel Control (dpctl)
+//
+// Copyright 2020-2023 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain in1 copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines kernels for elementwise evaluation of comparison of
+/// tensor elements.
+//===---------------------------------------------------------------------===//
+
+#pragma once
+#include <CL/sycl.hpp>
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+
+#include "utils/offset_utils.hpp"
+#include "utils/type_dispatch.hpp"
+#include "utils/type_utils.hpp"
+
+#include "kernels/elementwise_functions/common.hpp"
+#include <pybind11/pybind11.h>
+
+namespace dpctl
+{
+namespace tensor
+{
+namespace kernels
+{
+namespace greater
+{
+
+namespace py = pybind11;
+namespace td_ns = dpctl::tensor::type_dispatch;
+namespace tu_ns = dpctl::tensor::type_utils;
+
+template <typename argT1, typename argT2, typename resT> struct GreaterFunctor
+{
+    static_assert(std::is_same_v<resT, bool>);
+
+    using supports_sg_loadstore = std::negation<
+        std::disjunction<tu_ns::is_complex<argT1>, tu_ns::is_complex<argT2>>>;
+    using supports_vec = std::conjunction<
+        std::is_same<argT1, argT2>,
+        std::negation<std::disjunction<tu_ns::is_complex<argT1>,
+                                       tu_ns::is_complex<argT2>>>>;
+
+    resT operator()(const argT1 &in1, const argT2 &in2)
+    {
+        if constexpr (std::is_same_v<argT1, std::complex<float>> &&
+                      std::is_same_v<argT2, float>)
+        {
+            float real1 = std::real(in1);
+            return (real1 == in2) ? (std::imag(in1) > 0.0f) : real1 > in2;
+        }
+        else if constexpr (std::is_same_v<argT1, float> &&
+                           std::is_same_v<argT2, std::complex<float>>)
+        {
+            float real2 = std::real(in2);
+            return (in1 == real2) ? (0.0f > std::imag(in2)) : in1 > real2;
+        }
+        else if constexpr (tu_ns::is_complex<argT1>::value ||
+                           tu_ns::is_complex<argT2>::value)
+        {
+            static_assert(std::is_same_v<argT1, argT2>);
+            using realT = typename argT1::value_type;
+            realT real1 = std::real(in1);
+            realT real2 = std::real(in2);
+
+            return (real1 == real2) ? (std::imag(in1) > std::imag(in2))
+                                    : real1 > real2;
+        }
+        else {
+            return (in1 > in2);
+        }
+    }
+
+    template <int vec_sz>
+    sycl::vec<resT, vec_sz> operator()(const sycl::vec<argT1, vec_sz> &in1,
+                                       const sycl::vec<argT2, vec_sz> &in2)
+    {
+
+        auto tmp = (in1 > in2);
+
+        if constexpr (std::is_same_v<resT,
+                                     typename decltype(tmp)::element_type>) {
+            return tmp;
+        }
+        else {
+            using dpctl::tensor::type_utils::vec_cast;
+
+            return vec_cast<resT, typename decltype(tmp)::element_type, vec_sz>(
+                tmp);
+        }
+    }
+};
+
+template <typename argT1,
+          typename argT2,
+          typename resT,
+          unsigned int vec_sz = 4,
+          unsigned int n_vecs = 2>
+using GreaterContigFunctor =
+    elementwise_common::BinaryContigFunctor<argT1,
+                                            argT2,
+                                            resT,
+                                            GreaterFunctor<argT1, argT2, resT>,
+                                            vec_sz,
+                                            n_vecs>;
+
+template <typename argT1, typename argT2, typename resT, typename IndexerT>
+using GreaterStridedFunctor = elementwise_common::BinaryStridedFunctor<
+    argT1,
+    argT2,
+    resT,
+    IndexerT,
+    GreaterFunctor<argT1, argT2, resT>>;
+
+template <typename T1, typename T2> struct GreaterOutputType
+{
+    using value_type = typename std::disjunction< // disjunction is C++17
+                                                  // feature, supported by DPC++
+        td_ns::BinaryTypeMapResultEntry<T1, bool, T2, bool, bool>,
+        td_ns::
+            BinaryTypeMapResultEntry<T1, std::uint8_t, T2, std::uint8_t, bool>,
+        td_ns::BinaryTypeMapResultEntry<T1, std::int8_t, T2, std::int8_t, bool>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::uint16_t,
+                                        T2,
+                                        std::uint16_t,
+                                        bool>,
+        td_ns::
+            BinaryTypeMapResultEntry<T1, std::int16_t, T2, std::int16_t, bool>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::uint32_t,
+                                        T2,
+                                        std::uint32_t,
+                                        bool>,
+        td_ns::
+            BinaryTypeMapResultEntry<T1, std::int32_t, T2, std::int32_t, bool>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::uint64_t,
+                                        T2,
+                                        std::uint64_t,
+                                        bool>,
+        td_ns::
+            BinaryTypeMapResultEntry<T1, std::int64_t, T2, std::int64_t, bool>,
+        td_ns::BinaryTypeMapResultEntry<T1, sycl::half, T2, sycl::half, bool>,
+        td_ns::BinaryTypeMapResultEntry<T1, float, T2, float, bool>,
+        td_ns::BinaryTypeMapResultEntry<T1, double, T2, double, bool>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::complex<float>,
+                                        T2,
+                                        std::complex<float>,
+                                        bool>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::complex<double>,
+                                        T2,
+                                        std::complex<double>,
+                                        bool>,
+        td_ns::
+            BinaryTypeMapResultEntry<T1, float, T2, std::complex<float>, bool>,
+        td_ns::
+            BinaryTypeMapResultEntry<T1, std::complex<float>, T2, float, bool>,
+        td_ns::DefaultResultEntry<void>>::result_type;
+};
+
+template <typename argT1,
+          typename argT2,
+          typename resT,
+          unsigned int vec_sz,
+          unsigned int n_vecs>
+class greater_contig_kernel;
+
+template <typename argTy1, typename argTy2>
+sycl::event greater_contig_impl(sycl::queue exec_q,
+                                size_t nelems,
+                                const char *arg1_p,
+                                py::ssize_t arg1_offset,
+                                const char *arg2_p,
+                                py::ssize_t arg2_offset,
+                                char *res_p,
+                                py::ssize_t res_offset,
+                                const std::vector<sycl::event> &depends = {})
+{
+    sycl::event comp_ev = exec_q.submit([&](sycl::handler &cgh) {
+        cgh.depends_on(depends);
+
+        size_t lws = 64;
+        constexpr unsigned int vec_sz = 4;
+        constexpr unsigned int n_vecs = 2;
+        const size_t n_groups =
+            ((nelems + lws * n_vecs * vec_sz - 1) / (lws * n_vecs * vec_sz));
+        const auto gws_range = sycl::range<1>(n_groups * lws);
+        const auto lws_range = sycl::range<1>(lws);
+
+        using resTy = typename GreaterOutputType<argTy1, argTy2>::value_type;
+
+        const argTy1 *arg1_tp =
+            reinterpret_cast<const argTy1 *>(arg1_p) + arg1_offset;
+        const argTy2 *arg2_tp =
+            reinterpret_cast<const argTy2 *>(arg2_p) + arg2_offset;
+        resTy *res_tp = reinterpret_cast<resTy *>(res_p) + res_offset;
+
+        cgh.parallel_for<
+            greater_contig_kernel<argTy1, argTy2, resTy, vec_sz, n_vecs>>(
+            sycl::nd_range<1>(gws_range, lws_range),
+            GreaterContigFunctor<argTy1, argTy2, resTy, vec_sz, n_vecs>(
+                arg1_tp, arg2_tp, res_tp, nelems));
+    });
+    return comp_ev;
+}
+
+template <typename fnT, typename T1, typename T2> struct GreaterContigFactory
+{
+    fnT get()
+    {
+        if constexpr (std::is_same_v<
+                          typename GreaterOutputType<T1, T2>::value_type, void>)
+        {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = greater_contig_impl<T1, T2>;
+            return fn;
+        }
+    }
+};
+
+template <typename fnT, typename T1, typename T2> struct GreaterTypeMapFactory
+{
+    /*! @brief get typeid for output type of operator()>(x, y), always bool */
+    std::enable_if_t<std::is_same<fnT, int>::value, int> get()
+    {
+        using rT = typename GreaterOutputType<T1, T2>::value_type;
+        return td_ns::GetTypeid<rT>{}.get();
+    }
+};
+
+template <typename T1, typename T2, typename resT, typename IndexerT>
+class greater_strided_strided_kernel;
+
+template <typename argTy1, typename argTy2>
+sycl::event
+greater_strided_impl(sycl::queue exec_q,
+                     size_t nelems,
+                     int nd,
+                     const py::ssize_t *shape_and_strides,
+                     const char *arg1_p,
+                     py::ssize_t arg1_offset,
+                     const char *arg2_p,
+                     py::ssize_t arg2_offset,
+                     char *res_p,
+                     py::ssize_t res_offset,
+                     const std::vector<sycl::event> &depends,
+                     const std::vector<sycl::event> &additional_depends)
+{
+    sycl::event comp_ev = exec_q.submit([&](sycl::handler &cgh) {
+        cgh.depends_on(depends);
+        cgh.depends_on(additional_depends);
+
+        using resTy = typename GreaterOutputType<argTy1, argTy2>::value_type;
+
+        using IndexerT =
+            typename dpctl::tensor::offset_utils::ThreeOffsets_StridedIndexer;
+
+        IndexerT indexer{nd, arg1_offset, arg2_offset, res_offset,
+                         shape_and_strides};
+
+        const argTy1 *arg1_tp = reinterpret_cast<const argTy1 *>(arg1_p);
+        const argTy2 *arg2_tp = reinterpret_cast<const argTy2 *>(arg2_p);
+        resTy *res_tp = reinterpret_cast<resTy *>(res_p);
+
+        cgh.parallel_for<
+            greater_strided_strided_kernel<argTy1, argTy2, resTy, IndexerT>>(
+            {nelems}, GreaterStridedFunctor<argTy1, argTy2, resTy, IndexerT>(
+                          arg1_tp, arg2_tp, res_tp, indexer));
+    });
+    return comp_ev;
+}
+
+template <typename fnT, typename T1, typename T2> struct GreaterStridedFactory
+{
+    fnT get()
+    {
+        if constexpr (std::is_same_v<
+                          typename GreaterOutputType<T1, T2>::value_type, void>)
+        {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = greater_strided_impl<T1, T2>;
+            return fn;
+        }
+    }
+};
+
+} // namespace greater
+} // namespace kernels
+} // namespace tensor
+} // namespace dpctl
diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/greater_equal.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/greater_equal.hpp
new file mode 100644
index 0000000000..ceea2b6a5f
--- /dev/null
+++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/greater_equal.hpp
@@ -0,0 +1,329 @@
+//=== greater_equal.hpp -   Binary function GREATER_EQUAL        ------
+//*-C++-*--/===//
+//
+//                      Data Parallel Control (dpctl)
+//
+// Copyright 2020-2023 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain in1 copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines kernels for elementwise evaluation of comparison of
+/// tensor elements.
+//===---------------------------------------------------------------------===//
+
+#pragma once
+#include <CL/sycl.hpp>
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+
+#include "utils/offset_utils.hpp"
+#include "utils/type_dispatch.hpp"
+#include "utils/type_utils.hpp"
+
+#include "kernels/elementwise_functions/common.hpp"
+#include <pybind11/pybind11.h>
+
+namespace dpctl
+{
+namespace tensor
+{
+namespace kernels
+{
+namespace greater_equal
+{
+
+namespace py = pybind11;
+namespace td_ns = dpctl::tensor::type_dispatch;
+namespace tu_ns = dpctl::tensor::type_utils;
+
+template <typename argT1, typename argT2, typename resT>
+struct GreaterEqualFunctor
+{
+    static_assert(std::is_same_v<resT, bool>);
+
+    using supports_sg_loadstore = std::negation<
+        std::disjunction<tu_ns::is_complex<argT1>, tu_ns::is_complex<argT2>>>;
+    using supports_vec = std::conjunction<
+        std::is_same<argT1, argT2>,
+        std::negation<std::disjunction<tu_ns::is_complex<argT1>,
+                                       tu_ns::is_complex<argT2>>>>;
+
+    resT operator()(const argT1 &in1, const argT2 &in2)
+    {
+        if constexpr (std::is_same_v<argT1, std::complex<float>> &&
+                      std::is_same_v<argT2, float>)
+        {
+            float real1 = std::real(in1);
+            return (real1 == in2) ? (std::imag(in1) >= 0.0f) : real1 >= in2;
+        }
+        else if constexpr (std::is_same_v<argT1, float> &&
+                           std::is_same_v<argT2, std::complex<float>>)
+        {
+            float real2 = std::real(in2);
+            return (in1 == real2) ? (0.0f >= std::imag(in2)) : in1 >= real2;
+        }
+        else if constexpr (tu_ns::is_complex<argT1>::value ||
+                           tu_ns::is_complex<argT2>::value)
+        {
+            static_assert(std::is_same_v<argT1, argT2>);
+            using realT = typename argT1::value_type;
+            realT real1 = std::real(in1);
+            realT real2 = std::real(in2);
+
+            return (real1 == real2) ? (std::imag(in1) >= std::imag(in2))
+                                    : real1 >= real2;
+        }
+        else {
+            return (in1 >= in2);
+        }
+    }
+
+    template <int vec_sz>
+    sycl::vec<resT, vec_sz> operator()(const sycl::vec<argT1, vec_sz> &in1,
+                                       const sycl::vec<argT2, vec_sz> &in2)
+    {
+
+        auto tmp = (in1 >= in2);
+
+        if constexpr (std::is_same_v<resT,
+                                     typename decltype(tmp)::element_type>) {
+            return tmp;
+        }
+        else {
+            using dpctl::tensor::type_utils::vec_cast;
+
+            return vec_cast<resT, typename decltype(tmp)::element_type, vec_sz>(
+                tmp);
+        }
+    }
+};
+
+template <typename argT1,
+          typename argT2,
+          typename resT,
+          unsigned int vec_sz = 4,
+          unsigned int n_vecs = 2>
+using GreaterEqualContigFunctor = elementwise_common::BinaryContigFunctor<
+    argT1,
+    argT2,
+    resT,
+    GreaterEqualFunctor<argT1, argT2, resT>,
+    vec_sz,
+    n_vecs>;
+
+template <typename argT1, typename argT2, typename resT, typename IndexerT>
+using GreaterEqualStridedFunctor = elementwise_common::BinaryStridedFunctor<
+    argT1,
+    argT2,
+    resT,
+    IndexerT,
+    GreaterEqualFunctor<argT1, argT2, resT>>;
+
+template <typename T1, typename T2> struct GreaterEqualOutputType
+{
+    using value_type = typename std::disjunction< // disjunction is C++17
+                                                  // feature, supported by DPC++
+        td_ns::BinaryTypeMapResultEntry<T1, bool, T2, bool, bool>,
+        td_ns::
+            BinaryTypeMapResultEntry<T1, std::uint8_t, T2, std::uint8_t, bool>,
+        td_ns::BinaryTypeMapResultEntry<T1, std::int8_t, T2, std::int8_t, bool>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::uint16_t,
+                                        T2,
+                                        std::uint16_t,
+                                        bool>,
+        td_ns::
+            BinaryTypeMapResultEntry<T1, std::int16_t, T2, std::int16_t, bool>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::uint32_t,
+                                        T2,
+                                        std::uint32_t,
+                                        bool>,
+        td_ns::
+            BinaryTypeMapResultEntry<T1, std::int32_t, T2, std::int32_t, bool>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::uint64_t,
+                                        T2,
+                                        std::uint64_t,
+                                        bool>,
+        td_ns::
+            BinaryTypeMapResultEntry<T1, std::int64_t, T2, std::int64_t, bool>,
+        td_ns::BinaryTypeMapResultEntry<T1, sycl::half, T2, sycl::half, bool>,
+        td_ns::BinaryTypeMapResultEntry<T1, float, T2, float, bool>,
+        td_ns::BinaryTypeMapResultEntry<T1, double, T2, double, bool>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::complex<float>,
+                                        T2,
+                                        std::complex<float>,
+                                        bool>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::complex<double>,
+                                        T2,
+                                        std::complex<double>,
+                                        bool>,
+        td_ns::
+            BinaryTypeMapResultEntry<T1, float, T2, std::complex<float>, bool>,
+        td_ns::
+            BinaryTypeMapResultEntry<T1, std::complex<float>, T2, float, bool>,
+        td_ns::DefaultResultEntry<void>>::result_type;
+};
+
+template <typename argT1,
+          typename argT2,
+          typename resT,
+          unsigned int vec_sz,
+          unsigned int n_vecs>
+class greater_equal_contig_kernel;
+
+template <typename argTy1, typename argTy2>
+sycl::event
+greater_equal_contig_impl(sycl::queue exec_q,
+                          size_t nelems,
+                          const char *arg1_p,
+                          py::ssize_t arg1_offset,
+                          const char *arg2_p,
+                          py::ssize_t arg2_offset,
+                          char *res_p,
+                          py::ssize_t res_offset,
+                          const std::vector<sycl::event> &depends = {})
+{
+    sycl::event comp_ev = exec_q.submit([&](sycl::handler &cgh) {
+        cgh.depends_on(depends);
+
+        size_t lws = 64;
+        constexpr unsigned int vec_sz = 4;
+        constexpr unsigned int n_vecs = 2;
+        const size_t n_groups =
+            ((nelems + lws * n_vecs * vec_sz - 1) / (lws * n_vecs * vec_sz));
+        const auto gws_range = sycl::range<1>(n_groups * lws);
+        const auto lws_range = sycl::range<1>(lws);
+
+        using resTy =
+            typename GreaterEqualOutputType<argTy1, argTy2>::value_type;
+
+        const argTy1 *arg1_tp =
+            reinterpret_cast<const argTy1 *>(arg1_p) + arg1_offset;
+        const argTy2 *arg2_tp =
+            reinterpret_cast<const argTy2 *>(arg2_p) + arg2_offset;
+        resTy *res_tp = reinterpret_cast<resTy *>(res_p) + res_offset;
+
+        cgh.parallel_for<
+            greater_equal_contig_kernel<argTy1, argTy2, resTy, vec_sz, n_vecs>>(
+            sycl::nd_range<1>(gws_range, lws_range),
+            GreaterEqualContigFunctor<argTy1, argTy2, resTy, vec_sz, n_vecs>(
+                arg1_tp, arg2_tp, res_tp, nelems));
+    });
+    return comp_ev;
+}
+
+template <typename fnT, typename T1, typename T2>
+struct GreaterEqualContigFactory
+{
+    fnT get()
+    {
+        if constexpr (std::is_same_v<
+                          typename GreaterEqualOutputType<T1, T2>::value_type,
+                          void>)
+        {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = greater_equal_contig_impl<T1, T2>;
+            return fn;
+        }
+    }
+};
+
+template <typename fnT, typename T1, typename T2>
+struct GreaterEqualTypeMapFactory
+{
+    /*! @brief get typeid for output type of operator()>(x, y), always bool */
+    std::enable_if_t<std::is_same<fnT, int>::value, int> get()
+    {
+        using rT = typename GreaterEqualOutputType<T1, T2>::value_type;
+        return td_ns::GetTypeid<rT>{}.get();
+    }
+};
+
+template <typename T1, typename T2, typename resT, typename IndexerT>
+class greater_equal_strided_strided_kernel;
+
+template <typename argTy1, typename argTy2>
+sycl::event
+greater_equal_strided_impl(sycl::queue exec_q,
+                           size_t nelems,
+                           int nd,
+                           const py::ssize_t *shape_and_strides,
+                           const char *arg1_p,
+                           py::ssize_t arg1_offset,
+                           const char *arg2_p,
+                           py::ssize_t arg2_offset,
+                           char *res_p,
+                           py::ssize_t res_offset,
+                           const std::vector<sycl::event> &depends,
+                           const std::vector<sycl::event> &additional_depends)
+{
+    sycl::event comp_ev = exec_q.submit([&](sycl::handler &cgh) {
+        cgh.depends_on(depends);
+        cgh.depends_on(additional_depends);
+
+        using resTy =
+            typename GreaterEqualOutputType<argTy1, argTy2>::value_type;
+
+        using IndexerT =
+            typename dpctl::tensor::offset_utils::ThreeOffsets_StridedIndexer;
+
+        IndexerT indexer{nd, arg1_offset, arg2_offset, res_offset,
+                         shape_and_strides};
+
+        const argTy1 *arg1_tp = reinterpret_cast<const argTy1 *>(arg1_p);
+        const argTy2 *arg2_tp = reinterpret_cast<const argTy2 *>(arg2_p);
+        resTy *res_tp = reinterpret_cast<resTy *>(res_p);
+
+        cgh.parallel_for<greater_equal_strided_strided_kernel<argTy1, argTy2,
+                                                              resTy, IndexerT>>(
+            {nelems},
+            GreaterEqualStridedFunctor<argTy1, argTy2, resTy, IndexerT>(
+                arg1_tp, arg2_tp, res_tp, indexer));
+    });
+    return comp_ev;
+}
+
+template <typename fnT, typename T1, typename T2>
+struct GreaterEqualStridedFactory
+{
+    fnT get()
+    {
+        if constexpr (std::is_same_v<
+                          typename GreaterEqualOutputType<T1, T2>::value_type,
+                          void>)
+        {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = greater_equal_strided_impl<T1, T2>;
+            return fn;
+        }
+    }
+};
+
+} // namespace greater_equal
+} // namespace kernels
+} // namespace tensor
+} // namespace dpctl
diff --git a/dpctl/tensor/libtensor/include/kernels/elementwise_functions/less_equal.hpp b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/less_equal.hpp
new file mode 100644
index 0000000000..77a120fed2
--- /dev/null
+++ b/dpctl/tensor/libtensor/include/kernels/elementwise_functions/less_equal.hpp
@@ -0,0 +1,321 @@
+//=== less_equal.hpp -   Binary function LESS_EQUAL            ------
+//*-C++-*--/===//
+//
+//                      Data Parallel Control (dpctl)
+//
+// Copyright 2020-2023 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain in1 copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines kernels for elementwise evaluation of comparison of
+/// tensor elements.
+//===---------------------------------------------------------------------===//
+
+#pragma once
+#include <CL/sycl.hpp>
+#include <cstddef>
+#include <cstdint>
+#include <type_traits>
+
+#include "utils/offset_utils.hpp"
+#include "utils/type_dispatch.hpp"
+#include "utils/type_utils.hpp"
+
+#include "kernels/elementwise_functions/common.hpp"
+#include <pybind11/pybind11.h>
+
+namespace dpctl
+{
+namespace tensor
+{
+namespace kernels
+{
+namespace less_equal
+{
+
+namespace py = pybind11;
+namespace td_ns = dpctl::tensor::type_dispatch;
+namespace tu_ns = dpctl::tensor::type_utils;
+
+template <typename argT1, typename argT2, typename resT> struct LessEqualFunctor
+{
+    static_assert(std::is_same_v<resT, bool>);
+
+    using supports_sg_loadstore = std::negation<
+        std::disjunction<tu_ns::is_complex<argT1>, tu_ns::is_complex<argT2>>>;
+    using supports_vec = std::conjunction<
+        std::is_same<argT1, argT2>,
+        std::negation<std::disjunction<tu_ns::is_complex<argT1>,
+                                       tu_ns::is_complex<argT2>>>>;
+
+    resT operator()(const argT1 &in1, const argT2 &in2)
+    {
+        if constexpr (std::is_same_v<argT1, std::complex<float>> &&
+                      std::is_same_v<argT2, float>)
+        {
+            float real1 = std::real(in1);
+            return (real1 == in2) ? (std::imag(in1) <= 0.0f) : real1 <= in2;
+        }
+        else if constexpr (std::is_same_v<argT1, float> &&
+                           std::is_same_v<argT2, std::complex<float>>)
+        {
+            float real2 = std::real(in2);
+            return (in1 == real2) ? (0.0f <= std::imag(in2)) : in1 <= real2;
+        }
+        else if constexpr (tu_ns::is_complex<argT1>::value ||
+                           tu_ns::is_complex<argT2>::value)
+        {
+            static_assert(std::is_same_v<argT1, argT2>);
+            using realT = typename argT1::value_type;
+            realT real1 = std::real(in1);
+            realT real2 = std::real(in2);
+
+            return (real1 == real2) ? (std::imag(in1) <= std::imag(in2))
+                                    : real1 <= real2;
+        }
+        else {
+            return (in1 <= in2);
+        }
+    }
+
+    template <int vec_sz>
+    sycl::vec<resT, vec_sz> operator()(const sycl::vec<argT1, vec_sz> &in1,
+                                       const sycl::vec<argT2, vec_sz> &in2)
+    {
+
+        auto tmp = (in1 <= in2);
+
+        if constexpr (std::is_same_v<resT,
+                                     typename decltype(tmp)::element_type>) {
+            return tmp;
+        }
+        else {
+            using dpctl::tensor::type_utils::vec_cast;
+
+            return vec_cast<resT, typename decltype(tmp)::element_type, vec_sz>(
+                tmp);
+        }
+    }
+};
+
+template <typename argT1,
+          typename argT2,
+          typename resT,
+          unsigned int vec_sz = 4,
+          unsigned int n_vecs = 2>
+using LessEqualContigFunctor = elementwise_common::BinaryContigFunctor<
+    argT1,
+    argT2,
+    resT,
+    LessEqualFunctor<argT1, argT2, resT>,
+    vec_sz,
+    n_vecs>;
+
+template <typename argT1, typename argT2, typename resT, typename IndexerT>
+using LessEqualStridedFunctor = elementwise_common::BinaryStridedFunctor<
+    argT1,
+    argT2,
+    resT,
+    IndexerT,
+    LessEqualFunctor<argT1, argT2, resT>>;
+
+template <typename T1, typename T2> struct LessEqualOutputType
+{
+    using value_type = typename std::disjunction< // disjunction is C++17
+                                                  // feature, supported by DPC++
+        td_ns::BinaryTypeMapResultEntry<T1, bool, T2, bool, bool>,
+        td_ns::
+            BinaryTypeMapResultEntry<T1, std::uint8_t, T2, std::uint8_t, bool>,
+        td_ns::BinaryTypeMapResultEntry<T1, std::int8_t, T2, std::int8_t, bool>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::uint16_t,
+                                        T2,
+                                        std::uint16_t,
+                                        bool>,
+        td_ns::
+            BinaryTypeMapResultEntry<T1, std::int16_t, T2, std::int16_t, bool>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::uint32_t,
+                                        T2,
+                                        std::uint32_t,
+                                        bool>,
+        td_ns::
+            BinaryTypeMapResultEntry<T1, std::int32_t, T2, std::int32_t, bool>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::uint64_t,
+                                        T2,
+                                        std::uint64_t,
+                                        bool>,
+        td_ns::
+            BinaryTypeMapResultEntry<T1, std::int64_t, T2, std::int64_t, bool>,
+        td_ns::BinaryTypeMapResultEntry<T1, sycl::half, T2, sycl::half, bool>,
+        td_ns::BinaryTypeMapResultEntry<T1, float, T2, float, bool>,
+        td_ns::BinaryTypeMapResultEntry<T1, double, T2, double, bool>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::complex<float>,
+                                        T2,
+                                        std::complex<float>,
+                                        bool>,
+        td_ns::BinaryTypeMapResultEntry<T1,
+                                        std::complex<double>,
+                                        T2,
+                                        std::complex<double>,
+                                        bool>,
+        td_ns::
+            BinaryTypeMapResultEntry<T1, float, T2, std::complex<float>, bool>,
+        td_ns::
+            BinaryTypeMapResultEntry<T1, std::complex<float>, T2, float, bool>,
+        td_ns::DefaultResultEntry<void>>::result_type;
+};
+
+template <typename argT1,
+          typename argT2,
+          typename resT,
+          unsigned int vec_sz,
+          unsigned int n_vecs>
+class less_equal_contig_kernel;
+
+template <typename argTy1, typename argTy2>
+sycl::event less_equal_contig_impl(sycl::queue exec_q,
+                                   size_t nelems,
+                                   const char *arg1_p,
+                                   py::ssize_t arg1_offset,
+                                   const char *arg2_p,
+                                   py::ssize_t arg2_offset,
+                                   char *res_p,
+                                   py::ssize_t res_offset,
+                                   const std::vector<sycl::event> &depends = {})
+{
+    sycl::event comp_ev = exec_q.submit([&](sycl::handler &cgh) {
+        cgh.depends_on(depends);
+
+        size_t lws = 64;
+        constexpr unsigned int vec_sz = 4;
+        constexpr unsigned int n_vecs = 2;
+        const size_t n_groups =
+            ((nelems + lws * n_vecs * vec_sz - 1) / (lws * n_vecs * vec_sz));
+        const auto gws_range = sycl::range<1>(n_groups * lws);
+        const auto lws_range = sycl::range<1>(lws);
+
+        using resTy = typename LessEqualOutputType<argTy1, argTy2>::value_type;
+
+        const argTy1 *arg1_tp =
+            reinterpret_cast<const argTy1 *>(arg1_p) + arg1_offset;
+        const argTy2 *arg2_tp =
+            reinterpret_cast<const argTy2 *>(arg2_p) + arg2_offset;
+        resTy *res_tp = reinterpret_cast<resTy *>(res_p) + res_offset;
+
+        cgh.parallel_for<
+            less_equal_contig_kernel<argTy1, argTy2, resTy, vec_sz, n_vecs>>(
+            sycl::nd_range<1>(gws_range, lws_range),
+            LessEqualContigFunctor<argTy1, argTy2, resTy, vec_sz, n_vecs>(
+                arg1_tp, arg2_tp, res_tp, nelems));
+    });
+    return comp_ev;
+}
+
+template <typename fnT, typename T1, typename T2> struct LessEqualContigFactory
+{
+    fnT get()
+    {
+        if constexpr (std::is_same_v<
+                          typename LessEqualOutputType<T1, T2>::value_type,
+                          void>)
+        {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = less_equal_contig_impl<T1, T2>;
+            return fn;
+        }
+    }
+};
+
+template <typename fnT, typename T1, typename T2> struct LessEqualTypeMapFactory
+{
+    /*! @brief get typeid for output type of operator()>(x, y), always bool */
+    std::enable_if_t<std::is_same<fnT, int>::value, int> get()
+    {
+        using rT = typename LessEqualOutputType<T1, T2>::value_type;
+        return td_ns::GetTypeid<rT>{}.get();
+    }
+};
+
+template <typename T1, typename T2, typename resT, typename IndexerT>
+class less_equal_strided_strided_kernel;
+
+template <typename argTy1, typename argTy2>
+sycl::event
+less_equal_strided_impl(sycl::queue exec_q,
+                        size_t nelems,
+                        int nd,
+                        const py::ssize_t *shape_and_strides,
+                        const char *arg1_p,
+                        py::ssize_t arg1_offset,
+                        const char *arg2_p,
+                        py::ssize_t arg2_offset,
+                        char *res_p,
+                        py::ssize_t res_offset,
+                        const std::vector<sycl::event> &depends,
+                        const std::vector<sycl::event> &additional_depends)
+{
+    sycl::event comp_ev = exec_q.submit([&](sycl::handler &cgh) {
+        cgh.depends_on(depends);
+        cgh.depends_on(additional_depends);
+
+        using resTy = typename LessEqualOutputType<argTy1, argTy2>::value_type;
+
+        using IndexerT =
+            typename dpctl::tensor::offset_utils::ThreeOffsets_StridedIndexer;
+
+        IndexerT indexer{nd, arg1_offset, arg2_offset, res_offset,
+                         shape_and_strides};
+
+        const argTy1 *arg1_tp = reinterpret_cast<const argTy1 *>(arg1_p);
+        const argTy2 *arg2_tp = reinterpret_cast<const argTy2 *>(arg2_p);
+        resTy *res_tp = reinterpret_cast<resTy *>(res_p);
+
+        cgh.parallel_for<
+            less_equal_strided_strided_kernel<argTy1, argTy2, resTy, IndexerT>>(
+            {nelems}, LessEqualStridedFunctor<argTy1, argTy2, resTy, IndexerT>(
+                          arg1_tp, arg2_tp, res_tp, indexer));
+    });
+    return comp_ev;
+}
+
+template <typename fnT, typename T1, typename T2> struct LessEqualStridedFactory
+{
+    fnT get()
+    {
+        if constexpr (std::is_same_v<
+                          typename LessEqualOutputType<T1, T2>::value_type,
+                          void>)
+        {
+            fnT fn = nullptr;
+            return fn;
+        }
+        else {
+            fnT fn = less_equal_strided_impl<T1, T2>;
+            return fn;
+        }
+    }
+};
+
+} // namespace less_equal
+} // namespace kernels
+} // namespace tensor
+} // namespace dpctl
diff --git a/dpctl/tensor/libtensor/source/elementwise_functions.cpp b/dpctl/tensor/libtensor/source/elementwise_functions.cpp
index cb153349b6..a43534a056 100644
--- a/dpctl/tensor/libtensor/source/elementwise_functions.cpp
+++ b/dpctl/tensor/libtensor/source/elementwise_functions.cpp
@@ -40,11 +40,14 @@
 #include "kernels/elementwise_functions/exp.hpp"
 #include "kernels/elementwise_functions/expm1.hpp"
 #include "kernels/elementwise_functions/floor_divide.hpp"
+#include "kernels/elementwise_functions/greater.hpp"
+#include "kernels/elementwise_functions/greater_equal.hpp"
 #include "kernels/elementwise_functions/imag.hpp"
 #include "kernels/elementwise_functions/isfinite.hpp"
 #include "kernels/elementwise_functions/isinf.hpp"
 #include "kernels/elementwise_functions/isnan.hpp"
 #include "kernels/elementwise_functions/less.hpp"
+#include "kernels/elementwise_functions/less_equal.hpp"
 #include "kernels/elementwise_functions/log.hpp"
 #include "kernels/elementwise_functions/log1p.hpp"
 #include "kernels/elementwise_functions/multiply.hpp"
@@ -608,13 +611,77 @@ void populate_floor_divide_dispatch_tables(void)
 // B11: ==== GREATER       (x1, x2)
 namespace impl
 {
-// FIXME: add code for B11
+namespace greater_fn_ns = dpctl::tensor::kernels::greater;
+
+static binary_contig_impl_fn_ptr_t
+    greater_contig_dispatch_table[td_ns::num_types][td_ns::num_types];
+static int greater_output_id_table[td_ns::num_types][td_ns::num_types];
+
+static binary_strided_impl_fn_ptr_t
+    greater_strided_dispatch_table[td_ns::num_types][td_ns::num_types];
+
+void populate_greater_dispatch_tables(void)
+{
+    using namespace td_ns;
+    namespace fn_ns = greater_fn_ns;
+
+    // which input types are supported, and what is the type of the result
+    using fn_ns::GreaterTypeMapFactory;
+    DispatchTableBuilder<int, GreaterTypeMapFactory, num_types> dtb1;
+    dtb1.populate_dispatch_table(greater_output_id_table);
+
+    // function pointers for operation on general strided arrays
+    using fn_ns::GreaterStridedFactory;
+    DispatchTableBuilder<binary_strided_impl_fn_ptr_t, GreaterStridedFactory,
+                         num_types>
+        dtb2;
+    dtb2.populate_dispatch_table(greater_strided_dispatch_table);
+
+    // function pointers for operation on contiguous inputs and output
+    using fn_ns::GreaterContigFactory;
+    DispatchTableBuilder<binary_contig_impl_fn_ptr_t, GreaterContigFactory,
+                         num_types>
+        dtb3;
+    dtb3.populate_dispatch_table(greater_contig_dispatch_table);
+};
 } // namespace impl
 
 // B12: ==== GREATER_EQUAL (x1, x2)
 namespace impl
 {
-// FIXME: add code for B12
+namespace greater_equal_fn_ns = dpctl::tensor::kernels::greater_equal;
+
+static binary_contig_impl_fn_ptr_t
+    greater_equal_contig_dispatch_table[td_ns::num_types][td_ns::num_types];
+static int greater_equal_output_id_table[td_ns::num_types][td_ns::num_types];
+
+static binary_strided_impl_fn_ptr_t
+    greater_equal_strided_dispatch_table[td_ns::num_types][td_ns::num_types];
+
+void populate_greater_equal_dispatch_tables(void)
+{
+    using namespace td_ns;
+    namespace fn_ns = greater_equal_fn_ns;
+
+    // which input types are supported, and what is the type of the result
+    using fn_ns::GreaterEqualTypeMapFactory;
+    DispatchTableBuilder<int, GreaterEqualTypeMapFactory, num_types> dtb1;
+    dtb1.populate_dispatch_table(greater_equal_output_id_table);
+
+    // function pointers for operation on general strided arrays
+    using fn_ns::GreaterEqualStridedFactory;
+    DispatchTableBuilder<binary_strided_impl_fn_ptr_t,
+                         GreaterEqualStridedFactory, num_types>
+        dtb2;
+    dtb2.populate_dispatch_table(greater_equal_strided_dispatch_table);
+
+    // function pointers for operation on contiguous inputs and output
+    using fn_ns::GreaterEqualContigFactory;
+    DispatchTableBuilder<binary_contig_impl_fn_ptr_t, GreaterEqualContigFactory,
+                         num_types>
+        dtb3;
+    dtb3.populate_dispatch_table(greater_equal_contig_dispatch_table);
+};
 } // namespace impl
 
 // U16: ==== IMAG        (x)
@@ -797,7 +864,39 @@ void populate_less_dispatch_tables(void)
 // B14: ==== LESS_EQUAL  (x1, x2)
 namespace impl
 {
-// FIXME: add code for B14
+namespace less_equal_fn_ns = dpctl::tensor::kernels::less_equal;
+
+static binary_contig_impl_fn_ptr_t
+    less_equal_contig_dispatch_table[td_ns::num_types][td_ns::num_types];
+static int less_equal_output_id_table[td_ns::num_types][td_ns::num_types];
+
+static binary_strided_impl_fn_ptr_t
+    less_equal_strided_dispatch_table[td_ns::num_types][td_ns::num_types];
+
+void populate_less_equal_dispatch_tables(void)
+{
+    using namespace td_ns;
+    namespace fn_ns = less_equal_fn_ns;
+
+    // which input types are supported, and what is the type of the result
+    using fn_ns::LessEqualTypeMapFactory;
+    DispatchTableBuilder<int, LessEqualTypeMapFactory, num_types> dtb1;
+    dtb1.populate_dispatch_table(less_equal_output_id_table);
+
+    // function pointers for operation on general strided arrays
+    using fn_ns::LessEqualStridedFactory;
+    DispatchTableBuilder<binary_strided_impl_fn_ptr_t, LessEqualStridedFactory,
+                         num_types>
+        dtb2;
+    dtb2.populate_dispatch_table(less_equal_strided_dispatch_table);
+
+    // function pointers for operation on contiguous inputs and output
+    using fn_ns::LessEqualContigFactory;
+    DispatchTableBuilder<binary_contig_impl_fn_ptr_t, LessEqualContigFactory,
+                         num_types>
+        dtb3;
+    dtb3.populate_dispatch_table(less_equal_contig_dispatch_table);
+};
 } // namespace impl
 
 // U20: ==== LOG         (x)
@@ -1624,10 +1723,86 @@ void init_elementwise_functions(py::module_ m)
     }
 
     // B11: ==== GREATER       (x1, x2)
-    // FIXME:
+    {
+        impl::populate_greater_dispatch_tables();
+        using impl::greater_contig_dispatch_table;
+        using impl::greater_output_id_table;
+        using impl::greater_strided_dispatch_table;
+
+        auto greater_pyapi = [&](dpctl::tensor::usm_ndarray src1,
+                                 dpctl::tensor::usm_ndarray src2,
+                                 dpctl::tensor::usm_ndarray dst,
+                                 sycl::queue exec_q,
+                                 const std::vector<sycl::event> &depends = {}) {
+            return py_binary_ufunc(
+                src1, src2, dst, exec_q, depends, greater_output_id_table,
+                // function pointers to handle operation on contiguous arrays
+                // (pointers may be nullptr)
+                greater_contig_dispatch_table,
+                // function pointers to handle operation on strided arrays (most
+                // general case)
+                greater_strided_dispatch_table,
+                // function pointers to handle operation of c-contig matrix and
+                // c-contig row with broadcasting (may be nullptr)
+                td_ns::NullPtrTable<
+                    binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{},
+                // function pointers to handle operation of c-contig matrix and
+                // c-contig row with broadcasting (may be nullptr)
+                td_ns::NullPtrTable<
+                    binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{});
+        };
+        auto greater_result_type_pyapi = [&](py::dtype dtype1,
+                                             py::dtype dtype2) {
+            return py_binary_ufunc_result_type(dtype1, dtype2,
+                                               greater_output_id_table);
+        };
+        m.def("_greater", greater_pyapi, "", py::arg("src1"), py::arg("src2"),
+              py::arg("dst"), py::arg("sycl_queue"),
+              py::arg("depends") = py::list());
+        m.def("_greater_result_type", greater_result_type_pyapi, "");
+    }
 
     // B12: ==== GREATER_EQUAL (x1, x2)
-    // FIXME:
+    {
+        impl::populate_greater_equal_dispatch_tables();
+        using impl::greater_equal_contig_dispatch_table;
+        using impl::greater_equal_output_id_table;
+        using impl::greater_equal_strided_dispatch_table;
+
+        auto greater_equal_pyapi = [&](dpctl::tensor::usm_ndarray src1,
+                                       dpctl::tensor::usm_ndarray src2,
+                                       dpctl::tensor::usm_ndarray dst,
+                                       sycl::queue exec_q,
+                                       const std::vector<sycl::event> &depends =
+                                           {}) {
+            return py_binary_ufunc(
+                src1, src2, dst, exec_q, depends, greater_equal_output_id_table,
+                // function pointers to handle operation on contiguous arrays
+                // (pointers may be nullptr)
+                greater_equal_contig_dispatch_table,
+                // function pointers to handle operation on strided arrays (most
+                // general case)
+                greater_equal_strided_dispatch_table,
+                // function pointers to handle operation of c-contig matrix and
+                // c-contig row with broadcasting (may be nullptr)
+                td_ns::NullPtrTable<
+                    binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{},
+                // function pointers to handle operation of c-contig matrix and
+                // c-contig row with broadcasting (may be nullptr)
+                td_ns::NullPtrTable<
+                    binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{});
+        };
+        auto greater_equal_result_type_pyapi = [&](py::dtype dtype1,
+                                                   py::dtype dtype2) {
+            return py_binary_ufunc_result_type(dtype1, dtype2,
+                                               greater_equal_output_id_table);
+        };
+        m.def("_greater_equal", greater_equal_pyapi, "", py::arg("src1"),
+              py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"),
+              py::arg("depends") = py::list());
+        m.def("_greater_equal_result_type", greater_equal_result_type_pyapi,
+              "");
+    }
 
     // U16: ==== IMAG        (x)
     {
@@ -1764,7 +1939,45 @@ void init_elementwise_functions(py::module_ m)
     }
 
     // B14: ==== LESS_EQUAL  (x1, x2)
-    // FIXME:
+    {
+        impl::populate_less_equal_dispatch_tables();
+        using impl::less_equal_contig_dispatch_table;
+        using impl::less_equal_output_id_table;
+        using impl::less_equal_strided_dispatch_table;
+
+        auto less_equal_pyapi = [&](dpctl::tensor::usm_ndarray src1,
+                                    dpctl::tensor::usm_ndarray src2,
+                                    dpctl::tensor::usm_ndarray dst,
+                                    sycl::queue exec_q,
+                                    const std::vector<sycl::event> &depends =
+                                        {}) {
+            return py_binary_ufunc(
+                src1, src2, dst, exec_q, depends, less_equal_output_id_table,
+                // function pointers to handle operation on contiguous arrays
+                // (pointers may be nullptr)
+                less_equal_contig_dispatch_table,
+                // function pointers to handle operation on strided arrays (most
+                // general case)
+                less_equal_strided_dispatch_table,
+                // function pointers to handle operation of c-contig matrix and
+                // c-contig row with broadcasting (may be nullptr)
+                td_ns::NullPtrTable<
+                    binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{},
+                // function pointers to handle operation of c-contig matrix and
+                // c-contig row with broadcasting (may be nullptr)
+                td_ns::NullPtrTable<
+                    binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{});
+        };
+        auto less_equal_result_type_pyapi = [&](py::dtype dtype1,
+                                                py::dtype dtype2) {
+            return py_binary_ufunc_result_type(dtype1, dtype2,
+                                               less_equal_output_id_table);
+        };
+        m.def("_less_equal", less_equal_pyapi, "", py::arg("src1"),
+              py::arg("src2"), py::arg("dst"), py::arg("sycl_queue"),
+              py::arg("depends") = py::list());
+        m.def("_less_equal_result_type", less_equal_result_type_pyapi, "");
+    }
 
     // U20: ==== LOG         (x)
     {
diff --git a/dpctl/tests/elementwise/test_greater.py b/dpctl/tests/elementwise/test_greater.py
new file mode 100644
index 0000000000..fbda074e53
--- /dev/null
+++ b/dpctl/tests/elementwise/test_greater.py
@@ -0,0 +1,254 @@
+#                       Data Parallel Control (dpctl)
+#
+#  Copyright 2020-2023 Intel Corporation
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Ungreater required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+import ctypes
+
+import numpy as np
+import pytest
+
+import dpctl
+import dpctl.tensor as dpt
+from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported
+
+from .utils import _all_dtypes, _compare_dtypes, _usm_types
+
+
+@pytest.mark.parametrize("op1_dtype", _all_dtypes)
+@pytest.mark.parametrize("op2_dtype", _all_dtypes)
+def test_greater_dtype_matrix(op1_dtype, op2_dtype):
+    q = get_queue_or_skip()
+    skip_if_dtype_not_supported(op1_dtype, q)
+    skip_if_dtype_not_supported(op2_dtype, q)
+
+    sz = 127
+    ar1 = dpt.zeros(sz, dtype=op1_dtype)
+    ar2 = dpt.ones_like(ar1, dtype=op2_dtype)
+
+    r = dpt.greater(ar1, ar2)
+    assert isinstance(r, dpt.usm_ndarray)
+    expected = np.greater(
+        np.zeros(1, dtype=op1_dtype), np.ones(1, dtype=op2_dtype)
+    )
+    assert _compare_dtypes(r.dtype, expected.dtype, sycl_queue=q)
+    assert r.shape == ar1.shape
+    assert (dpt.asnumpy(r) == expected.astype(r.dtype)).all()
+    assert r.sycl_queue == ar1.sycl_queue
+
+    ar3 = dpt.zeros(sz, dtype=op1_dtype)
+    ar4 = dpt.ones(2 * sz, dtype=op2_dtype)
+
+    r = dpt.greater(ar3[::-1], ar4[::2])
+    assert isinstance(r, dpt.usm_ndarray)
+    expected = np.greater(
+        np.zeros(1, dtype=op1_dtype), np.ones(1, dtype=op2_dtype)
+    )
+    assert _compare_dtypes(r.dtype, expected.dtype, sycl_queue=q)
+    assert r.shape == ar3.shape
+    assert (dpt.asnumpy(r) == expected.astype(r.dtype)).all()
+
+
+@pytest.mark.parametrize("op_dtype", ["c8", "c16"])
+def test_greater_complex_matrix(op_dtype):
+    q = get_queue_or_skip()
+    skip_if_dtype_not_supported(op_dtype, q)
+
+    sz = 127
+    ar1_np_real = np.random.randint(0, 10, sz)
+    ar1_np_imag = np.random.randint(0, 10, sz)
+    ar1 = dpt.asarray(ar1_np_real + 1j * ar1_np_imag, dtype=op_dtype)
+
+    ar2_np_real = np.random.randint(0, 10, sz)
+    ar2_np_imag = np.random.randint(0, 10, sz)
+    ar2 = dpt.asarray(ar2_np_real + 1j * ar2_np_imag, dtype=op_dtype)
+
+    r = dpt.greater(ar1, ar2)
+    expected = np.greater(dpt.asnumpy(ar1), dpt.asnumpy(ar2))
+    assert _compare_dtypes(r.dtype, expected.dtype, sycl_queue=q)
+    assert r.shape == expected.shape
+    assert (dpt.asnumpy(r) == expected).all()
+
+    r1 = dpt.greater(ar1[::-2], ar2[::2])
+    expected1 = np.greater(dpt.asnumpy(ar1[::-2]), dpt.asnumpy(ar2[::2]))
+    assert _compare_dtypes(r.dtype, expected1.dtype, sycl_queue=q)
+    assert r1.shape == expected1.shape
+    assert (dpt.asnumpy(r1) == expected1).all()
+
+    ar3 = dpt.asarray([1.0 + 9j, 2.0 + 0j, 2.0 + 1j, 2.0 + 2j], dtype=op_dtype)
+    ar4 = dpt.asarray([2.0 + 0j, dpt.nan, dpt.inf, -dpt.inf], dtype=op_dtype)
+    r2 = dpt.greater(ar3, ar4)
+    with np.errstate(invalid="ignore"):
+        expected2 = np.greater(dpt.asnumpy(ar3), dpt.asnumpy(ar4))
+    assert (dpt.asnumpy(r2) == expected2).all()
+
+    r3 = dpt.greater(ar4, ar4)
+    with np.errstate(invalid="ignore"):
+        expected3 = np.greater(dpt.asnumpy(ar4), dpt.asnumpy(ar4))
+    assert (dpt.asnumpy(r3) == expected3).all()
+
+
+def test_greater_complex_float():
+    get_queue_or_skip()
+
+    ar1 = dpt.asarray([1.0 + 9j, 2.0 + 0j, 2.0 + 1j, 2.0 + 2j], dtype="c8")
+    ar2 = dpt.full((4,), 2, dtype="f4")
+
+    r = dpt.greater(ar1, ar2)
+    expected = np.greater(dpt.asnumpy(ar1), dpt.asnumpy(ar2))
+    assert (dpt.asnumpy(r) == expected).all()
+
+    r1 = dpt.greater(ar2, ar1)
+    expected1 = np.greater(dpt.asnumpy(ar2), dpt.asnumpy(ar1))
+    assert (dpt.asnumpy(r1) == expected1).all()
+    with np.errstate(invalid="ignore"):
+        for tp in [dpt.nan, dpt.inf, -dpt.inf]:
+
+            ar3 = dpt.full((4,), tp)
+            r2 = dpt.greater(ar1, ar3)
+            expected2 = np.greater(dpt.asnumpy(ar1), dpt.asnumpy(ar3))
+            assert (dpt.asnumpy(r2) == expected2).all()
+
+            r3 = dpt.greater(ar3, ar1)
+            expected3 = np.greater(dpt.asnumpy(ar3), dpt.asnumpy(ar1))
+            assert (dpt.asnumpy(r3) == expected3).all()
+
+
+@pytest.mark.parametrize("op1_usm_type", _usm_types)
+@pytest.mark.parametrize("op2_usm_type", _usm_types)
+def test_greater_usm_type_matrix(op1_usm_type, op2_usm_type):
+    get_queue_or_skip()
+
+    sz = 128
+    ar1 = dpt.ones(sz, dtype="i4", usm_type=op1_usm_type)
+    ar2 = dpt.ones_like(ar1, dtype="i4", usm_type=op2_usm_type)
+
+    r = dpt.greater(ar1, ar2)
+    assert isinstance(r, dpt.usm_ndarray)
+    expected_usm_type = dpctl.utils.get_coerced_usm_type(
+        (op1_usm_type, op2_usm_type)
+    )
+    assert r.usm_type == expected_usm_type
+
+
+def test_greater_order():
+    get_queue_or_skip()
+
+    ar1 = dpt.ones((20, 20), dtype="i4", order="C")
+    ar2 = dpt.ones((20, 20), dtype="i4", order="C")
+    r1 = dpt.greater(ar1, ar2, order="C")
+    assert r1.flags.c_contiguous
+    r2 = dpt.greater(ar1, ar2, order="F")
+    assert r2.flags.f_contiguous
+    r3 = dpt.greater(ar1, ar2, order="A")
+    assert r3.flags.c_contiguous
+    r4 = dpt.greater(ar1, ar2, order="K")
+    assert r4.flags.c_contiguous
+
+    ar1 = dpt.ones((20, 20), dtype="i4", order="F")
+    ar2 = dpt.ones((20, 20), dtype="i4", order="F")
+    r1 = dpt.greater(ar1, ar2, order="C")
+    assert r1.flags.c_contiguous
+    r2 = dpt.greater(ar1, ar2, order="F")
+    assert r2.flags.f_contiguous
+    r3 = dpt.greater(ar1, ar2, order="A")
+    assert r3.flags.f_contiguous
+    r4 = dpt.greater(ar1, ar2, order="K")
+    assert r4.flags.f_contiguous
+
+    ar1 = dpt.ones((40, 40), dtype="i4", order="C")[:20, ::-2]
+    ar2 = dpt.ones((40, 40), dtype="i4", order="C")[:20, ::-2]
+    r4 = dpt.greater(ar1, ar2, order="K")
+    assert r4.strides == (20, -1)
+
+    ar1 = dpt.ones((40, 40), dtype="i4", order="C")[:20, ::-2].mT
+    ar2 = dpt.ones((40, 40), dtype="i4", order="C")[:20, ::-2].mT
+    r4 = dpt.greater(ar1, ar2, order="K")
+    assert r4.strides == (-1, 20)
+
+
+def test_greater_broadcasting():
+    get_queue_or_skip()
+
+    m = dpt.ones((100, 5), dtype="i4")
+    v = dpt.arange(1, 6, dtype="i4")
+
+    r = dpt.greater(m, v)
+
+    expected = np.greater(
+        np.ones((100, 5), dtype="i4"), np.arange(1, 6, dtype="i4")
+    )
+    assert (dpt.asnumpy(r) == expected.astype(r.dtype)).all()
+
+    r2 = dpt.greater(v, m)
+    expected2 = np.greater(
+        np.arange(1, 6, dtype="i4"), np.ones((100, 5), dtype="i4")
+    )
+    assert (dpt.asnumpy(r2) == expected2.astype(r2.dtype)).all()
+
+
+@pytest.mark.parametrize("arr_dt", _all_dtypes)
+def test_greater_python_scalar(arr_dt):
+    q = get_queue_or_skip()
+    skip_if_dtype_not_supported(arr_dt, q)
+
+    X = dpt.ones((10, 10), dtype=arr_dt, sycl_queue=q)
+    py_ones = (
+        bool(1),
+        int(1),
+        float(1),
+        complex(1),
+        np.float32(1),
+        ctypes.c_int(1),
+    )
+    for sc in py_ones:
+        R = dpt.greater(X, sc)
+        assert isinstance(R, dpt.usm_ndarray)
+        R = dpt.greater(sc, X)
+        assert isinstance(R, dpt.usm_ndarray)
+
+
+class MockArray:
+    def __init__(self, arr):
+        self.data_ = arr
+
+    @property
+    def __sycl_usm_array_interface__(self):
+        return self.data_.__sycl_usm_array_interface__
+
+
+def test_greater_mock_array():
+    get_queue_or_skip()
+    a = dpt.arange(10)
+    b = dpt.ones(10)
+    c = MockArray(b)
+    r = dpt.greater(a, c)
+    assert isinstance(r, dpt.usm_ndarray)
+
+
+def test_greater_canary_mock_array():
+    get_queue_or_skip()
+    a = dpt.arange(10)
+
+    class Canary:
+        def __init__(self):
+            pass
+
+        @property
+        def __sycl_usm_array_interface__(self):
+            return None
+
+    c = Canary()
+    with pytest.raises(ValueError):
+        dpt.greater(a, c)
diff --git a/dpctl/tests/elementwise/test_greater_equal.py b/dpctl/tests/elementwise/test_greater_equal.py
new file mode 100644
index 0000000000..3f56e5d460
--- /dev/null
+++ b/dpctl/tests/elementwise/test_greater_equal.py
@@ -0,0 +1,254 @@
+#                       Data Parallel Control (dpctl)
+#
+#  Copyright 2020-2023 Intel Corporation
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Ungreater_equal required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+import ctypes
+
+import numpy as np
+import pytest
+
+import dpctl
+import dpctl.tensor as dpt
+from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported
+
+from .utils import _all_dtypes, _compare_dtypes, _usm_types
+
+
+@pytest.mark.parametrize("op1_dtype", _all_dtypes)
+@pytest.mark.parametrize("op2_dtype", _all_dtypes)
+def test_greater_equal_dtype_matrix(op1_dtype, op2_dtype):
+    q = get_queue_or_skip()
+    skip_if_dtype_not_supported(op1_dtype, q)
+    skip_if_dtype_not_supported(op2_dtype, q)
+
+    sz = 127
+    ar1 = dpt.zeros(sz, dtype=op1_dtype)
+    ar2 = dpt.ones_like(ar1, dtype=op2_dtype)
+
+    r = dpt.greater_equal(ar1, ar2)
+    assert isinstance(r, dpt.usm_ndarray)
+    expected = np.greater_equal(
+        np.zeros(1, dtype=op1_dtype), np.ones(1, dtype=op2_dtype)
+    )
+    assert _compare_dtypes(r.dtype, expected.dtype, sycl_queue=q)
+    assert r.shape == ar1.shape
+    assert (dpt.asnumpy(r) == expected.astype(r.dtype)).all()
+    assert r.sycl_queue == ar1.sycl_queue
+
+    ar3 = dpt.zeros(sz, dtype=op1_dtype)
+    ar4 = dpt.ones(2 * sz, dtype=op2_dtype)
+
+    r = dpt.greater_equal(ar3[::-1], ar4[::2])
+    assert isinstance(r, dpt.usm_ndarray)
+    expected = np.greater_equal(
+        np.zeros(1, dtype=op1_dtype), np.ones(1, dtype=op2_dtype)
+    )
+    assert _compare_dtypes(r.dtype, expected.dtype, sycl_queue=q)
+    assert r.shape == ar3.shape
+    assert (dpt.asnumpy(r) == expected.astype(r.dtype)).all()
+
+
+@pytest.mark.parametrize("op_dtype", ["c8", "c16"])
+def test_greater_equal_complex_matrix(op_dtype):
+    q = get_queue_or_skip()
+    skip_if_dtype_not_supported(op_dtype, q)
+
+    sz = 127
+    ar1_np_real = np.random.randint(0, 10, sz)
+    ar1_np_imag = np.random.randint(0, 10, sz)
+    ar1 = dpt.asarray(ar1_np_real + 1j * ar1_np_imag, dtype=op_dtype)
+
+    ar2_np_real = np.random.randint(0, 10, sz)
+    ar2_np_imag = np.random.randint(0, 10, sz)
+    ar2 = dpt.asarray(ar2_np_real + 1j * ar2_np_imag, dtype=op_dtype)
+
+    r = dpt.greater_equal(ar1, ar2)
+    expected = np.greater_equal(dpt.asnumpy(ar1), dpt.asnumpy(ar2))
+    assert _compare_dtypes(r.dtype, expected.dtype, sycl_queue=q)
+    assert r.shape == expected.shape
+    assert (dpt.asnumpy(r) == expected).all()
+
+    r1 = dpt.greater_equal(ar1[::-2], ar2[::2])
+    expected1 = np.greater_equal(dpt.asnumpy(ar1[::-2]), dpt.asnumpy(ar2[::2]))
+    assert _compare_dtypes(r.dtype, expected1.dtype, sycl_queue=q)
+    assert r1.shape == expected1.shape
+    assert (dpt.asnumpy(r1) == expected1).all()
+
+    ar3 = dpt.asarray([1.0 + 9j, 2.0 + 0j, 2.0 + 1j, 2.0 + 2j], dtype=op_dtype)
+    ar4 = dpt.asarray([2.0 + 0j, dpt.nan, dpt.inf, -dpt.inf], dtype=op_dtype)
+    r2 = dpt.greater_equal(ar3, ar4)
+    with np.errstate(invalid="ignore"):
+        expected2 = np.greater_equal(dpt.asnumpy(ar3), dpt.asnumpy(ar4))
+    assert (dpt.asnumpy(r2) == expected2).all()
+
+    r3 = dpt.greater_equal(ar4, ar4)
+    with np.errstate(invalid="ignore"):
+        expected3 = np.greater_equal(dpt.asnumpy(ar4), dpt.asnumpy(ar4))
+    assert (dpt.asnumpy(r3) == expected3).all()
+
+
+def test_greater_equal_complex_float():
+    get_queue_or_skip()
+
+    ar1 = dpt.asarray([1.0 + 9j, 2.0 + 0j, 2.0 + 1j, 2.0 + 2j], dtype="c8")
+    ar2 = dpt.full((4,), 2, dtype="f4")
+
+    r = dpt.greater_equal(ar1, ar2)
+    expected = np.greater_equal(dpt.asnumpy(ar1), dpt.asnumpy(ar2))
+    assert (dpt.asnumpy(r) == expected).all()
+
+    r1 = dpt.greater_equal(ar2, ar1)
+    expected1 = np.greater_equal(dpt.asnumpy(ar2), dpt.asnumpy(ar1))
+    assert (dpt.asnumpy(r1) == expected1).all()
+    with np.errstate(invalid="ignore"):
+        for tp in [dpt.nan, dpt.inf, -dpt.inf]:
+
+            ar3 = dpt.full((4,), tp)
+            r2 = dpt.greater_equal(ar1, ar3)
+            expected2 = np.greater_equal(dpt.asnumpy(ar1), dpt.asnumpy(ar3))
+            assert (dpt.asnumpy(r2) == expected2).all()
+
+            r3 = dpt.greater_equal(ar3, ar1)
+            expected3 = np.greater_equal(dpt.asnumpy(ar3), dpt.asnumpy(ar1))
+            assert (dpt.asnumpy(r3) == expected3).all()
+
+
+@pytest.mark.parametrize("op1_usm_type", _usm_types)
+@pytest.mark.parametrize("op2_usm_type", _usm_types)
+def test_greater_equal_usm_type_matrix(op1_usm_type, op2_usm_type):
+    get_queue_or_skip()
+
+    sz = 128
+    ar1 = dpt.ones(sz, dtype="i4", usm_type=op1_usm_type)
+    ar2 = dpt.ones_like(ar1, dtype="i4", usm_type=op2_usm_type)
+
+    r = dpt.greater_equal(ar1, ar2)
+    assert isinstance(r, dpt.usm_ndarray)
+    expected_usm_type = dpctl.utils.get_coerced_usm_type(
+        (op1_usm_type, op2_usm_type)
+    )
+    assert r.usm_type == expected_usm_type
+
+
+def test_greater_equal_order():
+    get_queue_or_skip()
+
+    ar1 = dpt.ones((20, 20), dtype="i4", order="C")
+    ar2 = dpt.ones((20, 20), dtype="i4", order="C")
+    r1 = dpt.greater_equal(ar1, ar2, order="C")
+    assert r1.flags.c_contiguous
+    r2 = dpt.greater_equal(ar1, ar2, order="F")
+    assert r2.flags.f_contiguous
+    r3 = dpt.greater_equal(ar1, ar2, order="A")
+    assert r3.flags.c_contiguous
+    r4 = dpt.greater_equal(ar1, ar2, order="K")
+    assert r4.flags.c_contiguous
+
+    ar1 = dpt.ones((20, 20), dtype="i4", order="F")
+    ar2 = dpt.ones((20, 20), dtype="i4", order="F")
+    r1 = dpt.greater_equal(ar1, ar2, order="C")
+    assert r1.flags.c_contiguous
+    r2 = dpt.greater_equal(ar1, ar2, order="F")
+    assert r2.flags.f_contiguous
+    r3 = dpt.greater_equal(ar1, ar2, order="A")
+    assert r3.flags.f_contiguous
+    r4 = dpt.greater_equal(ar1, ar2, order="K")
+    assert r4.flags.f_contiguous
+
+    ar1 = dpt.ones((40, 40), dtype="i4", order="C")[:20, ::-2]
+    ar2 = dpt.ones((40, 40), dtype="i4", order="C")[:20, ::-2]
+    r4 = dpt.greater_equal(ar1, ar2, order="K")
+    assert r4.strides == (20, -1)
+
+    ar1 = dpt.ones((40, 40), dtype="i4", order="C")[:20, ::-2].mT
+    ar2 = dpt.ones((40, 40), dtype="i4", order="C")[:20, ::-2].mT
+    r4 = dpt.greater_equal(ar1, ar2, order="K")
+    assert r4.strides == (-1, 20)
+
+
+def test_greater_equal_broadcasting():
+    get_queue_or_skip()
+
+    m = dpt.ones((100, 5), dtype="i4")
+    v = dpt.arange(1, 6, dtype="i4")
+
+    r = dpt.greater_equal(m, v)
+
+    expected = np.greater_equal(
+        np.ones((100, 5), dtype="i4"), np.arange(1, 6, dtype="i4")
+    )
+    assert (dpt.asnumpy(r) == expected.astype(r.dtype)).all()
+
+    r2 = dpt.greater_equal(v, m)
+    expected2 = np.greater_equal(
+        np.arange(1, 6, dtype="i4"), np.ones((100, 5), dtype="i4")
+    )
+    assert (dpt.asnumpy(r2) == expected2.astype(r2.dtype)).all()
+
+
+@pytest.mark.parametrize("arr_dt", _all_dtypes)
+def test_greater_equal_python_scalar(arr_dt):
+    q = get_queue_or_skip()
+    skip_if_dtype_not_supported(arr_dt, q)
+
+    X = dpt.ones((10, 10), dtype=arr_dt, sycl_queue=q)
+    py_ones = (
+        bool(1),
+        int(1),
+        float(1),
+        complex(1),
+        np.float32(1),
+        ctypes.c_int(1),
+    )
+    for sc in py_ones:
+        R = dpt.greater_equal(X, sc)
+        assert isinstance(R, dpt.usm_ndarray)
+        R = dpt.greater_equal(sc, X)
+        assert isinstance(R, dpt.usm_ndarray)
+
+
+class MockArray:
+    def __init__(self, arr):
+        self.data_ = arr
+
+    @property
+    def __sycl_usm_array_interface__(self):
+        return self.data_.__sycl_usm_array_interface__
+
+
+def test_greater_equal_mock_array():
+    get_queue_or_skip()
+    a = dpt.arange(10)
+    b = dpt.ones(10)
+    c = MockArray(b)
+    r = dpt.greater_equal(a, c)
+    assert isinstance(r, dpt.usm_ndarray)
+
+
+def test_greater_equal_canary_mock_array():
+    get_queue_or_skip()
+    a = dpt.arange(10)
+
+    class Canary:
+        def __init__(self):
+            pass
+
+        @property
+        def __sycl_usm_array_interface__(self):
+            return None
+
+    c = Canary()
+    with pytest.raises(ValueError):
+        dpt.greater_equal(a, c)
diff --git a/dpctl/tests/elementwise/test_less_equal.py b/dpctl/tests/elementwise/test_less_equal.py
new file mode 100644
index 0000000000..b539d6a48f
--- /dev/null
+++ b/dpctl/tests/elementwise/test_less_equal.py
@@ -0,0 +1,254 @@
+#                       Data Parallel Control (dpctl)
+#
+#  Copyright 2020-2023 Intel Corporation
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless_equal required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+import ctypes
+
+import numpy as np
+import pytest
+
+import dpctl
+import dpctl.tensor as dpt
+from dpctl.tests.helper import get_queue_or_skip, skip_if_dtype_not_supported
+
+from .utils import _all_dtypes, _compare_dtypes, _usm_types
+
+
+@pytest.mark.parametrize("op1_dtype", _all_dtypes)
+@pytest.mark.parametrize("op2_dtype", _all_dtypes)
+def test_less_equal_dtype_matrix(op1_dtype, op2_dtype):
+    q = get_queue_or_skip()
+    skip_if_dtype_not_supported(op1_dtype, q)
+    skip_if_dtype_not_supported(op2_dtype, q)
+
+    sz = 127
+    ar1 = dpt.zeros(sz, dtype=op1_dtype)
+    ar2 = dpt.ones_like(ar1, dtype=op2_dtype)
+
+    r = dpt.less_equal(ar1, ar2)
+    assert isinstance(r, dpt.usm_ndarray)
+    expected = np.less_equal(
+        np.zeros(1, dtype=op1_dtype), np.ones(1, dtype=op2_dtype)
+    )
+    assert _compare_dtypes(r.dtype, expected.dtype, sycl_queue=q)
+    assert r.shape == ar1.shape
+    assert (dpt.asnumpy(r) == expected.astype(r.dtype)).all()
+    assert r.sycl_queue == ar1.sycl_queue
+
+    ar3 = dpt.zeros(sz, dtype=op1_dtype)
+    ar4 = dpt.ones(2 * sz, dtype=op2_dtype)
+
+    r = dpt.less_equal(ar3[::-1], ar4[::2])
+    assert isinstance(r, dpt.usm_ndarray)
+    expected = np.less_equal(
+        np.zeros(1, dtype=op1_dtype), np.ones(1, dtype=op2_dtype)
+    )
+    assert _compare_dtypes(r.dtype, expected.dtype, sycl_queue=q)
+    assert r.shape == ar3.shape
+    assert (dpt.asnumpy(r) == expected.astype(r.dtype)).all()
+
+
+@pytest.mark.parametrize("op_dtype", ["c8", "c16"])
+def test_less_equal_complex_matrix(op_dtype):
+    q = get_queue_or_skip()
+    skip_if_dtype_not_supported(op_dtype, q)
+
+    sz = 127
+    ar1_np_real = np.random.randint(0, 10, sz)
+    ar1_np_imag = np.random.randint(0, 10, sz)
+    ar1 = dpt.asarray(ar1_np_real + 1j * ar1_np_imag, dtype=op_dtype)
+
+    ar2_np_real = np.random.randint(0, 10, sz)
+    ar2_np_imag = np.random.randint(0, 10, sz)
+    ar2 = dpt.asarray(ar2_np_real + 1j * ar2_np_imag, dtype=op_dtype)
+
+    r = dpt.less_equal(ar1, ar2)
+    expected = np.less_equal(dpt.asnumpy(ar1), dpt.asnumpy(ar2))
+    assert _compare_dtypes(r.dtype, expected.dtype, sycl_queue=q)
+    assert r.shape == expected.shape
+    assert (dpt.asnumpy(r) == expected).all()
+
+    r1 = dpt.less_equal(ar1[::-2], ar2[::2])
+    expected1 = np.less_equal(dpt.asnumpy(ar1[::-2]), dpt.asnumpy(ar2[::2]))
+    assert _compare_dtypes(r.dtype, expected1.dtype, sycl_queue=q)
+    assert r1.shape == expected1.shape
+    assert (dpt.asnumpy(r1) == expected1).all()
+
+    ar3 = dpt.asarray([1.0 + 9j, 2.0 + 0j, 2.0 + 1j, 2.0 + 2j], dtype=op_dtype)
+    ar4 = dpt.asarray([2.0 + 0j, dpt.nan, dpt.inf, -dpt.inf], dtype=op_dtype)
+    r2 = dpt.less_equal(ar3, ar4)
+    with np.errstate(invalid="ignore"):
+        expected2 = np.less_equal(dpt.asnumpy(ar3), dpt.asnumpy(ar4))
+    assert (dpt.asnumpy(r2) == expected2).all()
+
+    r3 = dpt.less_equal(ar4, ar4)
+    with np.errstate(invalid="ignore"):
+        expected3 = np.less_equal(dpt.asnumpy(ar4), dpt.asnumpy(ar4))
+    assert (dpt.asnumpy(r3) == expected3).all()
+
+
+def test_less_equal_complex_float():
+    get_queue_or_skip()
+
+    ar1 = dpt.asarray([1.0 + 9j, 2.0 + 0j, 2.0 + 1j, 2.0 + 2j], dtype="c8")
+    ar2 = dpt.full((4,), 2, dtype="f4")
+
+    r = dpt.less_equal(ar1, ar2)
+    expected = np.less_equal(dpt.asnumpy(ar1), dpt.asnumpy(ar2))
+    assert (dpt.asnumpy(r) == expected).all()
+
+    r1 = dpt.less_equal(ar2, ar1)
+    expected1 = np.less_equal(dpt.asnumpy(ar2), dpt.asnumpy(ar1))
+    assert (dpt.asnumpy(r1) == expected1).all()
+    with np.errstate(invalid="ignore"):
+        for tp in [dpt.nan, dpt.inf, -dpt.inf]:
+
+            ar3 = dpt.full((4,), tp)
+            r2 = dpt.less_equal(ar1, ar3)
+            expected2 = np.less_equal(dpt.asnumpy(ar1), dpt.asnumpy(ar3))
+            assert (dpt.asnumpy(r2) == expected2).all()
+
+            r3 = dpt.less_equal(ar3, ar1)
+            expected3 = np.less_equal(dpt.asnumpy(ar3), dpt.asnumpy(ar1))
+            assert (dpt.asnumpy(r3) == expected3).all()
+
+
+@pytest.mark.parametrize("op1_usm_type", _usm_types)
+@pytest.mark.parametrize("op2_usm_type", _usm_types)
+def test_less_equal_usm_type_matrix(op1_usm_type, op2_usm_type):
+    get_queue_or_skip()
+
+    sz = 128
+    ar1 = dpt.ones(sz, dtype="i4", usm_type=op1_usm_type)
+    ar2 = dpt.ones_like(ar1, dtype="i4", usm_type=op2_usm_type)
+
+    r = dpt.less_equal(ar1, ar2)
+    assert isinstance(r, dpt.usm_ndarray)
+    expected_usm_type = dpctl.utils.get_coerced_usm_type(
+        (op1_usm_type, op2_usm_type)
+    )
+    assert r.usm_type == expected_usm_type
+
+
+def test_less_equal_order():
+    get_queue_or_skip()
+
+    ar1 = dpt.ones((20, 20), dtype="i4", order="C")
+    ar2 = dpt.ones((20, 20), dtype="i4", order="C")
+    r1 = dpt.less_equal(ar1, ar2, order="C")
+    assert r1.flags.c_contiguous
+    r2 = dpt.less_equal(ar1, ar2, order="F")
+    assert r2.flags.f_contiguous
+    r3 = dpt.less_equal(ar1, ar2, order="A")
+    assert r3.flags.c_contiguous
+    r4 = dpt.less_equal(ar1, ar2, order="K")
+    assert r4.flags.c_contiguous
+
+    ar1 = dpt.ones((20, 20), dtype="i4", order="F")
+    ar2 = dpt.ones((20, 20), dtype="i4", order="F")
+    r1 = dpt.less_equal(ar1, ar2, order="C")
+    assert r1.flags.c_contiguous
+    r2 = dpt.less_equal(ar1, ar2, order="F")
+    assert r2.flags.f_contiguous
+    r3 = dpt.less_equal(ar1, ar2, order="A")
+    assert r3.flags.f_contiguous
+    r4 = dpt.less_equal(ar1, ar2, order="K")
+    assert r4.flags.f_contiguous
+
+    ar1 = dpt.ones((40, 40), dtype="i4", order="C")[:20, ::-2]
+    ar2 = dpt.ones((40, 40), dtype="i4", order="C")[:20, ::-2]
+    r4 = dpt.less_equal(ar1, ar2, order="K")
+    assert r4.strides == (20, -1)
+
+    ar1 = dpt.ones((40, 40), dtype="i4", order="C")[:20, ::-2].mT
+    ar2 = dpt.ones((40, 40), dtype="i4", order="C")[:20, ::-2].mT
+    r4 = dpt.less_equal(ar1, ar2, order="K")
+    assert r4.strides == (-1, 20)
+
+
+def test_less_equal_broadcasting():
+    get_queue_or_skip()
+
+    m = dpt.ones((100, 5), dtype="i4")
+    v = dpt.arange(1, 6, dtype="i4")
+
+    r = dpt.less_equal(m, v)
+
+    expected = np.less_equal(
+        np.ones((100, 5), dtype="i4"), np.arange(1, 6, dtype="i4")
+    )
+    assert (dpt.asnumpy(r) == expected.astype(r.dtype)).all()
+
+    r2 = dpt.less_equal(v, m)
+    expected2 = np.less_equal(
+        np.arange(1, 6, dtype="i4"), np.ones((100, 5), dtype="i4")
+    )
+    assert (dpt.asnumpy(r2) == expected2.astype(r2.dtype)).all()
+
+
+@pytest.mark.parametrize("arr_dt", _all_dtypes)
+def test_less_equal_python_scalar(arr_dt):
+    q = get_queue_or_skip()
+    skip_if_dtype_not_supported(arr_dt, q)
+
+    X = dpt.ones((10, 10), dtype=arr_dt, sycl_queue=q)
+    py_ones = (
+        bool(1),
+        int(1),
+        float(1),
+        complex(1),
+        np.float32(1),
+        ctypes.c_int(1),
+    )
+    for sc in py_ones:
+        R = dpt.less_equal(X, sc)
+        assert isinstance(R, dpt.usm_ndarray)
+        R = dpt.less_equal(sc, X)
+        assert isinstance(R, dpt.usm_ndarray)
+
+
+class MockArray:
+    def __init__(self, arr):
+        self.data_ = arr
+
+    @property
+    def __sycl_usm_array_interface__(self):
+        return self.data_.__sycl_usm_array_interface__
+
+
+def test_less_equal_mock_array():
+    get_queue_or_skip()
+    a = dpt.arange(10)
+    b = dpt.ones(10)
+    c = MockArray(b)
+    r = dpt.less_equal(a, c)
+    assert isinstance(r, dpt.usm_ndarray)
+
+
+def test_less_equal_canary_mock_array():
+    get_queue_or_skip()
+    a = dpt.arange(10)
+
+    class Canary:
+        def __init__(self):
+            pass
+
+        @property
+        def __sycl_usm_array_interface__(self):
+            return None
+
+    c = Canary()
+    with pytest.raises(ValueError):
+        dpt.less_equal(a, c)