NVIDIA · cliffburdick · Jul 26, 2023 · Jul 13, 2023 · Jul 26, 2023
diff --git a/docs_input/api/logic/comparison/isclose.rst b/docs_input/api/logic/comparison/isclose.rst
@@ -0,0 +1,21 @@
+.. _isclose_func:
+
+isclose
+=======
+
+Determine the closeness of values across two operators using absolute and relative tolerances. The output
+from isclose is an ``int`` value since it's commonly used for reductions and ``bool`` reductions using
+atomics are not available in hardware.
+
+
+.. doxygenfunction:: isclose
+
+Examples
+~~~~~~~~
+
+.. literalinclude:: ../../../../test/00_operators/OperatorTests.cu
+   :language: cpp
+   :start-after: example-begin isclose-test-1
+   :end-before: example-end isclose-test-1
+   :dedent:
+
diff --git a/docs_input/api/logic/truth/allclose.rst b/docs_input/api/logic/truth/allclose.rst
@@ -0,0 +1,20 @@
+.. _allclose_func:
+
+allclose
+========
+
+Reduce the closeness of two operators to a single scalar (0D) output. The output
+from allclose is an ``int`` value since boolean reductions are not available in hardware
+
+
+.. doxygenfunction:: allclose
+
+Examples
+~~~~~~~~
+
+.. literalinclude:: ../../../../test/00_operators/OperatorTests.cu
+   :language: cpp
+   :start-after: example-begin allclose-test-1
+   :end-before: example-end allclose-test-1
+   :dedent:
+
diff --git a/examples/spectrogram.cu b/examples/spectrogram.cu
@@ -136,8 +136,9 @@ int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv)
       viz::contour(time, freqs, Sxx);
 #else
       printf("Not outputting plot since visualizations disabled\n");
-#endif    
+#endif
     }
+
   }
 
   cudaEventRecord(stop, stream);
@@ -150,6 +151,7 @@ int main([[maybe_unused]] int argc, [[maybe_unused]] char **argv)
   cudaEventDestroy(start);
   cudaEventDestroy(stop);
   cudaStreamDestroy(stream);
+
   CUDA_CHECK_LAST_ERROR();
   MATX_EXIT_HANDLER();
 }
diff --git a/include/matx/operators/isclose.h b/include/matx/operators/isclose.h
@@ -0,0 +1,109 @@
+////////////////////////////////////////////////////////////////////////////////
+// BSD 3-Clause License
+//
+// Copyright (c) 2021, NVIDIA Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// 1. Redistributions of source code must retain the above copyright notice, this
+//    list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright notice,
+//    this list of conditions and the following disclaimer in the documentation
+//    and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the copyright holder nor the names of its
+//    contributors may be used to endorse or promote products derived from
+//    this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/////////////////////////////////////////////////////////////////////////////////
+
+#pragma once
+
+
+#include "matx/core/type_utils.h"
+#include "matx/operators/scalar_ops.h"
+#include "matx/operators/base_operator.h"
+
+namespace matx
+{
+
+  namespace detail {
+    template <typename Op1, typename Op2> 
+    class IsCloseOp : public BaseOp<IsCloseOp<Op1, Op2>>
+    {
+      public:
+        using matxop = bool;
+        using scalar_type = typename remove_cvref_t<Op2>::scalar_type;
+        using inner_type = typename inner_op_type_t<scalar_type>::type;
+
+        __MATX_INLINE__ std::string str() const { return "isclose()"; }
+
+        __MATX_INLINE__ IsCloseOp(Op1 op1, Op2 op2, double rtol, double atol) : 
+          op1_(op1), op2_(op2), rtol_(static_cast<inner_type>(rtol)), atol_(static_cast<inner_type>(atol)) 
+        {
+          static_assert(op1.Rank() == op2.Rank(), "Operator ranks must match in isclose()");
+          ASSERT_COMPATIBLE_OP_SIZES(op1); 
+          ASSERT_COMPATIBLE_OP_SIZES(op2);
+        }
+
+        template <typename... Is>
+          __MATX_INLINE__ __MATX_DEVICE__ __MATX_HOST__ int operator()([[maybe_unused]] Is... indices) const 
+          {
+
+            return static_cast<int>(detail::_internal_abs(op1_(indices...) - op2_(indices...)) <= 
+               static_cast<inner_type>(atol_) + static_cast<inner_type>(rtol_) * detail::_internal_abs(op2_(indices...)));
+          }
+
+        static __MATX_INLINE__ constexpr __MATX_HOST__ __MATX_DEVICE__ int32_t Rank()
+        {
+          return detail::matx_max(detail::get_rank<Op1>(), detail::get_rank<Op2>());
+        }
+
+        constexpr __MATX_INLINE__ __MATX_HOST__ __MATX_DEVICE__ index_t Size(int dim) const
+        {
+          index_t size1 = detail::get_expanded_size<Rank()>(op1_, dim);
+          index_t size2 = detail::get_expanded_size<Rank()>(op2_, dim);
+          return detail::matx_max(size1,size2);
+        }
+
+      private:
+        Op1 op1_;
+        Op2 op2_;
+        inner_type rtol_;
+        inner_type atol_;
+
+    };
+  }
+
+  /**
+   * @brief Returns an integer tensor where an element is 1 if:
+   *    abs(op1 - op2) <= atol + rtol * abs(op2)
+   * 
+   * or 0 otherwise
+   * 
+   * @tparam Op1 First operator type
+   * @tparam Op2 Second operator type
+   * @param op1 First operator
+   * @param op2 Second operator
+   * @param rtol Relative tolerance
+   * @param atol Absolute tolerance
+   * @return IsClose operator
+   */
+  template <typename Op1, typename Op2>
+  __MATX_INLINE__ auto isclose(Op1 op1, Op2 op2, double rtol = 1e-5, double atol = 1e-8) {
+    return detail::IsCloseOp<Op1, Op2>(op1, op2, rtol, atol);
+  }
+} // end namespace matx
diff --git a/include/matx/operators/operators.h b/include/matx/operators/operators.h
@@ -51,6 +51,7 @@
 #include "matx/operators/ifelse.h"
 #include "matx/operators/index.h"
 #include "matx/operators/interleaved.h"
+#include "matx/operators/isclose.h"
 #include "matx/operators/kronecker.h"
 #include "matx/operators/legendre.h"
 #include "matx/operators/permute.h"

diff --git a/include/matx/transforms/reduce.h b/include/matx/transforms/reduce.h
@@ -385,6 +385,7 @@ __MATX_DEVICE__ __MATX_INLINE__ void atomicAll(int *addr, int val)
   }
 };
 
+
 __MATX_DEVICE__ __MATX_INLINE__ void atomicAll(unsigned int *addr, unsigned int val)
 {
   unsigned int assumed;
@@ -2837,6 +2838,76 @@ void __MATX_INLINE__ all(OutType dest, const InType &in, const int (&dims)[D], E
 #endif  
 }
 
+/**
+ * Find if all values are != 0
+ *
+ * Returns a boolean value indicating whether all values in the set of inputs
+ * are non-zero. The same aggregation rules apply for input vs output tensor
+ * size and what type of reduction is done.
+ *
+ * @tparam OutType
+ *   Output data type
+ * @tparam InType
+ *   Input data type
+ *
+ * @param dest
+ *   Destination view of reduction
+ * @param in
+ *   Input data to reduce
+ * @param exec
+ *   CUDA executor or stream ID
+ */
+template <typename OutType, typename InType1, typename InType2>
+void __MATX_INLINE__ allclose(OutType dest, const InType1 &in1, const InType2 &in2, double rtol, double atol, cudaExecutor exec = 0)
+{
+#ifdef __CUDACC__ 
+  MATX_NVTX_START("allclose(" + get_type_str(in) + ")", matx::MATX_NVTX_LOG_API)
+  static_assert(OutType::Rank() == 0, "allclose output must be rank 0");
+
+  cudaStream_t stream = exec.getStream();
+  reduce(dest, isclose(in1, in2, rtol, atol), detail::reduceOpAll<int>(), stream, true);
+#endif  
+}
+
+/**
+ * Find if all values are != 0
+ *
+ * Returns a boolean value indicating whether all values in the set of inputs
+ * are non-zero. The same aggregation rules apply for input vs output tensor
+ * size and what type of reduction is done.
+ *
+ * @tparam OutType
+ *   Output data type
+ * @tparam InType
+ *   Input data type
+ *
+ * @param dest
+ *   Destination view of reduction
+ * @param in
+ *   Input data to reduce
+ * @param exec
+ *   Single threaded host executor
+ */
+template <typename OutType, typename InType1, typename InType2>
+void __MATX_INLINE__ allclose(OutType dest, const InType1 &in1, const InType2 &in2, double rtol, double atol, [[maybe_unused]] SingleThreadHostExecutor exec)
+{
+  MATX_NVTX_START("allclose(" + get_type_str(in) + ")", matx::MATX_NVTX_LOG_API)
+  static_assert(OutType::Rank() == 0, "allclose output must be rank 0");
+
+  auto isc = isclose(in1, in2, rtol, atol);
+
+  auto ft = [&](auto &&lin, auto &&lout, [[maybe_unused]] auto &&lbegin, [[maybe_unused]] auto &&lend) { 
+    *lout = std::all_of(lin, lin + TotalSize(in1), [](int vin) {
+        return vin != 0;
+      }); 
+  };
+
+
+  ReduceInput(ft, dest, isc);
+}
+
+
+
 /**
  * Compute a variance reduction
  *

diff --git a/test/00_operators/OperatorTests.cu b/test/00_operators/OperatorTests.cu
@@ -287,6 +287,54 @@ TYPED_TEST(OperatorTestsAllExecs, ReshapeOp)
   MATX_EXIT_HANDLER();
 }
 
+TYPED_TEST(OperatorTestsFloatAllExecs, IsClose)
+{
+  MATX_ENTER_HANDLER();
+  using TestType = std::tuple_element_t<0, TypeParam>;
+  using ExecType = std::tuple_element_t<1, TypeParam>;
+
+  ExecType exec{}; 
+
+  // example-begin isclose-test-1
+  auto A = make_tensor<TestType>({5, 5, 5});
+  auto B = make_tensor<TestType>({5, 5, 5});
+  auto C = make_tensor<int>({5, 5, 5});
+
+  (A = ones<TestType>(A.Shape())).run();
+  (B = ones<TestType>(B.Shape())).run();
+  (C = isclose(A, B)).run();
+  // example-end isclose-test-1
+  cudaStreamSynchronize(0);
+
+  for(int i=0; i < A.Size(0); i++) {
+    for(int j=0; j < A.Size(1); j++) {
+      for(int k=0; k < A.Size(2); k++) {
+        ASSERT_EQ(C(i,j,k), 1);
+      }
+    }
+  }
+
+  B(1,1,1) = 2;
+  (C = isclose(A, B)).run();
+  cudaStreamSynchronize(0);
+
+  for(int i=0; i < A.Size(0); i++) {
+    for(int j=0; j < A.Size(1); j++) {
+      for(int k=0; k < A.Size(2); k++) {
+        if (i == 1 && j == 1 && k == 1) {
+          ASSERT_EQ(C(i,j,k), 0); 
+        }
+        else {
+          ASSERT_EQ(C(i,j,k), 1);
+        }
+      }
+    }
+  }  
+
+  MATX_EXIT_HANDLER();
+}
+
+
 TYPED_TEST(OperatorTestsFloatNonComplexAllExecs, FMod)
 {
   MATX_ENTER_HANDLER();

diff --git a/test/00_operators/ReductionTests.cu b/test/00_operators/ReductionTests.cu
@@ -627,6 +627,36 @@ TYPED_TEST(ReductionTestsNumericNonComplexAllExecs, Any)
   MATX_EXIT_HANDLER();
 }
 
+TYPED_TEST(ReductionTestsFloatNonComplexNonHalfAllExecs, AllClose)
+{
+  MATX_ENTER_HANDLER();
+  using TestType = std::tuple_element_t<0, TypeParam>;
+  using ExecType = std::tuple_element_t<1, TypeParam>;
+
+  ExecType exec{}; 
+
+  // example-begin allclose-test-1
+  auto A = make_tensor<TestType>({5, 5, 5});
+  auto B = make_tensor<TestType>({5, 5, 5});
+  auto C = make_tensor<int>();
+
+  (A = ones<TestType>(A.Shape())).run();
+  (B = ones<TestType>(B.Shape())).run();
+  allclose(C, A, B, 1e-5, 1e-8, exec);
+  // example-end allclose-test-1
+  cudaStreamSynchronize(0);
+
+  ASSERT_EQ(C(), 1);
+
+  B(1,1,1) = 2;
+  allclose(C, A, B, 1e-5, 1e-8, exec);
+  cudaStreamSynchronize(0);
+
+  ASSERT_EQ(C(), 0);
+
+  MATX_EXIT_HANDLER();
+}
+
 TYPED_TEST(ReductionTestsNumericNonComplexAllExecs, All)
 {
   MATX_ENTER_HANDLER();