ginkgo-project · upsj · Aug 14, 2024 · Aug 11, 2024 · Aug 12, 2024 · Aug 12, 2024
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
@@ -486,8 +486,7 @@ build/windows-cuda/release/shared:
     - mkdir install
     - cmake -B build -DBUILD_SHARED_LIBS=ON -DGINKGO_BUILD_CUDA=ON "-DCMAKE_INSTALL_PREFIX=$pwd\install" .
     - cmake --build build --config Release -j16
-# we disable these tests until the triangular solver issues are resolved
-#   - ctest --test-dir build -C Release --no-tests=error --output-on-failure
+    - ctest --test-dir build -C Release --no-tests=error --output-on-failure
     - $env:PATH+=";$pwd/install/bin"
     - cmake --install build --config Release
     - cmake --build build --target test_install --config Release

diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt
@@ -22,6 +22,7 @@ function(ginkgo_benchmark_cusparse_linops type def)
     target_compile_definitions(cusparse_linops_${type} PUBLIC ${def})
     target_compile_definitions(cusparse_linops_${type} PRIVATE GKO_COMPILING_CUDA)
     target_link_libraries(cusparse_linops_${type} Ginkgo::ginkgo CUDA::cudart CUDA::cublas CUDA::cusparse)
+    ginkgo_compile_features(cusparse_linops_${type})
 endfunction()
 
 function(ginkgo_benchmark_hipsparse_linops type def)
@@ -31,13 +32,15 @@ function(ginkgo_benchmark_hipsparse_linops type def)
     target_compile_definitions(hipsparse_linops_${type} PRIVATE GKO_COMPILING_HIP)
     target_include_directories(hipsparse_linops_${type} SYSTEM PRIVATE ${HIPBLAS_INCLUDE_DIRS} ${HIPSPARSE_INCLUDE_DIRS})
     target_link_libraries(hipsparse_linops_${type} Ginkgo::ginkgo ${HIPSPARSE_LIBRARIES})
+    ginkgo_compile_features(hipsparse_linops_${type})
 endfunction()
 
 function(ginkgo_benchmark_onemkl_linops type def)
     add_library(onemkl_linops_${type} utils/dpcpp_linops.dp.cpp)
     # make the dependency public to catch issues
     target_compile_definitions(onemkl_linops_${type} PUBLIC ${def})
     target_link_libraries(onemkl_linops_${type} PRIVATE Ginkgo::ginkgo MKL::MKL_DPCPP)
+    ginkgo_compile_features(onemkl_linops_${type})
 endfunction()
 
 
@@ -116,6 +119,7 @@ if (GINKGO_BUILD_CUDA)
     ginkgo_benchmark_cusparse_linops(c GKO_BENCHMARK_USE_SINGLE_COMPLEX_PRECISION)
     add_library(cuda_timer utils/cuda_timer.cpp)
     target_link_libraries(cuda_timer ginkgo CUDA::cudart)
+    ginkgo_compile_features(cuda_timer)
 endif()
 if (GINKGO_BUILD_HIP)
     ginkgo_benchmark_hipsparse_linops(d GKO_BENCHMARK_USE_DOUBLE_PRECISION)
@@ -125,6 +129,7 @@ if (GINKGO_BUILD_HIP)
     set_source_files_properties(utils/hip_timer.hip.cpp PROPERTIES LANGUAGE HIP)
     add_library(hip_timer utils/hip_timer.hip.cpp)
     target_link_libraries(hip_timer ginkgo)
+    ginkgo_compile_features(hip_timer)
 endif()
 
 if (GINKGO_BUILD_SYCL)
@@ -136,11 +141,13 @@ if (GINKGO_BUILD_SYCL)
     target_compile_options(dpcpp_timer PRIVATE ${GINKGO_DPCPP_FLAGS})
     gko_add_sycl_to_target(TARGET dpcpp_timer SOURCES utils/dpcpp_timer.dp.cpp)
     target_link_libraries(dpcpp_timer ginkgo)
+    ginkgo_compile_features(dpcpp_timer)
 endif()
 
 if (GINKGO_BUILD_MPI)
     add_library(mpi_timer ${Ginkgo_SOURCE_DIR}/benchmark/utils/mpi_timer.cpp)
     target_link_libraries(mpi_timer ginkgo)
+    ginkgo_compile_features(mpi_timer)
 endif()
 
 add_subdirectory(blas)

diff --git a/benchmark/test/test_framework.py.in b/benchmark/test/test_framework.py.in
@@ -90,9 +90,12 @@ def sanitize_json_text(input: str) -> List[str]:
     and pretty-printed to replace the original JSON input.
     """
 
-    result = json.dumps(sanitize_json(json.loads(input)), indent=4)
-    # json.dumps doesn't add a trailing newline
-    return result.splitlines() + [""]
+    try:
+        result = json.dumps(sanitize_json(json.loads(input)), indent=4)
+        # json.dumps doesn't add a trailing newline
+        return result.splitlines() + [""]
+    except Exception as e:
+        return f"Error: {str(e)}"
 
 
 def sanitize_text(

diff --git a/common/cuda_hip/components/uninitialized_array.hpp b/common/cuda_hip/components/uninitialized_array.hpp
@@ -8,6 +8,8 @@
 
 #include <ginkgo/core/base/types.hpp>
 
+#include "common/cuda_hip/base/thrust.hpp"
+
 
 namespace gko {
 namespace kernels {
@@ -34,7 +36,7 @@ class uninitialized_array {
      */
     constexpr GKO_ATTRIBUTES operator const ValueType*() const noexcept
     {
-        return &(*this)[0];
+        return data_;
     }
 
     /**
@@ -43,7 +45,7 @@ class uninitialized_array {
      *
      * @return the non-const pointer to the first entry of the array.
      */
-    GKO_ATTRIBUTES operator ValueType*() noexcept { return &(*this)[0]; }
+    GKO_ATTRIBUTES operator ValueType*() noexcept { return data_; }
 
     /**
      * constexpr array access operator.
@@ -56,7 +58,7 @@ class uninitialized_array {
     constexpr GKO_ATTRIBUTES const ValueType& operator[](
         size_type pos) const noexcept
     {
-        return reinterpret_cast<const ValueType*>(data_)[pos];
+        return data_[pos];
     }
 
     /**
@@ -69,11 +71,42 @@ class uninitialized_array {
      */
     GKO_ATTRIBUTES ValueType& operator[](size_type pos) noexcept
     {
-        return reinterpret_cast<ValueType*>(data_)[pos];
+        return data_[pos];
+    }
+
+private:
+    ValueType data_[size];
+};
+
+
+template <typename ValueType, size_type size>
+class uninitialized_array<thrust::complex<ValueType>, size> {
+public:
+    constexpr GKO_ATTRIBUTES operator const thrust::complex<ValueType>*()
+        const noexcept
+    {
+        return &(*this)[0];
+    }
+
+    GKO_ATTRIBUTES operator thrust::complex<ValueType>*() noexcept
+    {
+        return &(*this)[0];
+    }
+
+    constexpr GKO_ATTRIBUTES const thrust::complex<ValueType>& operator[](
+        size_type pos) const noexcept
+    {
+        return reinterpret_cast<const thrust::complex<ValueType>*>(data_)[pos];
+    }
+
+    GKO_ATTRIBUTES thrust::complex<ValueType>& operator[](
+        size_type pos) noexcept
+    {
+        return reinterpret_cast<thrust::complex<ValueType>*>(data_)[pos];
     }
 
 private:
-    unsigned char data_[sizeof(ValueType) / sizeof(unsigned char) * size];
+    ValueType data_[2 * size];
 };
 
 

diff --git a/common/cuda_hip/matrix/csr_kernels.template.cpp b/common/cuda_hip/matrix/csr_kernels.template.cpp
@@ -335,7 +335,7 @@ __device__ void merge_path_reduce(const IndexType nwarps,
             }
         }
     }
-    __shared__ uninitialized_array<IndexType, spmv_block_size> tmp_ind;
+    __shared__ IndexType tmp_ind[spmv_block_size];
     __shared__ uninitialized_array<arithmetic_type, spmv_block_size> tmp_val;
     tmp_val[threadIdx.x] = value;
     tmp_ind[threadIdx.x] = row;

diff --git a/cuda/solver/common_trs_kernels.cuh b/cuda/solver/common_trs_kernels.cuh
@@ -6,6 +6,7 @@
 #define GKO_CUDA_SOLVER_COMMON_TRS_KERNELS_CUH_
 
 
+#include <cstring>
 #include <functional>
 #include <iostream>
 #include <memory>
@@ -342,6 +343,52 @@ constexpr int default_block_size = 512;
 constexpr int fallback_block_size = 32;
 
 
+/** Returns an unsigned type matching the size of the given float type. */
+template <typename T>
+struct float_to_unsigned_impl {};
+
+template <>
+struct float_to_unsigned_impl<double> {
+    using type = uint64;
+};
+
+template <>
+struct float_to_unsigned_impl<float> {
+    using type = uint32;
+};
+
+
+/**
+ * Checks if a floating point number representation matches the representation
+ * of the quiet NaN with value gko::nan() exactly.
+ */
+template <typename T>
+GKO_INLINE GKO_ATTRIBUTES std::enable_if_t<!is_complex_s<T>::value, bool>
+is_nan_exact(const T& value)
+{
+    using type = typename float_to_unsigned_impl<T>::type;
+    type value_bytes{};
+    type nan_bytes{};
+    auto nan_value = nan<T>();
+    using std::memcpy;
+    memcpy(&value_bytes, &value, sizeof(value));
+    memcpy(&nan_bytes, &nan_value, sizeof(value));
+    return value_bytes == nan_bytes;
+}
+
+
+/**
+ * Checks if any component of the complex value matches the quiet NaN with
+ * value gko::nan() exactly.
+ */
+template <typename T>
+GKO_INLINE GKO_ATTRIBUTES std::enable_if_t<is_complex_s<T>::value, bool>
+is_nan_exact(const T& value)
+{
+    return is_nan_exact(value.real()) || is_nan_exact(value.imag());
+}
+
+
 template <bool is_upper, typename ValueType, typename IndexType>
 __global__ void sptrsv_naive_caching_kernel(
     const IndexType* const rowptrs, const IndexType* const colidxs,
@@ -399,11 +446,12 @@ __global__ void sptrsv_naive_caching_kernel(
         ValueType val{};
         if (shmem_possible) {
             const auto dependency_shid = dependency_gid % default_block_size;
-            while (is_nan(val = load_relaxed_shared(x_s + dependency_shid))) {
+            while (is_nan_exact(
+                val = load_relaxed_shared(x_s + dependency_shid))) {
             }
         } else {
-            while (
-                is_nan(val = load_relaxed(x + dependency * x_stride + rhs))) {
+            while (is_nan_exact(
+                val = load_relaxed(x + dependency * x_stride + rhs))) {
             }
         }
 
@@ -418,7 +466,7 @@ __global__ void sptrsv_naive_caching_kernel(
     store_relaxed(x + row * x_stride + rhs, r);
 
     // This check to ensure no infinite loops happen.
-    if (is_nan(r)) {
+    if (is_nan_exact(r)) {
         store_relaxed_shared(x_s + self_shid, zero<ValueType>());
         store_relaxed(x + row * x_stride + rhs, zero<ValueType>());
         *nan_produced = true;
@@ -460,7 +508,7 @@ __global__ void sptrsv_naive_legacy_kernel(
     auto col = colidxs[j];
     while (j != row_end) {
         auto x_val = load_relaxed(x + col * x_stride + rhs);
-        while (!is_nan(x_val)) {
+        while (!is_nan_exact(x_val)) {
             sum += vals[j] * x_val;
             j += row_step;
             col = colidxs[j];
@@ -478,7 +526,7 @@ __global__ void sptrsv_naive_legacy_kernel(
             // after we encountered the diagonal, we are done
             // this also skips entries outside the triangle
             j = row_end;
-            if (is_nan(r)) {
+            if (is_nan_exact(r)) {
                 store_relaxed(x + row * x_stride + rhs, zero<ValueType>());
                 *nan_produced = true;
             }

diff --git a/include/ginkgo/core/base/math.hpp b/include/ginkgo/core/base/math.hpp
@@ -1220,10 +1220,14 @@ GKO_INLINE GKO_ATTRIBUTES T safe_divide(T a, T b)
  * @return `true` if the value is NaN.
  */
 template <typename T>
-GKO_INLINE GKO_ATTRIBUTES std::enable_if_t<!is_complex_s<T>::value, bool>
-is_nan(const T& value)
+GKO_DEPRECATED(
+    "is_nan can't be used safely on the device (MSVC+CUDA), and will thus be "
+    "removed in a future release, without replacement")
+GKO_INLINE GKO_ATTRIBUTES
+    std::enable_if_t<!is_complex_s<T>::value, bool> is_nan(const T& value)
 {
-    return std::isnan(value);
+    using std::isnan;
+    return isnan(value);
 }
 
 
@@ -1237,10 +1241,13 @@ is_nan(const T& value)
  * @return `true` if any component of the given value is NaN.
  */
 template <typename T>
+GKO_DEPRECATED(
+    "is_nan can't be used safely on the device (MSVC+CUDA), and will thus be "
+    "removed in a future release, without replacement")
 GKO_INLINE GKO_ATTRIBUTES std::enable_if_t<is_complex_s<T>::value, bool> is_nan(
     const T& value)
 {
-    return std::isnan(value.real()) || std::isnan(value.imag());
+    return is_nan(value.real()) || is_nan(value.imag());
 }
 
 

diff --git a/test/solver/gcr_kernels.cpp b/test/solver/gcr_kernels.cpp
@@ -222,7 +222,7 @@ TEST_F(Gcr, GcrApplyOneRHSIsEquivalentToRef)
     exec_solver->apply(d_b.get(), d_x.get());
 
     GKO_ASSERT_MTX_NEAR(d_b, b, 0);
-    GKO_ASSERT_MTX_NEAR(d_x, x, r<value_type>::value * 1e2);
+    GKO_ASSERT_MTX_NEAR(d_x, x, r<value_type>::value * 1e3);
 }
 
 

diff --git a/test/solver/lower_trs_kernels.cpp b/test/solver/lower_trs_kernels.cpp
@@ -152,7 +152,7 @@ TEST_F(LowerTrs, ApplyTriangularDenseMtxIsEquivalentToRef)
     solver->apply(b, x);
     d_solver->apply(db, dx);
 
-    GKO_ASSERT_MTX_NEAR(dx, x, 1e-14);
+    GKO_ASSERT_MTX_NEAR(dx, x, 1e-13);
 }
 
 
@@ -417,7 +417,7 @@ TEST_F(LowerTrs, ClassicalApplyTriangularDenseMtxIsEquivalentToRef)
     solver->apply(b, x);
     d_solver->apply(db, dx);
 
-    GKO_ASSERT_MTX_NEAR(dx, x, 1e-14);
+    GKO_ASSERT_MTX_NEAR(dx, x, 1e-13);
 }
 
 

diff --git a/test/solver/upper_trs_kernels.cpp b/test/solver/upper_trs_kernels.cpp
@@ -152,7 +152,7 @@ TEST_F(UpperTrs, ApplyTriangularDenseMtxIsEquivalentToRef)
     solver->apply(b, x);
     d_solver->apply(db, dx);
 
-    GKO_ASSERT_MTX_NEAR(dx, x, 1e-14);
+    GKO_ASSERT_MTX_NEAR(dx, x, 1e-13);
 }