diff --git a/common/cuda_hip/base/math.hpp b/common/cuda_hip/base/math.hpp
index 7e4faf80d08..bcdf0bb53e2 100644
--- a/common/cuda_hip/base/math.hpp
+++ b/common/cuda_hip/base/math.hpp
@@ -116,6 +116,7 @@ GKO_ATTRIBUTES GKO_INLINE __half abs<__half>(const complex<__half>& z)
 
 
 namespace gko {
+using thrust::sqrt;
 // It is required by NVHPC 23.3, isnan is undefined when NVHPC are only as host
 // compiler.
 #if defined(__CUDACC__) || defined(GKO_COMPILING_HIP)
@@ -156,6 +157,18 @@ __device__ __forceinline__ __half sqrt(const __half& val)
 }
 
 
+// using overload here. Otherwise, compiler still think the is_finite
+// specialization is still __host__ __device__ function.
+__device__ __forceinline__ bool is_finite(const __half& value)
+{
+    return abs(value) < device_numeric_limits<__half>::inf();
+}
+
+__device__ __forceinline__ bool is_finite(const thrust::complex<__half>& value)
+{
+    return is_finite(value.real()) && is_finite(value.imag());
+}
+
 #endif
 
 
diff --git a/common/cuda_hip/factorization/par_ic_kernels.cpp b/common/cuda_hip/factorization/par_ic_kernels.cpp
index 87e2fefd823..40de74279de 100644
--- a/common/cuda_hip/factorization/par_ic_kernels.cpp
+++ b/common/cuda_hip/factorization/par_ic_kernels.cpp
@@ -84,9 +84,13 @@ __global__ __launch_bounds__(default_block_size) void ic_sweep(
     }
     auto to_write =
         row == col
-            ? sqrt(a_val - sum)
+            ? gko::sqrt(a_val - sum)
             : (a_val - sum) / load_relaxed(l_vals + (l_row_ptrs[col + 1] - 1));
+    // if (row == col && row < 30) {
+    //     printf("%d: %lf\n", row, static_cast<double>(real(to_write)));
+    // }
     if (is_finite(to_write)) {
+        printf("write?!!\n");
         store_relaxed(l_vals + l_nz, to_write);
     }
 }
diff --git a/common/cuda_hip/factorization/par_ilu_kernels.cpp b/common/cuda_hip/factorization/par_ilu_kernels.cpp
index a22bb85275a..403979b0dd3 100644
--- a/common/cuda_hip/factorization/par_ilu_kernels.cpp
+++ b/common/cuda_hip/factorization/par_ilu_kernels.cpp
@@ -66,11 +66,13 @@ __global__ __launch_bounds__(default_block_size) void compute_l_u_factors(
             auto to_write =
                 sum / load_relaxed(u_values + (u_row_ptrs[col + 1] - 1));
             if (is_finite(to_write)) {
+                printf("write!\n");
                 store_relaxed(l_values + (l_idx - 1), to_write);
             }
         } else {
             auto to_write = sum;
             if (is_finite(to_write)) {
+                printf("write!\n");
                 store_relaxed(u_values + (u_idx - 1), to_write);
             }
         }
diff --git a/test/factorization/par_ic_kernels.cpp b/test/factorization/par_ic_kernels.cpp
index bb53a454e21..793d4f3ee61 100644
--- a/test/factorization/par_ic_kernels.cpp
+++ b/test/factorization/par_ic_kernels.cpp
@@ -41,7 +41,7 @@ class ParIc : public CommonTestFixture {
         mtx_l = gko::test::generate_random_lower_triangular_matrix<Csr>(
             mtx_size[0], false,
             std::uniform_int_distribution<index_type>(10, mtx_size[0]),
-            std::normal_distribution<>(0, 10.0), rand_engine, ref);
+            std::normal_distribution<>(0, 5.0), rand_engine, ref);
         dmtx_ani = Csr::create(exec);
         dmtx_l_ani = Csr::create(exec);
         dmtx_l_ani_init = Csr::create(exec);
@@ -108,16 +108,22 @@ TYPED_TEST(ParIc, KernelComputeFactorIsEquivalentToRef)
     using Csr = typename TestFixture::Csr;
     using Coo = typename TestFixture::Coo;
     using value_type = typename TestFixture::value_type;
-    SKIP_IF_HALF(value_type);
     auto square_size = this->mtx_ani->get_size();
     auto mtx_l_coo = Coo::create(this->ref, square_size);
     this->mtx_l_ani->convert_to(mtx_l_coo);
     auto dmtx_l_coo = gko::clone(this->exec, mtx_l_coo);
-
+    auto mtx_init = this->dmtx_l_ani_init->clone(this->ref);
     gko::kernels::reference::par_ic_factorization::compute_factor(
         this->ref, 1, mtx_l_coo.get(), this->mtx_l_ani_init.get());
     gko::kernels::GKO_DEVICE_NAMESPACE::par_ic_factorization::compute_factor(
         this->exec, 100, dmtx_l_coo.get(), this->dmtx_l_ani_init.get());
 
-    GKO_ASSERT_MTX_NEAR(this->mtx_l_ani_init, this->dmtx_l_ani_init, 1e-4);
+    GKO_EXPECT_MTX_NEAR(this->mtx_l_ani_init, this->dmtx_l_ani_init, 1e-4);
+
+    // gko::kernels::reference::par_ic_factorization::compute_factor(
+    //     this->ref, 1, mtx_l_coo.get(), mtx_init.get());
+    // gko::kernels::GKO_DEVICE_NAMESPACE::par_ic_factorization::compute_factor(
+    //     this->exec, 2000, dmtx_l_coo.get(), this->dmtx_l_ani_init.get());
+    // GKO_EXPECT_MTX_NEAR(this->mtx_l_ani_init, mtx_init, 1e-4);
+    GKO_EXPECT_MTX_NEAR(this->dmtx_l_ani_init, mtx_init, 0);
 }
diff --git a/test/factorization/par_ict_kernels.cpp b/test/factorization/par_ict_kernels.cpp
index 945f874ef26..a18658cc0fe 100644
--- a/test/factorization/par_ict_kernels.cpp
+++ b/test/factorization/par_ict_kernels.cpp
@@ -125,7 +125,7 @@ TYPED_TEST(ParIct, KernelComputeFactorIsEquivalentToRef)
     using Csr = typename TestFixture::Csr;
     using Coo = typename TestFixture::Coo;
     using value_type = typename TestFixture::value_type;
-    SKIP_IF_HALF(value_type);
+    // SKIP_IF_HALF(value_type);
     auto square_size = this->mtx_ani->get_size();
     auto mtx_l_coo = Coo::create(this->ref, square_size);
     this->mtx_l_ani->convert_to(mtx_l_coo);
diff --git a/test/factorization/par_ilu_kernels.cpp b/test/factorization/par_ilu_kernels.cpp
index 216a4f597cb..76106c32f0f 100644
--- a/test/factorization/par_ilu_kernels.cpp
+++ b/test/factorization/par_ilu_kernels.cpp
@@ -238,7 +238,7 @@ TYPED_TEST(ParIlu, KernelComputeParILUIsEquivalentToRef)
 {
     using Csr = typename TestFixture::Csr;
     using value_type = typename TestFixture::value_type;
-    SKIP_IF_HALF(value_type);
+    // SKIP_IF_HALF(value_type);
     std::unique_ptr<Csr> l_mtx{};
     std::unique_ptr<Csr> u_mtx{};
     std::unique_ptr<Csr> dl_mtx{};
@@ -257,12 +257,12 @@ TYPED_TEST(ParIlu, KernelComputeParILUWithMoreIterationsIsEquivalentToRef)
 {
     using Csr = typename TestFixture::Csr;
     using value_type = typename TestFixture::value_type;
-    SKIP_IF_HALF(value_type);
+    // SKIP_IF_HALF(value_type);
     std::unique_ptr<Csr> l_mtx{};
     std::unique_ptr<Csr> u_mtx{};
     std::unique_ptr<Csr> dl_mtx{};
     std::unique_ptr<Csr> du_mtx{};
-    gko::size_type iterations{200};
+    gko::size_type iterations{500};
 
     this->compute_lu(l_mtx, u_mtx, dl_mtx, du_mtx, iterations);