From 4da5a94d6fcce9404551db99bfd4371bb73146ae Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Mon, 2 Oct 2023 15:01:17 -0500
Subject: [PATCH 1/5] FIX context creation at import time

---
 python/cuml/internals/available_devices.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/python/cuml/internals/available_devices.py b/python/cuml/internals/available_devices.py
index ec58fac0df..e9eb264e43 100644
--- a/python/cuml/internals/available_devices.py
+++ b/python/cuml/internals/available_devices.py
@@ -24,12 +24,17 @@
     cache = lru_cache(maxsize=None)
 
 
-get_cuda_count = gpu_only_import_from("rmm._cuda.gpu", "getDeviceCount")
+def get_cuda_count():
+    try:
+        import cupy
+        return True
+    except ImportError:
+        return False
 
 
 @cache
 def is_cuda_available():
     try:
-        return GPU_ENABLED and get_cuda_count() >= 1
+        return GPU_ENABLED and get_cuda_count()
     except UnavailableError:
         return False

From 884fd9c297e993f1b74935c4631c4a4679414c7e Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Mon, 2 Oct 2023 17:17:15 -0500
Subject: [PATCH 2/5] FIX style fixes

---
 python/cuml/internals/available_devices.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/cuml/internals/available_devices.py b/python/cuml/internals/available_devices.py
index e9eb264e43..f6c583c573 100644
--- a/python/cuml/internals/available_devices.py
+++ b/python/cuml/internals/available_devices.py
@@ -27,6 +27,7 @@
 def get_cuda_count():
     try:
         import cupy
+
         return True
     except ImportError:
         return False

From edc0b62eaaa594b5dcc45cab3ea1b77cd93cc07d Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <dante.gamadessavre@gmail.com>
Date: Tue, 3 Oct 2023 09:06:10 -0500
Subject: [PATCH 3/5] FIX Update function name and description based on PR
 review

---
 python/cuml/internals/available_devices.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/python/cuml/internals/available_devices.py b/python/cuml/internals/available_devices.py
index f6c583c573..95fc35ef84 100644
--- a/python/cuml/internals/available_devices.py
+++ b/python/cuml/internals/available_devices.py
@@ -24,7 +24,11 @@
     cache = lru_cache(maxsize=None)
 
 
-def get_cuda_count():
+def gpu_available_no_context_creation():
+    """
+    Function tries to check if GPUs are available in the system without
+    creating a CUDA context. We check for CuPy presence as a proxy of that. 
+    """
     try:
         import cupy
 
@@ -36,6 +40,6 @@ def get_cuda_count():
 @cache
 def is_cuda_available():
     try:
-        return GPU_ENABLED and get_cuda_count()
+        return GPU_ENABLED and gpu_available_no_context_creation()
     except UnavailableError:
         return False

From 2bcbf74dd08721b1e4ed6d44c83f268c7346f61b Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Tue, 3 Oct 2023 09:10:30 -0500
Subject: [PATCH 4/5] FIX style fixes

---
 python/cuml/internals/available_devices.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cuml/internals/available_devices.py b/python/cuml/internals/available_devices.py
index 95fc35ef84..8110f1b5d1 100644
--- a/python/cuml/internals/available_devices.py
+++ b/python/cuml/internals/available_devices.py
@@ -27,7 +27,7 @@
 def gpu_available_no_context_creation():
     """
     Function tries to check if GPUs are available in the system without
-    creating a CUDA context. We check for CuPy presence as a proxy of that. 
+    creating a CUDA context. We check for CuPy presence as a proxy of that.
     """
     try:
         import cupy

From 603a838fd7a7d53ac9d86630cefbd05e0c53cb64 Mon Sep 17 00:00:00 2001
From: Dante Gama Dessavre <danteg@nvidia.com>
Date: Wed, 4 Oct 2023 12:21:13 -0500
Subject: [PATCH 5/5] FIX cd fix and temporarly skip flaky test

---
 python/cuml/solvers/cd.pyx                  |   2 +-
 python/cuml/tests/test_nearest_neighbors.py |   2 +
 python/cuml/tests/test_no_cuinit.py         | 109 ++++++++++++++++++++
 3 files changed, 112 insertions(+), 1 deletion(-)
 create mode 100644 python/cuml/tests/test_no_cuinit.py

diff --git a/python/cuml/solvers/cd.pyx b/python/cuml/solvers/cd.pyx
index 816d5f1955..c9c22fd0f6 100644
--- a/python/cuml/solvers/cd.pyx
+++ b/python/cuml/solvers/cd.pyx
@@ -296,7 +296,7 @@ class CD(Base,
                       <double>self.tol,
                       <double*>sample_weight_ptr)
 
-            self.intercept_ = _c_intercept2_f64
+                self.intercept_ = _c_intercept2_f64
 
         self.handle.sync()
         del X_m
diff --git a/python/cuml/tests/test_nearest_neighbors.py b/python/cuml/tests/test_nearest_neighbors.py
index 85548fc9f5..b4bed52d27 100644
--- a/python/cuml/tests/test_nearest_neighbors.py
+++ b/python/cuml/tests/test_nearest_neighbors.py
@@ -256,6 +256,8 @@ def test_ivfflat_pred(nrows, ncols, n_neighbors, nlist):
 def test_ivfpq_pred(
     nrows, ncols, n_neighbors, nlist, M, n_bits, usePrecomputedTables
 ):
+    if ncols == 512 and usePrecomputedTables is True:
+        pytest.skip("https://github.com/rapidsai/cuml/issues/5603")
     algo_params = {
         "nlist": nlist,
         "nprobe": int(nlist * 0.2),
diff --git a/python/cuml/tests/test_no_cuinit.py b/python/cuml/tests/test_no_cuinit.py
new file mode 100644
index 0000000000..661e496dfc
--- /dev/null
+++ b/python/cuml/tests/test_no_cuinit.py
@@ -0,0 +1,109 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+
+import os
+import subprocess
+import sys
+from shutil import which
+
+import pytest
+
+GDB_COMMANDS = """
+set confirm off
+set breakpoint pending on
+break cuInit
+run
+exit
+"""
+
+
+@pytest.fixture(scope="module")
+def cuda_gdb(request):
+    gdb = which("cuda-gdb")
+    if gdb is None:
+        request.applymarker(
+            pytest.mark.xfail(reason="No cuda-gdb found, can't detect cuInit"),
+        )
+        return gdb
+    else:
+        output = subprocess.run(
+            [gdb, "--version"], capture_output=True, text=True
+        )
+        if output.returncode != 0:
+            request.applymarker(
+                pytest.mark.xfail(
+                    reason=(
+                        "cuda-gdb not working on this platform, "
+                        f"can't detect cuInit: {output.stderr}"
+                    )
+                ),
+            )
+        return gdb
+
+
+def test_cuml_import_no_cuinit(cuda_gdb):
+    # When RAPIDS_NO_INITIALIZE is set, importing cuml should _not_
+    # create a CUDA context (i.e. cuInit should not be called).
+    # Intercepting the call to cuInit programmatically is tricky since
+    # the way it is resolved from dynamic libraries by
+    # cuda-python/numba/cupy is multitudinous (see discussion at
+    # https://github.com/rapidsai/cuml/pull/12361 which does this, but
+    # needs provide hooks that override dlsym, cuGetProcAddress, and
+    # cuInit.
+    # Instead, we just run under GDB and see if we hit a breakpoint
+    env = os.environ.copy()
+    env["RAPIDS_NO_INITIALIZE"] = "1"
+    output = subprocess.run(
+        [
+            cuda_gdb,
+            "-x",
+            "-",
+            "--args",
+            sys.executable,
+            "-c",
+            "import cuml",
+        ],
+        input=GDB_COMMANDS,
+        env=env,
+        capture_output=True,
+        text=True,
+    )
+
+    cuInit_called = output.stdout.find("in cuInit ()")
+    print("Command output:\n")
+    print("*** STDOUT ***")
+    print(output.stdout)
+    print("*** STDERR ***")
+    print(output.stderr)
+    assert output.returncode == 0
+    assert cuInit_called < 0
+
+
+def test_cuml_create_estimator_cuinit(cuda_gdb):
+    # This tests that our gdb scripting correctly identifies cuInit
+    # when it definitely should have been called.
+    env = os.environ.copy()
+    env["RAPIDS_NO_INITIALIZE"] = "1"
+    output = subprocess.run(
+        [
+            cuda_gdb,
+            "-x",
+            "-",
+            "--args",
+            sys.executable,
+            "-c",
+            "import cupy as cp; a = cp.ones(10)",
+        ],
+        input=GDB_COMMANDS,
+        env=env,
+        capture_output=True,
+        text=True,
+    )
+
+    cuInit_called = output.stdout.find("in cuInit ()")
+    print("Command output:\n")
+    print("*** STDOUT ***")
+    print(output.stdout)
+    print("*** STDERR ***")
+    print(output.stderr)
+    assert output.returncode == 0
+    assert cuInit_called >= 0