From 4da5a94d6fcce9404551db99bfd4371bb73146ae Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Mon, 2 Oct 2023 15:01:17 -0500 Subject: [PATCH 1/5] FIX context creation at import time --- python/cuml/internals/available_devices.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/python/cuml/internals/available_devices.py b/python/cuml/internals/available_devices.py index ec58fac0df..e9eb264e43 100644 --- a/python/cuml/internals/available_devices.py +++ b/python/cuml/internals/available_devices.py @@ -24,12 +24,17 @@ cache = lru_cache(maxsize=None) -get_cuda_count = gpu_only_import_from("rmm._cuda.gpu", "getDeviceCount") +def get_cuda_count(): + try: + import cupy + return True + except ImportError: + return False @cache def is_cuda_available(): try: - return GPU_ENABLED and get_cuda_count() >= 1 + return GPU_ENABLED and get_cuda_count() except UnavailableError: return False From 884fd9c297e993f1b74935c4631c4a4679414c7e Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Mon, 2 Oct 2023 17:17:15 -0500 Subject: [PATCH 2/5] FIX style fixes --- python/cuml/internals/available_devices.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/cuml/internals/available_devices.py b/python/cuml/internals/available_devices.py index e9eb264e43..f6c583c573 100644 --- a/python/cuml/internals/available_devices.py +++ b/python/cuml/internals/available_devices.py @@ -27,6 +27,7 @@ def get_cuda_count(): try: import cupy + return True except ImportError: return False From edc0b62eaaa594b5dcc45cab3ea1b77cd93cc07d Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Tue, 3 Oct 2023 09:06:10 -0500 Subject: [PATCH 3/5] FIX Update function name and description based on PR review --- python/cuml/internals/available_devices.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/python/cuml/internals/available_devices.py b/python/cuml/internals/available_devices.py index f6c583c573..95fc35ef84 100644 --- a/python/cuml/internals/available_devices.py +++ b/python/cuml/internals/available_devices.py @@ -24,7 +24,11 @@ cache = lru_cache(maxsize=None) -def get_cuda_count(): +def gpu_available_no_context_creation(): + """ + Function tries to check if GPUs are available in the system without + creating a CUDA context. We check for CuPy presence as a proxy of that. + """ try: import cupy @@ -36,6 +40,6 @@ def get_cuda_count(): @cache def is_cuda_available(): try: - return GPU_ENABLED and get_cuda_count() + return GPU_ENABLED and gpu_available_no_context_creation() except UnavailableError: return False From 2bcbf74dd08721b1e4ed6d44c83f268c7346f61b Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Tue, 3 Oct 2023 09:10:30 -0500 Subject: [PATCH 4/5] FIX style fixes --- python/cuml/internals/available_devices.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cuml/internals/available_devices.py b/python/cuml/internals/available_devices.py index 95fc35ef84..8110f1b5d1 100644 --- a/python/cuml/internals/available_devices.py +++ b/python/cuml/internals/available_devices.py @@ -27,7 +27,7 @@ def gpu_available_no_context_creation(): """ Function tries to check if GPUs are available in the system without - creating a CUDA context. We check for CuPy presence as a proxy of that. + creating a CUDA context. We check for CuPy presence as a proxy of that. """ try: import cupy From 603a838fd7a7d53ac9d86630cefbd05e0c53cb64 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Wed, 4 Oct 2023 12:21:13 -0500 Subject: [PATCH 5/5] FIX cd fix and temporarly skip flaky test --- python/cuml/solvers/cd.pyx | 2 +- python/cuml/tests/test_nearest_neighbors.py | 2 + python/cuml/tests/test_no_cuinit.py | 109 ++++++++++++++++++++ 3 files changed, 112 insertions(+), 1 deletion(-) create mode 100644 python/cuml/tests/test_no_cuinit.py diff --git a/python/cuml/solvers/cd.pyx b/python/cuml/solvers/cd.pyx index 816d5f1955..c9c22fd0f6 100644 --- a/python/cuml/solvers/cd.pyx +++ b/python/cuml/solvers/cd.pyx @@ -296,7 +296,7 @@ class CD(Base, self.tol, sample_weight_ptr) - self.intercept_ = _c_intercept2_f64 + self.intercept_ = _c_intercept2_f64 self.handle.sync() del X_m diff --git a/python/cuml/tests/test_nearest_neighbors.py b/python/cuml/tests/test_nearest_neighbors.py index 85548fc9f5..b4bed52d27 100644 --- a/python/cuml/tests/test_nearest_neighbors.py +++ b/python/cuml/tests/test_nearest_neighbors.py @@ -256,6 +256,8 @@ def test_ivfflat_pred(nrows, ncols, n_neighbors, nlist): def test_ivfpq_pred( nrows, ncols, n_neighbors, nlist, M, n_bits, usePrecomputedTables ): + if ncols == 512 and usePrecomputedTables is True: + pytest.skip("https://github.com/rapidsai/cuml/issues/5603") algo_params = { "nlist": nlist, "nprobe": int(nlist * 0.2), diff --git a/python/cuml/tests/test_no_cuinit.py b/python/cuml/tests/test_no_cuinit.py new file mode 100644 index 0000000000..661e496dfc --- /dev/null +++ b/python/cuml/tests/test_no_cuinit.py @@ -0,0 +1,109 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. + +import os +import subprocess +import sys +from shutil import which + +import pytest + +GDB_COMMANDS = """ +set confirm off +set breakpoint pending on +break cuInit +run +exit +""" + + +@pytest.fixture(scope="module") +def cuda_gdb(request): + gdb = which("cuda-gdb") + if gdb is None: + request.applymarker( + pytest.mark.xfail(reason="No cuda-gdb found, can't detect cuInit"), + ) + return gdb + else: + output = subprocess.run( + [gdb, "--version"], capture_output=True, text=True + ) + if output.returncode != 0: + request.applymarker( + pytest.mark.xfail( + reason=( + "cuda-gdb not working on this platform, " + f"can't detect cuInit: {output.stderr}" + ) + ), + ) + return gdb + + +def test_cuml_import_no_cuinit(cuda_gdb): + # When RAPIDS_NO_INITIALIZE is set, importing cuml should _not_ + # create a CUDA context (i.e. cuInit should not be called). + # Intercepting the call to cuInit programmatically is tricky since + # the way it is resolved from dynamic libraries by + # cuda-python/numba/cupy is multitudinous (see discussion at + # https://github.com/rapidsai/cuml/pull/12361 which does this, but + # needs provide hooks that override dlsym, cuGetProcAddress, and + # cuInit. + # Instead, we just run under GDB and see if we hit a breakpoint + env = os.environ.copy() + env["RAPIDS_NO_INITIALIZE"] = "1" + output = subprocess.run( + [ + cuda_gdb, + "-x", + "-", + "--args", + sys.executable, + "-c", + "import cuml", + ], + input=GDB_COMMANDS, + env=env, + capture_output=True, + text=True, + ) + + cuInit_called = output.stdout.find("in cuInit ()") + print("Command output:\n") + print("*** STDOUT ***") + print(output.stdout) + print("*** STDERR ***") + print(output.stderr) + assert output.returncode == 0 + assert cuInit_called < 0 + + +def test_cuml_create_estimator_cuinit(cuda_gdb): + # This tests that our gdb scripting correctly identifies cuInit + # when it definitely should have been called. + env = os.environ.copy() + env["RAPIDS_NO_INITIALIZE"] = "1" + output = subprocess.run( + [ + cuda_gdb, + "-x", + "-", + "--args", + sys.executable, + "-c", + "import cupy as cp; a = cp.ones(10)", + ], + input=GDB_COMMANDS, + env=env, + capture_output=True, + text=True, + ) + + cuInit_called = output.stdout.find("in cuInit ()") + print("Command output:\n") + print("*** STDOUT ***") + print(output.stdout) + print("*** STDERR ***") + print(output.stderr) + assert output.returncode == 0 + assert cuInit_called >= 0