From 82068defaeb972a6dca93b49c83de2c33463bef8 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Fri, 5 Jan 2024 17:52:55 +0100 Subject: [PATCH] Prevent double UCX initialization in `test_dgx` (#1301) Double initialization of UCX context may raise exceptions and cause test failures, prevent that by reseting the context after doing some initial checks. Authors: - Peter Andreas Entschev (https://github.com/pentschev) Approvers: - Benjamin Zaitlen (https://github.com/quasiben) URL: https://github.com/rapidsai/dask-cuda/pull/1301 --- dask_cuda/tests/test_dgx.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dask_cuda/tests/test_dgx.py b/dask_cuda/tests/test_dgx.py index a7b79f327..d57cf1a3c 100644 --- a/dask_cuda/tests/test_dgx.py +++ b/dask_cuda/tests/test_dgx.py @@ -144,6 +144,10 @@ def _test_ucx_infiniband_nvlink( else: skip_queue.put("ok") + # `ucp.get_active_transports()` call above initializes UCX, we must reset it + # so that Dask doesn't try to initialize it again and raise an exception. + ucp.reset() + if enable_infiniband is None and enable_nvlink is None and enable_rdmacm is None: enable_tcp_over_ucx = None cm_tls = ["all"]