dask · quasiben · Oct 6, 2020 · May 18, 2020 · May 18, 2020 · Sep 29, 2020
diff --git a/distributed/diagnostics/nvml.py b/distributed/diagnostics/nvml.py
@@ -1,3 +1,4 @@
+import os
 import pynvml
 
 handles = None
@@ -8,7 +9,16 @@ def _pynvml_handles():
     if handles is None:
         pynvml.nvmlInit()
         count = pynvml.nvmlDeviceGetCount()
-        handles = [pynvml.nvmlDeviceGetHandleByIndex(i) for i in range(count)]
+        cuda_visible_devices = [
+            int(idx) for idx in os.environ.get("CUDA_VISIBLE_DEVICES", "").split(",")
+        ]
+        if not cuda_visible_devices:
+            cuda_visible_devices = list(range(count))
+        handles = [
+            pynvml.nvmlDeviceGetHandleByIndex(i)
+            for i in range(count)
+            if i in cuda_visible_devices
+        ]
     return handles
 
 

diff --git a/distributed/diagnostics/tests/test_nvml.py b/distributed/diagnostics/tests/test_nvml.py
@@ -0,0 +1,26 @@
+import pytest
+import os
+
+pytest.importorskip("pynvml")
+
+from distributed.diagnostics import nvml
+
+
+def test_one_time():
+    output = nvml.one_time()
+    assert "memory-total" in output
+    assert "name" in output
+
+    assert len(output["name"]) > 0
+
+
+def test_1_visible_devices():
+    os.environ["CUDA_VISIBLE_DEVICES"] = "0"
+    output = nvml.one_time()
+    assert len(output["memory-total"]) == 1
+
+
+def test_2_visible_devices():
+    os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
+    output = nvml.one_time()
+    assert len(output["memory-total"]) == 2