diff --git a/distributed/dashboard/components/nvml.py b/distributed/dashboard/components/nvml.py
index 534c396cbc0..34cce3c4bc7 100644
--- a/distributed/dashboard/components/nvml.py
+++ b/distributed/dashboard/components/nvml.py
@@ -131,29 +131,23 @@ def update(self):
             memory_total = 0
             memory_max = 0
             worker = []
-            i = 0
 
-            for ws in workers:
+            for idx, ws in enumerate(workers):
                 try:
                     info = ws.extra["gpu"]
                 except KeyError:
                     continue
                 metrics = ws.metrics["gpu"]
-                for j, (u, mem_used, mem_total) in enumerate(
-                    zip(
-                        metrics["utilization"],
-                        metrics["memory-used"],
-                        info["memory-total"],
-                    )
-                ):
-                    memory_max = max(memory_max, mem_total)
-                    memory_total += mem_total
-                    utilization.append(int(u))
-                    memory.append(mem_used)
-                    worker.append(ws.address)
-                    gpu_index.append(j)
-                    y.append(i)
-                    i += 1
+                u = metrics["utilization"]
+                mem_used = metrics["memory-used"]
+                mem_total = info["memory-total"]
+                memory_max = max(memory_max, mem_total)
+                memory_total += mem_total
+                utilization.append(int(u))
+                memory.append(mem_used)
+                worker.append(ws.address)
+                gpu_index.append(idx)
+                y.append(idx)
 
             memory_text = [format_bytes(m) for m in memory]
 
diff --git a/distributed/diagnostics/nvml.py b/distributed/diagnostics/nvml.py
index a96a5547598..c1bbb4161a8 100644
--- a/distributed/diagnostics/nvml.py
+++ b/distributed/diagnostics/nvml.py
@@ -1,28 +1,48 @@
+import os
 import pynvml
 
-handles = None
+nvmlInit = None
+
+
+def init_once():
+    global nvmlInit
+    if nvmlInit is not None:
+        return
+
+    from pynvml import nvmlInit as _nvmlInit
+
+    nvmlInit = _nvmlInit
+    nvmlInit()
 
 
 def _pynvml_handles():
-    global handles
-    if handles is None:
-        pynvml.nvmlInit()
-        count = pynvml.nvmlDeviceGetCount()
-        handles = [pynvml.nvmlDeviceGetHandleByIndex(i) for i in range(count)]
-    return handles
+    count = pynvml.nvmlDeviceGetCount()
+    try:
+        cuda_visible_devices = [
+            int(idx) for idx in os.environ.get("CUDA_VISIBLE_DEVICES", "").split(",")
+        ]
+    except ValueError:
+        # CUDA_VISIBLE_DEVICES is not set
+        cuda_visible_devices = False
+    if not cuda_visible_devices:
+        cuda_visible_devices = list(range(count))
+    gpu_idx = cuda_visible_devices[0]
+    return pynvml.nvmlDeviceGetHandleByIndex(gpu_idx)
 
 
 def real_time():
-    handles = _pynvml_handles()
+    init_once()
+    h = _pynvml_handles()
     return {
-        "utilization": [pynvml.nvmlDeviceGetUtilizationRates(h).gpu for h in handles],
-        "memory-used": [pynvml.nvmlDeviceGetMemoryInfo(h).used for h in handles],
+        "utilization": pynvml.nvmlDeviceGetUtilizationRates(h).gpu,
+        "memory-used": pynvml.nvmlDeviceGetMemoryInfo(h).used,
     }
 
 
 def one_time():
-    handles = _pynvml_handles()
+    init_once()
+    h = _pynvml_handles()
     return {
-        "memory-total": [pynvml.nvmlDeviceGetMemoryInfo(h).total for h in handles],
-        "name": [pynvml.nvmlDeviceGetName(h).decode() for h in handles],
+        "memory-total": pynvml.nvmlDeviceGetMemoryInfo(h).total,
+        "name": pynvml.nvmlDeviceGetName(h).decode(),
     }
diff --git a/distributed/diagnostics/tests/test_nvml.py b/distributed/diagnostics/tests/test_nvml.py
new file mode 100644
index 00000000000..820ba57a8a5
--- /dev/null
+++ b/distributed/diagnostics/tests/test_nvml.py
@@ -0,0 +1,34 @@
+import pytest
+import os
+
+pynvml = pytest.importorskip("pynvml")
+
+from distributed.diagnostics import nvml
+
+
+def test_one_time():
+    output = nvml.one_time()
+    assert "memory-total" in output
+    assert "name" in output
+
+    assert len(output["name"]) > 0
+
+
+def test_1_visible_devices():
+    os.environ["CUDA_VISIBLE_DEVICES"] = "0"
+    output = nvml.one_time()
+    assert len(output["memory-total"]) == 1
+
+
+@pytest.mark.parametrize("CVD", ["1,0", "0,1"])
+def test_2_visible_devices(CVD):
+    os.environ["CUDA_VISIBLE_DEVICES"] = CVD
+    idx = int(CVD.split(",")[0])
+
+    h = nvml._pynvml_handles()
+    h2 = pynvml.nvmlDeviceGetHandleByIndex(idx)
+
+    s = pynvml.nvmlDeviceGetSerial(h)
+    s2 = pynvml.nvmlDeviceGetSerial(h2)
+
+    assert s == s2