From 1754b48864563a30b576636611db7266c6b4c9b6 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Thu, 3 Jun 2021 21:37:02 +0200 Subject: [PATCH] Move SystemMonitor's GPU initialization back to constructor (#4866) * Always use index 0 to get NVML GPU handle * Move SystemMonitor's GPU initialization back to constructor * Use nvmlDeviceGetHandleByIndex directly * Remove redundant nvmlInit check * Revert "Use nvmlDeviceGetHandleByIndex directly" This reverts commit ddf9a434154fad4802bc75915b61957fbbf13600. * Revert "Always use index 0 to get NVML GPU handle" This reverts commit d860e585c8455f285f5e5ca0d1470cb2c255e281. --- distributed/system_monitor.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/distributed/system_monitor.py b/distributed/system_monitor.py index dba32e55e4..acaeb16c7d 100644 --- a/distributed/system_monitor.py +++ b/distributed/system_monitor.py @@ -40,8 +40,9 @@ def __init__(self, n=10000): self.quantities["num_fds"] = self.num_fds if nvml is not None: - self.gpu_name = None - self.gpu_memory_total = None + gpu_extra = nvml.one_time() + self.gpu_name = gpu_extra["name"] + self.gpu_memory_total = gpu_extra["memory-total"] self.gpu_utilization = deque(maxlen=n) self.gpu_memory_used = deque(maxlen=n) self.quantities["gpu_utilization"] = self.gpu_utilization @@ -90,12 +91,7 @@ def update(self): self.num_fds.append(num_fds) result["num_fds"] = num_fds - # give external modules (like dask-cuda) a chance to initialize CUDA context - if nvml is not None and nvml.nvmlInit is not None: - if self.gpu_name is None: - gpu_extra = nvml.one_time() - self.gpu_name = gpu_extra["name"] - self.gpu_memory_total = gpu_extra["memory-total"] + if nvml is not None: gpu_metrics = nvml.real_time() self.gpu_utilization.append(gpu_metrics["utilization"]) self.gpu_memory_used.append(gpu_metrics["memory-used"])