Skip to content

Commit

Permalink
Move SystemMonitor's GPU initialization back to constructor (#4866)
Browse files Browse the repository at this point in the history
* Always use index 0 to get NVML GPU handle

* Move SystemMonitor's GPU initialization back to constructor

* Use nvmlDeviceGetHandleByIndex directly

* Remove redundant nvmlInit check

* Revert "Use nvmlDeviceGetHandleByIndex directly"

This reverts commit ddf9a43.

* Revert "Always use index 0 to get NVML GPU handle"

This reverts commit d860e58.
  • Loading branch information
pentschev authored Jun 3, 2021
1 parent 726f80d commit 1754b48
Showing 1 changed file with 4 additions and 8 deletions.
12 changes: 4 additions & 8 deletions distributed/system_monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,9 @@ def __init__(self, n=10000):
self.quantities["num_fds"] = self.num_fds

if nvml is not None:
self.gpu_name = None
self.gpu_memory_total = None
gpu_extra = nvml.one_time()
self.gpu_name = gpu_extra["name"]
self.gpu_memory_total = gpu_extra["memory-total"]
self.gpu_utilization = deque(maxlen=n)
self.gpu_memory_used = deque(maxlen=n)
self.quantities["gpu_utilization"] = self.gpu_utilization
Expand Down Expand Up @@ -90,12 +91,7 @@ def update(self):
self.num_fds.append(num_fds)
result["num_fds"] = num_fds

# give external modules (like dask-cuda) a chance to initialize CUDA context
if nvml is not None and nvml.nvmlInit is not None:
if self.gpu_name is None:
gpu_extra = nvml.one_time()
self.gpu_name = gpu_extra["name"]
self.gpu_memory_total = gpu_extra["memory-total"]
if nvml is not None:
gpu_metrics = nvml.real_time()
self.gpu_utilization.append(gpu_metrics["utilization"])
self.gpu_memory_used.append(gpu_metrics["memory-used"])
Expand Down

0 comments on commit 1754b48

Please sign in to comment.