diff --git a/distributed/dashboard/components/nvml.py b/distributed/dashboard/components/nvml.py index 34cce3c4bc7..8be3b406bcb 100644 --- a/distributed/dashboard/components/nvml.py +++ b/distributed/dashboard/components/nvml.py @@ -14,7 +14,7 @@ from tornado import escape from dask.utils import format_bytes from distributed.utils import log_errors -from distributed.dashboard.components.scheduler import BOKEH_THEME, TICKS_1024 +from distributed.dashboard.components.scheduler import BOKEH_THEME, TICKS_1024, env from distributed.dashboard.utils import without_property_validation, update @@ -22,8 +22,10 @@ import pynvml pynvml.nvmlInit() + + NVML_ENABLED = True except Exception: - pass + NVML_ENABLED = False class GPUCurrentLoad(DashboardComponent): @@ -173,16 +175,32 @@ def update(self): def gpu_memory_doc(scheduler, extra, doc): - gpu_load = GPUCurrentLoad(scheduler, sizing_mode="stretch_both") - gpu_load.update() - add_periodic_callback(doc, gpu_load, 100) - doc.add_root(gpu_load.memory_figure) - doc.theme = BOKEH_THEME + with log_errors(): + gpu_load = GPUCurrentLoad(scheduler, sizing_mode="stretch_both") + gpu_load.update() + add_periodic_callback(doc, gpu_load, 100) + doc.add_root(gpu_load.memory_figure) + doc.theme = BOKEH_THEME def gpu_utilization_doc(scheduler, extra, doc): - gpu_load = GPUCurrentLoad(scheduler, sizing_mode="stretch_both") - gpu_load.update() - add_periodic_callback(doc, gpu_load, 100) - doc.add_root(gpu_load.utilization_figure) - doc.theme = BOKEH_THEME + with log_errors(): + gpu_load = GPUCurrentLoad(scheduler, sizing_mode="stretch_both") + gpu_load.update() + add_periodic_callback(doc, gpu_load, 100) + doc.add_root(gpu_load.utilization_figure) + doc.theme = BOKEH_THEME + + +def gpu_doc(scheduler, extra, doc): + with log_errors(): + gpu_load = GPUCurrentLoad(scheduler, sizing_mode="stretch_both") + gpu_load.update() + add_periodic_callback(doc, gpu_load, 100) + doc.add_root(gpu_load.memory_figure) + doc.add_root(gpu_load.utilization_figure) + + doc.title = "Dask: GPU" + doc.theme = BOKEH_THEME + doc.template = env.get_template("gpu.html") + doc.template_variables.update(extra) diff --git a/distributed/dashboard/scheduler.py b/distributed/dashboard/scheduler.py index 09a4339b50a..413dde57301 100644 --- a/distributed/dashboard/scheduler.py +++ b/distributed/dashboard/scheduler.py @@ -37,13 +37,21 @@ individual_systemmonitor_doc, ) from .worker import counters_doc -from .components.nvml import gpu_memory_doc, gpu_utilization_doc # noqa: 1708 +from .components.nvml import ( + NVML_ENABLED, + gpu_memory_doc, + gpu_utilization_doc, + gpu_doc, +) # noqa: 1708 template_variables = { "pages": ["status", "workers", "tasks", "system", "profile", "graph", "info"] } +if NVML_ENABLED: + template_variables["pages"].insert(4, "gpu") + def connect(application, http_server, scheduler, prefix=""): bokeh_app = BokehApplication( @@ -75,6 +83,7 @@ def connect(application, http_server, scheduler, prefix=""): "/profile": profile_doc, "/profile-server": profile_server_doc, "/graph": graph_doc, + "/gpu": gpu_doc, "/individual-task-stream": individual_task_stream_doc, "/individual-progress": individual_progress_doc, "/individual-graph": individual_graph_doc, diff --git a/distributed/http/static/css/gpu.css b/distributed/http/static/css/gpu.css new file mode 100644 index 00000000000..44d66163a62 --- /dev/null +++ b/distributed/http/static/css/gpu.css @@ -0,0 +1,16 @@ +#status-fluid { + display: grid; + height: 100%; + } +#status-fluid { + grid-template-columns: 1fr 1fr; + grid-template-rows: 1fr; +} +#gpu-memory { + grid-column: 2; + grid-row: 1; +} +#gpu-utilization { + grid-column: 1; + grid-row: 1; +} \ No newline at end of file diff --git a/distributed/http/templates/gpu.html b/distributed/http/templates/gpu.html new file mode 100644 index 00000000000..3c80f21d332 --- /dev/null +++ b/distributed/http/templates/gpu.html @@ -0,0 +1,22 @@ +{% extends "base.html" %} + +{% block extra_resources %} + +{% endblock %} + +{% block content %} +{% from macros import embed %} +
+ +
+ {{ embed(roots.gpu_utilization_histogram) }} +
+ +
+ {{ embed(roots.gpu_memory_histogram) }} +
+ +
+{{ plot_script }} + +{% endblock %} \ No newline at end of file