From 6340b5b34a582ff1ffe3eaf9dd7f3aac33d0a983 Mon Sep 17 00:00:00 2001 From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com> Date: Wed, 16 Jun 2021 12:49:38 -0400 Subject: [PATCH] Add scheduler log tab to performance reports (#4909) Adds a tab to the performance report with the logs of the scheduler. Note that these are all the logs currently contained in the scheduler's log deque. and not just the logs generated in the performance_report context: --- distributed/dashboard/components/scheduler.py | 10 ++++- distributed/deploy/cluster.py | 13 +++---- distributed/scheduler.py | 7 ++++ distributed/tests/test_client.py | 1 + distributed/tests/test_utils.py | 5 +-- distributed/utils.py | 37 ++++++++++++++----- 6 files changed, 52 insertions(+), 21 deletions(-) diff --git a/distributed/dashboard/components/scheduler.py b/distributed/dashboard/components/scheduler.py index 28250ff9d10..b00bcfb94b0 100644 --- a/distributed/dashboard/components/scheduler.py +++ b/distributed/dashboard/components/scheduler.py @@ -32,6 +32,7 @@ value, ) from bokeh.models.widgets import DataTable, TableColumn +from bokeh.models.widgets.markups import Div from bokeh.palettes import Viridis11 from bokeh.plotting import figure from bokeh.themes import Theme @@ -69,7 +70,7 @@ from distributed.diagnostics.task_stream import color_of as ts_color_of from distributed.diagnostics.task_stream import colors as ts_color_lookup from distributed.metrics import time -from distributed.utils import format_time, log_errors, parse_timedelta +from distributed.utils import Logs, format_time, log_errors, parse_timedelta if dask.config.get("distributed.dashboard.export-tool"): from distributed.dashboard.export_tool import ExportTool @@ -2162,6 +2163,13 @@ def update(self): self.source.data.update(data) +class SchedulerLogs: + def __init__(self, scheduler): + logs = Logs(scheduler.get_logs())._repr_html_() + + self.root = Div(text=logs) + + def systemmonitor_doc(scheduler, extra, doc): with log_errors(): sysmon = SystemMonitor(scheduler, sizing_mode="stretch_both") diff --git a/distributed/deploy/cluster.py b/distributed/deploy/cluster.py index 8807cdec990..2439ab6bba3 100644 --- a/distributed/deploy/cluster.py +++ b/distributed/deploy/cluster.py @@ -15,8 +15,7 @@ from ..core import Status from ..objects import SchedulerInfo from ..utils import ( - Log, - Logs, + MultiLogs, format_dashboard_link, log_errors, parse_timedelta, @@ -210,21 +209,19 @@ def _log(self, log): print(log) async def _get_logs(self, cluster=True, scheduler=True, workers=True): - logs = Logs() + logs = MultiLogs() if cluster: - logs["Cluster"] = Log( - "\n".join(line[1] for line in self._cluster_manager_logs) - ) + logs["Cluster"] = self._cluster_manager_logs if scheduler: L = await self.scheduler_comm.get_logs() - logs["Scheduler"] = Log("\n".join(line for level, line in L)) + logs["Scheduler"] = L if workers: d = await self.scheduler_comm.worker_logs(workers=workers) for k, v in d.items(): - logs[k] = Log("\n".join(line for level, line in v)) + logs[k] = v return logs diff --git a/distributed/scheduler.py b/distributed/scheduler.py index a8571e8b627..b8a756b07be 100644 --- a/distributed/scheduler.py +++ b/distributed/scheduler.py @@ -6939,6 +6939,11 @@ def profile_to_figure(state): sysmon = SystemMonitor(self, last_count=last_count, sizing_mode="stretch_both") sysmon.update() + # Scheduler logs + from distributed.dashboard.components.scheduler import SchedulerLogs + + logs = SchedulerLogs(self) + from bokeh.models import Div, Panel, Tabs import distributed @@ -6997,12 +7002,14 @@ def profile_to_figure(state): ) bandwidth_types = Panel(child=bandwidth_types.root, title="Bandwidth (Types)") system = Panel(child=sysmon.root, title="System") + logs = Panel(child=logs.root, title="Scheduler Logs") tabs = Tabs( tabs=[ html, task_stream, system, + logs, compute, workers, scheduler, diff --git a/distributed/tests/test_client.py b/distributed/tests/test_client.py index ec47479e157..0c6b8c088b3 100644 --- a/distributed/tests/test_client.py +++ b/distributed/tests/test_client.py @@ -6374,6 +6374,7 @@ async def f(stacklevel): assert "Dask Performance Report" in data assert "x = da.random" in data assert "Threads: 4" in data + assert "distributed.scheduler - INFO - Clear task state" in data assert dask.__version__ in data # Stacklevel two captures code two frames back -- which in this case diff --git a/distributed/tests/test_utils.py b/distributed/tests/test_utils.py index 0a073484c93..bb274098e3f 100644 --- a/distributed/tests/test_utils.py +++ b/distributed/tests/test_utils.py @@ -19,9 +19,8 @@ from distributed.utils import ( LRU, All, - Log, - Logs, LoopRunner, + MultiLogs, TimeoutError, _maybe_complex, deprecated, @@ -550,7 +549,7 @@ def test_format_bytes_compat(): def test_logs(): - d = Logs({"123": Log("Hello"), "456": Log("World!")}) + d = MultiLogs({"123": [("INFO", "Hello")], "456": [("INFO", "World!")]}) text = d._repr_html_() assert is_valid_xml("
\n{log}\n
".format(
- log=html.escape(self.rstrip())
+ level, message = self
+
+ style = "font-family: monospace; margin: 0;"
+ style += self.level_styles.get(level, "")
+
+ return '{message}
'.format( + style=html.escape(style), + message=html.escape(message), ) -class Logs(dict): - """A container for multiple logs""" +class Logs(list): + """A container for a list of log entries""" + + def _repr_html_(self): + return "\n".join(Log(entry)._repr_html_() for entry in self) + + +class MultiLogs(dict): + """A container for a dict mapping strings to lists of log entries""" def _repr_html_(self): summaries = [ "