Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better exception grouping in reports #720

Merged
merged 8 commits into from
Aug 25, 2023
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions eolearn/core/eonode.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,5 +100,13 @@ class NodeStats:
node_name: str
start_time: dt.datetime
end_time: dt.datetime
exception: BaseException | None = None
exception_traceback: str | None = None
exception_info: ExceptionInfo | None = None


@dataclass(frozen=True)
class ExceptionInfo:
"""Contains information on exceptions that occur when executing a node."""

exception: BaseException
traceback: str
origin: str
38 changes: 20 additions & 18 deletions eolearn/core/eoworkflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@
import logging
import traceback
from dataclasses import dataclass, field, fields
from typing import Literal, Sequence, Tuple, cast, overload
from typing import Literal, Sequence, overload

from .eodata import EOPatch
from .eonode import EONode, NodeStats
from .eonode import EONode, ExceptionInfo, NodeStats
from .eotask import EOTask
from .eoworkflow_tasks import OutputTask
from .graph import DirectedGraph
Expand Down Expand Up @@ -209,7 +209,7 @@ def _execute_nodes(
)

stats_dict[node.uid] = stats
if stats.exception is not None:
if stats.exception_info is not None:
break

intermediate_results[node.uid] = result
Expand All @@ -235,43 +235,45 @@ def _execute_node(

LOGGER.debug("Computing %s(*%s, **%s)", node.task.__class__.__name__, str(task_args), str(node_input_kwargs))
start_time = dt.datetime.now()
result, is_success = self._execute_task(node.task, task_args, node_input_kwargs, raise_errors=raise_errors)
result = self._execute_task(node.task, task_args, node_input_kwargs, raise_errors=raise_errors)
end_time = dt.datetime.now()

if is_success:
exception, exception_traceback = None, None
else:
exception, exception_traceback = cast(Tuple[BaseException, str], result) # temporary fix until 3.8
result = None
if isinstance(result, ExceptionInfo):
exception_info, result = result, None
LOGGER.error(
"Task '%s' with id %s failed with stack trace:\n%s", node.get_name(), node.uid, exception_traceback
"Task '%s' with id %s failed with stack trace:\n%s",
node.get_name(),
node.uid,
exception_info.traceback,
)
else:
exception_info = None

node_stats = NodeStats(
node_uid=node.uid,
node_name=node.get_name(),
start_time=start_time,
end_time=end_time,
exception=exception,
exception_traceback=exception_traceback,
exception_info=exception_info,
)
return result, node_stats

@staticmethod
def _execute_task(
task: EOTask, task_args: list[object], task_kwargs: dict[str, object], raise_errors: bool
) -> tuple[object, bool]:
) -> object | ExceptionInfo:
"""Executes an EOTask and handles any potential exceptions."""
if raise_errors:
return task.execute(*task_args, **task_kwargs), True
return task.execute(*task_args, **task_kwargs)

try:
return task.execute(*task_args, **task_kwargs), True
return task.execute(*task_args, **task_kwargs)
except KeyboardInterrupt as exception:
raise KeyboardInterrupt from exception
except BaseException as exception:
exception_traceback = traceback.format_exc()
return (exception, exception_traceback), False
trace = traceback.extract_tb(exception.__traceback__)
origin = f"line {trace[-1].lineno} in {trace[-1].filename}." if trace else "unknown origin."
return ExceptionInfo(exception, traceback=traceback.format_exc(), origin=origin)

@staticmethod
def _relax_dependencies(
Expand Down Expand Up @@ -363,7 +365,7 @@ class WorkflowResults:
def __post_init__(self) -> None:
"""Checks if there is any node that failed during the workflow execution."""
for node_uid, node_stats in self.stats.items():
if node_stats.exception is not None:
if node_stats.exception_info is not None:
super().__setattr__("error_node_uid", node_uid)
break

Expand Down
67 changes: 49 additions & 18 deletions eolearn/visualization/eoexecutor.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,12 @@
import datetime as dt
import importlib
import inspect
import itertools as it
import os
import warnings
from collections import defaultdict
from contextlib import nullcontext
from dataclasses import dataclass
from typing import Any, cast

import fs
Expand All @@ -28,6 +30,7 @@
from pygments.formatters.html import HtmlFormatter

from eolearn.core import EOExecutor
from eolearn.core.eonode import ExceptionInfo
from eolearn.core.exceptions import EOUserWarning


Expand Down Expand Up @@ -97,40 +100,49 @@ def _create_dependency_graph(self) -> str:
dot = self.eoexecutor.workflow.dependency_graph()
return base64.b64encode(dot.pipe()).decode()

def _get_exception_stats(self) -> list[tuple[str, str, list[tuple[str, int]]]]:
"""Creates aggregated stats about exceptions"""
formatter = HtmlFormatter()
lexer = pygments.lexers.get_lexer_by_name("python", stripall=True)
def _get_exception_stats(self) -> list[tuple[str, str, list[_ErrorSummary]]]:
"""Creates aggregated stats about exceptions

Returns tuples of form (name, uid, [error_summary])
"""

exception_stats: defaultdict[str, defaultdict[str, int]] = defaultdict(lambda: defaultdict(lambda: 0))
exception_stats: defaultdict[str, dict[str, _ErrorSummary]] = defaultdict(dict)

for workflow_results in self.eoexecutor.execution_results:
if not workflow_results.error_node_uid:
for execution, results, execution_idx in zip(
self.eoexecutor.execution_names, self.eoexecutor.execution_results, it.count()
):
zigaLuksic marked this conversation as resolved.
Show resolved Hide resolved
if not results.error_node_uid:
continue

error_node = workflow_results.stats[workflow_results.error_node_uid]
exception_str = pygments.highlight(
f"{error_node.exception.__class__.__name__}: {error_node.exception}", lexer, formatter
)
exception_stats[error_node.node_uid][exception_str] += 1
error_node = results.stats[results.error_node_uid]
exception_info: ExceptionInfo = error_node.exception_info # type: ignore[assignment]
origin_str = f"<b>{exception_info.exception.__class__.__name__}</b> raised from {exception_info.origin}"

if origin_str not in exception_stats[error_node.node_uid]:
exception_stats[error_node.node_uid][origin_str] = _ErrorSummary(
origin_str, str(exception_info.exception), []
)

exception_stats[error_node.node_uid][origin_str].add_execution(execution_idx, execution)

return self._to_ordered_stats(exception_stats)

def _to_ordered_stats(
self, exception_stats: defaultdict[str, defaultdict[str, int]]
) -> list[tuple[str, str, list[tuple[str, int]]]]:
self, exception_stats: defaultdict[str, dict[str, _ErrorSummary]]
) -> list[tuple[str, str, list[_ErrorSummary]]]:
"""Exception stats get ordered by nodes in their execution order in workflows. Exception stats that happen
for the same node get ordered by number of occurrences in a decreasing order.

Returns tuples of form (name, uid, [exception_origin, example_message, num_occurences])
zigaLuksic marked this conversation as resolved.
Show resolved Hide resolved
"""
ordered_exception_stats = []
for node in self.eoexecutor.workflow.get_nodes():
if node.uid not in exception_stats:
continue

node_stats = exception_stats[node.uid]
ordered_exception_stats.append(
(node.get_name(), node.uid, sorted(node_stats.items(), key=lambda item: -item[1]))
)
error_summaries = sorted(node_stats.values(), key=lambda summary: -len(summary.failed_indexed_executions))
ordered_exception_stats.append((node.get_name(), node.uid, error_summaries))

return ordered_exception_stats

Expand Down Expand Up @@ -197,7 +209,8 @@ def _render_execution_tracebacks(self, formatter: pygments.formatter.Formatter)
if results.workflow_failed() and results.error_node_uid is not None:
# second part of above check needed only for typechecking purposes
failed_node_stats = results.stats[results.error_node_uid]
traceback = pygments.highlight(failed_node_stats.exception_traceback, tb_lexer, formatter)
traceback_str = failed_node_stats.exception_info.traceback # type: ignore[union-attr]
traceback = pygments.highlight(traceback_str, tb_lexer, formatter)
else:
traceback = None

Expand All @@ -223,3 +236,21 @@ def _format_datetime(value: dt.datetime) -> str:
def _format_timedelta(value1: dt.datetime, value2: dt.datetime) -> str:
"""Method for formatting time delta into report"""
return str(value2 - value1)


@dataclass()
class _ErrorSummary:
mlubej marked this conversation as resolved.
Show resolved Hide resolved
"""Contains data for errors of a node."""

origin: str
example_message: str
mlubej marked this conversation as resolved.
Show resolved Hide resolved
failed_indexed_executions: list[tuple[int, str]]

def add_execution(self, index: int, name: str) -> None:
"""Adds an execution to the summary."""
self.failed_indexed_executions.append((index, name))

@property
def num_failed(self) -> int:
"""Helps with jinja"""
return len(self.failed_indexed_executions)
17 changes: 14 additions & 3 deletions eolearn/visualization/report_templates/report.html
Original file line number Diff line number Diff line change
Expand Up @@ -112,13 +112,24 @@ <h3> Summary of exceptions </h3>

<div class="indent">
<ul>
{% for node_name, node_uid, exception_list in exception_stats %}
{% for node_name, node_uid, error_summary_list in exception_stats %}
<li>
<b>{{ node_name }} ({{ node_uid }}):</b>
<ul>
{% for exception_string, count in exception_list %}
{% for error_summary in error_summary_list %}
<li>
{{ count }} times: {{ exception_string }}
{{ error_summary.num_failed }} times: {{ error_summary.origin }}
<br>
Example message: <pre>{{ error_summary.example_message }}</pre>
<br>
<button class="collapsible">Failed executions</button>
<div class="collapsible-content">
<ul>
{% for idx, execution in error_summary.failed_indexed_executions %}
<li><a href="#execution{{ idx }}">{{ execution }}</a></li>
{% endfor %}
</ul>
</div>
</li>
{% endfor %}
</ul>
Expand Down
7 changes: 3 additions & 4 deletions tests/core/test_eoworkflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,8 +291,7 @@ def test_exception_handling():
assert node_stats.node_name == node.name

if node is exception_node:
assert isinstance(node_stats.exception, CustomExceptionError)
assert node_stats.exception_traceback.startswith("Traceback")
assert isinstance(node_stats.exception_info.exception, CustomExceptionError)
assert node_stats.exception_info.traceback.startswith("Traceback")
else:
assert node_stats.exception is None
assert node_stats.exception_traceback is None
assert node_stats.exception_info is None
12 changes: 9 additions & 3 deletions tests/visualization/test_eoexecutor.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,18 @@ def execute(self, *_, **kwargs):
my_logger.debug("Debug statement of Example task with kwargs: %s", kwargs)

if "arg1" in kwargs and kwargs["arg1"] is None:
raise Exception
raise RuntimeError(f"Oh no, i spilled my kwargs all over the floor! {kwargs}!")


NODE = EONode(ExampleTask())
WORKFLOW = EOWorkflow([NODE, EONode(task=ExampleTask(), inputs=[NODE, NODE])])
EXECUTION_KWARGS = [{NODE: {"arg1": 1}}, {}, {NODE: {"arg1": 3, "arg3": 10}}, {NODE: {"arg1": None}}]
EXECUTION_KWARGS = [
{NODE: {"arg1": 1}},
{},
{NODE: {"arg1": 3, "arg3": 10}},
{NODE: {"arg1": None}},
{NODE: {"arg1": None, "arg3": 10}},
]


@pytest.mark.parametrize("save_logs", [True, False])
Expand All @@ -42,7 +48,7 @@ def test_report_creation(save_logs, include_logs):
EXECUTION_KWARGS,
logs_folder=tmp_dir_name,
save_logs=save_logs,
execution_names=["ex 1", 2, 0.4, None],
execution_names=["ex 1", 2, 0.4, None, "beep"],
)
executor.run(workers=10)
executor.make_report(include_logs=include_logs)
Expand Down