Skip to content

Commit

Permalink
feat(report): summary table
Browse files Browse the repository at this point in the history
  • Loading branch information
Patil authored and sbrugman committed Oct 19, 2022
1 parent 2d9ef90 commit a5b9a30
Show file tree
Hide file tree
Showing 9 changed files with 291 additions and 13 deletions.
2 changes: 1 addition & 1 deletion examples/synthetic_data_streams/hyperplane.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
dataset_name = "hyperplane"
v = "1"

# Monitor the each feature w.r.t. the label
# Monitor each feature w.r.t. the label
features = [f"index:attr{i}:output" for i in range(10)]

# Also monitor predictions w.r.t. the label (see below)
Expand Down
2 changes: 1 addition & 1 deletion popmon/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ class OverviewSection(SectionModel):
name: str = "Overview"
"""Name of the overview section in the report"""

description: str = "Alerts aggregated per feature"
description: str = "Overview of the dataset, analysis and alerts."
"""Description of the overview section in the report"""


Expand Down
21 changes: 17 additions & 4 deletions popmon/pipeline/metrics_pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
from ..base import Module, Pipeline
from ..config import Settings
from ..hist.hist_splitter import HistSplitter
from .timing import Timing


def get_metrics_pipeline_class(reference_type, reference):
Expand Down Expand Up @@ -88,6 +89,10 @@ def create_metrics_pipeline(
return pipeline


def get_timing_module(key: str) -> List[Module]:
return [Timing(store_key=key)]


def get_splitting_modules(
hists_key, features, time_axis
) -> List[Union[Module, Pipeline]]:
Expand Down Expand Up @@ -251,11 +256,13 @@ def __init__(
]

modules = (
get_splitting_modules(hists_key, settings.features, settings.time_axis)
get_timing_module("start_time")
+ get_splitting_modules(hists_key, settings.features, settings.time_axis)
+ reference_modules
+ get_trend_modules(settings.comparison.window)
+ get_static_bound_modules(settings.monitoring.pull_rules)
+ get_traffic_light_modules(settings.monitoring.monitoring_rules)
+ get_timing_module("end_time")
)
super().__init__(modules)

Expand Down Expand Up @@ -312,11 +319,13 @@ def __init__(
),
]
modules = (
get_splitting_modules(hists_key, settings.features, settings.time_axis)
get_timing_module("start_time")
+ get_splitting_modules(hists_key, settings.features, settings.time_axis)
+ reference_modules
+ get_trend_modules(settings.comparison.window)
+ get_static_bound_modules(settings.monitoring.pull_rules)
+ get_traffic_light_modules(settings.monitoring.monitoring_rules)
+ get_timing_module("end_time")
)
super().__init__(modules)

Expand Down Expand Up @@ -369,11 +378,13 @@ def __init__(
]

modules = (
get_splitting_modules(hists_key, settings.features, settings.time_axis)
get_timing_module("start_time")
+ get_splitting_modules(hists_key, settings.features, settings.time_axis)
+ reference_modules
+ get_trend_modules(settings.comparison.window)
+ get_dynamic_bound_modules(settings.monitoring.pull_rules)
+ get_traffic_light_modules(settings.monitoring.monitoring_rules)
+ get_timing_module("end_time")
)
super().__init__(modules)

Expand Down Expand Up @@ -424,10 +435,12 @@ def __init__(
]

modules = (
get_splitting_modules(hists_key, settings.features, settings.time_axis)
get_timing_module("start_time")
+ get_splitting_modules(hists_key, settings.features, settings.time_axis)
+ reference_modules
+ get_trend_modules(settings.comparison.window)
+ get_dynamic_bound_modules(settings.monitoring.pull_rules)
+ get_traffic_light_modules(settings.monitoring.monitoring_rules)
+ get_timing_module("end_time")
)
super().__init__(modules)
5 changes: 4 additions & 1 deletion popmon/pipeline/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,10 @@


def stability_report(
hists, settings: Optional[Settings] = None, reference=None, **kwargs
hists,
settings: Optional[Settings] = None,
reference=None,
**kwargs,
):
"""Create a data stability monitoring html report for given dict of input histograms.
Expand Down
3 changes: 3 additions & 0 deletions popmon/pipeline/report_pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,9 @@ def __init__(
read_key="traffic_lights",
store_key=sections_key,
settings=settings.report,
reference_type=settings.reference_type,
time_axis=settings.time_axis,
bin_specs=settings.bin_specs,
),
# generate section with histogram
HistogramSection(
Expand Down
47 changes: 47 additions & 0 deletions popmon/pipeline/timing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of
# this software and associated documentation files (the "Software"), to deal in
# the Software without restriction, including without limitation the rights to
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
# the Software, and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
from datetime import datetime

from ..base import Module


class Timing(Module):
"""Module to add the current timestamp to the datastore."""

_input_keys = ()
_output_keys = ("store_key",)

def __init__(
self,
store_key: str,
**kwargs,
):
"""Initialize an instance.
:param str store_key: key of input data to be stored in the datastore
"""
super().__init__()

self.store_key = store_key
self.kwargs = kwargs

def transform(self):
data = datetime.now()
self.logger.info(f"storing time to {self.store_key}")
return data
60 changes: 55 additions & 5 deletions popmon/visualization/overview_section.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


from datetime import datetime
from typing import Optional

import numpy as np
Expand All @@ -28,7 +29,8 @@
from ..config import Report
from ..resources import templates_env
from ..utils import filter_metrics
from ..visualization.utils import _prune
from ..version import version as __version__
from ..visualization.utils import _prune, get_reproduction_table, get_summary_table


class OverviewSectionGenerator(Module):
Expand All @@ -37,14 +39,17 @@ class OverviewSectionGenerator(Module):
which later will be used for the report generation.
"""

_input_keys = ("read_key", "dynamic_bounds", "store_key")
_input_keys = ("read_key", "dynamic_bounds", "store_key", "start_time", "end_time")
_output_keys = ("store_key",)

def __init__(
self,
read_key,
store_key,
settings: Report,
reference_type,
time_axis,
bin_specs,
features=None,
ignore_features=None,
static_bounds=None,
Expand All @@ -68,6 +73,8 @@ def __init__(
super().__init__()
self.read_key = read_key
self.store_key = store_key
self.start_time = "start_time"
self.end_time = "end_time"
self.dynamic_bounds = dynamic_bounds
self.static_bounds = static_bounds

Expand All @@ -76,6 +83,9 @@ def __init__(
self.prefix = prefix
self.suffices = suffices
self.ignore_stat_endswith = ignore_stat_endswith or []
self.reference_type = reference_type
self.time_axis = time_axis
self.bin_specs = bin_specs

self.last_n = settings.last_n
self.skip_first_n = settings.skip_first_n
Expand All @@ -92,6 +102,8 @@ def transform(
data_obj: dict,
dynamic_bounds: Optional[dict] = None,
sections: Optional[list] = None,
start_time: Optional[datetime] = None,
end_time: Optional[datetime] = None,
):
assert isinstance(data_obj, dict)
if dynamic_bounds is None:
Expand All @@ -104,12 +116,17 @@ def transform(
features = self.get_features(list(data_obj.keys()))

self.logger.info(f'Generating section "{self.section_name}"')

time_windows = 0
values = {}
offset = ""
max_timestamp = ""
for feature in tqdm(features, ncols=100):
df = data_obj.get(feature, pd.DataFrame())
fdbounds = dynamic_bounds.get(feature, pd.DataFrame(index=df.index))
time_windows = len(df.index)
offset = df.index.min()
max_timestamp = df.index.max()

fdbounds = dynamic_bounds.get(feature, pd.DataFrame(index=df.index))
assert all(df.index == fdbounds.index)

# prepare date labels
Expand All @@ -131,11 +148,43 @@ def transform(
self.skip_last_n,
)

# Dataset summary table and Analysis Details table
tables = []
bin_width = (
self.bin_specs[self.time_axis]["bin_width"]
if self.time_axis in self.bin_specs.keys()
else 0
)

if (
self.time_axis in self.bin_specs.keys()
and self.bin_specs[self.time_axis]["bin_offset"] > 0
):
offset = datetime.utcfromtimestamp(
self.bin_specs[self.time_axis]["bin_offset"] // 1e9
)
tables.append(
get_summary_table(
len(features),
time_windows,
self.time_axis,
self.reference_type,
bin_width,
offset,
max_timestamp,
)
)

tables.append(get_reproduction_table(start_time, end_time, __version__))

# overview plots
plots = [_plot_metrics(values)]
# filter out potential empty plots (from skip empty plots)
plots = [e for e in plots if len(e["plot"])]
plots = sorted(plots, key=lambda plot: plot["name"])

plots = tables + plots

sections.append(
{
"section_title": self.section_name,
Expand Down Expand Up @@ -169,10 +218,11 @@ def _plot_metrics(
)

return {
"name": "Alert frequency per Feature",
"name": "Alerts",
"type": "alert",
"description": "",
"plot": plot,
"layout": "",
"full_width": True,
}

Expand Down
4 changes: 4 additions & 0 deletions popmon/visualization/templates/section.html
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,10 @@ <h3> {{ feature.titles.get(ref, ref) }} </h3>
{%- else -%}
<div class="row" >
{%- for metric in plots -%}
<script>
{%- set curr = loop.index -%}
layouts["{{ section_index }}{{ curr }}-{{ metric.type }}"] = {{ metric.layout | json_plot }};
</script>
{%- with metric=metric -%}
{%- include 'card.html' -%}
{%- endwith -%}
Expand Down
Loading

0 comments on commit a5b9a30

Please sign in to comment.