Skip to content

Commit

Permalink
feat(report): histogram inspector
Browse files Browse the repository at this point in the history
See #230

Co-authored-by: Pradyot Patil <pradyotpatil@gmail.com>
  • Loading branch information
sbrugman and pradyot-09 committed Sep 1, 2022
1 parent 1615bed commit 5e78f98
Show file tree
Hide file tree
Showing 4 changed files with 291 additions and 87 deletions.
9 changes: 6 additions & 3 deletions popmon/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,12 @@ class HistogramSectionModel(SectionModel):
name: str = "Histograms"
"""Name of the histograms section in the report"""

description: str = "Histograms of the last few time slots (default: 2)."
description: str = "This section contains visualisations of individual histograms and heatmaps of them over time."
"""Description of the histograms section in the report"""

inspector_histogram_choices: int = 2
"""The number of histograms that can be compared at once (e.g. the number of dropdowns)"""

hist_names: List[
Literal["heatmap", "heatmap_column_normalized", "heatmap_row_normalized"]
] = [
Expand Down Expand Up @@ -113,8 +116,8 @@ class HistogramSectionModel(SectionModel):
}
"""Descriptions of the heatmaps in the report"""

plot_hist_n: int = 2
"""plot histograms for last 'n' periods. default is 2 (optional)"""
plot_hist_n: int = 0
"""plot histograms for last 'n' periods. default is 0 to show all (optional)"""

top_n: int = 20
"""plot heatmap for top 'n' categories. default is 20 (optional)"""
Expand Down
2 changes: 1 addition & 1 deletion popmon/notebooks/popmon_tutorial_reports.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@
"metadata": {},
"outputs": [],
"source": [
"show_image(report.datastore[\"report_sections\"][4][\"features\"][0][\"plots\"][\"prev1\"][0])"
"show_image(report.datastore[\"report_sections\"][4][\"features\"][0][\"plots\"][\"ref\"][0])"
]
},
{
Expand Down
76 changes: 57 additions & 19 deletions popmon/visualization/histogram_section.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,11 @@
from ..base import Module
from ..config import HistogramSectionModel
from ..utils import parallel, short_date
from ..visualization.utils import plot_heatmap, plot_histogram_overlay
from ..visualization.utils import (
histogram_basic_checks,
plot_heatmap,
plot_histogram_overlay,
)


class HistogramSection(Module):
Expand Down Expand Up @@ -80,6 +84,7 @@ def __init__(
self.hist_names_formatted = settings.hist_names_formatted
self.plot_hist_n = settings.plot_hist_n
self.top_n = settings.top_n
self.n_choices = settings.inspector_histogram_choices
self.cmap = settings.cmap

def get_description(self):
Expand All @@ -92,12 +97,17 @@ def transform(self, data_obj: dict, sections: Optional[list] = None):
features = self.get_features(list(data_obj.keys()))
features_w_metrics = []

# Treat these as static
is_static_reference = self.reference_type in ["self", "external"]

self.logger.info(f'Generating section "{self.section_name}".')

for feature in tqdm(features, ncols=100):
df = data_obj.get(feature, pd.DataFrame())
last_n = (
len(df.index) if len(df.index) < self.plot_hist_n else self.plot_hist_n
len(df.index)
if (len(df.index) < self.plot_hist_n or self.plot_hist_n == 0)
else self.plot_hist_n
)
hist_names = [hn for hn in self.hist_names if hn in df.columns]
if len(hist_names) == 0 and len(self.hist_name_starts_with) > 0:
Expand Down Expand Up @@ -140,15 +150,43 @@ def transform(self, data_obj: dict, sections: Optional[list] = None):
(feature, dates[i], hists[i], hist_names, self.top_n)
for i in range(last_n)
]

# get histograms for each timestamp
plots = parallel(_plot_histograms, args)

plot_type_layouts = {}

# filter out potential empty plots
plots = [e for e in plots if len(e["plot"])]
plots = sorted(plots, key=lambda plot: plot["name"])
if len(plots) > 0:
plot_type_layouts["histogram"] = plots[0]["layout"]
plots = [e for e in plots if len(e)]
plots = sorted(plots, key=lambda plot: plot["date"])

# basic checks for histograms
histogram_basic_checks(plots)

for plot in plots:
for index in range(len(plot["hists"])):
if plot["hist_names"][index] == "histogram_prev1":
del plot["hist_names"][index]
del plot["hists"][index]
break

# get histogram plots
histogram = {}
if len(plots) > 1:
histogram = plot_histogram_overlay(
plots,
plots[0]["is_num"],
plots[0]["is_ts"],
is_static_reference,
top=self.top_n,
n_choices=self.n_choices,
)

if len(histogram) > 0:
plot_type_layouts["histogram"] = histogram["layout"]
histogram = [histogram]
else:
histogram = []

# filter out potential empty heatmap plots, then prepend them to the sorted histograms
hplots = []
Expand All @@ -160,7 +198,7 @@ def transform(self, data_obj: dict, sections: Optional[list] = None):
if len(hplots) > 0:
plot_type_layouts["heatmap"] = hplots[0]["layout"]

plots = hplots + plots
plots = hplots + histogram

features_w_metrics.append(
{
Expand Down Expand Up @@ -188,7 +226,7 @@ def _plot_histograms(feature, date, hc_list, hist_names, top_n, max_nbins=1000):
:param list hc_list: histogram list
:param list hist_names: names of histograms to show as labels
:param int max_nbins: maximum number of histogram bins allowed for plot (default 1000)
:return: dict with plotted histogram
:return: dict with histograms for each timestamp
"""
# basic checks
if len(hc_list) != len(hist_names):
Expand All @@ -206,7 +244,7 @@ def _plot_histograms(feature, date, hc_list, hist_names, top_n, max_nbins=1000):

# make plot. note: slow!
if hc_list[0].n_dim == 1:
if all(h.size == 0 for h in hc_list):
if all(h.entries == 0 for h in hc_list):
# triviality checks, skip all histograms empty
return {"name": date, "description": "", "plot": ""}

Expand Down Expand Up @@ -245,20 +283,20 @@ def _plot_histograms(feature, date, hc_list, hist_names, top_n, max_nbins=1000):
entries_list = np.reshape(entries_list.ravel(), (-1, len(bins)))

hists = [(el, bins) for el in entries_list]
plot = plot_histogram_overlay(
hists, feature, hist_names, y_label, is_num, is_ts
)

elif hc_list[0].n_dim == 2:
plot = {}
return {}
else:
plot = {}
return {}

return {
"name": date,
"type": "histogram",
"description": "",
"plot": plot.get("data", ""),
"layout": plot.get("layout", ""),
"date": date,
"hists": hists,
"feature": feature,
"hist_names": hist_names,
"y_label": y_label,
"is_num": is_num,
"is_ts": is_ts,
}


Expand Down
Loading

0 comments on commit 5e78f98

Please sign in to comment.