Skip to content

Commit

Permalink
feat: plotly express
Browse files Browse the repository at this point in the history
The histograms, heatmaps and comparisons have been replaced with interactive Plotly graphs.
Plotly.js is used to build the graphs on the go from JSON. Initial tests show that plotly
reports are smaller in size compared to matplotlib and the takes way less time for report
generation compared to matplotlib. use parameter 'online_report' to use plotly.js from CDN
 server and use report online. Else, plotly.js is embedded in the report and can be used
 offline too.

BREAKING CHANGE: matplotlib-related config is removed
  • Loading branch information
pradyot-09 committed Jul 5, 2022
1 parent a1ed9eb commit 2c2395c
Show file tree
Hide file tree
Showing 22 changed files with 531 additions and 447 deletions.
2 changes: 1 addition & 1 deletion NOTICE
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
# pyyaml: https://github.com/yaml/pyyaml/blob/master/LICENSE
# jinja2: https://github.com/noirbizarre/jinja2/blob/master/LICENSE
# tqdm: https://github.com/tqdm/tqdm/blob/master/LICENCE
# matplotlib: https://github.com/matplotlib/matplotlib/blob/master/LICENSE/LICENSE
# plotly: https://github.com/plotly/plotly.py/blob/master/LICENSE.txt
# joblib: https://github.com/joblib/joblib/blob/master/LICENSE.txt
# pybase64: https://github.com/mayeut/pybase64/blob/master/LICENSE
# htmlmin: https://github.com/mankyd/htmlmin/blob/master/LICENSE
Expand Down
11 changes: 7 additions & 4 deletions popmon/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,6 @@
# (see https://joblib.readthedocs.io/en/latest/generated/joblib.Parallel.html for details)
parallel_args = {"n_jobs": 1}

# Usage the `ing_matplotlib_theme`
themed = True


class SectionModel(BaseModel):
name: str
Expand Down Expand Up @@ -108,7 +105,7 @@ class HistogramSectionModel(SectionModel):
top_n: int = 20
"""plot heatmap for top 'n' categories. default is 20 (optional)"""

cmap: str = "autumn_r"
cmap: str = "ylorrd"
"""colormap for histogram heatmaps"""


Expand Down Expand Up @@ -171,6 +168,9 @@ class Report(BaseModel):
"""if True, show all the generated statistics in the report (optional)
if set to False, then smaller show_stats (see below)"""

online_report: bool = True
"""Use a CDN to host resources, or embed them into the report."""

show_stats: List[str] = [
"distinct*",
"filled*",
Expand All @@ -194,6 +194,9 @@ class Report(BaseModel):
]
"""list of statistic name patterns to show in the report. If None, show all (optional)"""

zline_color: List[str] = ["#FF0000", "#FFC800"]
""""Configure line colors in barplots of Comparisons and Profiles section. First and second elements as hex color code in list will replace the default red and yellow respectively"""

section: Section = Section()
"""Configuration for the individual sections"""

Expand Down
12 changes: 10 additions & 2 deletions popmon/notebooks/popmon_tutorial_advanced.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -467,7 +467,11 @@
" store_key=\"report_sections\",\n",
" settings=report_settings,\n",
" ),\n",
" ReportGenerator(read_key=\"report_sections\", store_key=\"html_report\"),\n",
" ReportGenerator(\n",
" read_key=\"report_sections\",\n",
" store_key=\"html_report\",\n",
" settings=report_settings,\n",
" ),\n",
" ]\n",
" super().__init__(modules)\n",
"\n",
Expand Down Expand Up @@ -525,7 +529,11 @@
" store_key=\"report_sections\",\n",
" settings=report_settings,\n",
" ),\n",
" ReportGenerator(read_key=\"report_sections\", store_key=\"html_report\"),\n",
" ReportGenerator(\n",
" read_key=\"report_sections\",\n",
" store_key=\"html_report\",\n",
" settings=report_settings,\n",
" ),\n",
" ]\n",
" super().__init__(modules)\n",
"\n",
Expand Down
4 changes: 3 additions & 1 deletion popmon/pipeline/report_pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,9 @@ def __init__(
settings=settings,
),
# generate report
ReportGenerator(read_key=sections_key, store_key=store_key),
ReportGenerator(
read_key=sections_key, store_key=store_key, settings=settings
),
]
if (
isinstance(settings.report_filepath, (str, Path))
Expand Down
27 changes: 26 additions & 1 deletion popmon/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@


# Resources lookup file for popmon

import json
import pathlib

from jinja2 import Environment, FileSystemLoader
Expand Down Expand Up @@ -53,6 +53,31 @@
_TEMPLATES_ENV.filters["fmt_metric"] = lambda x: x.replace("_", " ")


def js_list(encoder, data):
pairs = [js_val(encoder, v) for v in data]
return "[" + ", ".join(pairs) + "]"


def js_dict(encoder, data):
pairs = [k + ": " + js_val(encoder, v) for k, v in data.items()]
return "{" + ", ".join(pairs) + "}"


def js_val(encoder, data):
if isinstance(data, dict):
val = js_dict(encoder, data)
elif isinstance(data, list):
val = js_list(encoder, data)
else:
val = encoder.encode(data)
return val


_TEMPLATES_ENV.filters["json_plot"] = lambda x: js_val(
json.JSONEncoder(ensure_ascii=False), x
)


def _resource(resource_type, name: str) -> str:
"""Return the full path filename of a resource.
Expand Down
7 changes: 0 additions & 7 deletions popmon/visualization/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,6 @@
TrafficLightSectionGenerator,
)

# set matplotlib backend to batch mode when running in shell
# need to do this *before* matplotlib.pyplot gets imported
from ..visualization.backend import set_matplotlib_backend

set_matplotlib_backend()


__all__ = [
"SectionGenerator",
"HistogramSection",
Expand Down
6 changes: 5 additions & 1 deletion popmon/visualization/alert_section_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,11 @@ def transform(
plots = [e for e in plots if len(e["plot"])]

features_w_metrics.append(
{"name": feature, "plots": sorted(plots, key=lambda plot: plot["name"])}
{
"name": feature,
"plot_type_layouts": {"traffic_lights": ""},
"plots": sorted(plots, key=lambda plot: plot["name"]),
}
)

sections.append(
Expand Down
152 changes: 0 additions & 152 deletions popmon/visualization/backend.py

This file was deleted.

46 changes: 35 additions & 11 deletions popmon/visualization/histogram_section.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,17 +140,33 @@ def transform(self, data_obj: dict, sections: Optional[list] = None):
]
plots = parallel(_plot_histograms, args)

plot_type_layouts = {}

# filter out potential empty plots
plots = [e for e in plots if len(e["plot"])]
plots = sorted(plots, key=lambda plot: plot["name"])
if len(plots) > 0:
plot_type_layouts["histogram"] = plots[0]["layout"]

# filter out potential empty heatmap plots, then prepend them to the sorted histograms
hplots = [h for h in heatmaps if isinstance(h, dict) and len(h["plot"])]

plots = hplots + plots
hplots = []
for h in heatmaps:
if isinstance(h, dict):
if len(h["plot"]):
hplots.append(h)

features_w_metrics.append({"name": feature, "plots": plots})
if len(hplots) > 0:
plot_type_layouts["heatmap"] = hplots[0]["layout"]

plots = hplots + plots
# print(plot_types,layouts)
features_w_metrics.append(
{
"name": feature,
"plot_type_layouts": plot_type_layouts,
"plots": plots,
}
)
sections.append(
{
"section_title": self.section_name,
Expand Down Expand Up @@ -230,11 +246,17 @@ def _plot_histograms(feature, date, hc_list, hist_names, top_n, max_nbins=1000):
hists, feature, hist_names, y_label, is_num, is_ts
)
elif hc_list[0].n_dim == 2:
plot = ""
plot = {}
else:
plot = ""
plot = {}

return {"name": date, "description": "", "plot": plot}
return {
"name": date,
"type": "histogram",
"description": "",
"plot": plot.get("data", ""),
"layout": plot.get("layout", ""),
}


def _plot_heatmap(
Expand Down Expand Up @@ -321,13 +343,15 @@ def _plot_heatmap(
if isinstance(heatmaps, list):
plot = [hist_lookup(heatmaps, hist_name) for hist_name in hist_names]
elif isinstance(heatmaps, dict):
plot = [heatmaps["plot"]]
plot = [heatmaps]

plots = [
{
"name": hist_names_formatted[hist_name],
"description": descriptions[hist_name],
"plot": pl,
"type": "heatmap",
"description": "",
"plot": pl["plot"],
"layout": pl["layout"],
"full_width": True,
}
for pl, hist_name in zip(plot, hist_names)
Expand Down Expand Up @@ -364,4 +388,4 @@ def get_top_categories(entries_list, bins, top_n):
def hist_lookup(plot, hist_name):
for pl in plot:
if pl["name"] == hist_name:
return pl["plot"]
return pl
1 change: 1 addition & 0 deletions popmon/visualization/overview_section.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ def _plot_metrics(

return {
"name": "Alert frequency per Feature",
"type": "alert",
"description": "",
"plot": plot,
"full_width": True,
Expand Down
Loading

0 comments on commit 2c2395c

Please sign in to comment.