diff --git a/README.rst b/README.rst index 0c826865..31a57238 100644 --- a/README.rst +++ b/README.rst @@ -2,7 +2,7 @@ Population Shift Monitoring =========================== -|build| |docs| |release| |release_date| +|build| |docs| |release| |release_date| |downloads| |logo| @@ -128,6 +128,37 @@ These examples also work with spark dataframes. You can see the output of such example notebook code `here `_. For all available examples, please see the `tutorials `_ at read-the-docs. +Resources +========= + +Presentations +------------- + ++------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------+------------------+-------------------------+ +| Title | Host | Date | Speaker | ++------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------+------------------+-------------------------+ +| Popmon - population monitoring made easy | `Data Lunch @ Eneco `_ | October 29, 2020 | Max Baak, Simon Brugman | ++------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------+------------------+-------------------------+ +| Popmon - population monitoring made easy | `Data Science Summit 2020 `_ | October 16, 2020 | Max Baak | ++------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------+------------------+-------------------------+ +| `Population Shift Monitoring Made Easy: the popmon package `_ | `Online Data Science Meetup @ ING WBAA `_ | July 8 2020 | Tomas Sostak | ++------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------+------------------+-------------------------+ +| `Popmon: Population Shift Monitoring Made Easy `_ | `PyData Fest Amsterdam 2020 `_ | June 16, 2020 | Tomas Sostak | ++------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------+------------------+-------------------------+ +| Popmon: Population Shift Monitoring Made Easy | `Amundsen Community Meetup `_ | June 4, 2020 | Max Baak | ++------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------+------------------+-------------------------+ + + +Articles +-------- + ++---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------+----------------+ +| Title | Date | Author | ++---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------+----------------+ +| `Popmon Open Source Package — Population Shift Monitoring Made Easy `_ | May 20, 2020 | Nicole Mpozika | ++---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------+----------------+ + + Project contributors ==================== @@ -171,3 +202,6 @@ Copyright ING WBAA. `popmon` is completely free, open-source and licensed under .. |notebook_incremental_data_colab| image:: https://colab.research.google.com/assets/colab-badge.svg :alt: Open in Colab :target: https://colab.research.google.com/github/ing-bank/popmon/blob/master/popmon/notebooks/popmon_tutorial_incremental_data.ipynb +.. |downloads| image:: https://pepy.tech/badge/popmon + :alt: PyPi downloads + :target: https://pepy.tech/project/popmon diff --git a/popmon/alerting/alerts_summary.py b/popmon/alerting/alerts_summary.py index 6a75fd67..6ecbbfbb 100644 --- a/popmon/alerting/alerts_summary.py +++ b/popmon/alerting/alerts_summary.py @@ -44,7 +44,7 @@ def __init__( :param str read_key: key of input data to read from datastore. :param str store_key: key of output data to store in datastore (optional). - :param str combined_variable: name of artifical variable that combines all alerts. default is '_AGGREGATE_'. + :param str combined_variable: name of artificial variable that combines all alerts. default is '_AGGREGATE_'. :param list features: features of data frames to pick up from input data (optional). :param list ignore_features: list of features to ignore (optional). """ @@ -77,7 +77,7 @@ def transform(self, datastore): df = (self.get_datastore_object(data, feature, dtype=pd.DataFrame)).copy( deep=False ) - df.columns = [feature + "_" + c for c in df.columns] + df.columns = [f"{feature}_{c}" for c in df.columns] df_list.append(df) # the different features could technically have different indices. @@ -99,8 +99,8 @@ def transform(self, datastore): dfc["worst"] = tlv[cols].values.max(axis=1) if len(cols) else 0 # colors of traffic lights for color in ["green", "yellow", "red"]: - cols = fnmatch.filter(tlv.columns, "*_n_{}".format(color)) - dfc["n_{}".format(color)] = tlv[cols].values.sum(axis=1) if len(cols) else 0 + cols = fnmatch.filter(tlv.columns, f"*_n_{color}") + dfc[f"n_{color}"] = tlv[cols].values.sum(axis=1) if len(cols) else 0 # store combination of traffic alerts data[self.combined_variable] = dfc diff --git a/popmon/hist/histogram.py b/popmon/hist/histogram.py index 550d4f56..adf71724 100644 --- a/popmon/hist/histogram.py +++ b/popmon/hist/histogram.py @@ -211,7 +211,7 @@ def __repr__(self): return f"HistogramContainer(dtype={self.npdtype}, n_dims={self.n_dim})" def __str__(self): - return str(self) + return repr(self) def _edit_name(self, axis_name, xname, yname, convert_time_index, short_keys): if convert_time_index and self.is_ts: diff --git a/popmon/notebooks/popmon_tutorial_advanced.ipynb b/popmon/notebooks/popmon_tutorial_advanced.ipynb index fe0ea75a..61ab3f2e 100644 --- a/popmon/notebooks/popmon_tutorial_advanced.ipynb +++ b/popmon/notebooks/popmon_tutorial_advanced.ipynb @@ -4,7 +4,6 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, "jupyter": { "outputs_hidden": false }, @@ -26,10 +25,11 @@ "metadata": {}, "outputs": [], "source": [ + "%%capture\n", "# install popmon (if not installed yet)\n", "import sys\n", "\n", - "!{sys.executable} -m pip install popmon" + "!\"{sys.executable}\" -m pip install popmon" ] }, { @@ -145,11 +145,13 @@ "outputs": [], "source": [ "# download histogrammar jar files if not already installed, used for histogramming of spark dataframe\n", - "from pyspark.sql import SparkSession\n", + "try:\n", + " from pyspark.sql import SparkSession\n", "\n", - "spark = SparkSession.builder.config(\n", - " \"spark.jars.packages\", \"org.diana-hep:histogrammar-sparksql_2.11:1.0.4\"\n", - ").getOrCreate()" + " pyspark_installed = True\n", + "except ImportError:\n", + " print(\"pyspark needs to be installed for this example\")\n", + " pyspark_installed = False" ] }, { @@ -158,18 +160,19 @@ "metadata": {}, "outputs": [], "source": [ - "sdf = spark.createDataFrame(df)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sdf.pm_stability_report(\n", - " time_axis=\"DATE\", time_width=\"1w\", time_offset=\"2015-07-02\", extended_report=False\n", - ")" + "if pyspark_installed:\n", + " spark = SparkSession.builder.config(\n", + " \"spark.jars.packages\", \"org.diana-hep:histogrammar-sparksql_2.11:1.0.4\"\n", + " ).getOrCreate()\n", + "\n", + " sdf = spark.createDataFrame(df)\n", + "\n", + " sdf.pm_stability_report(\n", + " time_axis=\"DATE\",\n", + " time_width=\"1w\",\n", + " time_offset=\"2015-07-02\",\n", + " extended_report=False,\n", + " )" ] }, { @@ -287,7 +290,7 @@ "outputs": [], "source": [ "split_hist = split_hists.query(\"date == '2015-07-05 12:00:00'\")\n", - "split_hist.histogram[0].hist.plot.matplotlib();" + "split_hist.histogram[0].hist.plot.matplotlib()" ] }, { @@ -303,7 +306,7 @@ "metadata": {}, "outputs": [], "source": [ - "split_hist.histogram_ref[0].hist.plot.matplotlib();" + "split_hist.histogram_ref[0].hist.plot.matplotlib()" ] }, { @@ -320,11 +323,14 @@ "metadata": {}, "outputs": [], "source": [ - "import pickle\n", + "# As HTML report\n", + "report.to_file(\"report.html\")\n", "\n", - "with open(\"report.pkl\", \"wb\") as f:\n", - " pickle.dump(report, f)\n", - "report.to_file(\"report.html\")" + "# Alternatively, as serialized Python object\n", + "# import pickle\n", + "\n", + "# with open(\"report.pkl\", \"wb\") as f:\n", + "# pickle.dump(report, f)" ] }, { @@ -473,7 +479,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.7" + "version": "3.8.6" }, "nteract": { "version": "0.15.0" @@ -481,10 +487,10 @@ "pycharm": { "stem_cell": { "cell_type": "raw", - "source": [], "metadata": { "collapsed": false - } + }, + "source": [] } } }, diff --git a/popmon/notebooks/popmon_tutorial_basic.ipynb b/popmon/notebooks/popmon_tutorial_basic.ipynb index c1efff56..13ac619c 100644 --- a/popmon/notebooks/popmon_tutorial_basic.ipynb +++ b/popmon/notebooks/popmon_tutorial_basic.ipynb @@ -4,7 +4,6 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false, "jupyter": { "outputs_hidden": false }, @@ -36,10 +35,11 @@ "metadata": {}, "outputs": [], "source": [ + "%%capture\n", "# install popmon (if not installed yet)\n", "import sys\n", "\n", - "!{sys.executable} -m pip install popmon" + "!\"{sys.executable}\" -m pip install popmon" ] }, { diff --git a/popmon/notebooks/popmon_tutorial_incremental_data.ipynb b/popmon/notebooks/popmon_tutorial_incremental_data.ipynb index a2d5b97c..719571ff 100644 --- a/popmon/notebooks/popmon_tutorial_incremental_data.ipynb +++ b/popmon/notebooks/popmon_tutorial_incremental_data.ipynb @@ -28,10 +28,11 @@ "metadata": {}, "outputs": [], "source": [ + "%%capture\n", "# install popmon (if not installed yet)\n", "import sys\n", "\n", - "!{sys.executable} -m pip install popmon" + "!\"{sys.executable}\" -m pip install popmon" ] }, { diff --git a/popmon/pipeline/report_pipelines.py b/popmon/pipeline/report_pipelines.py index 483bcfdb..a4a67829 100644 --- a/popmon/pipeline/report_pipelines.py +++ b/popmon/pipeline/report_pipelines.py @@ -18,7 +18,7 @@ # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -from pathlib import PosixPath +from pathlib import Path from ..base import Pipeline from ..config import config @@ -30,6 +30,7 @@ metrics_self_reference, ) from ..visualization import ( + AlertSectionGenerator, HistogramSection, ReportGenerator, SectionGenerator, @@ -46,7 +47,7 @@ def self_reference( features=None, skip_empty_plots=True, last_n=0, - plot_hist_n=2, + plot_hist_n=6, report_filepath=None, show_stats=None, **kwargs, @@ -160,7 +161,7 @@ def rolling_reference( features=None, skip_empty_plots=True, last_n=0, - plot_hist_n=2, + plot_hist_n=6, report_filepath=None, show_stats=None, **kwargs, @@ -218,7 +219,7 @@ def expanding_reference( features=None, skip_empty_plots=True, last_n=0, - plot_hist_n=2, + plot_hist_n=6, report_filepath=None, show_stats=None, **kwargs, @@ -284,7 +285,7 @@ def __init__( last_n=0, skip_first_n=0, skip_last_n=0, - plot_hist_n=2, + plot_hist_n=6, ): """Initialize an instance of Report. @@ -329,13 +330,18 @@ def sg_kws(read_key): # - a section showing all traffic light alerts of monitored statistics # - a section with a summary of traffic light alerts # --- o generate report - SectionGenerator( - dynamic_bounds="dynamic_bounds", - section_name=profiles_section, - static_bounds="static_bounds", - ignore_stat_endswith=["_mean", "_std", "_pull"], - **sg_kws("profiles"), + HistogramSection( + read_key="split_hists", + store_key=sections_key, + section_name=histograms_section, + hist_name_starts_with="histogram", + last_n=plot_hist_n, + description=descs.get("histograms", ""), ), + TrafficLightSectionGenerator( + section_name=traffic_lights_section, **sg_kws("traffic_lights") + ), + AlertSectionGenerator(section_name=alerts_section, **sg_kws("alerts")), SectionGenerator( dynamic_bounds="dynamic_bounds_comparisons", static_bounds="static_bounds_comparisons", @@ -343,21 +349,16 @@ def sg_kws(read_key): ignore_stat_endswith=["_mean", "_std", "_pull"], **sg_kws("comparisons"), ), - TrafficLightSectionGenerator( - section_name=traffic_lights_section, **sg_kws("traffic_lights") - ), - SectionGenerator(section_name=alerts_section, **sg_kws("alerts")), - HistogramSection( - read_key="split_hists", - store_key=sections_key, - section_name=histograms_section, - hist_name_starts_with="histogram", - last_n=plot_hist_n, - description=descs.get("histograms", ""), + SectionGenerator( + dynamic_bounds="dynamic_bounds", + section_name=profiles_section, + static_bounds="static_bounds", + ignore_stat_endswith=["_mean", "_std", "_pull"], + **sg_kws("profiles"), ), ReportGenerator(read_key=sections_key, store_key=store_key), ] - if isinstance(report_filepath, (str, PosixPath)) and len(report_filepath) > 0: + if isinstance(report_filepath, (str, Path)) and len(report_filepath) > 0: self.modules.append(FileWriter(store_key, file_path=report_filepath)) def transform(self, datastore): diff --git a/popmon/version.py b/popmon/version.py index d06dc7ab..7a8c255a 100644 --- a/popmon/version.py +++ b/popmon/version.py @@ -1,6 +1,6 @@ """THIS FILE IS AUTO-GENERATED BY SETUP.PY.""" name = "popmon" -version = "0.3.10" -full_version = "0.3.10" +version = "0.3.11" +full_version = "0.3.11" release = True diff --git a/popmon/visualization/__init__.py b/popmon/visualization/__init__.py index 379b81b8..2026687d 100644 --- a/popmon/visualization/__init__.py +++ b/popmon/visualization/__init__.py @@ -20,6 +20,7 @@ # flake8: noqa +from popmon.visualization.alert_section_generator import AlertSectionGenerator from popmon.visualization.histogram_section import HistogramSection from popmon.visualization.report_generator import ReportGenerator from popmon.visualization.section_generator import SectionGenerator @@ -27,7 +28,7 @@ TrafficLightSectionGenerator, ) -# set matplotlib backend to batchmode when running in shell +# set matplotlib backend to batch mode when running in shell # need to do this *before* matplotlib.pyplot gets imported from ..visualization.backend import set_matplotlib_backend @@ -39,4 +40,5 @@ "HistogramSection", "ReportGenerator", "TrafficLightSectionGenerator", + "AlertSectionGenerator", ] diff --git a/popmon/visualization/alert_section_generator.py b/popmon/visualization/alert_section_generator.py new file mode 100644 index 00000000..7cab2af4 --- /dev/null +++ b/popmon/visualization/alert_section_generator.py @@ -0,0 +1,243 @@ +# Copyright (c) 2020 ING Wholesale Banking Advanced Analytics +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of +# this software and associated documentation files (the "Software"), to deal in +# the Software without restriction, including without limitation the rights to +# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +# the Software, and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +import fnmatch +import multiprocessing + +import numpy as np +import pandas as pd +from joblib import Parallel, delayed +from tqdm import tqdm + +from ..base import Module +from ..config import get_stat_description +from ..visualization.utils import _prune, plot_bars_b64 +from .traffic_light_section_generator import _plot_metrics + + +class AlertSectionGenerator(Module): + """This module takes the time-series data of already computed statistics, plots the data and + combines all the plots into a list which is stored together with the section name in a dictionary + which later will be used for the report generation. + """ + + def __init__( + self, + read_key, + store_key, + section_name, + features=None, + ignore_features=None, + last_n=0, + skip_first_n=0, + skip_last_n=0, + static_bounds=None, + dynamic_bounds=None, + prefix="traffic_light_", + suffices=["_red_high", "_yellow_high", "_yellow_low", "_red_low"], + ignore_stat_endswith=None, + skip_empty_plots=True, + description="", + show_stats=None, + ): + """Initialize an instance of SectionGenerator. + + :param str read_key: key of input data to read from the datastore and use for plotting + :param str store_key: key for output data to be stored in the datastore + :param str section_name: key of output data to store in the datastore + :param list features: list of features to pick up from input data (optional) + :param list ignore_features: ignore list of features, if present (optional) + :param int last_n: plot statistic data for last 'n' periods (optional) + :param int skip_first_n: when plotting data skip first 'n' periods. last_n takes precedence (optional) + :param int skip_last_n: in plot skip last 'n' periods. last_n takes precedence (optional) + :param str static_bounds: key to static traffic light bounds key in datastore (optional) + :param str dynamic_bounds: key to dynamic traffic light bounds key in datastore (optional) + :param str prefix: dynamic traffic light prefix. default is ``'traffic_light_'`` (optional) + :param str suffices: dynamic traffic light suffices. (optional) + :param list ignore_stat_endswith: ignore stats ending with any of list of suffices. (optional) + :param bool skip_empty_plots: if false, also show empty plots in report with only nans or zeroes (optional) + :param str description: description of the section. default is empty (optional) + :param list show_stats: list of statistic name patterns to show in the report. If None, show all (optional) + """ + super().__init__() + self.read_key = read_key + self.store_key = store_key + self.features = features or [] + self.ignore_features = ignore_features or [] + self.section_name = section_name + self.last_n = last_n + self.skip_first_n = skip_first_n + self.skip_last_n = skip_last_n + self.dynamic_bounds = dynamic_bounds + self.static_bounds = static_bounds + self.prefix = prefix + self.suffices = suffices + self.ignore_stat_endswith = ignore_stat_endswith or [] + self.skip_empty_plots = skip_empty_plots + self.description = description + self.show_stats = show_stats + self.plot_overview = True + self.plot_metrics = True + + def transform(self, datastore): + data_obj = self.get_datastore_object(datastore, self.read_key, dtype=dict) + + static_bounds = self.get_datastore_object( + datastore, self.static_bounds, dtype=dict, default={} + ) + dynamic_bounds = self.get_datastore_object( + datastore, self.dynamic_bounds, dtype=dict, default={} + ) + + features = self.get_features(data_obj.keys()) + features_w_metrics = [] + + num_cores = multiprocessing.cpu_count() + + self.logger.info( + f'Generating section "{self.section_name}". skip empty plots: {self.skip_empty_plots}' + ) + + def short_date(date): + return date if len(date) <= 22 else date[:22] + + for feature in tqdm(features, ncols=100): + df = data_obj.get(feature, pd.DataFrame()) + fdbounds = dynamic_bounds.get(feature, pd.DataFrame(index=df.index)) + + assert all(df.index == fdbounds.index) + + # prepare date labels + df.drop( + columns=["histogram", "reference_histogram"], + inplace=True, + errors="ignore", + ) + dates = [short_date(str(date)) for date in df.index.tolist()] + + # get base64 encoded plot for each metric; do parallel processing to speed up. + metrics = [ + m + for m in df.columns + if not any([m.endswith(s) for s in self.ignore_stat_endswith]) + ] + if self.show_stats is not None: + metrics = [ + m + for m in metrics + if any(fnmatch.fnmatch(m, pattern) for pattern in self.show_stats) + ] + + plots = [] + if self.plot_overview: + plots.append( + _plot_metrics( + [m for m in metrics if not m.endswith("worst")], + dates, + df, + 0, + 0, + 0, + 0, + style="alerts", + ) + ) + if self.plot_metrics: + plots += Parallel(n_jobs=num_cores)( + delayed(_plot_metric)( + feature, + metric, + dates, + df[metric], + static_bounds, + fdbounds, + self.prefix, + self.suffices, + self.last_n, + self.skip_first_n, + self.skip_last_n, + self.skip_empty_plots, + ) + for metric in metrics + ) + # filter out potential empty plots (from skip empty plots) + if self.skip_empty_plots: + plots = [e for e in plots if len(e["plot"])] + features_w_metrics.append( + dict(name=feature, plots=sorted(plots, key=lambda plot: plot["name"])) + ) + + params = { + "section_title": self.section_name, + "section_description": self.description, + "features": features_w_metrics, + } + + if self.store_key in datastore: + datastore[self.store_key].append(params) + else: + datastore[self.store_key] = [params] + + return datastore + + +def _plot_metric( + feature, + metric, + dates, + values, + static_bounds, + fdbounds, + prefix, + suffices, + last_n, + skip_first_n, + skip_last_n, + skip_empty, +): + """Split off plot histogram generation to allow for parallel processing""" + # pick up static traffic light boundaries + name = feature + ":" + metric + sbounds = static_bounds.get(name, ()) + # pick up dynamic traffic light boundaries + names = [prefix + metric + suffix for suffix in suffices] + dbounds = tuple( + [ + _prune(fdbounds[n].tolist(), last_n, skip_first_n, skip_last_n) + for n in names + if n in fdbounds.columns + ] + ) + # choose dynamic bounds if present + bounds = dbounds if len(dbounds) > 0 else sbounds + # prune dates and values + dates = _prune(dates, last_n, skip_first_n, skip_last_n) + values = _prune(values, last_n, skip_first_n, skip_last_n) + + # make plot. note: slow! + plot = plot_bars_b64( + data=np.array(values), + labels=dates, + ylim=True, + bounds=bounds, + skip_empty=skip_empty, + ) + + return dict(name=metric, description=get_stat_description(metric), plot=plot) diff --git a/popmon/visualization/section_generator.py b/popmon/visualization/section_generator.py index dda21ede..9f81ab56 100644 --- a/popmon/visualization/section_generator.py +++ b/popmon/visualization/section_generator.py @@ -171,10 +171,10 @@ def short_date(date): "features": features_w_metrics, } - if self.store_key in datastore: - datastore[self.store_key].append(params) - else: - datastore[self.store_key] = [params] + if self.store_key not in datastore: + datastore[self.store_key] = [] + + datastore[self.store_key].append(params) return datastore diff --git a/popmon/visualization/templates/assets/js/custom-script.js b/popmon/visualization/templates/assets/js/custom-script.js index fdbe64c3..523e2478 100644 --- a/popmon/visualization/templates/assets/js/custom-script.js +++ b/popmon/visualization/templates/assets/js/custom-script.js @@ -5,9 +5,15 @@ $( "section" ).each(function() { // show corresponding feature's data based on the filter $(document).on("click", "button.dropdown-item", function() { obj = $(this) - obj.closest("section").find("div.section_feature").hide() - obj.closest("section").find("div[data-section-feature='" + obj.attr("data-feature") + "']").show() - obj.parent().siblings("button").text("Feature: " + obj.text()) + +// obj.closest("section").find("div.section_feature").hide() +// obj.closest("section").find("div[data-section-feature='" + obj.attr("data-feature") + "']").show() +// obj.parent().siblings("button").text("Feature: " + obj.text()) + + // Linked dropdowns + $("div.section_feature").hide() + $("div[data-section-feature='" + obj.attr("data-feature") + "']").show() + $("button.dropdown-toggle").text("Feature: " + obj.text()) }); // making navigation work: after clicking a nav link scrolling to the corresponding section's position $(document).on("click", "a.nav-link,a.navbar-brand", function(e) { diff --git a/popmon/visualization/traffic_light_section_generator.py b/popmon/visualization/traffic_light_section_generator.py index 64032af2..22cd6e1f 100644 --- a/popmon/visualization/traffic_light_section_generator.py +++ b/popmon/visualization/traffic_light_section_generator.py @@ -30,6 +30,7 @@ from ..config import get_stat_description from ..visualization.utils import ( _prune, + plot_traffic_lights_alerts_b64, plot_traffic_lights_b64, plot_traffic_lights_heatmap_b64, ) @@ -60,7 +61,7 @@ def __init__( description="", show_stats=None, plot_overview=True, - plot_metrics=True, + plot_metrics=False, ): """Initialize an instance of SectionGenerator. @@ -152,7 +153,7 @@ def short_date(date): plots = [] if self.plot_overview: plots.append( - _plot_heatmap( + _plot_metrics( metrics, dates, df, @@ -213,7 +214,9 @@ def _plot_metric(metric, dates, values, last_n, skip_first_n, skip_last_n, skip_ return dict(name=metric, description=get_stat_description(metric), plot=plot) -def _plot_heatmap(metrics, dates, df, last_n, skip_first_n, skip_last_n, skip_empty): +def _plot_metrics( + metrics, dates, df, last_n, skip_first_n, skip_last_n, skip_empty, style="heatmap" +): # prune dates and values dates = _prune(dates, last_n, skip_first_n, skip_last_n) @@ -230,9 +233,18 @@ def _plot_heatmap(metrics, dates, df, last_n, skip_first_n, skip_last_n, skip_em values = np.stack(values) # make plot. note: slow! - plot = plot_traffic_lights_heatmap_b64( - values, metrics=nonempty_metrics, labels=dates - ) + if style == "heatmap": + plot = plot_traffic_lights_heatmap_b64( + values, metrics=nonempty_metrics, labels=dates + ) + elif style == "alerts": + plot = plot_traffic_lights_alerts_b64( + values, + metrics=nonempty_metrics, + labels=dates, + ) + else: + raise ValueError("style must be either 'heatmap' or 'alerts'") else: plot = "" diff --git a/popmon/visualization/utils.py b/popmon/visualization/utils.py index 4785820b..7f9a4368 100644 --- a/popmon/visualization/utils.py +++ b/popmon/visualization/utils.py @@ -20,7 +20,7 @@ import logging import math -from io import BytesIO +from io import BytesIO, StringIO import numpy as np import pandas as pd @@ -35,18 +35,29 @@ mpl_style(dark=False) -def plt_to_base64(): - """Outputting plot as a base64 encoded string. +def plt_to_str(format="png"): + """Outputting plot as a base64 encoded string or as svg image. - :return: base64 encoded plot image + :return: base64 encoded plot image or svg image :rtype: str """ - tmpfile = BytesIO() - plt.savefig(tmpfile, format="png") - plt.close() + if format == "png": + tmpfile = BytesIO() + + plt.savefig(tmpfile, format="png") + plt.close() + + return pybase64.b64encode(tmpfile.getvalue()).decode("utf-8") + elif format == "svg": + tmpfile = StringIO() - return pybase64.b64encode(tmpfile.getvalue()).decode("utf-8") + plt.savefig(tmpfile, format="svg") + plt.close() + + return tmpfile.getvalue().encode("utf-8") + else: + raise ValueError("Format should be png or svg.") def plot_bars_b64(data, labels=None, bounds=None, ylim=False, skip_empty=True): @@ -139,7 +150,7 @@ def plot_bars_b64(data, labels=None, bounds=None, ylim=False, skip_empty=True): ax.grid(True, linestyle=":") fig.tight_layout() - return plt_to_base64() + return plt_to_str() def plot_traffic_lights_heatmap_b64(data, metrics=None, labels=None): @@ -170,7 +181,67 @@ def plot_traffic_lights_heatmap_b64(data, metrics=None, labels=None): fig.tight_layout() - return plt_to_base64() + return plt_to_str() + + +def plot_traffic_lights_alerts_b64(data, metrics=None, labels=None): + assert data.shape[0] == 3 + + # Reorder metrics if needed + pos_green = metrics.index("n_green") + pos_yellow = metrics.index("n_yellow") + pos_red = metrics.index("n_red") + + if [pos_green, pos_yellow, pos_red] != [2, 1, 0]: + data[[0, 1, 2]] = data[[pos_green, pos_yellow, pos_red]] + + metrics = ["n_green", "n_yellow", "n_red"] + + fig, ax = plt.subplots(figsize=(14, 4.5)) + + N = 256 + yellow = np.ones((N, 4)) + yellow[:, 0] = np.linspace(1, 255 / 256, N) + yellow[:, 1] = np.linspace(1, 232 / 256, N) + yellow[:, 2] = np.linspace(1, 11 / 256, N) + yellow_cmp = ListedColormap(yellow) + + cmaps = reversed(["Reds", yellow_cmp, "Greens"]) + # https://stackoverflow.com/questions/60325792/seaborn-heatmap-color-by-row + for idx, cmap in enumerate(cmaps): + _ = ax.imshow( + np.vstack([data[idx, :], data[idx, :]]), + aspect="equal", + cmap=cmap, + extent=[-0.5, data.shape[1] - 0.5, idx - 0.5, idx + 0.5], + ) + + # Major ticks + ax.set_xticks(np.arange(0, len(labels), 1)) + ax.set_yticks(np.arange(0, len(metrics), 1)) + + # Labels for major ticks + ax.set_xticklabels(labels) + ax.set_yticklabels(metrics) + + # Minor ticks + ax.set_xticks(np.arange(-0.50, len(labels), 1), minor=True) + ax.set_yticks(np.arange(-0.50, len(metrics), 1), minor=True) + + plt.setp(ax.get_xticklabels(), rotation=90, ha="right", rotation_mode="anchor") + + # Annotations + for i in range(len(metrics)): + for j in range(len(labels)): + ax.text(j, i, f"{data[i, j]:.0f}", ha="center", va="center", color="black") + + # Gridlines based on minor ticks + ax.grid(False) + ax.grid(which="minor", color="#333333", linestyle="-", linewidth=1, alpha=1) + + fig.tight_layout() + + return plt_to_str() def plot_traffic_lights_b64(data, labels=None, skip_empty=True): @@ -234,7 +305,7 @@ def plot_traffic_lights_b64(data, labels=None, skip_empty=True): fig.tight_layout() - return plt_to_base64() + return plt_to_str() def grouped_bar_chart_b64(data, labels, legend): @@ -242,7 +313,7 @@ def grouped_bar_chart_b64(data, labels, legend): :param numpy.ndarray data: bin values of histograms :param list labels: common bin labels for all histograms - :param list legend: corresponing names of histograms we want to represent + :param list legend: corresponding names of histograms we want to represent :return: base64 encoded plot image (grouped bar chart) :rtype: str """ @@ -274,7 +345,7 @@ def grouped_bar_chart_b64(data, labels, legend): fig.tight_layout() - return plt_to_base64() + return plt_to_str() def plot_overlay_1d_histogram_b64( @@ -408,7 +479,7 @@ def xtick(lab): plt.grid() plt.legend() - return plt_to_base64() + return plt_to_str() def _prune(values, last_n=0, skip_first_n=0, skip_last_n=0): diff --git a/requirements-test.txt b/requirements-test.txt index d935bc45..5a48b749 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,7 +1,8 @@ flake8>=3.7.8 pytest>=4.0.2 -nbconvert>=5.3.1 +pytest-notebook>=0.6.1 +nbconvert~=5.6.1 jupyter_client>=5.2.3 ipykernel>=5.1.3 black>=19.10b0 -isort>=5.0.7 \ No newline at end of file +isort>=5.0.7 diff --git a/setup.py b/setup.py index a5714385..74972ad3 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ MAJOR = 0 REVISION = 3 -PATCH = 10 +PATCH = 11 DEV = False # NOTE: also update version at: README.rst diff --git a/tests/popmon/notebooks/test_notebooks.py b/tests/popmon/notebooks/test_notebooks.py index 0004d234..6d1bce4b 100644 --- a/tests/popmon/notebooks/test_notebooks.py +++ b/tests/popmon/notebooks/test_notebooks.py @@ -1,55 +1,30 @@ -import os -import unittest - -import nbformat import pytest -from jupyter_client.kernelspec import KernelSpecManager, NoSuchKernel -from nbconvert.preprocessors import ExecutePreprocessor -from nbconvert.preprocessors.execute import CellExecutionError - -from popmon import resources - -kernel_name = "python3" - -# check if jupyter python3 kernel can be opened. if kernel not found, skip unit tests below. -try: - km = KernelSpecManager() - km.get_kernel_spec(kernel_name) - kernel_found = True -except NoSuchKernel: - kernel_found = False +from pytest_notebook.nb_regression import NBRegressionFixture +from popmon.resources import notebook -class NotebookTest(unittest.TestCase): - """Unit test notebook""" - def run_notebook(self, notebook): - """ Test notebook """ +@pytest.fixture(scope="module") +def nb_tester(): + """Test notebooks using pytest-notebook""" + nb_regression = NBRegressionFixture( + diff_ignore=( + "/metadata/language_info", + "/cells/*/execution_count", + "/cells/*/outputs/*", + ), + exec_timeout=1800, + ) + return nb_regression - # load notebook - with open(notebook) as f: - nb = nbformat.read(f, as_version=4) - # execute notebook - ep = ExecutePreprocessor(timeout=600, kernel_name=kernel_name) - try: - ep.preprocess(nb, {}) - status = True - except CellExecutionError: - # store if failed - status = False - executed_notebook = os.getcwd() + "/" + notebook.split("/")[-1] - with open(executed_notebook, mode="wt") as f: - nbformat.write(nb, f) +def test_notebook_basic(nb_tester): + nb_tester.check(notebook("popmon_tutorial_basic.ipynb")) - # check status - self.assertTrue(status, "Notebook execution failed (%s)" % notebook) +def test_notebook_advanced(nb_tester): + nb_tester.check(notebook("popmon_tutorial_advanced.ipynb")) -@pytest.mark.filterwarnings("ignore:Session._key_changed is deprecated") -@pytest.mark.skipif(not kernel_found, reason=f"{kernel_name} kernel not found.") -class PipelineNotebookTest(NotebookTest): - """Unit test notebook""" - def test_basic_tutorial(self): - self.run_notebook(resources.notebook("popmon_tutorial_basic.ipynb")) +def test_notebook_incremental_data(nb_tester): + nb_tester.check(notebook("popmon_tutorial_incremental_data.ipynb"))