Skip to content

Commit

Permalink
feat: remove skip_empty_plots
Browse files Browse the repository at this point in the history
Default behaviour is now to skip the plots if they only contain NaN or Inf values.
Unlike before, for all zeros, the plots are now retained.
  • Loading branch information
sbrugman committed Sep 1, 2022
1 parent 695ee17 commit bd3ea29
Show file tree
Hide file tree
Showing 7 changed files with 25 additions and 84 deletions.
21 changes: 1 addition & 20 deletions popmon/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,12 @@
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
import warnings
from pathlib import Path
from typing import Any, Dict, List, Optional, Union

import pandas as pd
from histogrammar.dfinterface.make_histograms import get_time_axes
from pydantic import BaseModel, BaseSettings
from pydantic.class_validators import validator
from typing_extensions import Literal

# Global configuration for the joblib parallelization. Could be used to change the number of jobs, and/or change
Expand All @@ -32,12 +30,6 @@
parallel_args = {"n_jobs": 1}


class ValidatedBaseModel(BaseModel):
class Config:
validate_all = True
validate_assignment = True


class ValidatedSettings(BaseSettings):
class Config:
validate_all = True
Expand Down Expand Up @@ -174,23 +166,12 @@ class Section(BaseModel):
"""Configuration related to the traffic lights section"""


class Report(ValidatedBaseModel):
class Report(BaseModel):
"""Report-specific configuration"""

title: str = "POPMON Report"
"""Report title in browser and navbar. May contain HTML."""

skip_empty_plots: bool = False
"""(deprecated) if false, also show empty plots in report with only nans or zeroes (optional)"""

@validator("skip_empty_plots")
def skip_empty_plots_deprecated(cls, v):
if v:
warnings.warn(
"The 'skip_empty_plots' parameter is deprecated and will be removed in the next release."
)
return v

last_n: int = 0
"""plot statistic data for last 'n' periods (optional)"""

Expand Down
3 changes: 1 addition & 2 deletions popmon/notebooks/popmon_tutorial_advanced.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -416,8 +416,7 @@
"report_settings.report.last_n = 0\n",
"report_settings.report.skip_first_n = 0\n",
"report_settings.report.skip_last_n = 0\n",
"report_settings.report.section.histograms.plot_hist_n = 2\n",
"report_settings.report.skip_empty_plots = True\n",
"report_settings.report.section.histograms.plot_hist_n = 0\n",
"report_settings.report.report_filepath = None\n",
"\n",
"report.regenerate(\n",
Expand Down
9 changes: 2 additions & 7 deletions popmon/visualization/alert_section_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,6 @@ def __init__(
self.last_n = settings.last_n
self.skip_first_n = settings.skip_first_n
self.skip_last_n = settings.skip_last_n
self.skip_empty_plots = settings.skip_empty_plots
self.show_stats = settings.show_stats if not settings.extended_report else None

self.section_name = settings.section.alerts.name
Expand Down Expand Up @@ -110,9 +109,7 @@ def transform(
features = self.get_features(list(data_obj.keys()))
features_w_metrics = []

self.logger.info(
f'Generating section "{self.section_name}". skip empty plots: {self.skip_empty_plots}'
)
self.logger.info(f'Generating section "{self.section_name}"')

for feature in tqdm(features, ncols=100):
df = data_obj.get(feature, pd.DataFrame())
Expand Down Expand Up @@ -141,15 +138,13 @@ def transform(
0,
0,
0,
0,
self.tl_colors,
style="alerts",
)
]

# filter out potential empty plots (from skip empty plots)
if self.skip_empty_plots:
plots = [e for e in plots if len(e["plot"])]
plots = [e for e in plots if len(e["plot"])]

features_w_metrics.append(
{
Expand Down
23 changes: 5 additions & 18 deletions popmon/visualization/overview_section.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@ def __init__(
self.last_n = settings.last_n
self.skip_first_n = settings.skip_first_n
self.skip_last_n = settings.skip_last_n
self.skip_empty_plots = settings.skip_empty_plots
self.show_stats = settings.show_stats if not settings.extended_report else None
self.section_name = settings.section.overview.name
self.description = settings.section.overview.description
Expand All @@ -104,9 +103,7 @@ def transform(

features = self.get_features(list(data_obj.keys()))

self.logger.info(
f'Generating section "{self.section_name}". skip empty plots: {self.skip_empty_plots}'
)
self.logger.info(f'Generating section "{self.section_name}"')

values = {}
for feature in tqdm(features, ncols=100):
Expand All @@ -132,15 +129,11 @@ def transform(
self.last_n,
self.skip_first_n,
self.skip_last_n,
self.skip_empty_plots,
)

plots = [_plot_metrics(values)]

# filter out potential empty plots (from skip empty plots)
if self.skip_empty_plots:
plots = [e for e in plots if len(e["plot"])]

plots = [e for e in plots if len(e["plot"])]
plots = sorted(plots, key=lambda plot: plot["name"])

sections.append(
Expand Down Expand Up @@ -190,16 +183,10 @@ def _get_metrics(
last_n,
skip_first_n,
skip_last_n,
skip_empty,
):
values = []
nonempty_metrics = []
for metric in metrics:
value = _prune(df[metric], last_n, skip_first_n, skip_last_n)

if not skip_empty or np.sum(value) > 0:
values.append(value)
nonempty_metrics.append(metric)
values = [
_prune(df[metric], last_n, skip_first_n, skip_last_n) for metric in metrics
]

empty = {0: 0, 1: 0, 2: 0}
if len(values) > 0:
Expand Down
11 changes: 2 additions & 9 deletions popmon/visualization/section_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,6 @@ def __init__(
self.prefix = prefix
self.suffices = suffices
self.ignore_stat_endswith = ignore_stat_endswith or []
self.skip_empty_plots = settings.skip_empty_plots
self.description = description
self.show_stats = settings.show_stats if not settings.extended_report else None
self.primary_color = settings.primary_color
Expand All @@ -158,9 +157,7 @@ def transform(
features = self.get_features(list(data_obj.keys()))
features_w_metrics = []

self.logger.info(
f'Generating section "{self.section_name}". skip empty plots: {self.skip_empty_plots}'
)
self.logger.info(f'Generating section "{self.section_name}"')

for feature in tqdm(features, ncols=100):
df = data_obj.get(feature, pd.DataFrame())
Expand Down Expand Up @@ -193,7 +190,6 @@ def transform(
self.last_n,
self.skip_first_n,
self.skip_last_n,
self.skip_empty_plots,
self.primary_color,
self.tl_colors,
)
Expand All @@ -202,8 +198,7 @@ def transform(
plots = parallel(_plot_metric, args)

# filter out potential empty plots (from skip empty plots)
if self.skip_empty_plots:
plots = [e for e in plots if len(e["plot"])]
plots = [e for e in plots if len(e["plot"])]

layouts = ""
if len(plots) > 0:
Expand Down Expand Up @@ -262,7 +257,6 @@ def _plot_metric(
last_n,
skip_first_n,
skip_last_n,
skip_empty,
primary_color,
zline_color,
):
Expand Down Expand Up @@ -290,7 +284,6 @@ def _plot_metric(
labels=dates,
ylim=True,
bounds=bounds,
skip_empty=skip_empty,
primary_color=primary_color,
tl_colors=zline_color,
metric=metric,
Expand Down
26 changes: 8 additions & 18 deletions popmon/visualization/traffic_light_section_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,6 @@ def __init__(
self.prefix = prefix
self.suffices = suffices
self.ignore_stat_endswith = ignore_stat_endswith or []
self.skip_empty_plots = settings.skip_empty_plots
self.show_stats = settings.show_stats if not settings.extended_report else None

self.section_name = settings.section.traffic_lights.name
Expand All @@ -109,9 +108,7 @@ def transform(
features = self.get_features(list(data_obj.keys()))
features_w_metrics = []

self.logger.info(
f'Generating section "{self.section_name}". skip empty plots: {self.skip_empty_plots}'
)
self.logger.info(f'Generating section "{self.section_name}"')

for feature in tqdm(features, ncols=100):
df = data_obj.get(feature, pd.DataFrame())
Expand Down Expand Up @@ -140,14 +137,13 @@ def transform(
self.last_n,
self.skip_first_n,
self.skip_last_n,
self.skip_empty_plots,
tl_colors=self.tl_colors,
)
]

# filter out potential empty plots (from skip empty plots)
if self.skip_empty_plots:
plots = [e for e in plots if len(e["plot"])]
plots = [e for e in plots if len(e["plot"])]

features_w_metrics.append(
{
"name": feature,
Expand All @@ -174,34 +170,28 @@ def _plot_metrics(
last_n,
skip_first_n,
skip_last_n,
skip_empty,
tl_colors,
style="heatmap",
):
# prune dates and values
dates = _prune(dates, last_n, skip_first_n, skip_last_n)

values = []
nonempty_metrics = []
for metric in metrics:
value = _prune(df[metric], last_n, skip_first_n, skip_last_n)

if not skip_empty or np.sum(value) > 0:
values.append(value)
nonempty_metrics.append(metric)
values = [
_prune(df[metric], last_n, skip_first_n, skip_last_n) for metric in metrics
]

if len(values) > 0:
values = np.stack(values)

if style == "heatmap":
plot = plot_traffic_lights_overview(
feature, values, metrics=nonempty_metrics, labels=dates
feature, values, metrics=metrics, labels=dates
)
elif style == "alerts":
plot = plot_traffic_lights_alerts_aggregate(
feature,
values,
metrics=nonempty_metrics,
metrics=metrics,
labels=dates,
tl_colors=tl_colors,
)
Expand Down
16 changes: 6 additions & 10 deletions popmon/visualization/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ def plot_bars(
labels: List[str],
bounds: tuple,
ylim: bool,
skip_empty: bool,
primary_color: str,
tl_colors: Dict[str, str],
metric: str,
Expand All @@ -62,7 +61,6 @@ def plot_bars(
:param labels: common bin labels for all histograms. default is None.
:param bounds: traffic light bounds (y-coordinates). default is None.
:param ylim: place y-axis limits for zooming into the data. default is False.
:param skip_empty: if false, also plot empty plots with only nans or only zeroes. default is True.
:return: JSON plot image
:rtype: str
"""
Expand All @@ -72,14 +70,12 @@ def plot_bars(
raise ValueError("shape mismatch: x-axis labels do not match the data shape")

# skip plot generation for empty datasets
if skip_empty:
n_data = len(data)
n_zero = n_data - np.count_nonzero(data)
n_nan = pd.isnull(data).sum()
n_inf = np.sum([np.isinf(x) for x in data if isinstance(x, float)])
if n_nan + n_zero + n_inf == n_data:
logger.debug("skipping plot with empty data.")
return ""
n_data = len(data)
n_nan = pd.isnull(data).sum()
n_inf = np.sum([np.isinf(x) for x in data if isinstance(x, float)])
if n_nan + n_inf == n_data:
logger.debug("skipping plot with empty data.")
return ""

# plot bar
fig = go.Figure(
Expand Down

0 comments on commit bd3ea29

Please sign in to comment.