diff --git a/docs/source/conf.py b/docs/source/conf.py index ab2d5091..aafdde14 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -57,8 +57,8 @@ # General information about the project. project = "Population Shift Monitoring" -copyright = "2022, ING Bank N.V." -author = "ING Wholesale Banking Advanced Analytics" +copyright = "2023, ING Bank N.V." +author = "ING Analytics Wholesale Banking" version = popmon.version.version # The language for content autogenerated by Sphinx. Refer to documentation @@ -143,7 +143,7 @@ master_doc, "popmon.tex", "POPMON Documentation", - "ING Wholesale Banking Advanced Analytics", + "ING Analytics Wholesale Banking", "manual", ) ] diff --git a/examples/integrations/kibana/popmon_to_elastic.py b/examples/integrations/kibana/popmon_to_elastic.py index fc2ae100..869f78ac 100644 --- a/examples/integrations/kibana/popmon_to_elastic.py +++ b/examples/integrations/kibana/popmon_to_elastic.py @@ -4,7 +4,7 @@ import pandas as pd from elastic_connector import ElasticConnector -import popmon # noqa +import popmon # noqa: F401 from popmon import resources if __name__ == "__main__": diff --git a/examples/synthetic_data.py b/examples/synthetic_data.py index 5a6977d0..c3b4a18f 100644 --- a/examples/synthetic_data.py +++ b/examples/synthetic_data.py @@ -1,6 +1,6 @@ import pandas as pd -import popmon # noqa +import popmon # noqa: F401 from popmon import Settings, resources # open synthetic data diff --git a/popmon/__init__.py b/popmon/__init__.py index 2e770a6f..70df0a27 100644 --- a/popmon/__init__.py +++ b/popmon/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in diff --git a/popmon/alerting/__init__.py b/popmon/alerting/__init__.py index 80d35733..87c13e16 100644 --- a/popmon/alerting/__init__.py +++ b/popmon/alerting/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in @@ -18,8 +18,8 @@ # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -from ..alerting.alerts_summary import AlertsSummary -from ..alerting.compute_tl_bounds import ( +from popmon.alerting.alerts_summary import AlertsSummary +from popmon.alerting.compute_tl_bounds import ( ComputeTLBounds, DynamicBounds, StaticBounds, diff --git a/popmon/alerting/alerts_summary.py b/popmon/alerting/alerts_summary.py index 053c0c4e..4a21f66b 100644 --- a/popmon/alerting/alerts_summary.py +++ b/popmon/alerting/alerts_summary.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in @@ -24,7 +24,7 @@ import numpy as np import pandas as pd -from ..base import Module +from popmon.base import Module class AlertsSummary(Module): diff --git a/popmon/alerting/compute_tl_bounds.py b/popmon/alerting/compute_tl_bounds.py index 6d0a4379..cbd1ccf5 100644 --- a/popmon/alerting/compute_tl_bounds.py +++ b/popmon/alerting/compute_tl_bounds.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in @@ -26,8 +26,8 @@ import numpy as np import pandas as pd -from ..analysis.apply_func import ApplyFunc -from ..base import Module, Pipeline +from popmon.analysis.apply_func import ApplyFunc +from popmon.base import Module, Pipeline def traffic_light_summary(row, cols=None, prefix=""): @@ -95,7 +95,7 @@ def collect_traffic_light_bounds(monitoring_rules): """ metrics_per_feature = defaultdict(list) metrics = [] - for pattern in monitoring_rules.keys(): + for pattern in monitoring_rules: psplit = pattern.split(":") feature = ":".join(psplit[:-1]) metric = psplit[-1] diff --git a/popmon/analysis/__init__.py b/popmon/analysis/__init__.py index efe44800..6f6bb683 100644 --- a/popmon/analysis/__init__.py +++ b/popmon/analysis/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in @@ -18,7 +18,8 @@ # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -from ..analysis.apply_func import ApplyFunc +from popmon.analysis.apply_func import ApplyFunc + from .comparison import Comparisons from .profiling import Profiles diff --git a/popmon/analysis/apply_func.py b/popmon/analysis/apply_func.py index d4b9650b..fc24d6f7 100644 --- a/popmon/analysis/apply_func.py +++ b/popmon/analysis/apply_func.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in @@ -24,8 +24,8 @@ import numpy as np import pandas as pd -from ..base import Module -from ..utils import parallel +from popmon.base import Module +from popmon.utils import parallel class ApplyFunc(Module): @@ -97,8 +97,8 @@ def add_apply_func( func, suffix=None, prefix=None, - metrics=[], - features=[], + metrics=None, + features=None, entire=None, *args, **kwargs, @@ -117,6 +117,10 @@ def add_apply_func( :param kwargs: (dict, optional) kwargs for 'func' """ # check inputs + if features is None: + features = [] + if metrics is None: + metrics = [] if not callable(func): raise TypeError("functions in ApplyFunc must be callable objects") if suffix is not None and not isinstance(suffix, str): @@ -252,9 +256,12 @@ def apply_func(feature, selected_metrics, df, arr): :return: dictionary with outputs of applied-to metric pd.Series """ # basic checks of feature - if "features" in arr and len(arr["features"]) > 0: - if feature not in arr["features"]: - return {} + if ( + "features" in arr + and len(arr["features"]) > 0 + and feature not in arr["features"] + ): + return {} # get func input keys = list(arr.keys()) @@ -343,20 +350,15 @@ def apply_func(feature, selected_metrics, df, arr): and all(obj.index == df.index) ): obj = {"_".join(df.columns): obj} + # e.g. output of normalized_hist_mean_cov: a dataframe with one column, actually a series elif ( isinstance(obj, pd.DataFrame) and len(obj.columns) == 1 - and len(obj.index) != len(df.index) - ): - # e.g. output of normalized_hist_mean_cov: a dataframe with one column, actually a series - obj = obj[obj.columns[0]].to_dict() - elif ( - isinstance(obj, pd.DataFrame) - and len(obj.columns) == 1 - and len(obj.index) == len(df.index) - and (obj.index != df.index).any() + and ( + len(obj.index) != len(df.index) + or (len(obj.index) == len(df.index) and (obj.index != df.index).any()) + ) ): - # e.g. output of normalized_hist_mean_cov: a dataframe with one column, actually a series obj = obj[obj.columns[0]].to_dict() elif isinstance(obj, pd.Series): # e.g. output of np.mean of np.std: results in one number per column when applied to a dataframe diff --git a/popmon/analysis/comparison/__init__.py b/popmon/analysis/comparison/__init__.py index 388ec3ef..0a190acc 100644 --- a/popmon/analysis/comparison/__init__.py +++ b/popmon/analysis/comparison/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in @@ -16,7 +16,7 @@ # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -from ...analysis.comparison.hist_comparer import ( +from popmon.analysis.comparison.hist_comparer import ( ExpandingHistComparer, ExpandingNormHistComparer, ReferenceHistComparer, @@ -24,6 +24,7 @@ RollingHistComparer, RollingNormHistComparer, ) + from .comparisons import Comparisons __all__ = [ diff --git a/popmon/analysis/comparison/comparisons.py b/popmon/analysis/comparison/comparisons.py index 4d58b0c7..29cbb83b 100644 --- a/popmon/analysis/comparison/comparisons.py +++ b/popmon/analysis/comparison/comparisons.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in @@ -82,7 +82,7 @@ def ks_test(hist_1, hist_2): Formulas translated from c++ to python, but formulas otherwise not modified. Reference: link: https://root.cern.ch/doc/master/classTH1.html#TH1:KolmogorovTest GNU license: https://root.cern.ch/license - All modifications copyright ING WBAA. + All modifications copyright INGA WB. :param hist_1: 1D array with bin counts of the histogram_1 :param hist_2: 1D array with bin counts of the histogram_2 @@ -115,7 +115,7 @@ def ks_prob(testscore): Formulas translated from c++ to python, but formulas otherwise not modified. Reference: https://root.cern.ch/doc/master/classTH1.html#TH1:KolmogorovTest GNU license: https://root.cern.ch/license - All modifications copyright ING WBAA. + All modifications copyright INGA WB. :param float testscore: Kolmogorov-Smirnov test score @@ -154,7 +154,7 @@ def ks_prob(testscore): dim=1, htype="num", ) -def ks(p, q, *args): +def ks(p, q, *_): # KS-test only properly defined for (ordered) 1D interval variables ks_testscore = ks_test(p, q) ks_pvalue = ks_prob(ks_testscore) @@ -182,7 +182,7 @@ def unknown_labels(hist1, hist2): dim=(2,), htype="all", ) -def pearson(p, q, *args): +def pearson(p, q, *_): # calculate pearson coefficient pearson_coeff = np.nan if len(p) >= 2: @@ -201,7 +201,7 @@ def uu_chi2(n, m): Formulas translated from c++ to python, but formulas otherwise not modified. Reference: https://root.cern.ch/doc/master/classTH1.html#a6c281eebc0c0a848e7a0d620425090a5 GNU License: https://root.cern.ch/license - All modifications copyright ING WBAA. + All modifications copyright INGA WB. :param n: 1d array with bin counts of the reference set :param m: 1d array with bin counts of the test set @@ -218,8 +218,8 @@ def _not_finite_to_zero(x): if len(n) != len(m): raise ValueError("Input histograms have unequal size.") - N = np.sum(n) - M = np.sum(m) + N = np.sum(n) # noqa: N806 + M = np.sum(m) # noqa: N806 if N == 0 or M == 0: return np.nan, np.nan, np.nan, np.nan, [0] * len(n) diff --git a/popmon/analysis/comparison/hist_comparer.py b/popmon/analysis/comparison/hist_comparer.py index 9dc25b31..ba069a9f 100644 --- a/popmon/analysis/comparison/hist_comparer.py +++ b/popmon/analysis/comparison/hist_comparer.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in @@ -21,8 +21,8 @@ import numpy as np import pandas as pd -from ...analysis.apply_func import ApplyFunc -from ...analysis.functions import ( +from popmon.analysis.apply_func import ApplyFunc +from popmon.analysis.functions import ( expand_norm_hist_mean_cov, expanding_hist, hist_sum, @@ -31,13 +31,13 @@ roll_norm_hist_mean_cov, rolling_hist, ) -from ...analysis.hist_numpy import ( +from popmon.analysis.hist_numpy import ( check_similar_hists, get_consistent_numpy_entries, get_consistent_numpy_ndgrids, ) -from ...base import Pipeline -from ...hist.hist_utils import COMMON_HIST_TYPES, is_numeric +from popmon.base import Pipeline +from popmon.hist.hist_utils import COMMON_HIST_TYPES, is_numeric def hist_compare(row, hist_name1="", hist_name2=""): @@ -132,7 +132,7 @@ def __init__( assign_to_key=assign_to_key, ) hist_collector.add_apply_func( - func=func_hist_collector, entire=True, suffix=suffix, *args, **kwargs + *args, func=func_hist_collector, entire=True, suffix=suffix, **kwargs ) # do histogram comparison hist_comparer = ApplyFunc( @@ -326,7 +326,7 @@ def __init__( # make reference histogram(s) hist_collector = ApplyFunc(apply_to_key=read_key, assign_to_key=assign_to_key) hist_collector.add_apply_func( - func=func_hist_collector, hist_name=hist_col, suffix="", *args, **kwargs + *args, func=func_hist_collector, hist_name=hist_col, suffix="", **kwargs ) # do histogram comparison diff --git a/popmon/analysis/functions.py b/popmon/analysis/functions.py index d5506db8..e0370b44 100644 --- a/popmon/analysis/functions.py +++ b/popmon/analysis/functions.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in @@ -24,14 +24,14 @@ from scipy import linalg, stats from scipy.stats import linregress, norm -from ..analysis.hist_numpy import ( +from popmon.analysis.hist_numpy import ( check_similar_hists, get_consistent_numpy_2dgrids, get_consistent_numpy_entries, set_2dgrid, ) -from ..hist.hist_utils import COMMON_HIST_TYPES, is_numeric -from ..stats.numpy import probability_distribution_mean_covariance +from popmon.hist.hist_utils import COMMON_HIST_TYPES, is_numeric +from popmon.stats.numpy import probability_distribution_mean_covariance def pull(row, suffix_mean="_mean", suffix_std="_std", cols=None): @@ -57,15 +57,17 @@ def pull(row, suffix_mean="_mean", suffix_std="_std", cols=None): ] x = { - m: np.nan - if ( - any( - r not in rdict or pd.isnull(rdict[r]) - for r in [m, m + suffix_mean, m + suffix_std] + m: ( + np.nan + if ( + any( + r not in rdict or pd.isnull(rdict[r]) + for r in [m, m + suffix_mean, m + suffix_std] + ) + or rdict[m + suffix_std] == 0.0 ) - or rdict[m + suffix_std] == 0.0 + else (rdict[m] - rdict[m + suffix_mean]) / rdict[m + suffix_std] ) - else (rdict[m] - rdict[m + suffix_mean]) / rdict[m + suffix_std] for m in cols } @@ -183,6 +185,7 @@ def rolling_lr_zscore(df, window, shift=0): :param int shift: size of shift. default is 0. :return: df with rolling z-score results of lin_regress() function applied to all columns """ + # MB 20200420: turn original df.rolling off, it doesn't accept timestamps. # raw=True suppresses Future warning # return df.shift(shift).rolling(window).apply(func, raw=True) diff --git a/popmon/analysis/hist_numpy.py b/popmon/analysis/hist_numpy.py index 31967330..4df4e1ac 100644 --- a/popmon/analysis/hist_numpy.py +++ b/popmon/analysis/hist_numpy.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in @@ -24,8 +24,8 @@ import numpy as np from histogrammar.util import get_hist_props -from ..hist.hist_utils import get_bin_centers, is_numeric -from ..stats.numpy import quantile +from popmon.hist.hist_utils import get_bin_centers, is_numeric +from popmon.stats.numpy import quantile used_hist_types = (histogrammar.Bin, histogrammar.SparselyBin, histogrammar.Categorize) @@ -51,7 +51,7 @@ def get_hist_keys(h): elif hasattr(h, "values"): return set(range(len(h.values))) else: - raise TypeError() + raise TypeError # SparselyBin or Categorize def keys_recursive(hist, hist_keys, idx): @@ -64,7 +64,7 @@ def keys_recursive(hist, hist_keys, idx): for h in hist.values: hist_keys = keys_recursive(h, hist_keys, idx + 1) else: - raise TypeError() + raise TypeError return hist_keys keys = [set() for _ in range(n_dim)] @@ -114,19 +114,19 @@ def flatten(histogram, keys, grid, dim=0, prefix=None): if k not in keys[dim]: continue i = keys[dim].index(k) - flatten(h, keys, grid, dim + 1, [i] + prefix) + flatten(h, keys, grid, dim + 1, [i, *prefix]) elif hasattr(histogram, "values"): for i, h in enumerate(histogram.values): - flatten(h, keys, grid, dim + 1, [i] + prefix) + flatten(h, keys, grid, dim + 1, [i, *prefix]) else: - raise TypeError() + raise TypeError flatten(hist, keys, grid) return grid def set_2dgrid(hist, keys): - """Set 2d grid of first two dimenstions of input histogram + """Set 2d grid of first two dimensions of input histogram Used as input by get_2dgrid(hist). @@ -142,7 +142,7 @@ def get_ndgrid(hist, get_bin_labels=False, n_dim=2): """Get filled n-d grid of first n dimensions of input histogram :param hist: input histogrammar histogram - :return: grid of first n dimenstions of input histogram + :return: grid of first n dimensions of input histogram """ if hist.n_dim < n_dim: warnings.warn( @@ -163,12 +163,12 @@ def get_2dgrid(hist, get_bin_labels=False): """Get filled x,y grid of first two dimensions of input histogram :param hist: input histogrammar histogram - :return: x,y grid of first two dimenstions of input histogram + :return: x,y grid of first two dimensions of input histogram """ return get_ndgrid(hist, get_bin_labels, n_dim=2) -def get_consistent_numpy_ndgrids(hist_list=[], get_bin_labels=False, dim=3): +def get_consistent_numpy_ndgrids(hist_list=None, get_bin_labels=False, dim=3): """Get list of consistent x,y grids of first n dimensions of (sparse) input histograms :param list hist_list: list of input histogrammar histograms @@ -177,7 +177,7 @@ def get_consistent_numpy_ndgrids(hist_list=[], get_bin_labels=False, dim=3): :return: list of consistent x,y grids of first two dimensions of each input histogram in list """ # --- basic checks - if len(hist_list) == 0: + if hist_list is None or len(hist_list) == 0: raise ValueError("Input histogram list has zero length.") if hist_list[0].n_dim < dim: raise ValueError( @@ -200,13 +200,15 @@ def get_consistent_numpy_ndgrids(hist_list=[], get_bin_labels=False, dim=3): return gridnd_list -def get_consistent_numpy_2dgrids(hist_list=[], get_bin_labels=False): +def get_consistent_numpy_2dgrids(hist_list=None, get_bin_labels=False): """Get list of consistent x,y grids of first two dimensions of (sparse) input histograms :param list hist_list: list of input histogrammar histograms :param bool get_bin_labels: if true, return x-keys and y-keys describing binnings of 2d-grid. :return: list of consistent x,y grids of first two dimensions of each input histogram in list """ + if hist_list is None: + hist_list = [] return get_consistent_numpy_ndgrids(hist_list, get_bin_labels, dim=2) @@ -389,11 +391,11 @@ def check_similar_hists(hist_list, check_type=True, assert_type=used_hist_types) # Check generic attributes - filled histograms only n_d = [hist.n_dim for hist in hist_list] - if not n_d.count(n_d[0]) == len(n_d): + if n_d.count(n_d[0]) != len(n_d): warnings.warn("Input histograms have inconsistent dimensions.") return False dts = [hist.datatype for hist in hist_list] - if not dts.count(dts[0]) == len(dts): + if dts.count(dts[0]) != len(dts): warnings.warn(f"Input histograms have inconsistent datatypes: {dts}") return False # Check generic attributes @@ -402,7 +404,7 @@ def check_similar_hists(hist_list, check_type=True, assert_type=used_hist_types) # histogrammar.primitives.categorize.Categorize are both of type hg.Categorize # Make this consistent first. types = [get_contentType(hist) for hist in hist_list] - if not types.count(types[0]) == len(types): + if types.count(types[0]) != len(types): warnings.warn( "Input histograms have inconsistent class types: {types}".format( types=types @@ -413,7 +415,7 @@ def check_similar_hists(hist_list, check_type=True, assert_type=used_hist_types) # Check Bin attributes if isinstance(hist_list[0], histogrammar.Bin): nums = [hist.num for hist in hist_list] - if not nums.count(nums[0]) == len(nums): + if nums.count(nums[0]) != len(nums): warnings.warn( "Input Bin histograms have inconsistent num attributes: {types}".format( types=nums @@ -421,7 +423,7 @@ def check_similar_hists(hist_list, check_type=True, assert_type=used_hist_types) ) return False lows = [hist.low for hist in hist_list] - if not lows.count(lows[0]) == len(lows): + if lows.count(lows[0]) != len(lows): warnings.warn( "Input Bin histograms have inconsistent low attributes: {types}".format( types=lows @@ -429,7 +431,7 @@ def check_similar_hists(hist_list, check_type=True, assert_type=used_hist_types) ) return False highs = [hist.high for hist in hist_list] - if not highs.count(highs[0]) == len(highs): + if highs.count(highs[0]) != len(highs): warnings.warn( "Input histograms have inconsistent high attributes: {types}".format( types=highs @@ -440,7 +442,7 @@ def check_similar_hists(hist_list, check_type=True, assert_type=used_hist_types) # Check SparselyBin attributes if isinstance(hist_list[0], histogrammar.SparselyBin): origins = [hist.origin for hist in hist_list] - if not origins.count(origins[0]) == len(origins): + if origins.count(origins[0]) != len(origins): warnings.warn( "Input SparselyBin histograms have inconsistent origin attributes: {types}".format( types=origins @@ -448,7 +450,7 @@ def check_similar_hists(hist_list, check_type=True, assert_type=used_hist_types) ) return False bws = [hist.binWidth for hist in hist_list] - if not bws.count(bws[0]) == len(bws): + if bws.count(bws[0]) != len(bws): warnings.warn( "Input SparselyBin histograms have inconsistent binWidth attributes: {types}".format( types=bws diff --git a/popmon/analysis/merge_statistics.py b/popmon/analysis/merge_statistics.py index 74ef9694..366e2835 100644 --- a/popmon/analysis/merge_statistics.py +++ b/popmon/analysis/merge_statistics.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in @@ -22,7 +22,7 @@ import pandas as pd -from ..base import Module +from popmon.base import Module class MergeStatistics(Module): diff --git a/popmon/analysis/profiling/__init__.py b/popmon/analysis/profiling/__init__.py index 5229425e..42ecd367 100644 --- a/popmon/analysis/profiling/__init__.py +++ b/popmon/analysis/profiling/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in @@ -16,12 +16,13 @@ # COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -from ...analysis.profiling.hist_profiler import HistProfiler -from ...analysis.profiling.pull_calculator import ( +from popmon.analysis.profiling.hist_profiler import HistProfiler +from popmon.analysis.profiling.pull_calculator import ( ExpandingPullCalculator, ReferencePullCalculator, RollingPullCalculator, ) + from .profiles import Profiles __all__ = [ diff --git a/popmon/analysis/profiling/hist_profiler.py b/popmon/analysis/profiling/hist_profiler.py index e2db9148..63f78496 100644 --- a/popmon/analysis/profiling/hist_profiler.py +++ b/popmon/analysis/profiling/hist_profiler.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in @@ -21,8 +21,8 @@ import numpy as np import pandas as pd -from ...base import Module -from ...hist.hist_utils import get_bin_centers, is_numeric, is_timestamp +from popmon.base import Module +from popmon.hist.hist_utils import get_bin_centers, is_numeric, is_timestamp class HistProfiler(Module): @@ -71,7 +71,7 @@ def __init__( self.index_col = index_col if stats_functions is not None: - raise NotImplementedError() + raise NotImplementedError def _profile_1d_histogram(self, name, hist): from popmon.analysis import Profiles diff --git a/popmon/analysis/profiling/profiles.py b/popmon/analysis/profiling/profiles.py index 1a348b1c..43e5a5fc 100644 --- a/popmon/analysis/profiling/profiles.py +++ b/popmon/analysis/profiling/profiles.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in @@ -20,11 +20,10 @@ import numpy as np +from popmon.analysis.hist_numpy import get_2dgrid from popmon.base.registry import Registry - -from ...analysis.hist_numpy import get_2dgrid -from ...hist.hist_utils import sum_entries -from ...stats import numpy as pm_np +from popmon.hist.hist_utils import sum_entries +from popmon.stats import numpy as pm_np Profiles = Registry() @@ -45,7 +44,7 @@ dim=1, htype="num", ) -def profile_quantiles(x, w, bin_width): +def profile_quantiles(x, w, _): return tuple( pm_np.quantile( x, q=[0.0, 1.0, 0.01, 0.05, 0.16, 0.50, 0.84, 0.95, 0.99], weights=w @@ -54,12 +53,12 @@ def profile_quantiles(x, w, bin_width): @Profiles.register(key="mean", description="Mean value", dim=1, htype="num") -def profile_mean(x, w, bin_width): +def profile_mean(x, w, _): return pm_np.mean(x, w) @Profiles.register(key="std", description="Standard deviation", dim=1, htype="num") -def profile_std(x, w, bin_width): +def profile_std(x, w, _): return pm_np.std(x, w) diff --git a/popmon/analysis/profiling/pull_calculator.py b/popmon/analysis/profiling/pull_calculator.py index f250fc37..6795559e 100644 --- a/popmon/analysis/profiling/pull_calculator.py +++ b/popmon/analysis/profiling/pull_calculator.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in @@ -21,16 +21,16 @@ import numpy as np import pandas as pd -from ...analysis.apply_func import ApplyFunc -from ...analysis.functions import ( +from popmon.analysis.apply_func import ApplyFunc +from popmon.analysis.functions import ( expanding_mean, expanding_std, pull, rolling_mean, rolling_std, ) -from ...base import Pipeline -from ...stats.numpy import mad +from popmon.base import Pipeline +from popmon.stats.numpy import mad class PullCalculator(Pipeline): @@ -82,10 +82,10 @@ def __init__( apply_to_key=apply_to_key, assign_to_key=assign_to_key, features=features ) calc_mean_std.add_apply_func( - func_std, suffix=suffix_std, entire=True, *args, **kwargs + func_std, *args, suffix=suffix_std, entire=True, **kwargs ) calc_mean_std.add_apply_func( - func_mean, suffix=suffix_mean, entire=True, *args, **kwargs + func_mean, *args, suffix=suffix_mean, entire=True, **kwargs ) calc_pull = ApplyFunc( diff --git a/popmon/base/__init__.py b/popmon/base/__init__.py index 1e5812be..5b699d20 100644 --- a/popmon/base/__init__.py +++ b/popmon/base/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in @@ -18,7 +18,7 @@ # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -from ..base.module import Module -from ..base.pipeline import Pipeline +from popmon.base.module import Module +from popmon.base.pipeline import Pipeline __all__ = ["Module", "Pipeline"] diff --git a/popmon/base/module.py b/popmon/base/module.py index 0f524a93..3c27d415 100644 --- a/popmon/base/module.py +++ b/popmon/base/module.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in diff --git a/popmon/base/pipeline.py b/popmon/base/pipeline.py index 292f3ea8..ccad7b1a 100644 --- a/popmon/base/pipeline.py +++ b/popmon/base/pipeline.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in @@ -18,13 +18,13 @@ # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. import logging -from abc import ABC +from typing import Optional -class Pipeline(ABC): - """Abstract base class used for to run modules in a pipeline.""" +class Pipeline: + """Base class used for to run modules in a pipeline.""" - def __init__(self, modules, logger=None): + def __init__(self, modules, logger: Optional[logging.Logger] = None): """Initialization of the pipeline :param list modules: modules of the pipeline. @@ -33,14 +33,12 @@ def __init__(self, modules, logger=None): self.modules = modules self.set_logger(logger) - def set_logger(self, logger): + def set_logger(self, logger: Optional[logging.Logger]): """Set the logger to be used by each module :param logger: input logger """ - self.logger = logger - if self.logger is None: - self.logger = logging.getLogger() + self.logger = logger or logging.getLogger() for module in self.modules: module.set_logger(self.logger) @@ -56,7 +54,7 @@ def transform(self, datastore): """Central function of the pipeline. Calls transform() of each module in the pipeline. - Typically transform() of a module takes something from the datastore, does something to it, + Typically, transform() of a module takes something from the datastore, does something to it, and puts the results back into the datastore again, to be passed on to the next module in the pipeline. :param dict datastore: input datastore diff --git a/popmon/base/registry.py b/popmon/base/registry.py index ccb7e509..10b0d26a 100644 --- a/popmon/base/registry.py +++ b/popmon/base/registry.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in @@ -103,9 +103,7 @@ def get_keys(self) -> List[str]: def get_keys_by_dim_and_htype(self, dim, htype) -> List[str]: """Flat list of keys for a provided dimension and histogram type""" - return [ - v for values in self._properties_to_func[dim][htype].keys() for v in values - ] + return [v for values in self._properties_to_func[dim][htype] for v in values] def get_descriptions(self) -> Dict[str, str]: """Dictionary of key->description associated with registered functions""" diff --git a/popmon/config.py b/popmon/config.py index 545957ba..a85feba1 100644 --- a/popmon/config.py +++ b/popmon/config.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in diff --git a/popmon/decorators/__init__.py b/popmon/decorators/__init__.py index 4064f6a9..3fa7d054 100644 --- a/popmon/decorators/__init__.py +++ b/popmon/decorators/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in @@ -18,7 +18,5 @@ # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# flake8: noqa - # import pandas and spark DataFrame decorators -from ..decorators import pandas, spark +from popmon.decorators import pandas, spark # noqa: F401 diff --git a/popmon/decorators/pandas.py b/popmon/decorators/pandas.py index 5e2fa169..a80ed88d 100644 --- a/popmon/decorators/pandas.py +++ b/popmon/decorators/pandas.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in @@ -21,8 +21,8 @@ from histogrammar.dfinterface.make_histograms import make_histograms from pandas import DataFrame -from ..pipeline.metrics import df_stability_metrics -from ..pipeline.report import df_stability_report +from popmon.pipeline.metrics import df_stability_metrics +from popmon.pipeline.report import df_stability_report # add function to create histogrammar histograms. # pm_make_histograms is kept for bkw compatibility. diff --git a/popmon/decorators/spark.py b/popmon/decorators/spark.py index 5e467b16..04247c76 100644 --- a/popmon/decorators/spark.py +++ b/popmon/decorators/spark.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in diff --git a/popmon/extensions/__init__.py b/popmon/extensions/__init__.py index cef93965..0dd04665 100644 --- a/popmon/extensions/__init__.py +++ b/popmon/extensions/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in diff --git a/popmon/extensions/extension.py b/popmon/extensions/extension.py index c7892e91..89151448 100644 --- a/popmon/extensions/extension.py +++ b/popmon/extensions/extension.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in diff --git a/popmon/extensions/profile_diptest.py b/popmon/extensions/profile_diptest.py index 22ec8460..66619430 100644 --- a/popmon/extensions/profile_diptest.py +++ b/popmon/extensions/profile_diptest.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in diff --git a/popmon/extensions/utils.py b/popmon/extensions/utils.py index fb5ea54a..2778c6b7 100644 --- a/popmon/extensions/utils.py +++ b/popmon/extensions/utils.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in diff --git a/popmon/hist/__init__.py b/popmon/hist/__init__.py index f4b27dc9..f467235a 100644 --- a/popmon/hist/__init__.py +++ b/popmon/hist/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in @@ -18,6 +18,6 @@ # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -from ..hist.hist_splitter import HistSplitter +from popmon.hist.hist_splitter import HistSplitter __all__ = ["HistSplitter"] diff --git a/popmon/hist/filling/__init__.py b/popmon/hist/filling/__init__.py index 1078e963..939e371a 100644 --- a/popmon/hist/filling/__init__.py +++ b/popmon/hist/filling/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in @@ -17,7 +17,7 @@ # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# MB 20210323: histogrammming code hade been moved to histogrammar v1.0.20+ +# MB 20210323: histogrammming code has been moved to histogrammar v1.0.20+ # these imports are kept for backwards compatibility. from histogrammar.dfinterface.make_histograms import ( diff --git a/popmon/hist/hist_splitter.py b/popmon/hist/hist_splitter.py index f56ee794..168a729f 100644 --- a/popmon/hist/hist_splitter.py +++ b/popmon/hist/hist_splitter.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in @@ -20,8 +20,8 @@ import pandas as pd -from ..base import Module -from ..hist.hist_utils import ( +from popmon.base import Module +from popmon.hist.hist_utils import ( get_histogram, is_timestamp, split_hist_along_first_dimension, diff --git a/popmon/hist/hist_utils.py b/popmon/hist/hist_utils.py index 6a6141b6..a4a20b78 100644 --- a/popmon/hist/hist_utils.py +++ b/popmon/hist/hist_utils.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in diff --git a/popmon/io/__init__.py b/popmon/io/__init__.py index ed31c714..d06c9467 100644 --- a/popmon/io/__init__.py +++ b/popmon/io/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in @@ -18,8 +18,8 @@ # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -from ..io.file_reader import FileReader -from ..io.file_writer import FileWriter -from ..io.json_reader import JsonReader +from popmon.io.file_reader import FileReader +from popmon.io.file_writer import FileWriter +from popmon.io.json_reader import JsonReader __all__ = ["FileWriter", "FileReader", "JsonReader"] diff --git a/popmon/io/file_reader.py b/popmon/io/file_reader.py index eb8e154d..2f5a17eb 100644 --- a/popmon/io/file_reader.py +++ b/popmon/io/file_reader.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in @@ -21,7 +21,7 @@ from pathlib import Path from typing import Callable, Optional, Union -from ..base import Module +from popmon.base import Module class FileReader(Module): diff --git a/popmon/io/file_writer.py b/popmon/io/file_writer.py index 1615a564..ba12e544 100644 --- a/popmon/io/file_writer.py +++ b/popmon/io/file_writer.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in @@ -22,7 +22,7 @@ from pathlib import Path from typing import Callable, Optional, Union -from ..base import Module +from popmon.base import Module class FileWriter(Module): diff --git a/popmon/io/json_reader.py b/popmon/io/json_reader.py index 41404824..1aad052a 100644 --- a/popmon/io/json_reader.py +++ b/popmon/io/json_reader.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in @@ -22,7 +22,7 @@ from pathlib import Path from typing import Union -from ..io import FileReader +from popmon.io import FileReader class JsonReader(FileReader): diff --git a/popmon/pipeline/__init__.py b/popmon/pipeline/__init__.py index 0cb6dea0..bc081466 100644 --- a/popmon/pipeline/__init__.py +++ b/popmon/pipeline/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in diff --git a/popmon/pipeline/amazing_pipeline.py b/popmon/pipeline/amazing_pipeline.py index edf4571a..8c26ab06 100644 --- a/popmon/pipeline/amazing_pipeline.py +++ b/popmon/pipeline/amazing_pipeline.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in diff --git a/popmon/pipeline/dataset_splitter.py b/popmon/pipeline/dataset_splitter.py index c8b0d0d2..ad904077 100644 --- a/popmon/pipeline/dataset_splitter.py +++ b/popmon/pipeline/dataset_splitter.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in @@ -67,7 +67,7 @@ def _split_dataset_spark( :param partition_cols: cols to partition by (for performance) :param persist: persist or not, enabled by default since we are checking for empty dataframes """ - from pyspark.sql import functions as F + from pyspark.sql import functions as F # noqa: N812 from pyspark.sql.window import Window if split_type in ["n_instances", "fraction"]: diff --git a/popmon/pipeline/metrics.py b/popmon/pipeline/metrics.py index 410f022f..0a8b6e18 100644 --- a/popmon/pipeline/metrics.py +++ b/popmon/pipeline/metrics.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in @@ -22,8 +22,8 @@ from histogrammar.dfinterface.make_histograms import get_bin_specs, make_histograms -from ..config import Settings -from ..pipeline.metrics_pipelines import create_metrics_pipeline +from popmon.config import Settings +from popmon.pipeline.metrics_pipelines import create_metrics_pipeline logging.basicConfig( level=logging.INFO, format="%(asctime)s %(levelname)s [%(module)s]: %(message)s" diff --git a/popmon/pipeline/metrics_pipelines.py b/popmon/pipeline/metrics_pipelines.py index c1c5e6b9..faef770c 100644 --- a/popmon/pipeline/metrics_pipelines.py +++ b/popmon/pipeline/metrics_pipelines.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in @@ -18,31 +18,32 @@ # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. from typing import List, Union -from ..alerting import ( +from popmon.alerting import ( AlertsSummary, DynamicBounds, StaticBounds, TrafficLightAlerts, traffic_light_summary, ) -from ..analysis.apply_func import ApplyFunc -from ..analysis.comparison.hist_comparer import ( +from popmon.analysis.apply_func import ApplyFunc +from popmon.analysis.comparison.hist_comparer import ( ExpandingHistComparer, PreviousHistComparer, ReferenceHistComparer, RollingHistComparer, ) -from ..analysis.functions import rolling_lr_zscore -from ..analysis.profiling import HistProfiler -from ..analysis.profiling.pull_calculator import ( +from popmon.analysis.functions import rolling_lr_zscore +from popmon.analysis.profiling import HistProfiler +from popmon.analysis.profiling.pull_calculator import ( ExpandingPullCalculator, ReferencePullCalculator, RefMedianMadPullCalculator, RollingPullCalculator, ) -from ..base import Module, Pipeline -from ..config import Settings -from ..hist.hist_splitter import HistSplitter +from popmon.base import Module, Pipeline +from popmon.config import Settings +from popmon.hist.hist_splitter import HistSplitter + from .timing import Timing diff --git a/popmon/pipeline/report.py b/popmon/pipeline/report.py index a69f843e..6f09fe33 100644 --- a/popmon/pipeline/report.py +++ b/popmon/pipeline/report.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in @@ -23,10 +23,10 @@ from histogrammar.dfinterface.make_histograms import get_bin_specs, make_histograms -from ..config import Settings -from ..pipeline.dataset_splitter import split_dataset -from ..pipeline.report_pipelines import ReportPipe, get_report_pipeline_class -from ..resources import templates_env +from popmon.config import Settings +from popmon.pipeline.dataset_splitter import split_dataset +from popmon.pipeline.report_pipelines import ReportPipe, get_report_pipeline_class +from popmon.resources import templates_env logging.basicConfig( level=logging.INFO, format="%(asctime)s %(levelname)s [%(module)s]: %(message)s" @@ -129,7 +129,7 @@ def df_stability_report( and settings.time_axis not in reference.columns ): raise ValueError( - f'time_axis "{settings.time_axis}" not found in columns of reference dataframe.' + f'time_axis "{settings.time_axis}" not found in columns of reference dataframe.' ) if settings.features is not None: diff --git a/popmon/pipeline/report_pipelines.py b/popmon/pipeline/report_pipelines.py index 36f3c787..59164316 100644 --- a/popmon/pipeline/report_pipelines.py +++ b/popmon/pipeline/report_pipelines.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in @@ -22,23 +22,23 @@ from typing_extensions import Literal -from ..base import Pipeline -from ..config import Settings -from ..io import FileWriter -from ..pipeline.metrics_pipelines import ( +from popmon.base import Pipeline +from popmon.config import Settings +from popmon.io import FileWriter +from popmon.pipeline.metrics_pipelines import ( ExpandingReferenceMetricsPipeline, ExternalReferenceMetricsPipeline, RollingReferenceMetricsPipeline, SelfReferenceMetricsPipeline, ) -from ..visualization import ( +from popmon.visualization import ( AlertSectionGenerator, HistogramSection, ReportGenerator, SectionGenerator, TrafficLightSectionGenerator, ) -from ..visualization.overview_section import OverviewSectionGenerator +from popmon.visualization.overview_section import OverviewSectionGenerator def get_report_pipeline_class( diff --git a/popmon/pipeline/timing.py b/popmon/pipeline/timing.py index 60c7f5a6..95079214 100644 --- a/popmon/pipeline/timing.py +++ b/popmon/pipeline/timing.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in @@ -18,7 +18,7 @@ # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. from datetime import datetime -from ..base import Module +from popmon.base import Module class Timing(Module): diff --git a/popmon/resources.py b/popmon/resources.py index 9ac02219..da82d22d 100644 --- a/popmon/resources.py +++ b/popmon/resources.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in @@ -48,7 +48,8 @@ _TEMPLATES_ENV = Environment( loader=FileSystemLoader( resource_filename(popmon.__name__, "visualization/templates") - ) + ), + autoescape=True, ) _TEMPLATES_ENV.filters["fmt_metric"] = lambda x: x.replace("_", " ") diff --git a/popmon/stats/__init__.py b/popmon/stats/__init__.py index 5761f1b7..31639424 100644 --- a/popmon/stats/__init__.py +++ b/popmon/stats/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in @@ -18,6 +18,6 @@ # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -from ..stats.numpy import mean, median, quantile, std +from popmon.stats.numpy import mean, median, quantile, std __all__ = ["mean", "std", "median", "quantile"] diff --git a/popmon/stats/numpy.py b/popmon/stats/numpy.py index 16c81529..cc41eea1 100644 --- a/popmon/stats/numpy.py +++ b/popmon/stats/numpy.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in @@ -123,8 +123,8 @@ def quantile(a, q, weights=None, axis=None, keepdims: bool = False): idx = np.argsort(raveled_data) sorted_data = raveled_data[idx] sorted_weights = np.ravel(weights)[idx] - Sn = np.cumsum(sorted_weights) - Pn = (Sn - 0.5 * sorted_weights) / Sn[-1] + Sn = np.cumsum(sorted_weights) # noqa: N806 + Pn = (Sn - 0.5 * sorted_weights) / Sn[-1] # noqa: N806 y = np.interp(q, Pn, sorted_data) if keepdims: y = y.reshape((*y.shape, *(1,) * np.ndim(a))) @@ -233,7 +233,7 @@ def mad(a, c=0.6745, axis=0): Kindly taken from statsmodels package and then modified to work with dataframes as well. Reference: https://www.statsmodels.org/dev/_modules/statsmodels/robust/scale.html#mad License: https://github.com/statsmodels/statsmodels/blob/master/LICENSE.txt - All modifications copyright ING WBAA. + All modifications copyright INGA WB. :param a: array_like Input array. :param float c: optional. The normalization constant. Defined as scipy.stats.norm.ppf(3/4.), diff --git a/popmon/stitching/__init__.py b/popmon/stitching/__init__.py index 829dbe33..70eee546 100644 --- a/popmon/stitching/__init__.py +++ b/popmon/stitching/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in @@ -18,6 +18,6 @@ # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -from ..stitching.hist_stitcher import HistStitcher, stitch_histograms +from popmon.stitching.hist_stitcher import HistStitcher, stitch_histograms __all__ = ["HistStitcher", "stitch_histograms"] diff --git a/popmon/stitching/hist_stitcher.py b/popmon/stitching/hist_stitcher.py index 423e096e..be511a1b 100644 --- a/popmon/stitching/hist_stitcher.py +++ b/popmon/stitching/hist_stitcher.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in @@ -21,8 +21,8 @@ import histogrammar as hg import numpy as np -from ..analysis.hist_numpy import assert_similar_hists -from ..base import Module +from popmon.analysis.hist_numpy import assert_similar_hists +from popmon.base import Module class HistStitcher(Module): @@ -341,7 +341,7 @@ def _insert_hists(self, hbasis, hdelta_list, time_bin_idx, mode): ) if len(hbasis.bins) > 0: hbk0 = list(hbasis.bins.values())[0] - assert_similar_hists([hbk0] + hdelta_list) + assert_similar_hists([hbk0, *hdelta_list]) else: assert_similar_hists(hdelta_list) @@ -349,9 +349,10 @@ def _insert_hists(self, hbasis, hdelta_list, time_bin_idx, mode): if isinstance(time_bin_idx[0], str): if not isinstance(hbasis, hg.Categorize): raise TypeError("hbasis does not accept string time-values.") - elif isinstance(time_bin_idx[0], (int, np.integer)): - if not isinstance(hbasis, hg.SparselyBin): - raise TypeError("hbasis does not accept integer time-values.") + elif isinstance(time_bin_idx[0], (int, np.integer)) and not isinstance( + hbasis, hg.SparselyBin + ): + raise TypeError("hbasis does not accept integer time-values.") # stitch all the hdeltas into hbasis hsum = hbasis.copy() @@ -391,7 +392,7 @@ def _create_hist_with_time_axis(self, hist, time_bin_idx): hg.SparselyBin(binWidth=1.0, origin=0.0, quantity=lambda x: x) if isinstance(time_bin_idx, int) else hg.Categorize(quantity=lambda x: x) - ) # noqa + ) ht.bins[time_bin_idx] = hist ht.entries = hist.entries return ht diff --git a/popmon/utils.py b/popmon/utils.py index 20995c76..eab48b6d 100644 --- a/popmon/utils.py +++ b/popmon/utils.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in @@ -31,10 +31,11 @@ def short_date(date): """ if isinstance(date, pd.Timestamp): # Drop the time of day when midnight or noon - if date.hour in [0, 12] and date.minute == 0 and date.second == 0: - d = str(date).split(" ")[0] - else: - d = str(date) + d = ( + str(date).split(" ")[0] + if date.hour in [0, 12] and date.minute == 0 and date.second == 0 + else str(date) + ) else: d = str(date) diff --git a/popmon/visualization/__init__.py b/popmon/visualization/__init__.py index acbdca5d..d455e180 100644 --- a/popmon/visualization/__init__.py +++ b/popmon/visualization/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in @@ -18,8 +18,6 @@ # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# flake8: noqa - from popmon.visualization.alert_section_generator import AlertSectionGenerator from popmon.visualization.histogram_section import HistogramSection from popmon.visualization.report_generator import ReportGenerator diff --git a/popmon/visualization/alert_section_generator.py b/popmon/visualization/alert_section_generator.py index db6965d6..c189a83f 100644 --- a/popmon/visualization/alert_section_generator.py +++ b/popmon/visualization/alert_section_generator.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in @@ -23,9 +23,10 @@ import pandas as pd from tqdm import tqdm -from ..base import Module -from ..config import Report -from ..utils import filter_metrics, short_date +from popmon.base import Module +from popmon.config import Report +from popmon.utils import filter_metrics, short_date + from .traffic_light_section_generator import _plot_metrics @@ -48,7 +49,7 @@ def __init__( static_bounds=None, dynamic_bounds=None, prefix="traffic_light_", - suffices=["_red_high", "_yellow_high", "_yellow_low", "_red_low"], + suffices=None, ignore_stat_endswith=None, ): """Initialize an instance of SectionGenerator. @@ -72,7 +73,12 @@ def __init__( self.features = features or [] self.ignore_features = ignore_features or [] self.prefix = prefix - self.suffices = suffices + self.suffices = suffices or [ + "_red_high", + "_yellow_high", + "_yellow_low", + "_red_low", + ] self.ignore_stat_endswith = ignore_stat_endswith or [] self.last_n = settings.last_n @@ -118,9 +124,8 @@ def transform( assert all(df.index == fdbounds.index) # prepare date labels - df.drop( + df = df.drop( columns=["histogram", "reference_histogram"], - inplace=True, errors="ignore", ) dates = [short_date(date) for date in df.index.tolist()] diff --git a/popmon/visualization/histogram_section.py b/popmon/visualization/histogram_section.py index 0dd6fb6a..9f8b94ec 100644 --- a/popmon/visualization/histogram_section.py +++ b/popmon/visualization/histogram_section.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in @@ -25,15 +25,15 @@ from histogrammar.util import get_hist_props from tqdm import tqdm -from ..analysis.hist_numpy import ( +from popmon.analysis.hist_numpy import ( assert_similar_hists, get_consistent_numpy_1dhists, get_consistent_numpy_entries, ) -from ..base import Module -from ..config import HistogramSectionModel -from ..utils import parallel, short_date -from ..visualization.utils import ( +from popmon.base import Module +from popmon.config import HistogramSectionModel +from popmon.utils import parallel, short_date +from popmon.visualization.utils import ( histogram_basic_checks, plot_heatmap, plot_histogram_overlay, @@ -191,9 +191,8 @@ def transform(self, data_obj: dict, sections: Optional[list] = None): # filter out potential empty heatmap plots, then prepend them to the sorted histograms hplots = [] for h in heatmaps: - if isinstance(h, dict): - if len(h["plot"]): - hplots.append(h) + if isinstance(h, dict) and len(h["plot"]): + hplots.append(h) if len(hplots) > 0: plot_type_layouts["heatmap"] = hplots[0]["layout"] @@ -274,8 +273,8 @@ def _plot_histograms(feature, date, hc_list, hist_names, top_n, max_nbins=1000): for el, hc in zip(entries_list, hc_list) ] # if categorical + # get top_n categories for histogram if not is_num: - # get top_n categories for histogram if len(bins) > top_n: entries_list = np.stack(entries_list, axis=1) entries_list, bins = get_top_categories(entries_list, bins, top_n) @@ -430,3 +429,4 @@ def hist_lookup(plot, hist_name): for pl in plot: if pl["name"] == hist_name: return pl + return None diff --git a/popmon/visualization/overview_section.py b/popmon/visualization/overview_section.py index 6a4d150b..b40ad85f 100644 --- a/popmon/visualization/overview_section.py +++ b/popmon/visualization/overview_section.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in @@ -25,12 +25,12 @@ import pandas as pd from tqdm import tqdm -from ..base import Module -from ..config import Report -from ..resources import templates_env -from ..utils import filter_metrics -from ..version import version as __version__ -from ..visualization.utils import _prune, get_reproduction_table, get_summary_table +from popmon.base import Module +from popmon.config import Report +from popmon.resources import templates_env +from popmon.utils import filter_metrics +from popmon.version import version as __version__ +from popmon.visualization.utils import _prune, get_reproduction_table, get_summary_table class OverviewSectionGenerator(Module): @@ -55,7 +55,7 @@ def __init__( static_bounds=None, dynamic_bounds=None, prefix="traffic_light_", - suffices=["_red_high", "_yellow_high", "_yellow_low", "_red_low"], + suffices=None, ignore_stat_endswith=None, ): """Initialize an instance of SectionGenerator. @@ -81,7 +81,12 @@ def __init__( self.features = features or [] self.ignore_features = ignore_features or [] self.prefix = prefix - self.suffices = suffices + self.suffices = suffices or [ + "_red_high", + "_yellow_high", + "_yellow_low", + "_red_low", + ] self.ignore_stat_endswith = ignore_stat_endswith or [] self.reference_type = reference_type self.time_axis = time_axis @@ -130,9 +135,8 @@ def transform( assert all(df.index == fdbounds.index) # prepare date labels - df.drop( + df = df.drop( columns=["histogram", "reference_histogram"], - inplace=True, errors="ignore", ) @@ -152,12 +156,12 @@ def transform( tables = [] bin_width = ( self.bin_specs[self.time_axis]["bin_width"] - if self.time_axis in self.bin_specs.keys() + if self.time_axis in self.bin_specs else 0 ) if ( - self.time_axis in self.bin_specs.keys() + self.time_axis in self.bin_specs and self.bin_specs[self.time_axis]["bin_offset"] > 0 ): offset = datetime.utcfromtimestamp( diff --git a/popmon/visualization/report_generator.py b/popmon/visualization/report_generator.py index a31a0e7e..f9b8c791 100644 --- a/popmon/visualization/report_generator.py +++ b/popmon/visualization/report_generator.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in @@ -20,10 +20,10 @@ import htmlmin -from ..base import Module -from ..config import Report -from ..resources import templates_env -from ..version import version +from popmon.base import Module +from popmon.config import Report +from popmon.resources import templates_env +from popmon.version import version class ReportGenerator(Module): diff --git a/popmon/visualization/section_generator.py b/popmon/visualization/section_generator.py index 3fb6ba03..f25263b6 100644 --- a/popmon/visualization/section_generator.py +++ b/popmon/visualization/section_generator.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in @@ -25,11 +25,10 @@ from popmon.analysis.comparison import Comparisons from popmon.analysis.profiling import Profiles - -from ..base import Module -from ..config import Report -from ..utils import filter_metrics, parallel, short_date -from ..visualization.utils import _prune, plot_bars +from popmon.base import Module +from popmon.config import Report +from popmon.utils import filter_metrics, parallel, short_date +from popmon.visualization.utils import _prune, plot_bars profiles = Profiles.get_descriptions() @@ -99,7 +98,7 @@ def __init__( static_bounds=None, dynamic_bounds=None, prefix="traffic_light_", - suffices=["_red_high", "_yellow_high", "_yellow_low", "_red_low"], + suffices=None, ignore_stat_endswith=None, description="", ): @@ -130,7 +129,12 @@ def __init__( self.skip_first_n = settings.skip_first_n self.skip_last_n = settings.skip_last_n self.prefix = prefix - self.suffices = suffices + self.suffices = suffices or [ + "_red_high", + "_yellow_high", + "_yellow_low", + "_red_low", + ] self.ignore_stat_endswith = ignore_stat_endswith or [] self.description = description self.show_stats = settings.show_stats if not settings.extended_report else None @@ -166,9 +170,8 @@ def transform( assert all(df.index == fdbounds.index) # prepare date labels - df.drop( + df = df.drop( columns=["histogram", "reference_histogram"], - inplace=True, errors="ignore", ) dates = np.array([short_date(date) for date in df.index.tolist()]) @@ -304,10 +307,10 @@ def _plot_metric( "description": get_stat_description(metric), "plot": plot["data"], "shapes": plot["layout"]["shapes"] if "shapes" in plot["layout"] else "", - "yaxis_range": [ - "null" if r is None else r for r in plot["layout"]["yaxis"]["range"] - ] - if "range" in plot["layout"]["yaxis"] - else "", + "yaxis_range": ( + ["null" if r is None else r for r in plot["layout"]["yaxis"]["range"]] + if "range" in plot["layout"]["yaxis"] + else "" + ), "layout": plot["layout"], } diff --git a/popmon/visualization/traffic_light_section_generator.py b/popmon/visualization/traffic_light_section_generator.py index 86d7618f..4fc55678 100644 --- a/popmon/visualization/traffic_light_section_generator.py +++ b/popmon/visualization/traffic_light_section_generator.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in @@ -24,10 +24,10 @@ import pandas as pd from tqdm import tqdm -from ..base import Module -from ..config import Report -from ..utils import filter_metrics, short_date -from ..visualization.utils import ( +from popmon.base import Module +from popmon.config import Report +from popmon.utils import filter_metrics, short_date +from popmon.visualization.utils import ( _prune, plot_traffic_lights_alerts_aggregate, plot_traffic_lights_overview, @@ -53,7 +53,7 @@ def __init__( static_bounds=None, dynamic_bounds=None, prefix="traffic_light_", - suffices=["_red_high", "_yellow_high", "_yellow_low", "_red_low"], + suffices=None, ignore_stat_endswith=None, ): """Initialize an instance of SectionGenerator. @@ -80,7 +80,12 @@ def __init__( self.skip_first_n = settings.skip_first_n self.skip_last_n = settings.skip_last_n self.prefix = prefix - self.suffices = suffices + self.suffices = suffices or [ + "_red_high", + "_yellow_high", + "_yellow_low", + "_red_low", + ] self.ignore_stat_endswith = ignore_stat_endswith or [] self.show_stats = settings.show_stats if not settings.extended_report else None @@ -117,9 +122,8 @@ def transform( assert all(df.index == fdbounds.index) # prepare date labels - df.drop( + df = df.drop( columns=["histogram", "reference_histogram"], - inplace=True, errors="ignore", ) dates = [short_date(date) for date in df.index.tolist()] diff --git a/popmon/visualization/utils.py b/popmon/visualization/utils.py index d2640e32..6e8ec810 100644 --- a/popmon/visualization/utils.py +++ b/popmon/visualization/utils.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics +# Copyright (c) 2023 ING Analytics Wholesale Banking # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in @@ -391,18 +391,17 @@ def plot_traffic_lights_alerts_aggregate( # basic checks for histograms -def histogram_basic_checks(plots={}): - if len(plots) == 0: +def histogram_basic_checks(plots=None): + if plots is None or len(plots) == 0: return for plot in plots: if len(plot["hist_names"]) == 0: plot["hist_names"] = [f"hist{i}" for i in range(len(plot["hists"]))] - if plot["hist_names"]: - if len(plot["hists"]) != len(plot["hist_names"]): - raise ValueError("length of hist and hist_names are different") + if plot["hist_names"] and len(plot["hists"]) != len(plot["hist_names"]): + raise ValueError("length of hist and hist_names are different") - for i, hist in enumerate(plot["hists"]): + for hist in plot["hists"]: try: hist_values, hist_bins = hist except BaseException as e: @@ -437,7 +436,7 @@ def histogram_basic_checks(plots={}): def plot_histogram_overlay( - plots=[], + plots=None, is_num=True, is_ts=False, is_static_reference=True, @@ -450,7 +449,7 @@ def plot_histogram_overlay( Kindly taken from Eskapade package and then modified. Reference link: https://github.com/KaveIO/Eskapade/blob/master/python/eskapade/visualization/vis_utils.py#L397 License: https://github.com/KaveIO/Eskapade-Core/blob/master/LICENSE - Modifications copyright ING WBAA. + Modifications copyright INGA WB. :param list plots: list of dicts containing histograms for all timestamps :param bool is_num: True if observable to plot is numeric. default is True. @@ -462,6 +461,8 @@ def plot_histogram_overlay( :rtype: str """ + if plots is None: + plots = [] fig = go.Figure() alpha = 0.4 @@ -469,7 +470,7 @@ def plot_histogram_overlay( # check number of plots if len(plots) < 2: warnings.warn("insufficient plots for histogram inspection") - return + return None base_plot = plots[0] @@ -479,7 +480,6 @@ def plot_histogram_overlay( # plot numeric and time stamps if is_num: - # plot histogram for index in range(n_choices): bin_edges = plots[index]["hists"][0][1] @@ -513,12 +513,16 @@ def plot_histogram_overlay( y=bin_values, opacity=alpha, showlegend=True, - name="no_ref" - if len(plots[index]["hists"]) < 2 - else "Reference" - if is_static_reference - else (plots[index]["date"] + "-") - + plots[index]["hist_names"][1].split("_")[-1], + name=( + "no_ref" + if len(plots[index]["hists"]) < 2 + else ( + "Reference" + if is_static_reference + else (plots[index]["date"] + "-") + + plots[index]["hist_names"][1].split("_")[-1] + ) + ), meta=index + 2, ) ) @@ -529,7 +533,6 @@ def plot_histogram_overlay( # plot categories else: - # plot histogram for first 'n_choices' timestamps for index in range(n_choices): labels = plots[index]["hists"][0][1] @@ -563,13 +566,17 @@ def plot_histogram_overlay( y=values, opacity=alpha, showlegend=True, - name="no_ref" - if len(plots[index]["hists"]) < 2 - else "Reference" - if is_static_reference - else plots[index]["date"] - + " " - + plots[index]["hist_names"][1].split("_")[-1], + name=( + "no_ref" + if len(plots[index]["hists"]) < 2 + else ( + "Reference" + if is_static_reference + else plots[index]["date"] + + " " + + plots[index]["hist_names"][1].split("_")[-1] + ) + ), meta=index + n_choices, ) ) @@ -619,19 +626,25 @@ def plot_histogram_overlay( { "y": [ plot["hists"][0][0], - [0 for _ in range(len(plot["hists"][0][0]))] - if len(plot["hists"]) < 2 - else plot["hists"][1][0], + ( + [0 for _ in range(len(plot["hists"][0][0]))] + if len(plot["hists"]) < 2 + else plot["hists"][1][0] + ), ], "name": [ plot["date"], - "no_ref" - if len(plot["hist_names"]) < 2 - else "Reference" - if is_static_reference - else plots[index]["date"] - + " " - + plot["hist_names"][1].split("_")[-1], + ( + "no_ref" + if len(plot["hist_names"]) < 2 + else ( + "Reference" + if is_static_reference + else plots[index]["date"] + + " " + + plot["hist_names"][1].split("_")[-1] + ) + ), ], }, [b, b + 2], @@ -705,7 +718,7 @@ def plot_heatmap( Kindly taken from Eskapade package and then modified. Reference link: https://github.com/KaveIO/Eskapade/blob/master/python/eskapade/visualization/vis_utils.py#L397 License: https://github.com/KaveIO/Eskapade-Core/blob/master/LICENSE - Modifications copyright ING WBAA. + Modifications copyright INGA WB. :param list hist_values: values of heatmap in a 2d numpy array = :param list hist_bins: bin labels/edges on y-axis @@ -720,9 +733,8 @@ def plot_heatmap( :return: base64 encoded plot image :rtype: str """ - if hist_name: - if len(hist_name) == 0: - raise ValueError("length of heatmap names is zero") + if hist_name and len(hist_name) == 0: + raise ValueError("length of heatmap names is zero") assert hist_values is not None and len( hist_values diff --git a/setup.py b/setup.py index 8a4df8c5..9de0f04f 100644 --- a/setup.py +++ b/setup.py @@ -26,12 +26,12 @@ def setup_package() -> None: version=__version__, url="https://github.com/ing-bank/popmon", license="MIT", - author="ING Wholesale Banking Advanced Analytics", + author="ING Analytics Wholesale Banking", description="Monitor the stability of a pandas or spark dataset", keywords="pandas spark data-science data-analysis monitoring statistics python jupyter ipython", long_description=long_description, long_description_content_type="text/x-rst", - python_requires=">=3.6", + python_requires=">=3.7", packages=find_packages(), install_requires=REQUIREMENTS, extras_require=EXTRAS, diff --git a/tests/popmon/alerting/test_compute_tl_bounds.py b/tests/popmon/alerting/test_compute_tl_bounds.py index 9e97ded3..e3fe58b2 100644 --- a/tests/popmon/alerting/test_compute_tl_bounds.py +++ b/tests/popmon/alerting/test_compute_tl_bounds.py @@ -15,7 +15,6 @@ def test_collect_traffic_light_bounds(): def test_compute_traffic_light_bounds(): - datastore = {"test_data": pytest.test_comparer_df} conf = { diff --git a/tests/popmon/alerting/test_integration.py b/tests/popmon/alerting/test_integration.py index 5d9c9d51..0d3efb5b 100644 --- a/tests/popmon/alerting/test_integration.py +++ b/tests/popmon/alerting/test_integration.py @@ -1,5 +1,5 @@ import pandas as pd -from pytest import test_comparer_df +import pytest from popmon.alerting import AlertsSummary, ComputeTLBounds, traffic_light_summary from popmon.analysis.apply_func import ApplyFunc @@ -7,7 +7,7 @@ def test_integration_alerting(): - datastore = {"test_data": test_comparer_df} + datastore = {"test_data": pytest.test_comparer_df} conf = { "monitoring_rules": { @@ -50,7 +50,7 @@ def test_integration_alerting(): def test_traffic_light_summary(): - datastore = {"test_data": test_comparer_df} + datastore = {"test_data": pytest.test_comparer_df} conf = { "monitoring_rules": { @@ -94,7 +94,7 @@ def test_traffic_light_summary(): def test_traffic_light_summary_combination(): - datastore = {"test_data": test_comparer_df} + datastore = {"test_data": pytest.test_comparer_df} conf = { "monitoring_rules": { diff --git a/tests/popmon/analysis/comparison/test_hist_comparer.py b/tests/popmon/analysis/comparison/test_hist_comparer.py index 0afb31a3..c49dcfcb 100644 --- a/tests/popmon/analysis/comparison/test_hist_comparer.py +++ b/tests/popmon/analysis/comparison/test_hist_comparer.py @@ -200,7 +200,6 @@ def test_expanding_hist_comparer(): @pytest.mark.filterwarnings("ignore:An input array is constant") @pytest.mark.filterwarnings("ignore:invalid value encountered in true_divide") def test_rolling_hist_comparer(): - hist_list = ["date:country", "date:bankrupt", "date:num_employees", "date:A_score"] features = ["country", "bankrupt", "num_employees", "A_score"] diff --git a/tests/popmon/analysis/profiling/test_apply_func.py b/tests/popmon/analysis/profiling/test_apply_func.py index 8a53e87e..556a0e7d 100644 --- a/tests/popmon/analysis/profiling/test_apply_func.py +++ b/tests/popmon/analysis/profiling/test_apply_func.py @@ -1,6 +1,6 @@ import numpy as np import pandas as pd -from pytest import test_comparer_df +import pytest from popmon.analysis.apply_func import ApplyFunc, apply_func, apply_func_array from popmon.analysis.functions import pull @@ -71,7 +71,7 @@ def func(x): def test_variance_comparer(): - datastore = {"to_profile": test_comparer_df} + datastore = {"to_profile": pytest.test_comparer_df} module1 = ApplyFunc( apply_to_key="to_profile", features=["the_feature", "dummy_feature"] @@ -98,7 +98,7 @@ def test_variance_comparer(): def test_reference_pull_comparer(): - datastore = {"to_profile": test_comparer_df} + datastore = {"to_profile": pytest.test_comparer_df} mod = ReferencePullCalculator( reference_key="to_profile", @@ -116,7 +116,7 @@ def test_reference_pull_comparer(): def test_median_mad_pull_comparer(): - datastore = {"to_profile": test_comparer_df} + datastore = {"to_profile": pytest.test_comparer_df} mod = RefMedianMadPullCalculator( reference_key="to_profile", @@ -134,7 +134,7 @@ def test_median_mad_pull_comparer(): def test_rolling_pull_comparer(): - datastore = {"to_profile": test_comparer_df} + datastore = {"to_profile": pytest.test_comparer_df} mod = RollingPullCalculator( read_key="to_profile", features=["the_feature", "dummy_feature"], window=3 @@ -151,7 +151,7 @@ def test_rolling_pull_comparer(): def test_expanding_pull_comparer(): - datastore = {"to_profile": test_comparer_df} + datastore = {"to_profile": pytest.test_comparer_df} mod = ExpandingPullCalculator( read_key="to_profile", features=["the_feature", "dummy_feature"] diff --git a/tests/popmon/analysis/profiling/test_hist_profiler.py b/tests/popmon/analysis/profiling/test_hist_profiler.py index 2a233349..6550d01f 100644 --- a/tests/popmon/analysis/profiling/test_hist_profiler.py +++ b/tests/popmon/analysis/profiling/test_hist_profiler.py @@ -14,7 +14,7 @@ def test_profile_hist1d(): split = [] np.random.seed(0) - for i in range(split_len): + for _ in range(split_len): h = hg.Bin(num_bins, 0, 1, lambda x: x) h.fill.numpy(np.random.uniform(0, 1, num_entries)) split.append({"date": pd.Timestamp("2019 - 1 - 1"), hist_name: h}) diff --git a/tests/popmon/analysis/profiling/test_profiles.py b/tests/popmon/analysis/profiling/test_profiles.py index 1f91242a..d0be3a56 100644 --- a/tests/popmon/analysis/profiling/test_profiles.py +++ b/tests/popmon/analysis/profiling/test_profiles.py @@ -1,30 +1,28 @@ import numpy as np -from popmon.analysis.profiling.profiles import ( - profile_fraction_of_true as fraction_of_true, -) +from popmon.analysis.profiling.profiles import profile_fraction_of_true def test_fraction_of_true(): - res = fraction_of_true([], []) + res = profile_fraction_of_true([], []) assert np.isnan(res) - res = fraction_of_true(["a"], [10]) + res = profile_fraction_of_true(["a"], [10]) assert np.isnan(res) - res = fraction_of_true(["a", "b", "c"], [10, 10, 10]) + res = profile_fraction_of_true(["a", "b", "c"], [10, 10, 10]) assert np.isnan(res) - res = fraction_of_true(np.array(["True", "False"]), np.array([0, 0])) + res = profile_fraction_of_true(np.array(["True", "False"]), np.array([0, 0])) assert np.isnan(res) - res = fraction_of_true(np.array(["True", "False"]), np.array([10, 10])) + res = profile_fraction_of_true(np.array(["True", "False"]), np.array([10, 10])) assert res == 0.5 - res = fraction_of_true(np.array([True, False]), [10, 10]) + res = profile_fraction_of_true(np.array([True, False]), [10, 10]) assert res == 0.5 - res = fraction_of_true(np.array(["True"]), np.array([10])) + res = profile_fraction_of_true(np.array(["True"]), np.array([10])) assert res == 1.0 - res = fraction_of_true(np.array([True]), np.array([10])) + res = profile_fraction_of_true(np.array([True]), np.array([10])) assert res == 1.0 - res = fraction_of_true(np.array(["False"]), np.array([10])) + res = profile_fraction_of_true(np.array(["False"]), np.array([10])) assert res == 0.0 - res = fraction_of_true(np.array([False]), np.array([10])) + res = profile_fraction_of_true(np.array([False]), np.array([10])) assert res == 0.0 diff --git a/tests/popmon/base/test_module.py b/tests/popmon/base/test_module.py index abd91728..ac9bfd19 100644 --- a/tests/popmon/base/test_module.py +++ b/tests/popmon/base/test_module.py @@ -23,7 +23,7 @@ def transform(self, input_array: np.ndarray): return res -@pytest.fixture +@pytest.fixture() def test_module(): return Scaler(input_key="x", output_key="scaled_x", mean=2.0, std=0.3) diff --git a/tests/popmon/base/test_pipeline.py b/tests/popmon/base/test_pipeline.py index b3e06c01..75453d90 100644 --- a/tests/popmon/base/test_pipeline.py +++ b/tests/popmon/base/test_pipeline.py @@ -66,7 +66,7 @@ def transform(self, input_array: np.ndarray, weights: np.ndarray): return result -@pytest.fixture +@pytest.fixture() def test_pipeline(): logger = logging.getLogger() logger.addHandler(logging.StreamHandler()) diff --git a/tests/popmon/base/test_registry.py b/tests/popmon/base/test_registry.py index 33a2c209..15a1561f 100644 --- a/tests/popmon/base/test_registry.py +++ b/tests/popmon/base/test_registry.py @@ -54,7 +54,7 @@ def func1(): with pytest.raises(ValueError) as e: - @DuplicatedRegistry.register(key="another", description="value") # noqa: F811 + @DuplicatedRegistry.register(key="another", description="value") def func1(): # noqa: F811 pass diff --git a/tests/popmon/conftest.py b/tests/popmon/conftest.py index 619a9b38..b6b50b8b 100644 --- a/tests/popmon/conftest.py +++ b/tests/popmon/conftest.py @@ -17,7 +17,7 @@ def get_comparer_data(): "date": [2000, 2001, 2002, 2003, 2004], } ) - df.set_index("date", inplace=True) + df = df.set_index("date") test_comparer_df["the_feature"] = df df = pd.DataFrame( @@ -26,7 +26,7 @@ def get_comparer_data(): "date": [2000, 2001, 2002, 2003, 2004], } ) - df.set_index("date", inplace=True) + df = df.set_index("date") test_comparer_df["dummy_feature"] = df return test_comparer_df diff --git a/tests/popmon/hist/test_hist_splitter.py b/tests/popmon/hist/test_hist_splitter.py index dd5546fd..7e199622 100644 --- a/tests/popmon/hist/test_hist_splitter.py +++ b/tests/popmon/hist/test_hist_splitter.py @@ -9,7 +9,6 @@ def test_hist_splitter(): - hist_list = [ "date:country", "date:bankrupt", diff --git a/tests/popmon/pipeline/test_split_dataset.py b/tests/popmon/pipeline/test_split_dataset.py index 5d95d34e..33d3639f 100644 --- a/tests/popmon/pipeline/test_split_dataset.py +++ b/tests/popmon/pipeline/test_split_dataset.py @@ -6,7 +6,7 @@ from popmon.pipeline.dataset_splitter import split_dataset -@pytest.fixture +@pytest.fixture() def test_dataframe_pandas(): n_samples = 1000 start = datetime.today() diff --git a/tests/popmon/spark/test_spark.py b/tests/popmon/spark/test_spark.py index fa6b7458..6409d6fe 100644 --- a/tests/popmon/spark/test_spark.py +++ b/tests/popmon/spark/test_spark.py @@ -16,7 +16,7 @@ spark_found = False -@pytest.fixture +@pytest.fixture() def spark_context(): if not spark_found: return None @@ -37,7 +37,7 @@ def spark_context(): return spark -@pytest.mark.spark +@pytest.mark.spark() @pytest.mark.xfail( not spark_found, reason="spark not found - install spark or exclude spark from tests (`pytest -m 'not spark'`)", @@ -67,7 +67,7 @@ def test_spark_stability_metrics(spark_context): assert c in list(ds.keys()) -@pytest.mark.spark +@pytest.mark.spark() @pytest.mark.xfail( not spark_found, reason="spark not found - install spark or exclude spark from tests (`pytest -m 'not spark'`)", @@ -121,4 +121,4 @@ def test_spark_make_histograms(spark_context): v2["data"]["name"] = f"b'{name}'" output = current_hists[name].toJson() - assert output == v1 or output == v2 + assert output in (v1, v2) diff --git a/tests/popmon/spark/test_split_dataset_spark.py b/tests/popmon/spark/test_split_dataset_spark.py index 10d8baaf..1c1b9308 100644 --- a/tests/popmon/spark/test_split_dataset_spark.py +++ b/tests/popmon/spark/test_split_dataset_spark.py @@ -15,7 +15,7 @@ spark_found = False -@pytest.fixture +@pytest.fixture() def spark_context(): if not spark_found: return None @@ -36,7 +36,7 @@ def spark_context(): return spark -@pytest.fixture +@pytest.fixture() def test_dataframe_spark(spark_context): n_samples = 1000 start = datetime.today() @@ -51,7 +51,7 @@ def test_dataframe_spark(spark_context): return spark_df -@pytest.mark.spark +@pytest.mark.spark() @pytest.mark.xfail( not spark_found, reason="spark not found - install spark or exclude spark from tests (`pytest -m 'not spark'`)", @@ -65,7 +65,7 @@ def test_split_dataset_spark_int(test_dataframe_spark): assert df.columns == ["date", "f1", "f2"] -@pytest.mark.spark +@pytest.mark.spark() @pytest.mark.xfail( not spark_found, reason="spark not found - install spark or exclude spark from tests (`pytest -m 'not spark'`)", @@ -77,7 +77,7 @@ def test_split_dataset_spark_int_underflow(test_dataframe_spark): assert e.value.args[0] == "Number of instances should be greater than 0" -@pytest.mark.spark +@pytest.mark.spark() @pytest.mark.xfail( not spark_found, reason="spark not found - install spark or exclude spark from tests (`pytest -m 'not spark'`)", @@ -92,7 +92,7 @@ def test_split_dataset_spark_int_overflow(test_dataframe_spark): ) -@pytest.mark.spark +@pytest.mark.spark() @pytest.mark.xfail( not spark_found, reason="spark not found - install spark or exclude spark from tests (`pytest -m 'not spark'`)", @@ -106,7 +106,7 @@ def test_split_dataset_spark_float(test_dataframe_spark): assert df.columns == ["date", "f1", "f2"] -@pytest.mark.spark +@pytest.mark.spark() @pytest.mark.xfail( not spark_found, reason="spark not found - install spark or exclude spark from tests (`pytest -m 'not spark'`)", @@ -120,7 +120,7 @@ def test_split_dataset_spark_float_round(test_dataframe_spark): assert df.columns == ["date", "f1", "f2"] -@pytest.mark.spark +@pytest.mark.spark() @pytest.mark.xfail( not spark_found, reason="spark not found - install spark or exclude spark from tests (`pytest -m 'not spark'`)", @@ -137,7 +137,7 @@ def test_split_dataset_spark_float_underflow(test_dataframe_spark): assert e.value.args[0] == "Fraction should be 0 > fraction > 1" -@pytest.mark.spark +@pytest.mark.spark() @pytest.mark.xfail( not spark_found, reason="spark not found - install spark or exclude spark from tests (`pytest -m 'not spark'`)", @@ -154,7 +154,7 @@ def test_split_dataset_spark_float_overflow(test_dataframe_spark): assert e.value.args[0] == "Fraction should be 0 > fraction > 1" -@pytest.mark.spark +@pytest.mark.spark() @pytest.mark.xfail( not spark_found, reason="spark not found - install spark or exclude spark from tests (`pytest -m 'not spark'`)", @@ -162,7 +162,10 @@ def test_split_dataset_spark_float_overflow(test_dataframe_spark): def test_split_dataset_spark_condition(test_dataframe_spark): reference, df = split_dataset( test_dataframe_spark, - split=f"date < '{(datetime.today() + timedelta(days=50, hours=5)).strftime('%Y-%m-%d %H:%M:%S')}'", + split=( + "date <" + f" '{(datetime.today() + timedelta(days=50, hours=5)).strftime('%Y-%m-%d %H:%M:%S')}'" + ), time_axis="date", ) @@ -172,7 +175,7 @@ def test_split_dataset_spark_condition(test_dataframe_spark): assert df.columns == ["date", "f1", "f2"] -@pytest.mark.spark +@pytest.mark.spark() @pytest.mark.xfail( not spark_found, reason="spark not found - install spark or exclude spark from tests (`pytest -m 'not spark'`)", @@ -181,7 +184,10 @@ def test_split_dataset_spark_condition_false(test_dataframe_spark): with pytest.raises(ValueError) as e: split_dataset( test_dataframe_spark, - split=f"date < '{(datetime.today() - timedelta(days=1)).strftime('%Y-%m-%d %H:%M:%S')}'", + split=( + "date <" + f" '{(datetime.today() - timedelta(days=1)).strftime('%Y-%m-%d %H:%M:%S')}'" + ), time_axis="date", ) diff --git a/tests/popmon/stats/test_numpy.py b/tests/popmon/stats/test_numpy.py index 0e8a9945..6587ea4c 100644 --- a/tests/popmon/stats/test_numpy.py +++ b/tests/popmon/stats/test_numpy.py @@ -199,7 +199,7 @@ def test_probability_distribution_mean_covariance(): max_hist_entries = 10000 rel_error = 0.1 bin_entries = [] - for k in range(n_histos): + for _ in range(n_histos): bin_probs = np.random.normal(1.0, rel_error, size=n_bins) # + basic bin_probs /= np.sum(bin_probs) bin_entries.append(np.random.multinomial(max_hist_entries, bin_probs)) diff --git a/tests/popmon/stitching/test_histogram_stitching.py b/tests/popmon/stitching/test_histogram_stitching.py index d2ef0d52..58a155bb 100644 --- a/tests/popmon/stitching/test_histogram_stitching.py +++ b/tests/popmon/stitching/test_histogram_stitching.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python3 - import numpy as np import pytest @@ -28,7 +26,7 @@ def test_histogram_stitching(): hists_basis=hists2, hists_delta=hists2, time_axis="date", time_bin_idx=[50, 51] ) np.testing.assert_array_equal( - sorted(hists3.keys()), sorted(features1 + ["date:age"]) + sorted(hists3.keys()), sorted([*features1, "date:age"]) ) assert hists3["date:age"].entries == 800 assert hists3["date:age"].bins[50].entries == 400 @@ -37,7 +35,7 @@ def test_histogram_stitching(): # add 'date' axis to hists2 and hists2 and stitch at auto-bins 0, 1 hists3 = stitch_histograms(hists_basis=hists2, hists_delta=hists2, time_axis="date") np.testing.assert_array_equal( - sorted(hists3.keys()), sorted(features1 + ["date:age"]) + sorted(hists3.keys()), sorted([*features1, "date:age"]) ) assert hists3["date:age"].entries == 800 assert 0 in hists3["date:age"].bins @@ -48,7 +46,7 @@ def test_histogram_stitching(): hists_basis=hists2, hists_delta=hists2, time_axis="date", time_bin_idx=50 ) np.testing.assert_array_equal( - sorted(hists3.keys()), sorted(features1 + ["date:age"]) + sorted(hists3.keys()), sorted([*features1, "date:age"]) ) assert hists3["date:age"].entries == 800 assert 51 in hists3["date:age"].bins diff --git a/tests/popmon/visualization/test_report_generator.py b/tests/popmon/visualization/test_report_generator.py index 13a96cf7..2ba0485d 100644 --- a/tests/popmon/visualization/test_report_generator.py +++ b/tests/popmon/visualization/test_report_generator.py @@ -14,7 +14,6 @@ @pytest.mark.filterwarnings("ignore:invalid value encountered in true_divide") @pytest.mark.filterwarnings("ignore:All-NaN slice encountered") def test_report_generator(): - hist_list = ["date:country", "date:bankrupt", "date:num_employees", "date:A_score"] features = ["country", "bankrupt", "num_employees", "A_score"]