From 3b238c73bf2a8f4bc543f9c65aa026ad7b5b9e9b Mon Sep 17 00:00:00 2001 From: Panagiotis Papaemmanouil Date: Fri, 20 Jan 2023 15:47:43 +0200 Subject: [PATCH] set seed for numpy --- luminaire/exploration/data_exploration.py | 19 +++++++++++++++++++ luminaire/model/lad_filtering.py | 6 +++++- luminaire/model/lad_structural.py | 10 +++++++++- luminaire/model/window_density.py | 7 +++++++ .../hyperparameter_optimization.py | 3 +++ luminaire/tests/conftest.py | 1 + luminaire/tests/test_exploration.py | 1 + luminaire/tests/test_models.py | 1 + 8 files changed, 46 insertions(+), 2 deletions(-) diff --git a/luminaire/exploration/data_exploration.py b/luminaire/exploration/data_exploration.py index 08fb589..c71ea5c 100644 --- a/luminaire/exploration/data_exploration.py +++ b/luminaire/exploration/data_exploration.py @@ -166,6 +166,7 @@ def _kalman_smoothing_imputation(self, df=None, target_metric=None, imputed_metr length requirement for Kalman smoothing """ import numpy as np + np.random.RandomState(42) from pykalman import KalmanFilter time_series = np.array(df[target_metric], dtype=np.float64) missing_idx = np.where(np.isnan(time_series))[0] @@ -206,6 +207,7 @@ def _moving_average(self, series=None, window_length=None, train_subwindow_len=N :rtype: list """ import numpy as np + np.random.RandomState(42) moving_averages = [] iter_length = len(series) - window_length @@ -261,6 +263,7 @@ def _stationarizer(cls, endog=None, diff_min=1, diff_max=2, significance_level=0 """ import numpy as np + np.random.RandomState(42) from statsmodels.tsa.stattools import adfuller endog_diff = np.array(endog) @@ -342,6 +345,7 @@ def _detrender(self, training_data_sliced=None, detrend_order_max=2, significanc """ import numpy as np + np.random.RandomState(42) import pandas as pd from itertools import chain from statsmodels.tsa.stattools import adfuller @@ -434,6 +438,7 @@ def _ma_detrender(self, series=None, padded_series=None, ma_window_length=None): """ import numpy as np + np.random.RandomState(42) moving_averages = [] @@ -455,6 +460,7 @@ def _detect_window_size(self, series=None, streaming=False): :rtype: int """ import numpy as np + np.random.RandomState(42) n = len(series) @@ -489,6 +495,8 @@ def _local_minima(self, input_dict=None, window_length=None): :rtype: list """ import numpy as np + np.random.RandomState(42) + import collections ordered_dict = collections.OrderedDict(sorted(input_dict.items())) @@ -519,6 +527,7 @@ def _shift_intensity(self, change_points=None, df=None, metric=None): :rtype: list """ import numpy as np + np.random.RandomState(42) min_changepoint_padding_length = self.min_changepoint_padding_length @@ -600,6 +609,8 @@ def _pelt_change_point_detection(self, df=None, metric=None, min_ts_length=None, [1021 rows x 2 columns], ['2016-12-26 00:00:00', '2018-09-10 00:00:00']) """ import numpy as np + np.random.RandomState(42) + import pandas as pd from changepy import pelt from changepy.costs import normal_var @@ -698,6 +709,8 @@ def _trend_changes(self, input_df=None, value_column=None): """ import numpy as np + np.random.RandomState(42) + from scipy import stats from statsmodels.tsa.stattools import acf @@ -810,6 +823,8 @@ def kf_naive_outlier_detection(self, input_series, idx_position): False """ import numpy as np + np.random.RandomState(42) + from pykalman import KalmanFilter kf = KalmanFilter() @@ -841,6 +856,7 @@ def _truncate_by_data_gaps(self, df, target_metric): """ import numpy as np + np.random.RandomState(42) max_data_gap = abs(self.min_ts_length / 3.0) @@ -968,6 +984,7 @@ def profile(self, df, impute_only=False, **kwargs): """ import numpy as np + np.random.RandomState(42) min_ts_length = self.min_ts_length max_ts_length = self.max_ts_length @@ -1056,6 +1073,8 @@ def stream_profile(self, df, impute_only=False, **kwargs): from random import sample import datetime import numpy as np + np.random.RandomState(42) + import pandas as pd from scipy import stats diff --git a/luminaire/model/lad_filtering.py b/luminaire/model/lad_filtering.py index 0fc7389..0db3310 100644 --- a/luminaire/model/lad_filtering.py +++ b/luminaire/model/lad_filtering.py @@ -103,6 +103,7 @@ def _prediction_summary(cls, state_mean, state_covariance, observation_covarianc kalman gain """ import numpy as np + np.random.RandomState(42) try: @@ -139,6 +140,7 @@ def _training(self, data, **kwargs): """ import numpy as np + np.random.RandomState(42) from pykalman import KalmanFilter from numpy.linalg import LinAlgError @@ -264,6 +266,7 @@ def _scoring(cls, raw_actual=None, synthetic_actual=None, model=None, state_mean """ import numpy as np + np.random.RandomState(42) import scipy.stats as st from numpy.linalg import LinAlgError is_anomaly = False @@ -414,7 +417,8 @@ def score(self, observed_value, pred_date, synthetic_actual=None, **kwargs): import pandas as pd import numpy as np - + np.random.RandomState(42) + pred_date = pd.Timestamp(pred_date) result, model = self._scoring(raw_actual=observed_value, synthetic_actual=synthetic_actual, diff --git a/luminaire/model/lad_structural.py b/luminaire/model/lad_structural.py index 4d38a00..120f5f7 100644 --- a/luminaire/model/lad_structural.py +++ b/luminaire/model/lad_structural.py @@ -124,6 +124,7 @@ def _signals(cls, idx, m, n): :return: A numpy array containing the sinusoids corresponding to the significant frequencies """ import numpy as np + np.random.RandomState(42) signal = [] # Generating all the frequencies from a time series of length n @@ -150,6 +151,7 @@ def _inv_fft(cls, n_extp, n, idx, a): time series """ import numpy as np + np.random.RandomState(42) ts = [] for i in range(0, n_extp): # Sinusoid for the ith frequency @@ -173,6 +175,7 @@ def _fourier_extp(cls, series=None, max_trun=None, forecast_period=None): many frequencies """ import numpy as np + np.random.RandomState(42) import copy n = len(series) @@ -237,6 +240,7 @@ def _seasonal_arima(self, endog=None, exog=None, p=None, d=None, q=None, imodels """ import numpy as np + np.random.RandomState(42) import statsmodels.tsa.arima.model as arima # Extract the exogenous variable generated based on (imodels * 2) number of most significant @@ -290,6 +294,7 @@ def _fit(self, endog, endog_end, min_ts_mean, min_ts_mean_window, include_holida lower and upper bound of the confidence interval, flag whether holidays are included in the model as exogenous """ import numpy as np + np.random.RandomState(42) from pykalman import KalmanFilter import warnings warnings.filterwarnings('ignore') @@ -451,6 +456,7 @@ def _validate_model(self, data, hyper_params, result): """ import numpy as np + np.random.RandomState(42) import scipy.stats as st levene_alpha = 0.05 @@ -588,6 +594,7 @@ def _predict(cls, model, is_log_transformed, """ import numpy as np + np.random.RandomState(42) import pandas as pd import scipy.stats as st from numpy.linalg import LinAlgError @@ -773,7 +780,8 @@ def _scoring(cls, model, observed_value, pred_date, training_end=None, import pandas as pd import numpy as np - + np.random.RandomState(42) + # Date to predict pred_date = pd.Timestamp(pred_date) diff --git a/luminaire/model/window_density.py b/luminaire/model/window_density.py index 92f6ff5..db258e5 100644 --- a/luminaire/model/window_density.py +++ b/luminaire/model/window_density.py @@ -102,6 +102,7 @@ def _volume_shift_detection(self, mean_list=None, sd_list=None, probability_thre :rtype: int """ import numpy as np + np.random.RandomState(42) from bayesian_changepoint_detection.priors import const_prior from bayesian_changepoint_detection.bayesian_models import offline_changepoint_detection import bayesian_changepoint_detection.offline_likelihoods as offline_ll @@ -148,6 +149,7 @@ def _distance_function(self, data=None, called_for=None, baseline=None): :rtype: float """ import numpy as np + np.random.RandomState(42) import scipy.stats as stats float_min = 1e-50 float_max = 1e50 @@ -207,6 +209,7 @@ def _training_data_truncation(self, sliced_training_data=None): :rtype: list """ import numpy as np + np.random.RandomState(42) # Change point detection is performed over the means and standard deviations of the sub windows window_means = [] @@ -298,6 +301,7 @@ def _get_model(self, input_df=None, window_length=None, value_column=None, detre :rtype: tuple(list, float, float, float, int, list, luminaire.model, float) """ import numpy as np + np.random.RandomState(42) import pandas as pd from itertools import chain import scipy.stats as st @@ -403,6 +407,7 @@ def train(self, data, **kwargs): (True, "2018-10-10 23:00:00", ) """ import numpy as np + np.random.RandomState(42) import pandas as pd freq = pd.Timedelta(self._params['freq']) if self._params['freq'] not in ['S', 'T', '15T', 'H', 'D'] \ @@ -525,6 +530,7 @@ def _get_result(self, input_df=None, detrend_order=None, agg_data_model=None, va """ import numpy as np + np.random.RandomState(42) import pandas as pd import copy import scipy.stats as st @@ -677,6 +683,7 @@ def score(self, data, **kwargs): """ import numpy as np + np.random.RandomState(42) import pandas as pd is_log_transformed = self._params['is_log_transformed'] diff --git a/luminaire/optimization/hyperparameter_optimization.py b/luminaire/optimization/hyperparameter_optimization.py index b14d66d..3356b09 100644 --- a/luminaire/optimization/hyperparameter_optimization.py +++ b/luminaire/optimization/hyperparameter_optimization.py @@ -59,6 +59,7 @@ def _mape(self, actuals, predictions): :rtype: numpy.nanmean """ import numpy as np + np.random.RandomState(42) actuals = np.array(actuals) predictions = np.array(predictions) @@ -85,6 +86,7 @@ def _synthetic_anomaly_check(self, observation, prediction, std_error): """ import numpy as np + np.random.RandomState(42) import scipy.stats as st float_min = 1e-10 @@ -137,6 +139,7 @@ def _objective_part(self, data, smoothed_series, args): """ import numpy as np + np.random.RandomState(42) import pandas as pd from sklearn.metrics import log_loss import copy diff --git a/luminaire/tests/conftest.py b/luminaire/tests/conftest.py index dc2863c..4fd9c4b 100644 --- a/luminaire/tests/conftest.py +++ b/luminaire/tests/conftest.py @@ -3,6 +3,7 @@ import pytest import pandas as pd import numpy as np +np.random.RandomState(42) def get_data_path(path): luminaire_test_dir = up(os.path.realpath(path)) diff --git a/luminaire/tests/test_exploration.py b/luminaire/tests/test_exploration.py index b305a06..79e265a 100644 --- a/luminaire/tests/test_exploration.py +++ b/luminaire/tests/test_exploration.py @@ -1,5 +1,6 @@ from luminaire.exploration.data_exploration import * import numpy as np +np.random.RandomState(42) import pandas as pd class TestDataExploration(object): diff --git a/luminaire/tests/test_models.py b/luminaire/tests/test_models.py index 2774776..a751c48 100644 --- a/luminaire/tests/test_models.py +++ b/luminaire/tests/test_models.py @@ -196,6 +196,7 @@ def test_low_freq_window_density_scoring_aggregated(self, window_density_model_d def test_lad_filtering_scoring_diff_order(self, scoring_test_data, lad_filtering_model): import numpy as np + np.random.RandomState(42) # check to see if scoring yields AdjustedActual with correct order of differences pred_date_normal = scoring_test_data.index[0] value_normal = scoring_test_data['raw'][0]