set seed for numpy

zillow · Jan 20, 2023 · 3b238c7 · 3b238c7
1 parent 9471eb3
commit 3b238c7
Show file tree

Hide file tree

Showing 8 changed files with 46 additions and 2 deletions.
diff --git a/luminaire/exploration/data_exploration.py b/luminaire/exploration/data_exploration.py
@@ -166,6 +166,7 @@ def _kalman_smoothing_imputation(self, df=None, target_metric=None, imputed_metr
         length requirement for Kalman smoothing
         """
         import numpy as np
+        np.random.RandomState(42)
         from pykalman import KalmanFilter
         time_series = np.array(df[target_metric], dtype=np.float64)
         missing_idx = np.where(np.isnan(time_series))[0]
@@ -206,6 +207,7 @@ def _moving_average(self, series=None, window_length=None, train_subwindow_len=N
         :rtype: list
         """
         import numpy as np
+        np.random.RandomState(42)
 
         moving_averages = []
         iter_length = len(series) - window_length
@@ -261,6 +263,7 @@ def _stationarizer(cls, endog=None, diff_min=1, diff_max=2, significance_level=0
         """
 
         import numpy as np
+        np.random.RandomState(42)
         from statsmodels.tsa.stattools import adfuller
 
         endog_diff = np.array(endog)
@@ -342,6 +345,7 @@ def _detrender(self, training_data_sliced=None, detrend_order_max=2, significanc
         """
 
         import numpy as np
+        np.random.RandomState(42)
         import pandas as pd
         from itertools import chain
         from statsmodels.tsa.stattools import adfuller
@@ -434,6 +438,7 @@ def _ma_detrender(self, series=None, padded_series=None, ma_window_length=None):
         """
 
         import numpy as np
+        np.random.RandomState(42)
 
         moving_averages = []
 
@@ -455,6 +460,7 @@ def _detect_window_size(self, series=None, streaming=False):
         :rtype: int
         """
         import numpy as np
+        np.random.RandomState(42)
 
         n = len(series)
 
@@ -489,6 +495,8 @@ def _local_minima(self, input_dict=None, window_length=None):
         :rtype: list
         """
         import numpy as np
+        np.random.RandomState(42)
+
         import collections
 
         ordered_dict = collections.OrderedDict(sorted(input_dict.items()))
@@ -519,6 +527,7 @@ def _shift_intensity(self, change_points=None, df=None, metric=None):
         :rtype: list
         """
         import numpy as np
+        np.random.RandomState(42)
 
         min_changepoint_padding_length = self.min_changepoint_padding_length
 
@@ -600,6 +609,8 @@ def _pelt_change_point_detection(self, df=None, metric=None, min_ts_length=None,
         [1021 rows x 2 columns], ['2016-12-26 00:00:00', '2018-09-10 00:00:00'])
         """
         import numpy as np
+        np.random.RandomState(42)
+
         import pandas as pd
         from changepy import pelt
         from changepy.costs import normal_var
@@ -698,6 +709,8 @@ def _trend_changes(self, input_df=None, value_column=None):
 
         """
         import numpy as np
+        np.random.RandomState(42)
+
         from scipy import stats
         from statsmodels.tsa.stattools import acf
 
@@ -810,6 +823,8 @@ def kf_naive_outlier_detection(self, input_series, idx_position):
         False
         """
         import numpy as np
+        np.random.RandomState(42)
+
         from pykalman import KalmanFilter
 
         kf = KalmanFilter()
@@ -841,6 +856,7 @@ def _truncate_by_data_gaps(self, df, target_metric):
         """
 
         import numpy as np
+        np.random.RandomState(42)
 
         max_data_gap = abs(self.min_ts_length / 3.0)
 
@@ -968,6 +984,7 @@ def profile(self, df, impute_only=False, **kwargs):
         """
 
         import numpy as np
+        np.random.RandomState(42)
 
         min_ts_length = self.min_ts_length
         max_ts_length = self.max_ts_length
@@ -1056,6 +1073,8 @@ def stream_profile(self, df, impute_only=False, **kwargs):
         from random import sample
         import datetime
         import numpy as np
+        np.random.RandomState(42)
+
         import pandas as pd
         from scipy import stats
 

diff --git a/luminaire/model/lad_filtering.py b/luminaire/model/lad_filtering.py
@@ -103,6 +103,7 @@ def _prediction_summary(cls, state_mean, state_covariance, observation_covarianc
         kalman gain
         """
         import numpy as np
+        np.random.RandomState(42)
 
         try:
 
@@ -139,6 +140,7 @@ def _training(self, data, **kwargs):
         """
 
         import numpy as np
+        np.random.RandomState(42)
         from pykalman import KalmanFilter
         from numpy.linalg import LinAlgError
 
@@ -264,6 +266,7 @@ def _scoring(cls, raw_actual=None, synthetic_actual=None, model=None, state_mean
         """
 
         import numpy as np
+        np.random.RandomState(42)
         import scipy.stats as st
         from numpy.linalg import LinAlgError
         is_anomaly = False
@@ -414,7 +417,8 @@ def score(self, observed_value, pred_date, synthetic_actual=None, **kwargs):
 
         import pandas as pd
         import numpy as np
-
+        np.random.RandomState(42)
+
         pred_date = pd.Timestamp(pred_date)
 
         result, model = self._scoring(raw_actual=observed_value, synthetic_actual=synthetic_actual,

diff --git a/luminaire/model/lad_structural.py b/luminaire/model/lad_structural.py
@@ -124,6 +124,7 @@ def _signals(cls, idx, m, n):
         :return: A numpy array containing the sinusoids corresponding to the significant frequencies
         """
         import numpy as np
+        np.random.RandomState(42)
         signal = []
 
         # Generating all the frequencies from a time series of length n
@@ -150,6 +151,7 @@ def _inv_fft(cls, n_extp, n, idx, a):
         time series
         """
         import numpy as np
+        np.random.RandomState(42)
         ts = []
         for i in range(0, n_extp):
             # Sinusoid for the ith frequency
@@ -173,6 +175,7 @@ def _fourier_extp(cls, series=None, max_trun=None, forecast_period=None):
         many frequencies
         """
         import numpy as np
+        np.random.RandomState(42)
         import copy
         n = len(series)
 
@@ -237,6 +240,7 @@ def _seasonal_arima(self, endog=None, exog=None, p=None, d=None, q=None, imodels
         """
 
         import numpy as np
+        np.random.RandomState(42)
         import statsmodels.tsa.arima.model as arima
 
         # Extract the exogenous variable generated based on (imodels * 2) number of most significant
@@ -290,6 +294,7 @@ def _fit(self, endog, endog_end, min_ts_mean, min_ts_mean_window, include_holida
         lower and upper bound of the confidence interval, flag whether holidays are included in the model as exogenous
         """
         import numpy as np
+        np.random.RandomState(42)
         from pykalman import KalmanFilter
         import warnings
         warnings.filterwarnings('ignore')
@@ -451,6 +456,7 @@ def _validate_model(self, data, hyper_params, result):
         """
 
         import numpy as np
+        np.random.RandomState(42)
         import scipy.stats as st
 
         levene_alpha = 0.05
@@ -588,6 +594,7 @@ def _predict(cls, model, is_log_transformed,
         """
 
         import numpy as np
+        np.random.RandomState(42)
         import pandas as pd
         import scipy.stats as st
         from numpy.linalg import LinAlgError
@@ -773,7 +780,8 @@ def _scoring(cls, model, observed_value, pred_date, training_end=None,
 
         import pandas as pd
         import numpy as np
-
+        np.random.RandomState(42)
+
         # Date to predict
         pred_date = pd.Timestamp(pred_date)
 

diff --git a/luminaire/model/window_density.py b/luminaire/model/window_density.py
@@ -102,6 +102,7 @@ def _volume_shift_detection(self, mean_list=None, sd_list=None, probability_thre
         :rtype: int
         """
         import numpy as np
+        np.random.RandomState(42)
         from bayesian_changepoint_detection.priors import const_prior
         from bayesian_changepoint_detection.bayesian_models import offline_changepoint_detection
         import bayesian_changepoint_detection.offline_likelihoods as offline_ll
@@ -148,6 +149,7 @@ def _distance_function(self, data=None, called_for=None, baseline=None):
         :rtype: float
         """
         import numpy as np
+        np.random.RandomState(42)
         import scipy.stats as stats
         float_min = 1e-50
         float_max = 1e50
@@ -207,6 +209,7 @@ def _training_data_truncation(self, sliced_training_data=None):
         :rtype: list
         """
         import numpy as np
+        np.random.RandomState(42)
 
         # Change point detection is performed over the means and standard deviations of the sub windows
         window_means = []
@@ -298,6 +301,7 @@ def _get_model(self, input_df=None, window_length=None, value_column=None, detre
         :rtype: tuple(list, float, float, float, int, list, luminaire.model, float)
         """
         import numpy as np
+        np.random.RandomState(42)
         import pandas as pd
         from itertools import chain
         import scipy.stats as st
@@ -403,6 +407,7 @@ def train(self, data, **kwargs):
         (True, "2018-10-10 23:00:00", <luminaire.model.window_density.WindowDensityModel object at 0x7fd7c5a34e80>)
         """
         import numpy as np
+        np.random.RandomState(42)
         import pandas as pd
 
         freq = pd.Timedelta(self._params['freq']) if self._params['freq'] not in ['S', 'T', '15T', 'H', 'D'] \
@@ -525,6 +530,7 @@ def _get_result(self, input_df=None, detrend_order=None, agg_data_model=None, va
         """
 
         import numpy as np
+        np.random.RandomState(42)
         import pandas as pd
         import copy
         import scipy.stats as st
@@ -677,6 +683,7 @@ def score(self, data, **kwargs):
         """
 
         import numpy as np
+        np.random.RandomState(42)
         import pandas as pd
 
         is_log_transformed = self._params['is_log_transformed']

diff --git a/luminaire/optimization/hyperparameter_optimization.py b/luminaire/optimization/hyperparameter_optimization.py
@@ -59,6 +59,7 @@ def _mape(self, actuals, predictions):
         :rtype: numpy.nanmean
         """
         import numpy as np
+        np.random.RandomState(42)
 
         actuals = np.array(actuals)
         predictions = np.array(predictions)
@@ -85,6 +86,7 @@ def _synthetic_anomaly_check(self, observation, prediction, std_error):
         """
 
         import numpy as np
+        np.random.RandomState(42)
         import scipy.stats as st
         float_min = 1e-10
 
@@ -137,6 +139,7 @@ def _objective_part(self, data, smoothed_series, args):
         """
 
         import numpy as np
+        np.random.RandomState(42)
         import pandas as pd
         from sklearn.metrics import log_loss
         import copy

diff --git a/luminaire/tests/conftest.py b/luminaire/tests/conftest.py
@@ -3,6 +3,7 @@
 import pytest
 import pandas as pd
 import numpy as np
+np.random.RandomState(42)
 
 def get_data_path(path):
     luminaire_test_dir = up(os.path.realpath(path))

diff --git a/luminaire/tests/test_exploration.py b/luminaire/tests/test_exploration.py
@@ -1,5 +1,6 @@
 from luminaire.exploration.data_exploration import *
 import numpy as np
+np.random.RandomState(42)
 import pandas as pd
 
 class TestDataExploration(object):

diff --git a/luminaire/tests/test_models.py b/luminaire/tests/test_models.py
@@ -196,6 +196,7 @@ def test_low_freq_window_density_scoring_aggregated(self, window_density_model_d
 
     def test_lad_filtering_scoring_diff_order(self, scoring_test_data, lad_filtering_model):
         import numpy as np
+        np.random.RandomState(42)
         # check to see if scoring yields AdjustedActual with correct order of differences
         pred_date_normal = scoring_test_data.index[0]
         value_normal = scoring_test_data['raw'][0]