Merge pull request slacgismo#68 from slacgismo/bug-fix

Bug fix
abhisheksparikh · Sep 28, 2021 · 36a4ec2 · 36a4ec2
2 parents 2372d22 + 3d5617f
commit 36a4ec2
Show file tree

Hide file tree

Showing 3 changed files with 32 additions and 36 deletions.
diff --git a/solardatatools/algorithms/time_shifts.py b/solardatatools/algorithms/time_shifts.py
@@ -17,6 +17,7 @@
 
 import numpy as np
 from scipy.stats import mode
+from sklearn.model_selection import train_test_split
 import matplotlib.pyplot as plt
 from solardatatools.solar_noon import energy_com, avg_sunrise_sunset
 from solardatatools.signal_decompositions import l2_l1d1_l2d2p365
@@ -143,29 +144,34 @@ def run(
         self.__recursion_depth = 0
 
     def optimize_c1(self, metric, c1s, use_ixs, c2, periodic_detector, solver=None):
-        n = np.sum(use_ixs)
-        select = np.random.uniform(size=n) <= 0.75  # random holdout selection
-        train = np.copy(use_ixs)
-        test = np.copy(use_ixs)
-        train[use_ixs] = select
-        test[use_ixs] = ~select
+        # set up train/test split with sklearn
+        ixs = np.arange(len(metric))
+        ixs = ixs[use_ixs]
+        train_ixs, test_ixs = train_test_split(ixs, test_size=0.75)
+        train = np.zeros(len(metric), dtype=bool)
+        test = np.zeros(len(metric), dtype=bool)
+        train[train_ixs] = True
+        test[test_ixs] = True
+        # initialize results objects
         train_r = np.zeros_like(c1s)
         test_r = np.zeros_like(c1s)
         tv_metric = np.zeros_like(c1s)
         jpy = np.zeros_like(c1s)
+        # iterate over possible values of c1 parameter
         for i, v in enumerate(c1s):
             s1, s2 = self.estimate_components(
                 metric, v, c2, train, periodic_detector, n_iter=5, solver=solver
             )
             y = metric
+            # collect results
             train_r[i] = np.average(np.power((y - s1 - s2)[train], 2))
             test_r[i] = np.average(np.power((y - s1 - s2)[test], 2))
             tv_metric[i] = np.average(np.abs(np.diff(s1, n=1)))
             count_jumps = np.sum(~np.isclose(np.diff(s1), 0, atol=1e-4))
             jumps_per_year = count_jumps / (len(metric) / 365)
             jpy[i] = jumps_per_year
         zero_one_scale = lambda x: (x - np.min(x)) / (np.max(x) - np.min(x))
-        hn = zero_one_scale(test_r)
+        hn = zero_one_scale(test_r) # holdout error metrix
         rn = zero_one_scale(train_r)
         ixs = np.arange(len(c1s))
         # Detecting more than 5 time shifts per year is extremely uncommon,
@@ -203,39 +209,29 @@ def estimate_components(
             w = 1 / (eps + np.abs(np.diff(s1, n=1)))
         return s1, s2
 
-    def plot_optimization(self):
+    def plot_optimization(self, figsize=None):
         if self.best_ix is not None:
             c1s = self.c1_vals
             hn = self.normalized_holdout_error
             rn = self.normalized_train_error
             best_c1 = self.best_c1
             import matplotlib.pyplot as plt
-
-            plt.plot(c1s, hn, marker=".")
-            plt.axvline(best_c1, ls="--", color="red")
-            plt.xscale("log")
-            plt.title("holdout validation")
-            plt.show()
-            plt.plot(c1s, self.jumps_per_year, marker=".")
-            plt.axvline(best_c1, ls="--", color="red")
-            plt.xscale("log")
-            plt.title("jumps per year")
-            plt.show()
-            plt.plot(c1s, rn, marker=".")
-            plt.axvline(best_c1, ls="--", color="red")
-            plt.xscale("log")
-            plt.title("training residuals")
-            plt.show()
-            # plt.plot(c1s, hn * rn, marker='.')
-            # plt.axvline(best_c1, ls='--', color='red')
-            # plt.xscale('log')
-            # plt.title('holdout error times training error')
-            # plt.show()
-            plt.plot(c1s, self.tv_metric, marker=".")
-            plt.axvline(best_c1, ls="--", color="red")
-            plt.xscale("log")
-            plt.title("Total variation metric")
-            plt.show()
+            fig, ax = plt.subplots(nrows=4, sharex=True, figsize=figsize)
+            ax[0].plot(c1s, hn, marker=".")
+            ax[0].axvline(best_c1, ls="--", color="red")
+            ax[0].set_title("holdout validation")
+            ax[1].plot(c1s, self.jumps_per_year, marker=".")
+            ax[1].axvline(best_c1, ls="--", color="red")
+            ax[1].set_title("jumps per year")
+            ax[2].plot(c1s, rn, marker=".")
+            ax[2].axvline(best_c1, ls="--", color="red")
+            ax[2].set_title("training residuals")
+            ax[3].plot(c1s, self.tv_metric, marker=".")
+            ax[3].axvline(best_c1, ls="--", color="red")
+            ax[3].set_xscale("log")
+            ax[3].set_title("Total variation metric")
+            plt.tight_layout()
+            return fig
 
     def apply_corrections(self, data):
         roll_by_index = self.roll_by_index

diff --git a/solardatatools/clear_day_detection.py b/solardatatools/clear_day_detection.py
@@ -41,7 +41,7 @@ def find_clear_days(
     # Shift this metric so the median is at zero
     # tc = np.percentile(tc, 50) - tc
     # Normalize such that the maximum value is equal to one
-    tc /= np.max(tc)
+    tc /= np.nanmax(tc)
     tc = 1 - tc
     # Seasonal renormalization: estimate a "baseline smoothness" based on local
     # 90th percentile of smoothness signal. This has the effect of increasing

diff --git a/solardatatools/signal_decompositions.py b/solardatatools/signal_decompositions.py
@@ -161,7 +161,7 @@ def tl1_l2d2p365(
     :return: median fit with seasonal baseline removed
     """
     if use_ixs is None:
-        use_ixs = np.arange(len(signal))
+        use_ixs = ~np.isnan(signal)
     x = cvx.Variable(len(signal))
     r = signal[use_ixs] - x[use_ixs]
     objective = cvx.Minimize(