Skip to content

Commit

Permalink
Merge pull request slacgismo#68 from slacgismo/bug-fix
Browse files Browse the repository at this point in the history
Bug fix
  • Loading branch information
bmeyers authored Sep 28, 2021
2 parents 2372d22 + 3d5617f commit 36a4ec2
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 36 deletions.
64 changes: 30 additions & 34 deletions solardatatools/algorithms/time_shifts.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

import numpy as np
from scipy.stats import mode
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from solardatatools.solar_noon import energy_com, avg_sunrise_sunset
from solardatatools.signal_decompositions import l2_l1d1_l2d2p365
Expand Down Expand Up @@ -143,29 +144,34 @@ def run(
self.__recursion_depth = 0

def optimize_c1(self, metric, c1s, use_ixs, c2, periodic_detector, solver=None):
n = np.sum(use_ixs)
select = np.random.uniform(size=n) <= 0.75 # random holdout selection
train = np.copy(use_ixs)
test = np.copy(use_ixs)
train[use_ixs] = select
test[use_ixs] = ~select
# set up train/test split with sklearn
ixs = np.arange(len(metric))
ixs = ixs[use_ixs]
train_ixs, test_ixs = train_test_split(ixs, test_size=0.75)
train = np.zeros(len(metric), dtype=bool)
test = np.zeros(len(metric), dtype=bool)
train[train_ixs] = True
test[test_ixs] = True
# initialize results objects
train_r = np.zeros_like(c1s)
test_r = np.zeros_like(c1s)
tv_metric = np.zeros_like(c1s)
jpy = np.zeros_like(c1s)
# iterate over possible values of c1 parameter
for i, v in enumerate(c1s):
s1, s2 = self.estimate_components(
metric, v, c2, train, periodic_detector, n_iter=5, solver=solver
)
y = metric
# collect results
train_r[i] = np.average(np.power((y - s1 - s2)[train], 2))
test_r[i] = np.average(np.power((y - s1 - s2)[test], 2))
tv_metric[i] = np.average(np.abs(np.diff(s1, n=1)))
count_jumps = np.sum(~np.isclose(np.diff(s1), 0, atol=1e-4))
jumps_per_year = count_jumps / (len(metric) / 365)
jpy[i] = jumps_per_year
zero_one_scale = lambda x: (x - np.min(x)) / (np.max(x) - np.min(x))
hn = zero_one_scale(test_r)
hn = zero_one_scale(test_r) # holdout error metrix
rn = zero_one_scale(train_r)
ixs = np.arange(len(c1s))
# Detecting more than 5 time shifts per year is extremely uncommon,
Expand Down Expand Up @@ -203,39 +209,29 @@ def estimate_components(
w = 1 / (eps + np.abs(np.diff(s1, n=1)))
return s1, s2

def plot_optimization(self):
def plot_optimization(self, figsize=None):
if self.best_ix is not None:
c1s = self.c1_vals
hn = self.normalized_holdout_error
rn = self.normalized_train_error
best_c1 = self.best_c1
import matplotlib.pyplot as plt

plt.plot(c1s, hn, marker=".")
plt.axvline(best_c1, ls="--", color="red")
plt.xscale("log")
plt.title("holdout validation")
plt.show()
plt.plot(c1s, self.jumps_per_year, marker=".")
plt.axvline(best_c1, ls="--", color="red")
plt.xscale("log")
plt.title("jumps per year")
plt.show()
plt.plot(c1s, rn, marker=".")
plt.axvline(best_c1, ls="--", color="red")
plt.xscale("log")
plt.title("training residuals")
plt.show()
# plt.plot(c1s, hn * rn, marker='.')
# plt.axvline(best_c1, ls='--', color='red')
# plt.xscale('log')
# plt.title('holdout error times training error')
# plt.show()
plt.plot(c1s, self.tv_metric, marker=".")
plt.axvline(best_c1, ls="--", color="red")
plt.xscale("log")
plt.title("Total variation metric")
plt.show()
fig, ax = plt.subplots(nrows=4, sharex=True, figsize=figsize)
ax[0].plot(c1s, hn, marker=".")
ax[0].axvline(best_c1, ls="--", color="red")
ax[0].set_title("holdout validation")
ax[1].plot(c1s, self.jumps_per_year, marker=".")
ax[1].axvline(best_c1, ls="--", color="red")
ax[1].set_title("jumps per year")
ax[2].plot(c1s, rn, marker=".")
ax[2].axvline(best_c1, ls="--", color="red")
ax[2].set_title("training residuals")
ax[3].plot(c1s, self.tv_metric, marker=".")
ax[3].axvline(best_c1, ls="--", color="red")
ax[3].set_xscale("log")
ax[3].set_title("Total variation metric")
plt.tight_layout()
return fig

def apply_corrections(self, data):
roll_by_index = self.roll_by_index
Expand Down
2 changes: 1 addition & 1 deletion solardatatools/clear_day_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def find_clear_days(
# Shift this metric so the median is at zero
# tc = np.percentile(tc, 50) - tc
# Normalize such that the maximum value is equal to one
tc /= np.max(tc)
tc /= np.nanmax(tc)
tc = 1 - tc
# Seasonal renormalization: estimate a "baseline smoothness" based on local
# 90th percentile of smoothness signal. This has the effect of increasing
Expand Down
2 changes: 1 addition & 1 deletion solardatatools/signal_decompositions.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ def tl1_l2d2p365(
:return: median fit with seasonal baseline removed
"""
if use_ixs is None:
use_ixs = np.arange(len(signal))
use_ixs = ~np.isnan(signal)
x = cvx.Variable(len(signal))
r = signal[use_ixs] - x[use_ixs]
objective = cvx.Minimize(
Expand Down

0 comments on commit 36a4ec2

Please sign in to comment.