Skip to content

Commit a5c18a1

Browse files
committed
Update smoother imputer:
* separate out the smoother's polynomial fit degree from the imputer's * default the imputer's fit degree to 2 * add tests
1 parent 647e05d commit a5c18a1

File tree

2 files changed

+44
-31
lines changed

2 files changed

+44
-31
lines changed

_delphi_utils_python/delphi_utils/smooth.py

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ def __init__(
150150
else:
151151
self.coeffs = None
152152

153-
def smooth(self, signal: Union[np.ndarray, pd.Series]) -> Union[np.ndarray, pd.Series]:
153+
def smooth(self, signal: Union[np.ndarray, pd.Series], impute_order=2) -> Union[np.ndarray, pd.Series]:
154154
"""Apply a smoother to a signal.
155155
156156
The major workhorse smoothing function. Imputes the nans and then applies
@@ -160,6 +160,9 @@ def smooth(self, signal: Union[np.ndarray, pd.Series]) -> Union[np.ndarray, pd.S
160160
----------
161161
signal: np.ndarray or pd.Series
162162
A 1D signal to be smoothed.
163+
impute_order: int
164+
The polynomial order of the fit used for imputation. By default, this is set to
165+
2.
163166
164167
Returns
165168
----------
@@ -184,7 +187,7 @@ def smooth(self, signal: Union[np.ndarray, pd.Series]) -> Union[np.ndarray, pd.S
184187
signal_smoothed = signal.copy()
185188
else:
186189
# Impute
187-
signal = self.impute(signal)
190+
signal = self.impute(signal, impute_order=impute_order)
188191

189192
# Smooth
190193
if self.smoother_name == "savgol":
@@ -204,7 +207,7 @@ def smooth(self, signal: Union[np.ndarray, pd.Series]) -> Union[np.ndarray, pd.S
204207
signal_smoothed.index = pandas_index
205208
return signal_smoothed
206209

207-
def impute(self, signal):
210+
def impute(self, signal, impute_order=2):
208211
"""Impute the nan values in the signal.
209212
210213
See the class docstring for an explanation of the impute methods.
@@ -213,6 +216,8 @@ def impute(self, signal):
213216
----------
214217
signal: np.ndarray
215218
1D signal to be imputed.
219+
impute_order: int
220+
The polynomial order of the fit used for imputation.
216221
217222
Returns
218223
-------
@@ -224,7 +229,7 @@ def impute(self, signal):
224229
# To preserve input-output array lengths, this util will not drop NaNs for you.
225230
if np.isnan(signal[0]):
226231
raise ValueError("The signal should not begin with a nan value.")
227-
imputed_signal = self.savgol_impute(signal)
232+
imputed_signal = self.savgol_impute(signal, impute_order)
228233
elif self.impute_method == "zeros":
229234
imputed_signal = np.nan_to_num(signal)
230235
elif self.impute_method is None:
@@ -428,10 +433,10 @@ def savgol_smoother(self, signal):
428433
elif self.boundary_method == "nan":
429434
return signal_smoothed
430435

431-
def savgol_impute(self, signal):
436+
def savgol_impute(self, signal, impute_order):
432437
"""Impute the nan values in signal using savgol.
433438
434-
This method fills the nan values in the signal with a quadratic polynomial fit
439+
This method fills the nan values in the signal with polynomial interpolation
435440
on a rolling window of the immediate past up to window_length data points.
436441
437442
A number of boundary cases are handled involving nan filling close to the boundary.
@@ -443,34 +448,35 @@ def savgol_impute(self, signal):
443448
----------
444449
signal: np.ndarray
445450
A 1D signal to be imputed.
451+
impute_order: int
452+
The polynomial order of the fit used for imputation.
446453
447454
Returns
448455
----------
449456
signal_imputed: np.ndarray
450457
An imputed 1D signal.
451458
"""
459+
if impute_order > self.window_length:
460+
raise ValueError("Impute order must be smaller than window length.")
461+
452462
signal_imputed = np.copy(signal)
453463
for ix in np.where(np.isnan(signal))[0]:
454464
# Boundary cases
455465
if ix < self.window_length:
456466
# At the boundary, a single value should just be extended
457467
if ix == 1:
458468
signal_imputed[ix] = signal_imputed[ix - 1]
459-
# Reduce the polynomial degree if needed
460-
elif ix == 2:
461-
signal_imputed[ix] = self.savgol_predict(
462-
signal_imputed[:ix], 1, -1
463-
)
464-
# Otherwise, use savgol fitting on the largest window prior
469+
# Otherwise, use savgol fitting on the largest window prior,
470+
# reduce the polynomial degree if needed
465471
else:
466472
signal_imputed[ix] = self.savgol_predict(
467-
signal_imputed[:ix], self.poly_fit_degree, -1
473+
signal_imputed[:ix], min(ix-1, impute_order), -1
468474
)
469475
# Away from the boundary, use savgol fitting on a fixed window
470476
else:
471477
signal_imputed[ix] = self.savgol_predict(
472478
signal_imputed[ix - self.window_length : ix],
473-
self.poly_fit_degree,
479+
impute_order,
474480
-1,
475481
)
476482
return signal_imputed

_delphi_utils_python/tests/test_smooth.py

Lines changed: 24 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,12 @@ def test_causal_savgol_smoother(self):
143143
smoothed_signal = smoother.smooth(signal)
144144
assert np.allclose(smoothed_signal, signal, equal_nan=True)
145145

146+
# test window_length > len(signal) and boundary_method="identity"
147+
signal = np.arange(20)
148+
smoother = Smoother(boundary_method="identity", window_length=30)
149+
smoothed_signal = smoother.smooth(signal)
150+
assert np.allclose(signal, smoothed_signal)
151+
146152
def test_impute(self):
147153
# test the nan imputer
148154
signal = np.array([i if i % 3 else np.nan for i in range(1, 40)])
@@ -159,7 +165,7 @@ def test_impute(self):
159165
signal = np.array([i if i % 3 else np.nan for i in range(1, 40)])
160166
# test that the non-nan values are unchanged
161167
not_nans_ixs = np.bitwise_xor(np.isnan(signal, where=True), np.full(len(signal), True))
162-
smoothed_signal = Smoother().savgol_impute(signal)
168+
smoothed_signal = Smoother().impute(signal)
163169
assert np.allclose(signal[not_nans_ixs], smoothed_signal[not_nans_ixs])
164170
# test that the imputer is close to the true line
165171
assert np.allclose(range(1, 40), smoothed_signal, atol=0.5)
@@ -168,49 +174,50 @@ def test_impute(self):
168174
signal = np.hstack([np.arange(10), [np.nan], np.arange(10)])
169175
window_length = 10
170176
smoother = Smoother(
171-
smoother_name="savgol", window_length=window_length, poly_fit_degree=1
177+
window_length=window_length, poly_fit_degree=1
172178
)
173-
imputed_signal = smoother.savgol_impute(signal)
179+
imputed_signal = smoother.impute(signal)
174180
assert np.allclose(imputed_signal, np.hstack([np.arange(11), np.arange(10)]))
175181
smoother = Smoother(
176-
smoother_name="savgol", window_length=window_length, poly_fit_degree=2
182+
window_length=window_length, poly_fit_degree=2
177183
)
178-
imputed_signal = smoother.savgol_impute(signal)
184+
imputed_signal = smoother.impute(signal)
179185
assert np.allclose(imputed_signal, np.hstack([np.arange(11), np.arange(10)]))
180186

181187
# if there are nans on the boundary, should dynamically change window
182188
signal = np.hstack(
183189
[np.arange(5), [np.nan], np.arange(20), [np.nan], np.arange(5)]
184190
)
185191
smoother = Smoother(
186-
smoother_name="savgol", window_length=window_length, poly_fit_degree=2
192+
window_length=window_length, poly_fit_degree=2
187193
)
188-
imputed_signal = smoother.savgol_impute(signal)
194+
imputed_signal = smoother.impute(signal)
189195
assert np.allclose(
190196
imputed_signal, np.hstack([np.arange(6), np.arange(21), np.arange(5)]),
191197
)
192198

193199
# if the array begins with np.nan, we should tell the user to peel it off before sending
194200
signal = np.hstack([[np.nan], np.arange(20), [np.nan], np.arange(5)])
195201
smoother = Smoother(
196-
smoother_name="savgol", window_length=window_length, poly_fit_degree=2
202+
window_length=window_length, poly_fit_degree=2
197203
)
198204
with pytest.raises(ValueError):
199-
imputed_signal = smoother.savgol_impute(signal)
200-
201-
# test window_length > len(signal) and boundary_method="identity"
202-
signal = np.arange(20)
203-
smoother = Smoother(smoother_name="savgol", boundary_method="identity", window_length=30)
204-
smoothed_signal = smoother.smooth(signal)
205-
assert np.allclose(signal, smoothed_signal)
205+
imputed_signal = smoother.impute(signal)
206206

207207
# test the boundary methods
208208
signal = np.arange(20)
209-
smoother = Smoother(smoother_name="savgol", poly_fit_degree=0,
209+
smoother = Smoother(poly_fit_degree=0,
210210
boundary_method="identity", window_length=10)
211-
smoothed_signal = smoother.savgol_impute(signal)
211+
smoothed_signal = smoother.impute(signal)
212212
assert np.allclose(smoothed_signal, signal)
213213

214+
# test the impute_order argument
215+
signal = np.hstack([[1, np.nan, np.nan, 2], np.arange(5)])
216+
smoother = Smoother()
217+
smoothed_signal = smoother.impute(signal, impute_order=1)
218+
assert np.allclose(smoothed_signal, np.hstack([[1, 1, 1, 2], np.arange(5)]))
219+
220+
214221
def test_pandas_series_input(self):
215222
# The savgol method should match the linear regression method on the first
216223
# window_length-many values of the signal, if the savgol_weighting is set to true,

0 commit comments

Comments
 (0)