From cc1d60debc770dee5e2f5f4023d6c1be1919480f Mon Sep 17 00:00:00 2001 From: andrew Date: Wed, 4 Nov 2020 10:53:13 -0800 Subject: [PATCH 1/7] Refactor to use smoothing utils --- changehc/delphi_changehc/__init__.py | 1 - changehc/delphi_changehc/sensor.py | 12 ++++++---- changehc/delphi_changehc/smooth.py | 36 ---------------------------- changehc/tests/test_sensor.py | 7 +++--- changehc/tests/test_smooth.py | 17 ------------- 5 files changed, 10 insertions(+), 63 deletions(-) delete mode 100644 changehc/delphi_changehc/smooth.py delete mode 100644 changehc/tests/test_smooth.py diff --git a/changehc/delphi_changehc/__init__.py b/changehc/delphi_changehc/__init__.py index 29577dcba..a8df7b6ce 100644 --- a/changehc/delphi_changehc/__init__.py +++ b/changehc/delphi_changehc/__init__.py @@ -12,7 +12,6 @@ from . import load_data from . import run from . import sensor -from . import smooth from . import update_sensor from . import weekday diff --git a/changehc/delphi_changehc/sensor.py b/changehc/delphi_changehc/sensor.py index 7a625e4fe..f33bcf888 100644 --- a/changehc/delphi_changehc/sensor.py +++ b/changehc/delphi_changehc/sensor.py @@ -8,6 +8,7 @@ # standard packages import logging +from delphi_utils import Smoother # third party import numpy as np @@ -15,22 +16,23 @@ # first party from .config import Config -from .smooth import left_gauss_linear +SMOOTHER = Smoother("savgol", poly_fit_degree=1, gaussian_bandwidth=100) + class CHCSensor: """Sensor class to fit a signal using Covid counts from Change HC outpatient data. """ @staticmethod def gauss_smooth(count,total): - """smooth using the left_gauss_linear + """smooth using the SMOOTHER.smooth Args: count, total: array """ - count_smooth = left_gauss_linear(count) - total_smooth = left_gauss_linear(total) + count_smooth = SMOOTHER.smooth(count) + total_smooth = SMOOTHER.smooth(total) total_clip = np.clip(total_smooth, 0, None) count_clip = np.clip(count_smooth, 0, total_clip) return count_clip, total_clip @@ -114,7 +116,7 @@ def fit(y_data, first_sensor_date, geo_id, num_col="num", den_col="den"): total_counts, total_visits = CHCSensor.backfill(y_data[num_col].values, y_data[den_col].values) # calculate smoothed counts and jeffreys rate - # the left_gauss_linear smoother is not guaranteed to return values greater than 0 + # the SMOOTHER.smooth smoother is not guaranteed to return values greater than 0 smoothed_total_counts, smoothed_total_visits = CHCSensor.gauss_smooth(total_counts.flatten(),total_visits) diff --git a/changehc/delphi_changehc/smooth.py b/changehc/delphi_changehc/smooth.py deleted file mode 100644 index 710f4053f..000000000 --- a/changehc/delphi_changehc/smooth.py +++ /dev/null @@ -1,36 +0,0 @@ -""" -This file contains a left gauss filter used to smooth a 1-d signal. -Code is courtesy of Addison Hu (minor adjustments by Maria). - -Author: Maria Jahja -Created: 2020-04-16 - -""" -import numpy as np - -from .config import Config - - -def left_gauss_linear(s, h=Config.SMOOTHER_BANDWIDTH): - """Smooth the y-values using a local linear left Gaussian filter. - - Args: - y: one dimensional signal to smooth. - h: smoothing bandwidth (in terms of variance) - - Returns: a smoothed 1D signal. - """ - - n = len(s) - t = np.zeros_like(s) - X = np.vstack([np.ones(n), np.arange(n)]).T - for idx in range(n): - wts = np.exp(-((np.arange(idx + 1) - idx) ** 2) / h) - XwX = np.dot(X[: (idx + 1), :].T * wts, X[: (idx + 1), :]) - Xwy = np.dot(X[: (idx + 1), :].T * wts, s[: (idx + 1)].reshape(-1, 1)) - try: - beta = np.linalg.solve(XwX, Xwy) - t[idx] = np.dot(X[: (idx + 1), :], beta)[-1] - except np.linalg.LinAlgError: - t[idx] = np.nan - return t diff --git a/changehc/tests/test_sensor.py b/changehc/tests/test_sensor.py index 5161750d2..3c6410953 100644 --- a/changehc/tests/test_sensor.py +++ b/changehc/tests/test_sensor.py @@ -20,7 +20,7 @@ class TestLoadData: combined_data = load_combined_data(DENOM_FILEPATH, COVID_FILEPATH, DROP_DATE, - "fips") + "fips") def test_backfill(self): num0 = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8], dtype=float).reshape(-1, 1) @@ -49,8 +49,8 @@ def test_backfill(self): assert np.array_equal(exp_den4, den4) def test_fit_fips(self): - date_range = pd.date_range("2020-05-01", "2020-05-20") - all_fips = self.combined_data.index.get_level_values('fips').unique() + date_range = pd.date_range("2020-05-01", "2020-06-01") + all_fips = self.combined_data.index.get_level_values("fips").unique() sample_fips = nr.choice(all_fips, 10) for fips in sample_fips: @@ -60,7 +60,6 @@ def test_fit_fips(self): # first value is burn-in assert np.min(res0["rate"][1:]) > 0 assert np.max(res0["rate"][1:]) <= 100 - if np.all(np.isnan(res0["se"])): assert res0["incl"].sum() == 0 else: diff --git a/changehc/tests/test_smooth.py b/changehc/tests/test_smooth.py deleted file mode 100644 index b3a27b9ae..000000000 --- a/changehc/tests/test_smooth.py +++ /dev/null @@ -1,17 +0,0 @@ -# standard -import pytest - -# third party -import numpy as np - -# first party -from delphi_changehc.smooth import left_gauss_linear - - -class TestLeftGaussSmoother: - def test_gauss_linear(self): - signal = np.ones(10) - assert np.allclose(left_gauss_linear(signal)[1:], signal[1:]) - - signal = np.arange(1, 10) + np.random.normal(0, 1, 9) - assert np.allclose(left_gauss_linear(signal, h=0.1)[1:], signal[1:]) From 107efc9d6239ecda2273923b6853fd8a4acb22af Mon Sep 17 00:00:00 2001 From: andrew Date: Wed, 4 Nov 2020 11:18:10 -0800 Subject: [PATCH 2/7] use config constant for bandwidth --- changehc/delphi_changehc/sensor.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/changehc/delphi_changehc/sensor.py b/changehc/delphi_changehc/sensor.py index f33bcf888..2ff93cec7 100644 --- a/changehc/delphi_changehc/sensor.py +++ b/changehc/delphi_changehc/sensor.py @@ -18,7 +18,9 @@ from .config import Config -SMOOTHER = Smoother("savgol", poly_fit_degree=1, gaussian_bandwidth=100) +SMOOTHER = Smoother("savgol", + poly_fit_degree=1, + gaussian_bandwidth=Config.SMOOTHER_BANDWIDTH) class CHCSensor: """Sensor class to fit a signal using Covid counts from Change HC outpatient data. From 5888e02b693d638a5ad753fa1e0326466bb60dbb Mon Sep 17 00:00:00 2001 From: andrew Date: Wed, 4 Nov 2020 12:53:27 -0800 Subject: [PATCH 3/7] Update import --- changehc/delphi_changehc/sensor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/changehc/delphi_changehc/sensor.py b/changehc/delphi_changehc/sensor.py index 2ff93cec7..d99c30329 100644 --- a/changehc/delphi_changehc/sensor.py +++ b/changehc/delphi_changehc/sensor.py @@ -8,11 +8,11 @@ # standard packages import logging -from delphi_utils import Smoother # third party import numpy as np import pandas as pd +from delphi_utils import Smoother # first party from .config import Config @@ -118,7 +118,7 @@ def fit(y_data, first_sensor_date, geo_id, num_col="num", den_col="den"): total_counts, total_visits = CHCSensor.backfill(y_data[num_col].values, y_data[den_col].values) # calculate smoothed counts and jeffreys rate - # the SMOOTHER.smooth smoother is not guaranteed to return values greater than 0 + # the left_gauss_linear smoother is not guaranteed to return values greater than 0 smoothed_total_counts, smoothed_total_visits = CHCSensor.gauss_smooth(total_counts.flatten(),total_visits) From 93c10ebee7f433c21c6b12a07c1e7ee9402f8b25 Mon Sep 17 00:00:00 2001 From: andrew Date: Wed, 4 Nov 2020 13:06:49 -0800 Subject: [PATCH 4/7] Change into class var --- changehc/delphi_changehc/sensor.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/changehc/delphi_changehc/sensor.py b/changehc/delphi_changehc/sensor.py index d99c30329..e51b6bba1 100644 --- a/changehc/delphi_changehc/sensor.py +++ b/changehc/delphi_changehc/sensor.py @@ -18,23 +18,23 @@ from .config import Config -SMOOTHER = Smoother("savgol", - poly_fit_degree=1, - gaussian_bandwidth=Config.SMOOTHER_BANDWIDTH) class CHCSensor: """Sensor class to fit a signal using Covid counts from Change HC outpatient data. """ + smoother = Smoother("savgol", + poly_fit_degree=1, + gaussian_bandwidth=Config.SMOOTHER_BANDWIDTH) @staticmethod def gauss_smooth(count,total): - """smooth using the SMOOTHER.smooth + """smooth using the left_gauss_linear Args: count, total: array """ - count_smooth = SMOOTHER.smooth(count) - total_smooth = SMOOTHER.smooth(total) + count_smooth = CHCSensor.smoother.smooth(count) + total_smooth = CHCSensor.smoother.smooth(total) total_clip = np.clip(total_smooth, 0, None) count_clip = np.clip(count_smooth, 0, total_clip) return count_clip, total_clip @@ -47,7 +47,7 @@ def backfill( min_visits_to_fill=Config.MIN_CUM_VISITS): """ Adjust for backfill (retroactively added observations) by using a - variable length smoother, which starts from the RHS and moves + variable length CHCSensor.smoother, which starts from the RHS and moves leftwards (backwards through time). We cumulatively sum the total visits (denominator), until we have observed some minimum number of counts, then calculate the sum over that bin. We restrict the @@ -118,7 +118,7 @@ def fit(y_data, first_sensor_date, geo_id, num_col="num", den_col="den"): total_counts, total_visits = CHCSensor.backfill(y_data[num_col].values, y_data[den_col].values) # calculate smoothed counts and jeffreys rate - # the left_gauss_linear smoother is not guaranteed to return values greater than 0 + # the left_gauss_linear CHCSensor.smoother is not guaranteed to return values greater than 0 smoothed_total_counts, smoothed_total_visits = CHCSensor.gauss_smooth(total_counts.flatten(),total_visits) @@ -131,7 +131,7 @@ def fit(y_data, first_sensor_date, geo_id, num_col="num", den_col="den"): (smoothed_total_counts + 0.5) / (smoothed_total_visits + 1) ) - # checks - due to the smoother, the first value will be NA + # checks - due to the CHCSensor.smoother, the first value will be NA assert ( np.sum(np.isnan(smoothed_total_rates[1:])) == 0 ), "NAs in rate calculation" From 93b61fb0179c8be8f90ba8f7f46e1b7a77632575 Mon Sep 17 00:00:00 2001 From: andrew Date: Wed, 4 Nov 2020 13:09:01 -0800 Subject: [PATCH 5/7] Revert comments --- changehc/delphi_changehc/sensor.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/changehc/delphi_changehc/sensor.py b/changehc/delphi_changehc/sensor.py index e51b6bba1..35737ad32 100644 --- a/changehc/delphi_changehc/sensor.py +++ b/changehc/delphi_changehc/sensor.py @@ -47,7 +47,7 @@ def backfill( min_visits_to_fill=Config.MIN_CUM_VISITS): """ Adjust for backfill (retroactively added observations) by using a - variable length CHCSensor.smoother, which starts from the RHS and moves + variable length smoother, which starts from the RHS and moves leftwards (backwards through time). We cumulatively sum the total visits (denominator), until we have observed some minimum number of counts, then calculate the sum over that bin. We restrict the @@ -118,7 +118,7 @@ def fit(y_data, first_sensor_date, geo_id, num_col="num", den_col="den"): total_counts, total_visits = CHCSensor.backfill(y_data[num_col].values, y_data[den_col].values) # calculate smoothed counts and jeffreys rate - # the left_gauss_linear CHCSensor.smoother is not guaranteed to return values greater than 0 + # the left_gauss_linear smoother is not guaranteed to return values greater than 0 smoothed_total_counts, smoothed_total_visits = CHCSensor.gauss_smooth(total_counts.flatten(),total_visits) @@ -131,7 +131,7 @@ def fit(y_data, first_sensor_date, geo_id, num_col="num", den_col="den"): (smoothed_total_counts + 0.5) / (smoothed_total_visits + 1) ) - # checks - due to the CHCSensor.smoother, the first value will be NA + # checks - due to the smoother, the first value will be NA assert ( np.sum(np.isnan(smoothed_total_rates[1:])) == 0 ), "NAs in rate calculation" From bf83a4d7c6c096a8f0eeb002e2f782fabd0891de Mon Sep 17 00:00:00 2001 From: andrew Date: Wed, 4 Nov 2020 13:43:17 -0800 Subject: [PATCH 6/7] update test to change window length --- changehc/tests/test_sensor.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/changehc/tests/test_sensor.py b/changehc/tests/test_sensor.py index 3c6410953..ab2ef798a 100644 --- a/changehc/tests/test_sensor.py +++ b/changehc/tests/test_sensor.py @@ -2,7 +2,7 @@ import pytest # third party -from delphi_utils import read_params +from delphi_utils import read_params, Smoother import numpy as np import numpy.random as nr import pandas as pd @@ -21,6 +21,10 @@ class TestLoadData: combined_data = load_combined_data(DENOM_FILEPATH, COVID_FILEPATH, DROP_DATE, "fips") + CHCSensor.smoother = Smoother("savgol", + poly_fit_degree=1, + gaussian_bandwidth=Config.SMOOTHER_BANDWIDTH, + window_length=20) def test_backfill(self): num0 = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8], dtype=float).reshape(-1, 1) @@ -49,8 +53,8 @@ def test_backfill(self): assert np.array_equal(exp_den4, den4) def test_fit_fips(self): - date_range = pd.date_range("2020-05-01", "2020-06-01") - all_fips = self.combined_data.index.get_level_values("fips").unique() + date_range = pd.date_range("2020-05-01", "2020-05-20") + all_fips = self.combined_data.index.get_level_values('fips').unique() sample_fips = nr.choice(all_fips, 10) for fips in sample_fips: @@ -60,6 +64,7 @@ def test_fit_fips(self): # first value is burn-in assert np.min(res0["rate"][1:]) > 0 assert np.max(res0["rate"][1:]) <= 100 + if np.all(np.isnan(res0["se"])): assert res0["incl"].sum() == 0 else: From 6df9b16a3ea7193d0163b5444714bc98304f3ea5 Mon Sep 17 00:00:00 2001 From: andrew Date: Wed, 4 Nov 2020 13:48:44 -0800 Subject: [PATCH 7/7] Add comment --- changehc/tests/test_sensor.py | 1 + 1 file changed, 1 insertion(+) diff --git a/changehc/tests/test_sensor.py b/changehc/tests/test_sensor.py index ab2ef798a..a1745dff0 100644 --- a/changehc/tests/test_sensor.py +++ b/changehc/tests/test_sensor.py @@ -21,6 +21,7 @@ class TestLoadData: combined_data = load_combined_data(DENOM_FILEPATH, COVID_FILEPATH, DROP_DATE, "fips") + # change smoother window length for test data CHCSensor.smoother = Smoother("savgol", poly_fit_degree=1, gaussian_bandwidth=Config.SMOOTHER_BANDWIDTH,