diff --git a/jhu/.gitignore b/jhu/.gitignore index 552154e09..03e7e268e 100644 --- a/jhu/.gitignore +++ b/jhu/.gitignore @@ -5,6 +5,7 @@ params.json # Do not commit output files receiving/*.csv +tests/receiving/*.csv # Remove macOS files .DS_Store diff --git a/jhu/delphi_jhu/run.py b/jhu/delphi_jhu/run.py index ac06304ad..7d8260a2d 100644 --- a/jhu/delphi_jhu/run.py +++ b/jhu/delphi_jhu/run.py @@ -93,11 +93,13 @@ def run_module(): df = dfs[metric] # Aggregate to appropriate geographic resolution df = geo_map(df, geo_res) - df["val"] = SMOOTHERS_MAP[smoother][0](df[sensor].values) + df.set_index(["timestamp", "geo_id"], inplace=True) + df["val"] = df[sensor].groupby(level=1).transform(SMOOTHERS_MAP[smoother][0]) df["se"] = np.nan df["sample_size"] = np.nan # Drop early entries where data insufficient for smoothing - df = df.loc[~df["val"].isnull(), :] + df = df[~df["val"].isnull()] + df = df.reset_index() sensor_name = SENSOR_NAME_MAP[sensor][0] # if (SENSOR_NAME_MAP[sensor][1] or SMOOTHERS_MAP[smoother][2]): # metric = f"wip_{metric}" diff --git a/jhu/tests/receiving/.gitkeep b/jhu/tests/receiving/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/jhu/tests/test_geo.py b/jhu/tests/test_geo.py index 444ea58f0..cf7cee21f 100644 --- a/jhu/tests/test_geo.py +++ b/jhu/tests/test_geo.py @@ -20,7 +20,7 @@ def test_incorrect_geo(self): ) with pytest.raises(ValueError): - geo_map(df, "département", 'new_counts') + geo_map(df, "département") def test_county(self): df = pd.DataFrame( @@ -45,7 +45,7 @@ def test_county(self): df = df.append(df_mega) - new_df = geo_map(df, "county", 'new_counts') + new_df = geo_map(df, "county") exp_incidence = df["new_counts"] / df["population"] * 100000 exp_cprop = df["cumulative_counts"] / df["population"] * 100000 @@ -78,7 +78,7 @@ def test_state(self): df = df.append(df_mega) - new_df = geo_map(df, "state", 'new_counts') + new_df = geo_map(df, "state") exp_incidence = np.array([27 + 5, 13 + 10]) / np.array([2500, 25]) * 100000 exp_cprop = np.array([165 + 30, 60 + 100]) / np.array([2500, 25]) * 100000 @@ -114,7 +114,7 @@ def test_hrr(self): # df = df.append(df_mega) - new_df = geo_map(df, "hrr", 'new_counts') + new_df = geo_map(df, "hrr") exp_incidence = np.array([13, 27]) / np.array([25, 2500]) * 100000 exp_cprop = np.array([60, 165]) / np.array([25, 2500]) * 100000 @@ -145,7 +145,7 @@ def test_msa(self): # df = df.append(df_mega) - new_df = geo_map(df, "msa", 'new_counts') + new_df = geo_map(df, "msa") assert new_df["geo_id"].isin([31420, 49340]).all() assert new_df["timestamp"].isin(["2020-02-15"]).all() diff --git a/jhu/tests/test_smooth.py b/jhu/tests/test_smooth.py index 37f908f84..303f7dab0 100644 --- a/jhu/tests/test_smooth.py +++ b/jhu/tests/test_smooth.py @@ -10,21 +10,25 @@ class TestSmooth: def test_output_files_smoothed(self, run_as_module): - dates = [str(x) for x in range(20200701, 20200730)] + dates = [str(x) for x in range(20200303, 20200310)] smoothed = pd.read_csv( - join("../receiving", + join("./receiving", f"{dates[-1]}_state_confirmed_7dav_cumulative_num.csv") ) + # Build a dataframe out of the individual day files raw = pd.concat([ pd.read_csv( - join("../receiving", + join("./receiving", f"{date}_state_confirmed_cumulative_num.csv") ) for date in dates ]) - + # Compute the mean across the time values; order doesn't matter + # this corresponds to the smoothed value on the last day + # 2020-03-10 raw = raw.groupby('geo_id')['val'].mean() - df = pd.merge(smoothed, raw, on='geo_id', suffixes=('_smoothed', '_raw')) + df = pd.merge(smoothed, raw, on='geo_id', suffixes=('_smoothed', '_raw')) assert np.allclose(df['val_smoothed'].values, df['val_raw'].values) +