Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions jhu/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ params.json

# Do not commit output files
receiving/*.csv
tests/receiving/*.csv

# Remove macOS files
.DS_Store
Expand Down
6 changes: 4 additions & 2 deletions jhu/delphi_jhu/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,11 +93,13 @@ def run_module():
df = dfs[metric]
# Aggregate to appropriate geographic resolution
df = geo_map(df, geo_res)
df["val"] = SMOOTHERS_MAP[smoother][0](df[sensor].values)
df.set_index(["timestamp", "geo_id"], inplace=True)
df["val"] = df[sensor].groupby(level=1).transform(SMOOTHERS_MAP[smoother][0])
df["se"] = np.nan
df["sample_size"] = np.nan
# Drop early entries where data insufficient for smoothing
df = df.loc[~df["val"].isnull(), :]
df = df[~df["val"].isnull()]
df = df.reset_index()
sensor_name = SENSOR_NAME_MAP[sensor][0]
# if (SENSOR_NAME_MAP[sensor][1] or SMOOTHERS_MAP[smoother][2]):
# metric = f"wip_{metric}"
Expand Down
Empty file added jhu/tests/receiving/.gitkeep
Empty file.
10 changes: 5 additions & 5 deletions jhu/tests/test_geo.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def test_incorrect_geo(self):
)

with pytest.raises(ValueError):
geo_map(df, "département", 'new_counts')
geo_map(df, "département")

def test_county(self):
df = pd.DataFrame(
Expand All @@ -45,7 +45,7 @@ def test_county(self):

df = df.append(df_mega)

new_df = geo_map(df, "county", 'new_counts')
new_df = geo_map(df, "county")

exp_incidence = df["new_counts"] / df["population"] * 100000
exp_cprop = df["cumulative_counts"] / df["population"] * 100000
Expand Down Expand Up @@ -78,7 +78,7 @@ def test_state(self):

df = df.append(df_mega)

new_df = geo_map(df, "state", 'new_counts')
new_df = geo_map(df, "state")

exp_incidence = np.array([27 + 5, 13 + 10]) / np.array([2500, 25]) * 100000
exp_cprop = np.array([165 + 30, 60 + 100]) / np.array([2500, 25]) * 100000
Expand Down Expand Up @@ -114,7 +114,7 @@ def test_hrr(self):

# df = df.append(df_mega)

new_df = geo_map(df, "hrr", 'new_counts')
new_df = geo_map(df, "hrr")

exp_incidence = np.array([13, 27]) / np.array([25, 2500]) * 100000
exp_cprop = np.array([60, 165]) / np.array([25, 2500]) * 100000
Expand Down Expand Up @@ -145,7 +145,7 @@ def test_msa(self):

# df = df.append(df_mega)

new_df = geo_map(df, "msa", 'new_counts')
new_df = geo_map(df, "msa")

assert new_df["geo_id"].isin([31420, 49340]).all()
assert new_df["timestamp"].isin(["2020-02-15"]).all()
14 changes: 9 additions & 5 deletions jhu/tests/test_smooth.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,21 +10,25 @@
class TestSmooth:
def test_output_files_smoothed(self, run_as_module):

dates = [str(x) for x in range(20200701, 20200730)]
dates = [str(x) for x in range(20200303, 20200310)]

smoothed = pd.read_csv(
join("../receiving",
join("./receiving",
f"{dates[-1]}_state_confirmed_7dav_cumulative_num.csv")
)

# Build a dataframe out of the individual day files
raw = pd.concat([
pd.read_csv(
join("../receiving",
join("./receiving",
f"{date}_state_confirmed_cumulative_num.csv")
) for date in dates
])

# Compute the mean across the time values; order doesn't matter
# this corresponds to the smoothed value on the last day
# 2020-03-10
raw = raw.groupby('geo_id')['val'].mean()
df = pd.merge(smoothed, raw, on='geo_id', suffixes=('_smoothed', '_raw'))

df = pd.merge(smoothed, raw, on='geo_id', suffixes=('_smoothed', '_raw'))
assert np.allclose(df['val_smoothed'].values, df['val_raw'].values)