Skip to content

Commit ff9d515

Browse files
committed
move lag and issue_date setting to daily files in change
1 parent 18fbc56 commit ff9d515

File tree

2 files changed

+14
-8
lines changed

2 files changed

+14
-8
lines changed

changehc/delphi_changehc/backfill.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,18 @@ def store_backfill_file(df, _end_date, backfill_dir, numtype, geo, weekday):
4646
'num', 'den']
4747
backfilldata = backfilldata.loc[backfilldata["time_value"] >= _start_date,
4848
selected_columns]
49+
50+
backfilldata["lag"] = [(_end_date - x).days for x in backfilldata["time_value"]]
51+
backfilldata["time_value"] = backfilldata.time_value.dt.strftime("%Y-%m-%d")
52+
backfilldata["issue_date"] = datetime.strftime(_end_date, "%Y-%m-%d")
53+
54+
backfilldata = backfilldata.astype({
55+
"time_value": "string",
56+
"issue_date": "string",
57+
"fips": "string",
58+
"state_id": "string"
59+
})
60+
4961
path = backfill_dir + \
5062
"/changehc_%s_as_of_%s.parquet"%(numtype, datetime.strftime(_end_date, "%Y%m%d"))
5163
# Store intermediate file into the backfill folder
@@ -109,9 +121,6 @@ def get_date(file_link):
109121
pdList = []
110122
for fn in new_files:
111123
df = pd.read_parquet(fn, engine='pyarrow')
112-
issue_date = get_date(fn)
113-
df["issue_date"] = issue_date
114-
df["lag"] = [(issue_date - x).days for x in df["time_value"]]
115124
pdList.append(df)
116125
merged_file = pd.concat(pdList).sort_values(["time_value", "fips"])
117126
path = backfill_dir + "/changehc_%s_from_%s_to_%s.parquet"%(

changehc/tests/test_backfill.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
class TestBackfill:
4141

4242
def test_store_backfill_file(self):
43-
43+
4444
fn = "changehc_covid_as_of_20200101.parquet"
4545
dropdate = datetime(2020, 1, 1)
4646
numtype = "covid"
@@ -69,7 +69,7 @@ def test_store_backfill_file(self):
6969
backfill_df = pd.read_parquet(backfill_dir + "/"+ fn, engine='pyarrow')
7070

7171
selected_columns = ['time_value', 'fips', 'state_id',
72-
'num', 'den']
72+
'num', 'den', 'lag', 'issue_date']
7373
assert set(selected_columns) == set(backfill_df.columns)
7474

7575
os.remove(backfill_dir + "/" + fn)
@@ -114,9 +114,6 @@ def test_merge_backfill_file(self):
114114
if "from" in file:
115115
continue
116116
df = pd.read_parquet(file, engine='pyarrow')
117-
issue_date = datetime.strptime(file[-16:-8], "%Y%m%d")
118-
df["issue_date"] = issue_date
119-
df["lag"] = [(issue_date - x).days for x in df["time_value"]]
120117
pdList.append(df)
121118
os.remove(file)
122119
new_files = glob.glob(backfill_dir + "/changehc_%s*.parquet"%numtype)

0 commit comments

Comments
 (0)