From 44974ac22267a965816936e6edaa3a2fb6d0b4f8 Mon Sep 17 00:00:00 2001 From: Amaris Sim Date: Mon, 7 Apr 2025 09:37:20 -0400 Subject: [PATCH 1/6] moved time diff into constants and extended to 36 hours --- nhsn/delphi_nhsn/constants.py | 4 ++++ nhsn/delphi_nhsn/pull.py | 14 +++++++++++--- nhsn/tests/test_pull.py | 10 ++++++---- 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/nhsn/delphi_nhsn/constants.py b/nhsn/delphi_nhsn/constants.py index da6216322..0af1bd224 100644 --- a/nhsn/delphi_nhsn/constants.py +++ b/nhsn/delphi_nhsn/constants.py @@ -1,5 +1,7 @@ """Registry for signal names.""" +from datetime import timedelta + GEOS = ["state", "nation", "hhs"] MAIN_DATASET_ID = "ua7e-t2fy" @@ -62,3 +64,5 @@ f"{NUM_HOSP_REPORTING_FLU}_prelim": float, f"{NUM_HOSP_REPORTING_RSV}_prelim": float, } + +RECENTLY_UPDATED_DIFF = timedelta(hours=36) diff --git a/nhsn/delphi_nhsn/pull.py b/nhsn/delphi_nhsn/pull.py index 31164770e..c81caabcc 100644 --- a/nhsn/delphi_nhsn/pull.py +++ b/nhsn/delphi_nhsn/pull.py @@ -4,7 +4,7 @@ import logging import random import time -from datetime import datetime, timedelta +from datetime import datetime from pathlib import Path from typing import Optional from urllib.error import HTTPError @@ -13,7 +13,15 @@ from delphi_utils import create_backup_csv from sodapy import Socrata -from .constants import MAIN_DATASET_ID, PRELIM_DATASET_ID, PRELIM_SIGNALS_MAP, PRELIM_TYPE_DICT, SIGNALS_MAP, TYPE_DICT +from .constants import ( + MAIN_DATASET_ID, + PRELIM_DATASET_ID, + PRELIM_SIGNALS_MAP, + PRELIM_TYPE_DICT, + RECENTLY_UPDATED_DIFF, + SIGNALS_MAP, + TYPE_DICT, +) def check_last_updated(socrata_token, dataset_id, logger): @@ -40,7 +48,7 @@ def check_last_updated(socrata_token, dataset_id, logger): updated_timestamp = datetime.utcfromtimestamp(int(response["rowsUpdatedAt"])) now = datetime.utcnow() - recently_updated_source = (now - updated_timestamp) < timedelta(days=1) + recently_updated_source = (now - updated_timestamp) < RECENTLY_UPDATED_DIFF prelim_prefix = "Preliminary " if dataset_id == PRELIM_DATASET_ID else "" if recently_updated_source: diff --git a/nhsn/tests/test_pull.py b/nhsn/tests/test_pull.py index f8e27868b..bf6df97ad 100644 --- a/nhsn/tests/test_pull.py +++ b/nhsn/tests/test_pull.py @@ -1,5 +1,6 @@ import glob import time +from datetime import datetime from unittest.mock import patch, MagicMock import os import pytest @@ -12,7 +13,7 @@ pull_data_from_file, check_last_updated ) -from delphi_nhsn.constants import TYPE_DICT, PRELIM_TYPE_DICT, PRELIM_DATASET_ID, MAIN_DATASET_ID +from delphi_nhsn.constants import TYPE_DICT, PRELIM_TYPE_DICT, PRELIM_DATASET_ID, MAIN_DATASET_ID, RECENTLY_UPDATED_DIFF from delphi_utils import get_structured_logger from conftest import TEST_DATA, PRELIM_TEST_DATA, TEST_DIR @@ -158,7 +159,7 @@ def test_pull_nhsn_data_backup(self, mock_socrata, dataset, caplog, params): @pytest.mark.parametrize('dataset', DATASETS, ids=["data", "prelim_data"]) - @pytest.mark.parametrize("updatedAt", [time.time(), time.time() - 172800], ids=["updated", "stale"]) + @pytest.mark.parametrize("updatedAt", [time.time(), time.time() - 172800, time.time() - 108000], ids=["updated", "stale", "updated_late"]) @patch("delphi_nhsn.pull.Socrata") def test_check_last_updated(self, mock_socrata, dataset, updatedAt, caplog): mock_client = MagicMock() @@ -169,8 +170,9 @@ def test_check_last_updated(self, mock_socrata, dataset, updatedAt, caplog): check_last_updated(mock_client, dataset["id"], logger) # Check that get method was called with correct arguments - now = time.time() - if now - updatedAt < 60: + now_datetime = datetime.utcfromtimestamp(time.time()) + updatedAt_datetime = datetime.utcfromtimestamp(updatedAt) + if now_datetime - updatedAt_datetime < RECENTLY_UPDATED_DIFF: assert f"{dataset['msg_prefix']}NHSN data was recently updated; Pulling data" in caplog.text else: stale_msg = f"{dataset['msg_prefix']}NHSN data is stale; Skipping" From 7b419b02c4a77ae5ead3623150be77dd8e2a7586 Mon Sep 17 00:00:00 2001 From: Amaris Sim Date: Mon, 7 Apr 2025 09:40:25 -0400 Subject: [PATCH 2/6] added comment --- nhsn/delphi_nhsn/pull.py | 1 + 1 file changed, 1 insertion(+) diff --git a/nhsn/delphi_nhsn/pull.py b/nhsn/delphi_nhsn/pull.py index c81caabcc..f0c1a012f 100644 --- a/nhsn/delphi_nhsn/pull.py +++ b/nhsn/delphi_nhsn/pull.py @@ -48,6 +48,7 @@ def check_last_updated(socrata_token, dataset_id, logger): updated_timestamp = datetime.utcfromtimestamp(int(response["rowsUpdatedAt"])) now = datetime.utcnow() + # currently set to run twice a week, RECENTLY_UPDATED_DIFF may need adjusting based on the cadence recently_updated_source = (now - updated_timestamp) < RECENTLY_UPDATED_DIFF prelim_prefix = "Preliminary " if dataset_id == PRELIM_DATASET_ID else "" From 2f2f63a215d9a9f52eec08fea11a98f4303c78ca Mon Sep 17 00:00:00 2001 From: Amaris Sim Date: Wed, 9 Apr 2025 13:48:16 -0400 Subject: [PATCH 3/6] working on converting into api calls instead of time diff --- nhsn/delphi_nhsn/constants.py | 2 +- nhsn/delphi_nhsn/pull.py | 17 +++++++++++++---- nhsn/tests/conftest.py | 5 +++++ nhsn/tests/test_pull.py | 1 + 4 files changed, 20 insertions(+), 5 deletions(-) diff --git a/nhsn/delphi_nhsn/constants.py b/nhsn/delphi_nhsn/constants.py index 0af1bd224..ca3cd81a3 100644 --- a/nhsn/delphi_nhsn/constants.py +++ b/nhsn/delphi_nhsn/constants.py @@ -65,4 +65,4 @@ f"{NUM_HOSP_REPORTING_RSV}_prelim": float, } -RECENTLY_UPDATED_DIFF = timedelta(hours=36) +RECENTLY_UPDATED_DIFF = timedelta(days=2) diff --git a/nhsn/delphi_nhsn/pull.py b/nhsn/delphi_nhsn/pull.py index f0c1a012f..5033e9e35 100644 --- a/nhsn/delphi_nhsn/pull.py +++ b/nhsn/delphi_nhsn/pull.py @@ -4,7 +4,7 @@ import logging import random import time -from datetime import datetime +from datetime import datetime, timezone from pathlib import Path from typing import Optional from urllib.error import HTTPError @@ -12,6 +12,8 @@ import pandas as pd from delphi_utils import create_backup_csv from sodapy import Socrata +from delphi_epidata import Epidata +from epiweeks import Week from .constants import ( MAIN_DATASET_ID, @@ -47,10 +49,17 @@ def check_last_updated(socrata_token, dataset_id, logger): response = client.get_metadata(dataset_id) updated_timestamp = datetime.utcfromtimestamp(int(response["rowsUpdatedAt"])) - now = datetime.utcnow() - # currently set to run twice a week, RECENTLY_UPDATED_DIFF may need adjusting based on the cadence - recently_updated_source = (now - updated_timestamp) < RECENTLY_UPDATED_DIFF + cdc_updated_epiweek = Week.fromdate(updated_timestamp) + + meta_df = pd.DataFrame(Epidata.covidcast_meta()["epidata"]) + signal_suffix = 'prelim' if dataset_id == PRELIM_DATASET_ID else "ew" + nhsn_meta_df = meta_df[(meta_df["data_source"] == "nhsn") & (meta_df["signal"].str.endswith(signal_suffix))] + last_updated = datetime.utcfromtimestamp(nhsn_meta_df["last_update"].min()) + covidcast_updated_epiweek = Week.fromdate(last_updated) + # currently set to run twice a week, RECENTLY_UPDATED_DIFF may need adjusting based on the cadence + recently_updated_source = (last_updated - updated_timestamp) > RECENTLY_UPDATED_DIFF + print("non") prelim_prefix = "Preliminary " if dataset_id == PRELIM_DATASET_ID else "" if recently_updated_source: logger.info( diff --git a/nhsn/tests/conftest.py b/nhsn/tests/conftest.py index b321f1236..e27f7d2ce 100644 --- a/nhsn/tests/conftest.py +++ b/nhsn/tests/conftest.py @@ -23,6 +23,11 @@ with open(f"{TEST_DIR}/test_data/prelim_page.json", "r") as f: PRELIM_TEST_DATA = json.load(f) + +covidcast_metadata = pd.read_csv(f"{TEST_DIR}/test_data/covid_metadata.csv", + parse_dates=["max_time", "min_time", "max_issue", "last_update"]) + + @pytest.fixture(scope="session") def params(): params = { diff --git a/nhsn/tests/test_pull.py b/nhsn/tests/test_pull.py index bf6df97ad..d19c4db00 100644 --- a/nhsn/tests/test_pull.py +++ b/nhsn/tests/test_pull.py @@ -161,6 +161,7 @@ def test_pull_nhsn_data_backup(self, mock_socrata, dataset, caplog, params): @pytest.mark.parametrize('dataset', DATASETS, ids=["data", "prelim_data"]) @pytest.mark.parametrize("updatedAt", [time.time(), time.time() - 172800, time.time() - 108000], ids=["updated", "stale", "updated_late"]) @patch("delphi_nhsn.pull.Socrata") + @patch("delphi_nhsn.pull.Epidata.covidcast_meta", return_value=covidcast_metadata): def test_check_last_updated(self, mock_socrata, dataset, updatedAt, caplog): mock_client = MagicMock() mock_socrata.return_value = mock_client From 8b6a0e635121f1f35a696e82f7df6ea2fa5da97f Mon Sep 17 00:00:00 2001 From: Amaris Sim Date: Wed, 9 Apr 2025 17:51:53 -0400 Subject: [PATCH 4/6] finished converting raw time diff to using api --- nhsn/delphi_nhsn/constants.py | 2 +- nhsn/delphi_nhsn/pull.py | 29 +- nhsn/tests/conftest.py | 6 +- nhsn/tests/test_data/covidcast_meta.json | 619 +++++++++++++++++++++++ nhsn/tests/test_pull.py | 31 +- 5 files changed, 661 insertions(+), 26 deletions(-) create mode 100644 nhsn/tests/test_data/covidcast_meta.json diff --git a/nhsn/delphi_nhsn/constants.py b/nhsn/delphi_nhsn/constants.py index ca3cd81a3..caa91746e 100644 --- a/nhsn/delphi_nhsn/constants.py +++ b/nhsn/delphi_nhsn/constants.py @@ -65,4 +65,4 @@ f"{NUM_HOSP_REPORTING_RSV}_prelim": float, } -RECENTLY_UPDATED_DIFF = timedelta(days=2) +RECENTLY_UPDATED_DIFF = timedelta(days=1) diff --git a/nhsn/delphi_nhsn/pull.py b/nhsn/delphi_nhsn/pull.py index 5033e9e35..69e6d56fd 100644 --- a/nhsn/delphi_nhsn/pull.py +++ b/nhsn/delphi_nhsn/pull.py @@ -4,16 +4,15 @@ import logging import random import time -from datetime import datetime, timezone +from datetime import datetime, timedelta, timezone from pathlib import Path from typing import Optional from urllib.error import HTTPError import pandas as pd +from delphi_epidata import Epidata from delphi_utils import create_backup_csv from sodapy import Socrata -from delphi_epidata import Epidata -from epiweeks import Week from .constants import ( MAIN_DATASET_ID, @@ -48,25 +47,31 @@ def check_last_updated(socrata_token, dataset_id, logger): client = Socrata("data.cdc.gov", socrata_token) response = client.get_metadata(dataset_id) - updated_timestamp = datetime.utcfromtimestamp(int(response["rowsUpdatedAt"])) - cdc_updated_epiweek = Week.fromdate(updated_timestamp) + updated_timestamp = datetime.fromtimestamp(int(response["rowsUpdatedAt"]), tz=timezone.utc) + # pulling last updated from the api meta_df = pd.DataFrame(Epidata.covidcast_meta()["epidata"]) - signal_suffix = 'prelim' if dataset_id == PRELIM_DATASET_ID else "ew" + signal_suffix = "prelim" if dataset_id == PRELIM_DATASET_ID else "ew" nhsn_meta_df = meta_df[(meta_df["data_source"] == "nhsn") & (meta_df["signal"].str.endswith(signal_suffix))] - last_updated = datetime.utcfromtimestamp(nhsn_meta_df["last_update"].min()) - covidcast_updated_epiweek = Week.fromdate(last_updated) + est = timezone(timedelta(hours=-5)) + last_updated = datetime.fromtimestamp(nhsn_meta_df["last_update"].min(), tz=est) # currently set to run twice a week, RECENTLY_UPDATED_DIFF may need adjusting based on the cadence - recently_updated_source = (last_updated - updated_timestamp) > RECENTLY_UPDATED_DIFF - print("non") + recently_updated_source = (updated_timestamp - last_updated) > RECENTLY_UPDATED_DIFF + prelim_prefix = "Preliminary " if dataset_id == PRELIM_DATASET_ID else "" if recently_updated_source: logger.info( - f"{prelim_prefix}NHSN data was recently updated; Pulling data", updated_timestamp=updated_timestamp + f"{prelim_prefix}NHSN data was recently updated; Pulling data", + updated_timestamp=updated_timestamp, + metadata_timestamp=last_updated, ) else: - logger.info(f"{prelim_prefix}NHSN data is stale; Skipping", updated_timestamp=updated_timestamp) + logger.info( + f"{prelim_prefix}NHSN data is stale; Skipping", + updated_timestamp=updated_timestamp, + metadata_timestamp=last_updated, + ) # pylint: disable=W0703 except Exception as e: logger.info("error while processing socrata metadata; treating data as stale", error=str(e)) diff --git a/nhsn/tests/conftest.py b/nhsn/tests/conftest.py index e27f7d2ce..0abbe92e3 100644 --- a/nhsn/tests/conftest.py +++ b/nhsn/tests/conftest.py @@ -23,9 +23,9 @@ with open(f"{TEST_DIR}/test_data/prelim_page.json", "r") as f: PRELIM_TEST_DATA = json.load(f) - -covidcast_metadata = pd.read_csv(f"{TEST_DIR}/test_data/covid_metadata.csv", - parse_dates=["max_time", "min_time", "max_issue", "last_update"]) +# filtered metadata (just includes nhsn meta) +with open(f"{TEST_DIR}/test_data/covidcast_meta.json", "r") as f: + COVID_META_DATA = json.load(f) @pytest.fixture(scope="session") diff --git a/nhsn/tests/test_data/covidcast_meta.json b/nhsn/tests/test_data/covidcast_meta.json new file mode 100644 index 000000000..998d3e820 --- /dev/null +++ b/nhsn/tests/test_data/covidcast_meta.json @@ -0,0 +1,619 @@ +{ + "epidata": + [ + { + "data_source": "nhsn", + "geo_type": "hhs", + "last_update": 1743168336, + "max_issue": 202514, + "max_lag": 244, + "max_time": 202513, + "max_value": 41214.0, + "mean_value": 3231.6440329, + "min_lag": 2, + "min_time": 202032, + "min_value": 17.0, + "num_locations": 10, + "signal": "confirmed_admissions_covid_ew", + "stdev_value": 4663.7857901, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "nation", + "last_update": 1743168336, + "max_issue": 202514, + "max_lag": 244, + "max_time": 202513, + "max_value": 156269.0, + "mean_value": 32316.4403292, + "min_lag": 2, + "min_time": 202032, + "min_value": 1972.0, + "num_locations": 1, + "signal": "confirmed_admissions_covid_ew", + "stdev_value": 29747.0960788, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "state", + "last_update": 1743168336, + "max_issue": 202514, + "max_lag": 244, + "max_time": 202513, + "max_value": 17256.0, + "mean_value": 589.2470173, + "min_lag": 2, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 56, + "signal": "confirmed_admissions_covid_ew", + "stdev_value": 1207.2145303, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "hhs", + "last_update": 1743168336, + "max_issue": 202514, + "max_lag": 244, + "max_time": 202513, + "max_value": 11524.0, + "mean_value": 445.562963, + "min_lag": 2, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 10, + "signal": "confirmed_admissions_flu_ew", + "stdev_value": 1089.5293067, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "nation", + "last_update": 1743168336, + "max_issue": 202514, + "max_lag": 244, + "max_time": 202513, + "max_value": 54353.0, + "mean_value": 4455.6296296, + "min_lag": 2, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 1, + "signal": "confirmed_admissions_flu_ew", + "stdev_value": 8916.7719395, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "state", + "last_update": 1743168336, + "max_issue": 202514, + "max_lag": 244, + "max_time": 202513, + "max_value": 4691.0, + "mean_value": 84.3303996, + "min_lag": 2, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 56, + "signal": "confirmed_admissions_flu_ew", + "stdev_value": 273.1056648, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "hhs", + "last_update": 1742995536, + "max_issue": 202512, + "max_lag": 244, + "max_time": 202513, + "max_value": 11524.0, + "mean_value": 446.5401639, + "min_lag": 1, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 10, + "signal": "confirmed_admissions_flu_ew_prelim", + "stdev_value": 1087.8572053, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "nation", + "last_update": 1742995536, + "max_issue": 202512, + "max_lag": 244, + "max_time": 202514, + "max_value": 54374.0, + "mean_value": 4465.4016393, + "min_lag": 1, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 1, + "signal": "confirmed_admissions_flu_ew_prelim", + "stdev_value": 8902.0785394, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "state", + "last_update": 1742995536, + "max_issue": 202512, + "max_lag": 244, + "max_time": 202514, + "max_value": 4691.0, + "mean_value": 84.5011633, + "min_lag": 1, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 56, + "signal": "confirmed_admissions_flu_ew_prelim", + "stdev_value": 272.713479, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "hhs", + "last_update": 1743168336, + "max_issue": 202514, + "max_lag": 244, + "max_time": 202513, + "max_value": 3386.0, + "mean_value": 67.7148148, + "min_lag": 2, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 10, + "signal": "confirmed_admissions_rsv_ew", + "stdev_value": 276.3491733, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "nation", + "last_update": 1743168336, + "max_issue": 202514, + "max_lag": 244, + "max_time": 202513, + "max_value": 15402.0, + "mean_value": 2109.5769231, + "min_lag": 2, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 1, + "signal": "confirmed_admissions_rsv_ew", + "stdev_value": 3701.788104, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "state", + "last_update": 1743168336, + "max_issue": 202514, + "max_lag": 244, + "max_time": 202513, + "max_value": 1286.0, + "mean_value": 53.1310946, + "min_lag": 2, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 56, + "signal": "confirmed_admissions_rsv_ew", + "stdev_value": 131.5669637, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "hhs", + "last_update": 1742995536, + "max_issue": 202513, + "max_lag": 244, + "max_time": 202514, + "max_value": 3386.0, + "mean_value": 68.6311475, + "min_lag": 1, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 10, + "signal": "confirmed_admissions_rsv_ew_prelim", + "stdev_value": 276.3551187, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "nation", + "last_update": 1742995536, + "max_issue": 202513, + "max_lag": 244, + "max_time": 202514, + "max_value": 15402.0, + "mean_value": 2119.7468354, + "min_lag": 1, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 1, + "signal": "confirmed_admissions_rsv_ew_prelim", + "stdev_value": 3680.5022272, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "state", + "last_update": 1742995536, + "max_issue": 202513, + "max_lag": 244, + "max_time": 202514, + "max_value": 1286.0, + "mean_value": 53.1281726, + "min_lag": 1, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 56, + "signal": "confirmed_admissions_rsv_ew_prelim", + "stdev_value": 130.6219895, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "hhs", + "last_update": 1743168336, + "max_issue": 202514, + "max_lag": 244, + "max_time": 202513, + "max_value": 1037.0, + "mean_value": 502.2559671, + "min_lag": 2, + "min_time": 202032, + "min_value": 15.0, + "num_locations": 10, + "signal": "hosprep_confirmed_admissions_covid_ew", + "stdev_value": 292.6534298, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "nation", + "last_update": 1743168336, + "max_issue": 202514, + "max_lag": 244, + "max_time": 202513, + "max_value": 5462.0, + "mean_value": 5022.5596708, + "min_lag": 2, + "min_time": 202032, + "min_value": 966.0, + "num_locations": 1, + "signal": "hosprep_confirmed_admissions_covid_ew", + "stdev_value": 1032.3401836, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "state", + "last_update": 1743168336, + "max_issue": 202514, + "max_lag": 244, + "max_time": 202513, + "max_value": 495.0, + "mean_value": 89.6951569, + "min_lag": 2, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 56, + "signal": "hosprep_confirmed_admissions_covid_ew", + "stdev_value": 85.6518225, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "hhs", + "last_update": 1742995536, + "max_issue": 202513, + "max_lag": 244, + "max_time": 202514, + "max_value": 1037.0, + "mean_value": 502.2758197, + "min_lag": 1, + "min_time": 202032, + "min_value": 15.0, + "num_locations": 10, + "signal": "hosprep_confirmed_admissions_covid_ew_prelim", + "stdev_value": 292.5640713, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "nation", + "last_update": 1743600336, + "max_issue": 202515, + "max_lag": 244, + "max_time": 202514, + "max_value": 5462.0, + "mean_value": 5022.7581967, + "min_lag": 1, + "min_time": 202032, + "min_value": 976.0, + "num_locations": 1, + "signal": "hosprep_confirmed_admissions_covid_ew_prelim", + "stdev_value": 1030.2748494, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "state", + "last_update": 1742995536, + "max_issue": 202513, + "max_lag": 244, + "max_time": 202514, + "max_value": 495.0, + "mean_value": 89.6921107, + "min_lag": 1, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 56, + "signal": "hosprep_confirmed_admissions_covid_ew_prelim", + "stdev_value": 85.6365269, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "hhs", + "last_update": 1743168336, + "max_issue": 202514, + "max_lag": 244, + "max_time": 202513, + "max_value": 1037.0, + "mean_value": 461.3489712, + "min_lag": 2, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 10, + "signal": "hosprep_confirmed_admissions_flu_ew", + "stdev_value": 295.9522902, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "nation", + "last_update": 1743168336, + "max_issue": 202514, + "max_lag": 244, + "max_time": 202513, + "max_value": 5461.0, + "mean_value": 4613.4897119, + "min_lag": 2, + "min_time": 202032, + "min_value": 5.0, + "num_locations": 1, + "signal": "hosprep_confirmed_admissions_flu_ew", + "stdev_value": 1419.4719106, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "state", + "last_update": 1743168336, + "max_issue": 202514, + "max_lag": 244, + "max_time": 202513, + "max_value": 495.0, + "mean_value": 82.3897994, + "min_lag": 2, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 56, + "signal": "hosprep_confirmed_admissions_flu_ew", + "stdev_value": 82.9412078, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "hhs", + "last_update": 1742995536, + "max_issue": 202513, + "max_lag": 244, + "max_time": 202514, + "max_value": 41214.0, + "mean_value": 3221.1389344, + "min_lag": 1, + "min_time": 202032, + "min_value": 17.0, + "num_locations": 10, + "signal": "confirmed_admissions_covid_ew_prelim", + "stdev_value": 4657.1833354, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "nation", + "last_update": 1742995536, + "max_issue": 202513, + "max_lag": 244, + "max_time": 202514, + "max_value": 156269.0, + "mean_value": 32211.3893443, + "min_lag": 1, + "min_time": 202032, + "min_value": 1979.0, + "num_locations": 1, + "signal": "confirmed_admissions_covid_ew_prelim", + "stdev_value": 29731.3030329, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "state", + "last_update": 1742995536, + "max_issue": 202513, + "max_lag": 244, + "max_time": 202514, + "max_value": 17256.0, + "mean_value": 587.3246899, + "min_lag": 1, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 56, + "signal": "confirmed_admissions_covid_ew_prelim", + "stdev_value": 1205.1369519, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "hhs", + "last_update": 1742995536, + "max_issue": 202513, + "max_lag": 244, + "max_time": 202514, + "max_value": 1037.0, + "mean_value": 461.5364754, + "min_lag": 1, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 10, + "signal": "hosprep_confirmed_admissions_flu_ew_prelim", + "stdev_value": 295.8647556, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "nation", + "last_update": 1742995536, + "max_issue": 202513, + "max_lag": 244, + "max_time": 202514, + "max_value": 5461.0, + "mean_value": 4615.3647541, + "min_lag": 1, + "min_time": 202032, + "min_value": 5.0, + "num_locations": 1, + "signal": "hosprep_confirmed_admissions_flu_ew_prelim", + "stdev_value": 1416.8963319, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "state", + "last_update": 1742995536, + "max_issue": 202513, + "max_lag": 244, + "max_time": 202514, + "max_value": 495.0, + "mean_value": 82.4172278, + "min_lag": 1, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 56, + "signal": "hosprep_confirmed_admissions_flu_ew_prelim", + "stdev_value": 82.9379752, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "hhs", + "last_update": 1743168336, + "max_issue": 202514, + "max_lag": 244, + "max_time": 202513, + "max_value": 1037.0, + "mean_value": 56.6061728, + "min_lag": 2, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 10, + "signal": "hosprep_confirmed_admissions_rsv_ew", + "stdev_value": 173.6300832, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "nation", + "last_update": 1743168336, + "max_issue": 202514, + "max_lag": 244, + "max_time": 202513, + "max_value": 5383.0, + "mean_value": 566.0617284, + "min_lag": 2, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 1, + "signal": "hosprep_confirmed_admissions_rsv_ew", + "stdev_value": 1494.1846698, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "state", + "last_update": 1743168336, + "max_issue": 202514, + "max_lag": 244, + "max_time": 202513, + "max_value": 472.0, + "mean_value": 10.108988, + "min_lag": 2, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 56, + "signal": "hosprep_confirmed_admissions_rsv_ew", + "stdev_value": 38.2887186, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "hhs", + "last_update": 1742995536, + "max_issue": 202513, + "max_lag": 244, + "max_time": 202514, + "max_value": 1037.0, + "mean_value": 58.452459, + "min_lag": 1, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 10, + "signal": "hosprep_confirmed_admissions_rsv_ew_prelim", + "stdev_value": 176.4958952, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "nation", + "last_update": 1743600336, + "max_issue": 202515, + "max_lag": 244, + "max_time": 202514, + "max_value": 5383.0, + "mean_value": 584.5245902, + "min_lag": 1, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 1, + "signal": "hosprep_confirmed_admissions_rsv_ew_prelim", + "stdev_value": 1518.6735056, + "time_type": "week" + }, + { + "data_source": "nhsn", + "geo_type": "state", + "last_update": 1742995536, + "max_issue": 202513, + "max_lag": 244, + "max_time": 202514, + "max_value": 472.0, + "mean_value": 10.4379391, + "min_lag": 1, + "min_time": 202032, + "min_value": 0.0, + "num_locations": 56, + "signal": "hosprep_confirmed_admissions_rsv_ew_prelim", + "stdev_value": 38.9065954, + "time_type": "week" + } + ], + "message": "success", + "result": 1 +} \ No newline at end of file diff --git a/nhsn/tests/test_pull.py b/nhsn/tests/test_pull.py index d19c4db00..7ce8426ea 100644 --- a/nhsn/tests/test_pull.py +++ b/nhsn/tests/test_pull.py @@ -1,6 +1,6 @@ import glob import time -from datetime import datetime +from datetime import datetime, timedelta from unittest.mock import patch, MagicMock import os import pytest @@ -16,7 +16,7 @@ from delphi_nhsn.constants import TYPE_DICT, PRELIM_TYPE_DICT, PRELIM_DATASET_ID, MAIN_DATASET_ID, RECENTLY_UPDATED_DIFF from delphi_utils import get_structured_logger -from conftest import TEST_DATA, PRELIM_TEST_DATA, TEST_DIR +from conftest import TEST_DATA, PRELIM_TEST_DATA, TEST_DIR, COVID_META_DATA DATASETS = [{"id":MAIN_DATASET_ID, "test_data": TEST_DATA, @@ -159,23 +159,34 @@ def test_pull_nhsn_data_backup(self, mock_socrata, dataset, caplog, params): @pytest.mark.parametrize('dataset', DATASETS, ids=["data", "prelim_data"]) - @pytest.mark.parametrize("updatedAt", [time.time(), time.time() - 172800, time.time() - 108000], ids=["updated", "stale", "updated_late"]) + @pytest.mark.parametrize("updatedAt", [datetime(year=2025, month=4, day=4, hour=12, minute=30), + # called off-cycle (checks for main update on wednesday, but updates on friday) + datetime(year=2025, month=3, day=28, hour=12, minute=30), + # called off-cycle (checks for main update on wednesday, but the update got skipped) + datetime(year=2025, month=4, day=4, hour=13, minute=30), + ], ids=["updated", "stale", "updated_late"]) @patch("delphi_nhsn.pull.Socrata") - @patch("delphi_nhsn.pull.Epidata.covidcast_meta", return_value=covidcast_metadata): - def test_check_last_updated(self, mock_socrata, dataset, updatedAt, caplog): + @patch("delphi_nhsn.pull.Epidata.covidcast_meta") + def test_check_last_updated(self, mock_covidcast_meta, mock_socrata, dataset, updatedAt, caplog): mock_client = MagicMock() mock_socrata.return_value = mock_client - mock_client.get_metadata.return_value = {"rowsUpdatedAt": updatedAt } - logger = get_structured_logger() + mock_covidcast_meta.return_value = COVID_META_DATA + + # preliminary data is updated on wednesdays + if dataset["prelim_flag"]: + updatedAt = updatedAt - timedelta(days=2) + mock_client.get_metadata.return_value = {"rowsUpdatedAt": updatedAt.timestamp()} + logger = get_structured_logger() check_last_updated(mock_client, dataset["id"], logger) # Check that get method was called with correct arguments - now_datetime = datetime.utcfromtimestamp(time.time()) - updatedAt_datetime = datetime.utcfromtimestamp(updatedAt) - if now_datetime - updatedAt_datetime < RECENTLY_UPDATED_DIFF: + last_updated = datetime(2025, 3, 28, 13, 25, 36) + if (updatedAt - last_updated) > RECENTLY_UPDATED_DIFF: assert f"{dataset['msg_prefix']}NHSN data was recently updated; Pulling data" in caplog.text else: stale_msg = f"{dataset['msg_prefix']}NHSN data is stale; Skipping" assert stale_msg in caplog.text + + From 40d6cd2473772e535ad76888b74019a06021e408 Mon Sep 17 00:00:00 2001 From: Amaris Sim Date: Wed, 9 Apr 2025 17:57:20 -0400 Subject: [PATCH 5/6] forgot to mock in test_run --- nhsn/tests/conftest.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/nhsn/tests/conftest.py b/nhsn/tests/conftest.py index 0abbe92e3..5d0f49ce8 100644 --- a/nhsn/tests/conftest.py +++ b/nhsn/tests/conftest.py @@ -67,7 +67,8 @@ def params_w_patch(params): @pytest.fixture(scope="function") def run_as_module(params): with patch('sodapy.Socrata.get') as mock_get, \ - patch('sodapy.Socrata.get_metadata') as mock_get_metadata: + patch('sodapy.Socrata.get_metadata') as mock_get_metadata, \ + patch('delphi_nhsn.pull.Epidata.covidcast_meta') as mock_covidcast_meta: def side_effect(*args, **kwargs): if kwargs['offset'] == 0: if "ua7e-t2fy" in args[0]: @@ -78,5 +79,6 @@ def side_effect(*args, **kwargs): return [] mock_get.side_effect = side_effect mock_get_metadata.return_value = {"rowsUpdatedAt": time.time()} + mock_covidcast_meta.return_value = COVID_META_DATA run_module(params) From 9c9b550bab2636ba939d72f0c7ae08ade2e354f9 Mon Sep 17 00:00:00 2001 From: Amaris Sim Date: Wed, 9 Apr 2025 18:00:56 -0400 Subject: [PATCH 6/6] forgot to mock in other tests in test_pull --- nhsn/tests/test_pull.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/nhsn/tests/test_pull.py b/nhsn/tests/test_pull.py index 7ce8426ea..e77b8e622 100644 --- a/nhsn/tests/test_pull.py +++ b/nhsn/tests/test_pull.py @@ -80,14 +80,16 @@ def test_pull_from_file(self, caplog, dataset, params_w_patch): @patch("delphi_nhsn.pull.Socrata") @patch("delphi_nhsn.pull.create_backup_csv") + @patch("delphi_nhsn.pull.Epidata.covidcast_meta") @pytest.mark.parametrize('dataset', DATASETS, ids=["data", "prelim_data"]) - def test_pull_nhsn_data_output(self, mock_create_backup, mock_socrata, dataset, caplog, params): + def test_pull_nhsn_data_output(self, mock_covidcast_meta, mock_create_backup, mock_socrata, dataset, caplog, params): now = time.time() # Mock Socrata client and its get method mock_client = MagicMock() mock_socrata.return_value = mock_client mock_client.get.side_effect = [dataset["test_data"],[]] mock_client.get_metadata.return_value = {"rowsUpdatedAt": now} + mock_covidcast_meta.return_value = COVID_META_DATA backup_dir = params["common"]["backup_dir"] test_token = params["indicator"]["socrata_token"]