cmu-delphi · aysim319 · Jul 9, 2024 · Jul 25, 2024 · Jul 29, 2024 · Jul 29, 2024
diff --git a/_delphi_utils_python/delphi_utils/__init__.py b/_delphi_utils_python/delphi_utils/__init__.py
@@ -5,14 +5,13 @@
 
 from .archive import ArchiveDiffer, GitArchiveDiffer, S3ArchiveDiffer
 from .export import create_export_csv
-from .utils import read_params
-
-from .slack_notifier import SlackNotifier
-from .logger import get_structured_logger
 from .geomap import GeoMapper
-from .smooth import Smoother
-from .signal import add_prefix
+from .logger import get_structured_logger
 from .nancodes import Nans
+from .signal import add_prefix
+from .slack_notifier import SlackNotifier
+from .smooth import Smoother
+from .utils import read_params
 from .weekday import Weekday
 
 __version__ = "0.3.25"
diff --git a/_delphi_utils_python/delphi_utils/validator/datafetcher.py b/_delphi_utils_python/delphi_utils/validator/datafetcher.py
@@ -3,13 +3,15 @@
 
 import re
 import threading
+import warnings
 from os import listdir
 from os.path import isfile, join
-import warnings
-import requests
-import pandas as pd
+
 import numpy as np
-import covidcast
+import pandas as pd
+import requests
+from delphi_epidata import Epidata
+
 from .errors import APIDataFetchError, ValidationFailure
 
 FILENAME_REGEX = re.compile(
@@ -115,7 +117,22 @@ def get_geo_signal_combos(data_source, api_key):
     meta_response.raise_for_status()
     source_signal_mappings = {i['source']:i['db_source'] for i in
         meta_response.json()}
-    meta = covidcast.metadata()
+
+    response = Epidata.covidcast_meta()
+
+    # pylint: disable=R1720
+    if response["result"] != 1:
+        # Something failed in the API and we did not get real metadata
+        raise RuntimeError(
+            "Error when fetching metadata from the API", response["message"]
+        )
+
+    # pylint: disable=I0021
+    else:
+        meta = pd.DataFrame.from_dict(response["epidata"])
+        # note: this will fail for signals with weekly data, but currently not supported for validation
+        meta = meta[meta["time_type"] == "day"]
+
     source_meta = meta[meta['data_source'] == data_source]
     # Need to convert np.records to tuples so they are hashable and can be used in sets and dicts.
     geo_signal_combos = list(map(tuple,
@@ -158,18 +175,66 @@ def fetch_api_reference(data_source, start_date, end_date, geo_type, signal_type
 
     Formatting is changed to match that of source data CSVs.
     """
-    with warnings.catch_warnings():
-        warnings.simplefilter("ignore")
-        api_df = covidcast.signal(
-            data_source, signal_type, start_date, end_date, geo_type)
+    if start_date > end_date:
+        raise ValueError(
+            "end_day must be on or after start_day, but "
+            f"start_day = '{start_date}', end_day = '{end_date}'"
+        )
+    response = Epidata.covidcast(
+        data_source,
+        signal_type,
+        time_type="day",
+        geo_type=geo_type,
+        time_values=Epidata.range(
+            start_date.strftime("%Y%m%d"), end_date.strftime("%Y%m%d")
+        ),
+        geo_value="*",
+    )
+    if response["result"] != 1:
+        # Something failed in the API and we did not get real signal data
+        raise RuntimeError(
+            "Error when fetching signal data from the API", response["message"]
+        )
+
+    # pylint: disable=E1124
+    if response["message"] not in {"success", "no results"}:
+        # pylint: disable=E1123
+        warnings.warn(
+            "Problem obtaining data",
+            # pylint: disable=E0602
+            RuntimeWarning,
+            message=response["message"],
+            data_source=data_source,
+            signal=signal,
+            time_value=params["time_values"],
+            geo_type=geo_type,
+        )
+        response = Epidata.covidcast(
+            data_source,
+            signal_type,
+            time_type="day",
+            geo_type=geo_type,
+            time_values=Epidata.range(
+                start_date.strftime("%Y%m%d"), end_date.strftime("%Y%m%d")
+            ),
+            geo_value="*",
+        )
+
+    api_df = None
+    if len(response["epidata"]) > 0:
+        api_df = pd.DataFrame.from_dict(response["epidata"])
+        # note: this will fail for signals with weekly data, but currently not supported for validation
+        api_df["issue"] = pd.to_datetime(api_df["issue"], format="%Y%m%d")
+        api_df["time_value"] = pd.to_datetime(api_df["time_value"], format="%Y%m%d")
+        api_df.drop("direction", axis=1, inplace=True)
+        api_df["data_source"] = data_source
+        api_df["signal"] = signal_type
 
     error_context = f"when fetching reference data from {start_date} to {end_date} " +\
         f"for data source: {data_source}, signal type: {signal_type}, geo type: {geo_type}"
 
     if api_df is None:
         raise APIDataFetchError("Error: no API data was returned " + error_context)
-    if not isinstance(api_df, pd.DataFrame):
-        raise APIDataFetchError("Error: API return value was not a dataframe " + error_context)
 
     column_names = ["geo_id", "val",
                     "se", "sample_size", "time_value"]

diff --git a/_delphi_utils_python/delphi_utils/validator/dynamic.py b/_delphi_utils_python/delphi_utils/validator/dynamic.py
@@ -1,14 +1,16 @@
 """Dynamic file checks."""
+
+import re
 from dataclasses import dataclass
 from datetime import date, timedelta
 from typing import Dict, Set
-import re
-import pandas as pd
+
 import numpy as np
-import covidcast
-from .errors import ValidationFailure
+import pandas as pd
+
 from .datafetcher import get_geo_signal_combos, threaded_api_calls
-from .utils import relative_difference_by_min, TimeWindow, lag_converter
+from .errors import ValidationFailure
+from .utils import TimeWindow, lag_converter, relative_difference_by_min
 
 
 class DynamicValidator:
@@ -78,8 +80,6 @@ def validate(self, all_frames, report):
         # Get 14 days prior to the earliest list date
         outlier_lookbehind = timedelta(days=14)
 
-        # Authenticate API
-        covidcast.use_api_key(self.params.api_key)
 
         # Get all expected combinations of geo_type and signal.
         geo_signal_combos = get_geo_signal_combos(self.params.data_source,

diff --git a/_delphi_utils_python/delphi_utils/validator/run.py b/_delphi_utils_python/delphi_utils/validator/run.py
@@ -5,8 +5,10 @@
 when the module is run with `python -m delphi_utils.validator`.
 """
 import argparse as ap
-import covidcast
-from .. import read_params, get_structured_logger
+
+from delphi_epidata import Epidata
+
+from .. import get_structured_logger, read_params
 from .validate import Validator
 
 
@@ -18,7 +20,7 @@ def run_module():
     args = parser.parse_args()
     params = read_params()
     assert "validation" in params
-    covidcast.use_api_key(params["validation"]["common"]["api_credentials"])
+    Epidata.auth = ("epidata", params["validation"]["common"]["api_credentials"])
     dry_run_param = params["validation"]["common"].get("dry_run", False)
     params["validation"]["common"]["dry_run"] = args.dry_run or dry_run_param
     validator = Validator(params)

diff --git a/_delphi_utils_python/pyproject.toml b/_delphi_utils_python/pyproject.toml
@@ -17,8 +17,8 @@ classifiers = [
 ]
 dependencies = [
     "boto3",
-    "covidcast",
     "cvxpy",
+    "delphi-epidata",
     "epiweeks",
     "gitpython",
     "importlib_resources>=1.3",

diff --git a/_delphi_utils_python/tests/test_data/sample_epidata_metadata.json b/_delphi_utils_python/tests/test_data/sample_epidata_metadata.json
@@ -0,0 +1,28 @@
+{"data_source": ["chng", "chng", "chng",
+                                                    "covid-act-now",
+                                                    "covid-act-now",
+                                                    "covid-act-now",
+                                                    "chng"],
+                                    "signal": ["smoothed_outpatient_cli",
+                                               "smoothed_outpatient_covid",
+                                               "smoothed_outpatient_covid",
+                                               "pcr_specimen_positivity_rate",
+                                               "pcr_specimen_positivity_rate",
+                                               "pcr_specimen_total_tests",
+                                               "inactive"],
+                                    "geo_type": ["state", "state", "county",
+                                                 "hrr", "msa", "msa",
+                                                 "state"],
+                                   "min_time": ["20200101", "20200101", "20200101",
+                                                "20200101", "20200101", "20200101",
+                                                "20200101"],
+                                   "max_time": ["20240101", "20240101", "20240101",
+                                                "20240101", "20240101", "20240101",
+                                                "20240101"],
+                                   "last_update": [1711963480, 1711963480, 1711963480,
+                                    1711963480, 1711963480, 1711963480,
+                                    1711963480],
+                                   "time_type": ["day", "day", "day",
+                                                   "day", "day", "day",
+                                                   "day"]
+                                    }
diff --git a/_delphi_utils_python/tests/test_data/sample_epidata_signal_a.json b/_delphi_utils_python/tests/test_data/sample_epidata_signal_a.json
@@ -0,0 +1,9 @@
+{"geo_value": ["1044"],
+                                      "stderr": [null],
+                                      "value": [3],
+                                      "issue": [20200101],
+                                      "lag": [7],
+                                      "sample_size": [null],
+                                      "time_value": [20200101],
+                                      "direction": [null]
+                                     }
diff --git a/_delphi_utils_python/tests/test_data/sample_epidata_signal_county.json b/_delphi_utils_python/tests/test_data/sample_epidata_signal_county.json
@@ -0,0 +1,9 @@
+{"geo_value": ["0888"],
+                                      "stderr": [2],
+                                      "value": [14],
+                                      "issue": [20200101],
+                                      "lag": [1],
+                                      "sample_size": [100],
+                                      "time_value": [20200101],
+                                      "direction": [null]
+                                     }
diff --git a/_delphi_utils_python/tests/validator/test_datafetcher.py b/_delphi_utils_python/tests/validator/test_datafetcher.py
@@ -1,7 +1,9 @@
 """Tests for datafetcher.py."""
 
-from datetime import date
+from datetime import date, datetime
 import mock
+import json
+from pathlib import Path
 import numpy as np
 import pandas as pd
 import pytest
@@ -14,6 +16,7 @@
 from delphi_utils.validator.errors import ValidationFailure
 
 
+TEST_DIR = Path(__file__).parent.parent
 
 class TestDataFetcher:
     """Tests for various data fetching utilities."""
@@ -45,6 +48,27 @@ def raise_for_status(self):
                 {'source': 'covid-act-now', 'db_source': 'covid-act-now'}], 200)
         elif "params" in kwargs and kwargs["params"] == {'signal': 'chng:inactive'}:
             return MockResponse([{"signals": [{"active": False}]}], 200)
+        elif args[0] == 'https://api.delphi.cmu.edu/epidata/covidcast_meta/' and \
+                'delphi_epidata' in kwargs["headers"]["user-agent"]:
+            with open(f"{TEST_DIR}/test_data/sample_epidata_metadata.json") as f:
+                epidata = json.load(f)
+                response = {"epidata": epidata, "result": 1, "message": "success"}
+                return MockResponse(response, 200)
+        elif args[0] == 'https://api.delphi.cmu.edu/epidata/covidcast/' and \
+            'delphi_epidata' in kwargs["headers"]["user-agent"]:
+            signal_type = args[1].get("signals")
+            geo_type = args[1].get("geo_type")
+            if signal_type == "a":
+                with open(f"{TEST_DIR}/test_data/sample_epidata_signal_a.json") as f:
+                    epidata = json.load(f)
+                    response = {"epidata": epidata, "result": 1, "message": "success"}
+                    return MockResponse(response, 200)
+            if geo_type == "county":
+                with open(f"{TEST_DIR}/test_data/sample_epidata_signal_county.json") as f:
+                    epidata = json.load(f)
+                    response = {"epidata": epidata, "result": 1, "message": "success"}
+                    return MockResponse(response, 200)
+            return MockResponse({"epidata": {}, "result": 1, "message": "success"}, 200)
         else:
             return MockResponse([{"signals": [{"active": True}]}], 200)
 
@@ -57,27 +81,9 @@ def test_bad_api_key(self, **kwargs):
             get_geo_signal_combos("chng", api_key="")
 
     @mock.patch('requests.get', side_effect=mocked_requests_get)
-    @mock.patch("covidcast.metadata")
-    def test_get_geo_signal_combos(self, mock_metadata, mock_get):
+    def test_get_geo_signal_combos(self, mock_get):
+
         """Test that the geo signal combos are correctly pulled from the covidcast metadata."""
-        # Need to use actual data_source and signal names since we reference the API
-        # We let the chng signal "inactive" be an inactive signal
-        mock_metadata.return_value = pd.DataFrame({"data_source": ["chng", "chng", "chng",
-                                                                   "covid-act-now",
-                                                                   "covid-act-now",
-                                                                   "covid-act-now",
-                                                                   "chng"],
-                                                   "signal": ["smoothed_outpatient_cli",
-                                                              "smoothed_outpatient_covid",
-                                                              "smoothed_outpatient_covid",
-                                                              "pcr_specimen_positivity_rate",
-                                                              "pcr_specimen_positivity_rate",
-                                                              "pcr_specimen_total_tests",
-                                                              "inactive"],
-                                                   "geo_type": ["state", "state", "county",
-                                                                "hrr", "msa", "msa",
-                                                                "state"]
-                                                  })
         assert set(get_geo_signal_combos("chng", api_key="")) == set(
             [("state", "smoothed_outpatient_cli"),
              ("state", "smoothed_outpatient_covid"),
@@ -87,49 +93,20 @@ def test_get_geo_signal_combos(self, mock_metadata, mock_get):
              ("msa", "pcr_specimen_positivity_rate"),
              ("msa", "pcr_specimen_total_tests")])
 
-    @mock.patch("covidcast.signal")
-    def test_threaded_api_calls(self, mock_signal):
+    @mock.patch('requests.get', side_effect=mocked_requests_get)
+    def test_threaded_api_calls(self, mock_get):
         """Test that calls to the covidcast API are made."""
-
-        signal_data_1 = pd.DataFrame({"geo_value": ["1044"],
-                                      "stderr": [None],
-                                      "value": [3],
-                                      "issue": [10],
-                                      "lag": [7],
-                                      "sample_size": [None],
-                                      "time_value": [10]
-                                     })
-        signal_data_2 = pd.DataFrame({"geo_value": ["0888"],
-                                      "stderr": [2],
-                                      "value": [14],
-                                      "issue": [10],
-                                      "lag": [1],
-                                      "sample_size": [100],
-                                      "time_value": [8]
-                                     })
-
-        def mock_signal_return_fn(unused_data_source, signal_type, unused_start_date,
-                                  unused_end_date, geo_type):
-            """Function to return data when covidcast.signal() is called."""
-            if signal_type == "a":
-                return signal_data_1
-            if geo_type == "county":
-                return signal_data_2
-            return None
-
-        mock_signal.side_effect = mock_signal_return_fn
-
         processed_signal_data_1 = pd.DataFrame({"geo_id": ["1044"],
                                                 "val": [3],
                                                 "se": [np.nan],
                                                 "sample_size": [np.nan],
-                                                "time_value": [10]
+                                                "time_value": [datetime.strptime("20200101", "%Y%m%d")],
                                                })
         processed_signal_data_2 = pd.DataFrame({"geo_id": ["0888"],
                                                 "val": [14],
                                                 "se": [2],
                                                 "sample_size": [100],
-                                                "time_value": [8]
+                                                "time_value": [datetime.strptime("20200101", "%Y%m%d")],
                                                })
         expected = {
             ("county", "a"): processed_signal_data_1,

diff --git a/changehc/setup.py b/changehc/setup.py
@@ -3,7 +3,6 @@
 
 required = [
     "boto3",
-    "covidcast",
     "darker[isort]~=2.1.1",
     "delphi-utils",
     "mock",

diff --git a/changehc/tests/test_update_sensor.py b/changehc/tests/test_update_sensor.py
@@ -91,7 +91,8 @@ def test_geo_reindex(self):
                 "timestamp": [pd.Timestamp(f'03-{i}-2020') for i in range(1, 14)]})
             if geo == "county": # test for rogue \N
                 row_contain_N = {"num": 700, "fips": r"\N", "den": 2000, "timestamp": pd.Timestamp("03-15-2020")}
-                test_data = test_data.append(row_contain_N, ignore_index=True)
+                test_data = pd.concat([test_data, pd.DataFrame([row_contain_N])], ignore_index=True)
+
             data_frame = su_inst.geo_reindex(test_data)
             assert data_frame.shape[0] == multiple*len(su_inst.fit_dates)
             assert (data_frame.sum(numeric_only=True) == (4200,19000)).all()