diff --git a/docs/api/covidcast-signals/quidel-inactive.md b/docs/api/covidcast-signals/quidel-inactive.md index 19b98ee6a..1c7583845 100644 --- a/docs/api/covidcast-signals/quidel-inactive.md +++ b/docs/api/covidcast-signals/quidel-inactive.md @@ -15,6 +15,7 @@ grand_parent: COVIDcast Main Endpoint 1. TOC {:toc} +## Accessibility: Delphi-internal only ## COVID-19 Tests These signals are still active. Documentation is available on the [Quidel page](quidel.md). diff --git a/docs/api/covidcast-signals/quidel.md b/docs/api/covidcast-signals/quidel.md index 54296dbb3..6cb153269 100644 --- a/docs/api/covidcast-signals/quidel.md +++ b/docs/api/covidcast-signals/quidel.md @@ -15,6 +15,8 @@ grand_parent: COVIDcast Main Endpoint 1. TOC {:toc} +## Accessibility: Delphi-internal only + ## COVID-19 Tests * **Earliest issue available:** July 29, 2020 diff --git a/docs/api/covidcast_signals.md b/docs/api/covidcast_signals.md index 895977010..4e69b6d73 100644 --- a/docs/api/covidcast_signals.md +++ b/docs/api/covidcast_signals.md @@ -36,7 +36,6 @@ dashboard](https://delphi.cmu.edu/covidcast/): | Early Indicators | COVID-Like Symptoms | [`fb-survey`](covidcast-signals/fb-survey.md) | `smoothed_wcli` | | Early Indicators | COVID-Like Symptoms in Community | [`fb-survey`](covidcast-signals/fb-survey.md) | `smoothed_whh_cmnty_cli` | | Early Indicators | COVID-Related Doctor Visits | [`doctor-visits`](covidcast-signals/doctor-visits.md) | `smoothed_adj_cli` | -| Cases and Testing | COVID Antigen Test Positivity (Quidel) | [`quidel`](covidcast-signals/quidel.md) | `covid_ag_smoothed_pct_positive` | | Cases and Testing | COVID Cases | [`jhu-csse`](covidcast-signals/jhu-csse.md) | `confirmed_7dav_incidence_prop` | | Late Indicators | COVID Hospital Admissions | [`hhs`](covidcast-signals/hhs.md) | `confirmed_admissions_covid_1d_prop_7dav` | | Late Indicators | Deaths | [`jhu-csse`](covidcast-signals/jhu-csse.md) | `deaths_7dav_incidence_prop` | diff --git a/integrations/server/test_covidcast_endpoints.py b/integrations/server/test_covidcast_endpoints.py index c86bb10d6..3ba0af039 100644 --- a/integrations/server/test_covidcast_endpoints.py +++ b/integrations/server/test_covidcast_endpoints.py @@ -26,7 +26,19 @@ def localSetUp(self): # reset the `covidcast_meta_cache` table (it should always have one row) self._db._cursor.execute('update covidcast_meta_cache set timestamp = 0, epidata = "[]"') - def _fetch(self, endpoint="/", is_compatibility=False, **params): + cur = self._db._cursor + # NOTE: we must specify the db schema "epidata" here because the cursor/connection are bound to schema "covid" + cur.execute("TRUNCATE TABLE epidata.api_user") + cur.execute("TRUNCATE TABLE epidata.user_role") + cur.execute("TRUNCATE TABLE epidata.user_role_link") + cur.execute("INSERT INTO epidata.api_user (api_key, email) VALUES ('quidel_key', 'quidel_email')") + cur.execute("INSERT INTO epidata.user_role (name) VALUES ('quidel')") + cur.execute( + "INSERT INTO epidata.user_role_link (user_id, role_id) SELECT api_user.id, user_role.id FROM epidata.api_user JOIN epidata.user_role WHERE api_key='quidel_key' and user_role.name='quidel'" + ) + cur.execute("INSERT INTO epidata.api_user (api_key, email) VALUES ('key', 'email')") + + def _fetch(self, endpoint="/", is_compatibility=False, auth=AUTH, **params): # make the request if is_compatibility: url = BASE_URL_OLD @@ -37,7 +49,7 @@ def _fetch(self, endpoint="/", is_compatibility=False, **params): params.setdefault("data_source", params.get("source")) else: url = f"{BASE_URL}{endpoint}" - response = requests.get(url, params=params, auth=AUTH) + response = requests.get(url, params=params, auth=auth) response.raise_for_status() return response.json() @@ -67,6 +79,28 @@ def test_basic(self): out = self._fetch("/", signal=first.signal_pair(), geo=first.geo_pair(), time="day:*") self.assertEqual(len(out["epidata"]), len(rows)) + def test_basic_restricted_source(self): + """Request a signal from the / endpoint.""" + rows = [CovidcastTestRow.make_default_row(time_value=2020_04_01 + i, value=i, source="quidel") for i in range(10)] + first = rows[0] + self._insert_rows(rows) + + with self.subTest("validation"): + out = self._fetch("/") + self.assertEqual(out["result"], -1) + + with self.subTest("no_roles"): + out = self._fetch("/", signal=first.signal_pair(), geo=first.geo_pair(), time="day:*") + self.assertEqual(len(out["epidata"]), 0) + + with self.subTest("no_api_key"): + out = self._fetch("/", auth=None, signal=first.signal_pair(), geo=first.geo_pair(), time="day:*") + self.assertEqual(len(out["epidata"]), 0) + + with self.subTest("quidel_role"): + out = self._fetch("/", auth=("epidata", "quidel_key"), signal=first.signal_pair(), geo=first.geo_pair(), time="day:*") + self.assertEqual(len(out["epidata"]), len(rows)) + def test_compatibility(self): """Request at the /api.php endpoint.""" rows = [CovidcastTestRow.make_default_row(source="src", signal="sig", time_value=2020_04_01 + i, value=i) for i in range(10)] @@ -271,6 +305,35 @@ def test_meta(self): out = self._fetch("/meta", signal=f"{first.source}:X") self.assertEqual(len(out), 0) + def test_meta_restricted(self): + """Request 'restricted' signals from the /meta endpoint.""" + # NOTE: this method is nearly identical to ./test_covidcast_meta.py:test_restricted_sources() + # ...except the self._fetch() methods are different, as is the format of those methods' outputs + # (the other covidcast_meta endpoint uses APrinter, this one returns its own unadulterated json). + # additionally, the sample data used here must match entries (that is, named sources and signals) + # from covidcast_utils.model.data_sources (the `data_sources` variable from file + # src/server/endpoints/covidcast_utils/model.py, which is created by the _load_data_sources() method + # and fed by src/server/endpoints/covidcast_utils/db_sources.csv, but also surreptitiously augmened + # by _load_data_signals() which attaches a list of signals to each source, + # in turn fed by src/server/endpoints/covidcast_utils/db_signals.csv) + + # insert data from two different sources, one restricted/protected (quidel), one not + self._insert_rows([ + CovidcastTestRow.make_default_row(source="quidel", signal="raw_pct_negative"), + CovidcastTestRow.make_default_row(source="hhs", signal="confirmed_admissions_covid_1d") + ]) + + # update metadata cache + update_cache(args=None) + + # verify unauthenticated (no api key) or unauthorized (user w/o privilege) only see metadata for one source + self.assertEqual(len(self._fetch("/meta", auth=None)), 1) + self.assertEqual(len(self._fetch("/meta", auth=AUTH)), 1) + + # verify authorized user sees metadata for both sources + qauth = ('epidata', 'quidel_key') + self.assertEqual(len(self._fetch("/meta", auth=qauth)), 2) + def test_coverage(self): """Request a signal from the /coverage endpoint.""" diff --git a/integrations/server/test_covidcast_meta.py b/integrations/server/test_covidcast_meta.py index ad297f1e8..d03317c98 100644 --- a/integrations/server/test_covidcast_meta.py +++ b/integrations/server/test_covidcast_meta.py @@ -9,6 +9,7 @@ #first party from delphi_utils import Nans +from delphi.epidata.acquisition.covidcast.test_utils import CovidcastBase, CovidcastTestRow from delphi.epidata.maintenance.covidcast_meta_cache_updater import main as update_cache import delphi.operations.secrets as secrets @@ -17,7 +18,7 @@ AUTH = ('epidata', 'key') -class CovidcastMetaTests(unittest.TestCase): +class CovidcastMetaTests(CovidcastBase): """Tests the `covidcast_meta` endpoint.""" src_sig_lookups = { @@ -48,7 +49,7 @@ class CovidcastMetaTests(unittest.TestCase): %d, %d) ''' - def setUp(self): + def localSetUp(self): """Perform per-test setup.""" # connect to the `epidata` database and clear the `covidcast` table @@ -68,6 +69,17 @@ def setUp(self): # reset the `covidcast_meta_cache` table (it should always have one row) cur.execute('update covidcast_meta_cache set timestamp = 0, epidata = "[]"') + # NOTE: we must specify the db schema "epidata" here because the cursor/connection are bound to schema "covid" + cur.execute("TRUNCATE TABLE epidata.api_user") + cur.execute("TRUNCATE TABLE epidata.user_role") + cur.execute("TRUNCATE TABLE epidata.user_role_link") + cur.execute("INSERT INTO epidata.api_user (api_key, email) VALUES ('quidel_key', 'quidel_email')") + cur.execute("INSERT INTO epidata.user_role (name) VALUES ('quidel')") + cur.execute( + "INSERT INTO epidata.user_role_link (user_id, role_id) SELECT api_user.id, user_role.id FROM epidata.api_user JOIN epidata.user_role WHERE api_key='quidel_key' and user_role.name='quidel'" + ) + cur.execute("INSERT INTO epidata.api_user (api_key, email) VALUES ('key', 'email')") + # populate dimension tables for (src,sig) in self.src_sig_lookups: cur.execute(''' @@ -93,7 +105,7 @@ def setUp(self): secrets.db.epi = ('user', 'pass') - def tearDown(self): + def localTearDown(self): """Perform per-test teardown.""" self.cur.close() self.cnx.close() @@ -138,10 +150,10 @@ def _get_id(self): return self.id_counter @staticmethod - def _fetch(**kwargs): + def _fetch(auth=AUTH, **kwargs): params = kwargs.copy() params['endpoint'] = 'covidcast_meta' - response = requests.get(BASE_URL, params=params, auth=AUTH) + response = requests.get(BASE_URL, params=params, auth=auth) response.raise_for_status() return response.json() @@ -161,6 +173,26 @@ def test_round_trip(self): 'message': 'success', }) + def test_restricted_sources(self): + # NOTE: this method is nearly identical to ./test_covidcast_endpoints.py:test_meta_restricted() + + # insert data from two different sources, one restricted/protected (quidel), one not + self._insert_rows([ + CovidcastTestRow.make_default_row(source="quidel"), + CovidcastTestRow.make_default_row(source="not-quidel") + ]) + + # generate metadata cache + update_cache(args=None) + + # verify unauthenticated (no api key) or unauthorized (user w/o privilege) only see metadata for one source + self.assertEqual(len(self._fetch(auth=None)['epidata']), 1) + self.assertEqual(len(self._fetch(auth=AUTH)['epidata']), 1) + + # verify authorized user sees metadata for both sources + qauth = ('epidata', 'quidel_key') + self.assertEqual(len(self._fetch(auth=qauth)['epidata']), 2) + def test_filter(self): """Test filtering options some sample data.""" diff --git a/src/server/_security.py b/src/server/_security.py index 38294eb10..c47f948a5 100644 --- a/src/server/_security.py +++ b/src/server/_security.py @@ -82,6 +82,22 @@ def decorated_function(*args, **kwargs): return decorator_wrapper +# key is data "source" name, value is role name required to access that source +sources_protected_by_roles = { + 'quidel': 'quidel', + # the following two entries are needed because + # the covidcast endpoint uses this method + # to allow using various different "source" name aliases: + # delphi.epidata.server.endpoints.covidcast_utils.model.create_source_signal_alias_mapper() + # which, for reference, is populated by the file: + # src/server/endpoints/covidcast_utils/db_sources.csv + 'quidel-covid-ag': 'quidel', + 'quidel-flu': 'quidel', +} +# TODO(): source this info from a better place than a hardcoded dict: +# maybe somewhere in the db? maybe in src/server/endpoints/covidcast_utils/db_sources.csv ? + + def update_key_last_time_used(user): if user: # update last usage for this user's api key to "now()" diff --git a/src/server/endpoints/covidcast.py b/src/server/endpoints/covidcast.py index c1350b490..bd336dacf 100644 --- a/src/server/endpoints/covidcast.py +++ b/src/server/endpoints/covidcast.py @@ -30,11 +30,13 @@ ) from .._query import QueryBuilder, execute_query, run_query, parse_row, filter_fields from .._printer import create_printer, CSVPrinter +from .._security import current_user, sources_protected_by_roles from .._validate import require_all from .._pandas import as_pandas, print_pandas from .covidcast_utils import compute_trend, compute_trends, compute_trend_value, CovidcastMetaEntry from ..utils import shift_day_value, day_to_time_value, time_value_to_iso, time_value_to_day, shift_week_value, time_value_to_week, guess_time_value_is_day, week_to_time_value, TimeValues from .covidcast_utils.model import TimeType, count_signal_time_types, data_sources, create_source_signal_alias_mapper +from delphi.epidata.common.logger import get_structured_logger # first argument is the endpoint name bp = Blueprint("covidcast", __name__) @@ -43,9 +45,30 @@ latest_table = "epimetric_latest_v" history_table = "epimetric_full_v" +def restrict_by_roles(source_signal_sets): + # takes a list of SourceSignalSet objects + # and returns only those from the list + # that the current user is permitted to access. + user = current_user + allowed_source_signal_sets = [] + for src_sig_set in source_signal_sets: + src = src_sig_set.source + if src in sources_protected_by_roles: + role = sources_protected_by_roles[src] + if user and user.has_role(role): + allowed_source_signal_sets.append(src_sig_set) + else: + # protected src and user does not have permission => leave it out of the srcsig sets + get_structured_logger("covcast_endpt").warning("non-authZd request for restricted 'source'", api_key=(user and user.api_key), src=src) + else: + allowed_source_signal_sets.append(src_sig_set) + return allowed_source_signal_sets + + @bp.route("/", methods=("GET", "POST")) def handle(): source_signal_sets = parse_source_signal_sets() + source_signal_sets = restrict_by_roles(source_signal_sets) source_signal_sets, alias_mapper = create_source_signal_alias_mapper(source_signal_sets) time_set = parse_time_set() geo_sets = parse_geo_sets() @@ -102,6 +125,7 @@ def _verify_argument_time_type_matches(is_day_argument: bool, count_daily_signal def handle_trend(): require_all(request, "window", "date") source_signal_sets = parse_source_signal_sets() + source_signal_sets = restrict_by_roles(source_signal_sets) daily_signals, weekly_signals = count_signal_time_types(source_signal_sets) source_signal_sets, alias_mapper = create_source_signal_alias_mapper(source_signal_sets) geo_sets = parse_geo_sets() @@ -157,6 +181,7 @@ def gen(rows): def handle_trendseries(): require_all(request, "window") source_signal_sets = parse_source_signal_sets() + source_signal_sets = restrict_by_roles(source_signal_sets) daily_signals, weekly_signals = count_signal_time_types(source_signal_sets) source_signal_sets, alias_mapper = create_source_signal_alias_mapper(source_signal_sets) geo_sets = parse_geo_sets() @@ -405,8 +430,19 @@ def handle_meta(): entry = by_signal.setdefault((row["data_source"], row["signal"]), []) entry.append(row) + user = current_user sources: List[Dict[str, Any]] = [] for source in data_sources: + src = source.db_source + if src in sources_protected_by_roles: + role = sources_protected_by_roles[src] + if not (user and user.has_role(role)): + # if this is a protected source + # and the user doesnt have the allowed role + # (or if we have no user) + # then skip this source + continue + meta_signals: List[Dict[str, Any]] = [] for signal in source.signals: @@ -448,6 +484,7 @@ def handle_coverage(): """ source_signal_sets = parse_source_signal_sets() + source_signal_sets = restrict_by_roles(source_signal_sets) daily_signals, weekly_signals = count_signal_time_types(source_signal_sets) source_signal_sets, alias_mapper = create_source_signal_alias_mapper(source_signal_sets) diff --git a/src/server/endpoints/covidcast_meta.py b/src/server/endpoints/covidcast_meta.py index 52d0a06eb..35dc9f12e 100644 --- a/src/server/endpoints/covidcast_meta.py +++ b/src/server/endpoints/covidcast_meta.py @@ -8,6 +8,7 @@ from .._params import extract_strings from .._printer import create_printer from .._query import filter_fields +from .._security import current_user, sources_protected_by_roles from delphi.epidata.common.logger import get_structured_logger bp = Blueprint("covidcast_meta", __name__) @@ -71,17 +72,28 @@ def handle(): age = metadata["age"] reported_age = max(0, min(age, standard_age) - age_margin) + user = current_user + def cache_entry_gen(): for entry in metadata_list: if time_types and entry.get("time_type") not in time_types: continue if geo_types and entry.get("geo_type") not in geo_types: continue + entry_source = entry.get("data_source") + if entry_source in sources_protected_by_roles: + role = sources_protected_by_roles[entry_source] + if not (user and user.has_role(role)): + # if this is a protected source + # and the user doesnt have the allowed role + # (or if we have no user) + # then skip this source + continue if not signals: yield entry for signal in signals: # match source and (signal or no signal or signal = *) - if entry.get("data_source") == signal.source and ( + if entry_source == signal.source and ( signal.signal == "*" or signal.signal == entry.get("signal") ): yield entry