diff --git a/.bumpversion.cfg b/.bumpversion.cfg index db2bfeb10..4116c1c83 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.2.14 +current_version = 0.2.15 commit = False tag = False diff --git a/.github/workflows/missing_signals.yaml b/.github/workflows/missing_signals.yaml new file mode 100644 index 000000000..1b204edb4 --- /dev/null +++ b/.github/workflows/missing_signals.yaml @@ -0,0 +1,28 @@ +name: Missing Signal Detector + +on: + workflow_dispatch: + +jobs: + build: + runs-on: ubuntu-latest + steps: + - name: Check out code + uses: actions/checkout@v2 + with: + ref: dev + - name: Set up Python 3.8 + uses: actions/setup-python@v2 + with: + python-version: 3.8 + - name: Install Dependencies + run: pip install requests pandas + - name: Run Missing Signals Detector + run: python scripts/report_missing_covidcast_meta.py + - name: Upload Missing Artifact + if: failure() + uses: actions/upload-artifact@v2 + with: + name: missing_db_signals.csv + path: missing_db_signals.csv + diff --git a/.gitignore b/.gitignore index 6796be71c..d8c24d101 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ __pycache__/ /build /node_modules .mypy_cache +/missing_db_signals.csv diff --git a/docs/Gemfile.lock b/docs/Gemfile.lock index b39b1f8f1..6320c2d62 100644 --- a/docs/Gemfile.lock +++ b/docs/Gemfile.lock @@ -204,15 +204,15 @@ GEM rb-fsevent (~> 0.10, >= 0.10.3) rb-inotify (~> 0.9, >= 0.9.10) mercenary (0.3.6) - mini_portile2 (2.5.1) + mini_portile2 (2.6.1) minima (2.5.1) jekyll (>= 3.5, < 5.0) jekyll-feed (~> 0.9) jekyll-seo-tag (~> 2.1) minitest (5.14.4) multipart-post (2.1.1) - nokogiri (1.11.5) - mini_portile2 (~> 2.5.0) + nokogiri (1.12.5) + mini_portile2 (~> 2.6.1) racc (~> 1.4) octokit (4.20.0) faraday (>= 0.9) diff --git a/docs/api/covidcast-signals/fb-survey.md b/docs/api/covidcast-signals/fb-survey.md index 7bdb0a596..242acd57e 100644 --- a/docs/api/covidcast-signals/fb-survey.md +++ b/docs/api/covidcast-signals/fb-survey.md @@ -85,8 +85,7 @@ survey, which go into more detail on symptoms, contacts, risk factors, and demographics. These are used for many of our behavior and testing indicators below. The full text of the survey (including all deployed versions) can be found on our [questions and coding page](../../symptom-survey/coding.md). -Researchers can [request -access](https://dataforgood.fb.com/docs/covid-19-symptom-survey-request-for-data-access/) +Researchers can [request access](https://dataforgood.facebook.com/dfg/docs/covid-19-trends-and-impact-survey-request-for-data-access) to (fully de-identified) individual survey responses for research purposes. As of early March 2021, the average number of Facebook survey responses we diff --git a/docs/symptom-survey/data-access.md b/docs/symptom-survey/data-access.md index 215f7ec3c..7cb49dfaf 100644 --- a/docs/symptom-survey/data-access.md +++ b/docs/symptom-survey/data-access.md @@ -22,8 +22,7 @@ characteristics are available for download. De-identified individual survey responses can be made available to researchers associated with universities or non-profit organizations who sign a Data Use Agreement (DUA). To request access to the data please submit the information -requested in [Facebook's page on obtaining data -access](https://dataforgood.fb.com/docs/covid-19-symptom-survey-request-for-data-access/), +requested in [Facebook's page on obtaining data access](https://dataforgood.facebook.com/dfg/docs/covid-19-trends-and-impact-survey-request-for-data-access), which sets out the basic conditions and provides a form to request access. An [international version of CTIS](https://covidmap.umd.edu/) is conducted by the University of Maryland (UMD) and access can be requested through the same diff --git a/docs/symptom-survey/problems.md b/docs/symptom-survey/problems.md index 4b612c631..5f87749c7 100644 --- a/docs/symptom-survey/problems.md +++ b/docs/symptom-survey/problems.md @@ -61,6 +61,15 @@ June 26, 2021, 89.5% of the initial decrease in total response volume has been recovered. The response volume continued to recover as Android users updated to the fixed version of the Facebook app. +## B13 Missing from Data Files (May-June 2021) + +Survey item B13 was added in Wave 11 beginning May 20, 2021. Due to a survey +implementation bug, responses to B13 received between May 20 and June 16th were +not included in the API or in microdata files. This problem was fixed on June +16th; however, due to an oversight, certain microdata files were not corrected +to contain B13 responses until September 27, 2021. All files should now contain +responses to item B13 from the beginning of Wave 11. + ## Incorrect Coding in Documentation We found a Qualtrics bug that affects the exported text of the survey (but not diff --git a/integrations/acquisition/covidcast/test_csv_uploading.py b/integrations/acquisition/covidcast/test_csv_uploading.py index 397dc0a73..bb65e10d8 100644 --- a/integrations/acquisition/covidcast/test_csv_uploading.py +++ b/integrations/acquisition/covidcast/test_csv_uploading.py @@ -8,6 +8,8 @@ # third party import mysql.connector +import pandas as pd +import numpy as np # first party from delphi_utils import Nans @@ -52,6 +54,26 @@ def tearDown(self): self.cur.close() self.cnx.close() + @staticmethod + def apply_lag(expected_epidata): + expected_issue_day=date.today() + expected_issue=expected_issue_day.strftime("%Y%m%d") + for dct in expected_epidata: + dct['issue'] = int(expected_issue) + time_value_day = date(year=dct['time_value'] // 10000, + month=dct['time_value'] % 10000 // 100, + day= dct['time_value'] % 100) + expected_lag = (expected_issue_day - time_value_day).days + dct['lag'] = expected_lag + return expected_epidata + + def verify_timestamps_and_defaults(self): + self.cur.execute('select value_updated_timestamp, direction_updated_timestamp, direction from covidcast') + for value_updated_timestamp, direction_updated_timestamp, direction in self.cur: + self.assertGreater(value_updated_timestamp, 0) + self.assertEqual(direction_updated_timestamp, 0) + self.assertIsNone(direction) + def test_uploading(self): """Scan, parse, upload, archive, serve, and fetch a covidcast signal.""" @@ -64,299 +86,250 @@ def test_uploading(self): log_file_directory = "/var/log/" os.makedirs(source_receiving_dir, exist_ok=True) os.makedirs(log_file_directory, exist_ok=True) - - # valid - with open(source_receiving_dir + '/20200419_state_test.csv', 'w') as f: - f.write('geo_id,val,se,sample_size,missing_val,missing_se,missing_sample_size\n') - f.write(f'ca,1,0.1,10,{Nans.NOT_MISSING},{Nans.NOT_MISSING},{Nans.NOT_MISSING}\n') - f.write(f'tx,2,0.2,20,{Nans.NOT_MISSING},{Nans.NOT_MISSING},{Nans.NOT_MISSING}\n') - f.write(f'fl,3,0.3,30,{Nans.NOT_MISSING},{Nans.NOT_MISSING},{Nans.NOT_MISSING}\n') - - # valid, old style no missing cols should have intelligent defaults - with open(source_receiving_dir + '/20200419_state_test_no_missing.csv', 'w') as f: - f.write('geo_id,val,se,sample_size\n') - f.write('ca,1,0.1,10\n') - f.write('tx,NA,0.2,20\n') - f.write('wa,3,0.3,30\n') - - # invalid, missing with an inf value - with open(source_receiving_dir + '/20200419_state_test_missing1.csv', 'w') as f: - f.write('geo_id,val,se,sample_size,missing_val,missing_se,missing_sample_size\n') - f.write(f'fl,inf,0.3,30,{Nans.OTHER},{Nans.NOT_MISSING},{Nans.NOT_MISSING}\n') - - # invalid, missing with an incorrect missing code - with open(source_receiving_dir + '/20200419_state_test_missing2.csv', 'w') as f: - f.write('geo_id,val,se,sample_size,missing_val,missing_se,missing_sample_size\n') - f.write(f'tx,NA,0.2,20,{Nans.NOT_MISSING},{Nans.NOT_MISSING},{Nans.NOT_MISSING}\n') - - # invalid, no missing with an incorrect missing code - with open(source_receiving_dir + '/20200419_state_test_missing3.csv', 'w') as f: - f.write('geo_id,val,se,sample_size,missing_val,missing_se,missing_sample_size\n') - f.write(f'wa,3,0.3,30,{Nans.OTHER},{Nans.NOT_MISSING},{Nans.NOT_MISSING}\n') - - # valid wip - with open(source_receiving_dir + '/20200419_state_wip_prototype.csv', 'w') as f: - f.write('geo_id,val,se,sample_size,missing_val,missing_se,missing_sample_size\n') - f.write(f'me,10,0.01,100,{Nans.NOT_MISSING},{Nans.NOT_MISSING},{Nans.NOT_MISSING}\n') - f.write(f'nd,20,0.02,200,{Nans.NOT_MISSING},{Nans.NOT_MISSING},{Nans.NOT_MISSING}\n') - f.write(f'wa,30,0.03,300,{Nans.NOT_MISSING},{Nans.NOT_MISSING},{Nans.NOT_MISSING}\n') - - # invalid - with open(source_receiving_dir + '/20200419_state_wip_really_long_name_that_will_be_accepted.csv', 'w') as f: - f.write('geo_id,val,se,sample_size,missing_val,missing_se,missing_sample_size\n') - f.write(f'pa,100,5.4,624,{Nans.NOT_MISSING},{Nans.NOT_MISSING},{Nans.NOT_MISSING}\n') - - # invalid - with open(source_receiving_dir + '/20200419_state_wip_really_long_name_that_will_get_truncated_lorem_ipsum_dolor_sit_amet.csv', 'w') as f: - f.write('geo_id,val,se,sample_size,missing_val,missing_se,missing_sample_size\n') - f.write(f'pa,100,5.4,624,{Nans.NOT_MISSING}, {Nans.NOT_MISSING}, {Nans.NOT_MISSING}\n') - - # invalid - with open(source_receiving_dir + '/20200420_state_test.csv', 'w') as f: - f.write('this,header,is,wrong\n') - - # invalid - with open(source_receiving_dir + '/hello.csv', 'w') as f: - f.write('file name is wrong\n') - - # upload CSVs # TODO: use an actual argparse object for the args instead of a MagicMock args = MagicMock( - log_file=log_file_directory + - "output.log", - data_dir=data_dir, - is_wip_override=False, - not_wip_override=False, - specific_issue_date=False) - main(args) + log_file=log_file_directory + + "output.log", + data_dir=data_dir, + is_wip_override=False, + not_wip_override=False, + specific_issue_date=False) + uploader_column_rename = {"geo_id": "geo_value", "val": "value", "se": "stderr", "missing_val": "missing_value", "missing_se": "missing_stderr"} + + + with self.subTest("Valid CSV with correct missing columns"): + values = pd.DataFrame({ + "geo_id": ["ca", "fl", "tx"], + "val": [1.0, 2.0, 3.0], + "se": [0.1, 0.2, 0.3], + "sample_size": [10.0, 20.0, 30.0], + "missing_val": [Nans.NOT_MISSING] * 3, + "missing_se": [Nans.NOT_MISSING] * 3, + "missing_sample_size": [Nans.NOT_MISSING] * 3 + }) + signal_name = "test" + values.to_csv(source_receiving_dir + f'/20200419_state_{signal_name}.csv', index=False) + + # upload CSVs + main(args) + response = Epidata.covidcast('src-name', signal_name, 'day', 'state', 20200419, '*') + + expected_values = pd.concat([values, pd.DataFrame({ "time_value": [20200419] * 3, "signal": [signal_name] * 3, "direction": [None] * 3})], axis=1).rename(columns=uploader_column_rename).to_dict(orient="records") + expected_response = {'result': 1, 'epidata': self.apply_lag(expected_values), 'message': 'success'} + + self.assertEqual(response, expected_response) + self.verify_timestamps_and_defaults() + + # Verify that files were archived + path = data_dir + f'/archive/successful/src-name/20200419_state_test.csv.gz' + self.assertIsNotNone(os.stat(path)) - # request CSV data from the API - response = Epidata.covidcast( - 'src-name', 'test', 'day', 'state', 20200419, '*') + self.tearDown() + self.setUp() + + + with self.subTest("Valid CSV with no missing columns should set intelligent defaults"): + values = pd.DataFrame({ + "geo_id": ["ca", "fl", "tx"], + "val": [None, 2.0, 3.0], + "se": [0.1, None, 0.3], + "sample_size": [10.0, 20.0, None] + }, dtype=object) + signal_name = "test_no_missing_cols" + values.to_csv(source_receiving_dir + f'/20200419_state_{signal_name}.csv', index=False) + + # upload CSVs + main(args) + response = Epidata.covidcast('src-name', signal_name, 'day', 'state', 20200419, '*') + + expected_values = pd.concat([values, pd.DataFrame({ + "time_value": [20200419] * 3, + "signal": [signal_name] * 3, + "direction": [None] * 3, + "missing_value": [Nans.OTHER] + [Nans.NOT_MISSING] * 2, + "missing_stderr": [Nans.NOT_MISSING, Nans.OTHER, Nans.NOT_MISSING], + "missing_sample_size": [Nans.NOT_MISSING] * 2 + [Nans.OTHER] + })], axis=1).rename(columns=uploader_column_rename).to_dict(orient="records") + expected_response = {'result': 1, 'epidata': self.apply_lag(expected_values), 'message': 'success'} + + self.assertEqual(response, expected_response) + self.verify_timestamps_and_defaults() + + self.tearDown() + self.setUp() + + + with self.subTest("Invalid, missing with an inf value"): + values = pd.DataFrame({ + "geo_id": ["tx"], + "val": [np.inf], + "se": [0.3], + "sample_size": [None], + "missing_value": [Nans.OTHER], + "missing_stderr": [Nans.NOT_MISSING], + "missing_sample_size": [Nans.NOT_MISSING] + }) + signal_name = "test_with_inf" + values.to_csv(source_receiving_dir + f'/20200419_state_{signal_name}.csv', index=False) + + # upload CSVs + main(args) + response = Epidata.covidcast('src-name', signal_name, 'day', 'state', 20200419, '*') + + expected_response = {'result': -2, 'message': 'no results'} + + self.assertEqual(response, expected_response) + self.verify_timestamps_and_defaults() + self.tearDown() + self.setUp() + + + with self.subTest("Valid, missing with incorrect missing codes, fixed by acquisition"): + values = pd.DataFrame({ + "geo_id": ["tx"], + "val": [None], + "se": [0.3], + "sample_size": [30.0], + "missing_val": [Nans.NOT_MISSING], + "missing_se": [Nans.NOT_MISSING], + "missing_sample_size": [Nans.OTHER] + }).replace({np.nan:None}) + signal_name = "test_incorrect_missing_codes" + values.to_csv(source_receiving_dir + f'/20200419_state_{signal_name}.csv', index=False) + + # upload CSVs + main(args) + response = Epidata.covidcast('src-name', signal_name, 'day', 'state', 20200419, '*') + + expected_values_df = pd.concat([values, pd.DataFrame({ + "time_value": [20200419], + "signal": [signal_name], + "direction": [None]})], axis=1).rename(columns=uploader_column_rename) + expected_values_df["missing_value"].iloc[0] = Nans.OTHER + expected_values_df["missing_sample_size"].iloc[0] = Nans.NOT_MISSING + expected_values = expected_values_df.to_dict(orient="records") + expected_response = {'result': 1, 'epidata': self.apply_lag(expected_values), 'message': 'success'} + + self.assertEqual(response, expected_response) + self.verify_timestamps_and_defaults() + + self.tearDown() + self.setUp() + + + with self.subTest("Valid wip"): + values = pd.DataFrame({ + "geo_id": ["me", "nd", "wa"], + "val": [10.0, 20.0, 30.0], + "se": [0.01, 0.02, 0.03], + "sample_size": [100.0, 200.0, 300.0], + "missing_val": [Nans.NOT_MISSING] * 3, + "missing_se": [Nans.NOT_MISSING] * 3, + "missing_sample_size": [Nans.NOT_MISSING] * 3 + }) + signal_name = "wip_prototype" + values.to_csv(source_receiving_dir + f'/20200419_state_{signal_name}.csv', index=False) + + # upload CSVs + main(args) + response = Epidata.covidcast('src-name', signal_name, 'day', 'state', 20200419, '*') + + expected_values = pd.concat([values, pd.DataFrame({ + "time_value": [20200419] * 3, + "signal": [signal_name] * 3, + "direction": [None] * 3 + })], axis=1).rename(columns=uploader_column_rename).to_dict(orient="records") + expected_response = {'result': 1, 'epidata': self.apply_lag(expected_values), 'message': 'success'} + + self.assertEqual(response, expected_response) + self.verify_timestamps_and_defaults() + + # Verify that files were archived + path = data_dir + f'/archive/successful/src-name/20200419_state_wip_prototype.csv.gz' + self.assertIsNotNone(os.stat(path)) + self.tearDown() + self.setUp() - expected_issue_day=date.today() - expected_issue=expected_issue_day.strftime("%Y%m%d") - def apply_lag(expected_epidata): - for dct in expected_epidata: - dct['issue'] = int(expected_issue) - time_value_day = date(year=dct['time_value'] // 10000, - month=dct['time_value'] % 10000 // 100, - day= dct['time_value'] % 100) - expected_lag = (expected_issue_day - time_value_day).days - dct['lag'] = expected_lag - return expected_epidata - - # verify data matches the CSV - # NB these are ordered by geo_value - self.assertEqual(response, { - 'result': 1, - 'epidata': apply_lag([ - { - 'time_value': 20200419, - 'geo_value': 'ca', - 'value': 1, - 'stderr': 0.1, - 'sample_size': 10, - 'direction': None, - 'signal': 'test', - 'missing_value': Nans.NOT_MISSING, - 'missing_stderr': Nans.NOT_MISSING, - 'missing_sample_size': Nans.NOT_MISSING, - }, - { - 'time_value': 20200419, - 'geo_value': 'fl', - 'value': 3, - 'stderr': 0.3, - 'sample_size': 30, - 'direction': None, - 'signal': 'test', - 'missing_value': Nans.NOT_MISSING, - 'missing_stderr': Nans.NOT_MISSING, - 'missing_sample_size': Nans.NOT_MISSING, - }, - { - 'time_value': 20200419, - 'geo_value': 'tx', - 'value': 2, - 'stderr': 0.2, - 'sample_size': 20, - 'direction': None, - 'signal': 'test', - 'missing_value': Nans.NOT_MISSING, - 'missing_stderr': Nans.NOT_MISSING, - 'missing_sample_size': Nans.NOT_MISSING, - }, - ]), - 'message': 'success', - }) - - # request CSV data from the API on the test with missing values - response = Epidata.covidcast( - 'src-name', 'test_no_missing', 'day', 'state', 20200419, '*') - - # verify data matches the CSV - # NB these are ordered by geo_value - self.assertEqual(response, { - 'result': 1, - 'epidata': apply_lag([ - { - 'time_value': 20200419, - 'geo_value': 'ca', - 'value': 1, - 'stderr': 0.1, - 'sample_size': 10, - 'direction': None, - 'signal': 'test_no_missing', - 'missing_value': Nans.NOT_MISSING, - 'missing_stderr': Nans.NOT_MISSING, - 'missing_sample_size': Nans.NOT_MISSING, - }, - { - 'time_value': 20200419, - 'geo_value': 'tx', - 'value': None, - 'stderr': 0.2, - 'sample_size': 20, - 'direction': None, - 'signal': 'test_no_missing', - 'missing_value': Nans.OTHER, - 'missing_stderr': Nans.NOT_MISSING, - 'missing_sample_size': Nans.NOT_MISSING, - }, - { - 'time_value': 20200419, - 'geo_value': 'wa', - 'value': 3, - 'stderr': 0.3, - 'sample_size': 30, - 'direction': None, - 'signal': 'test_no_missing', - 'missing_value': Nans.NOT_MISSING, - 'missing_stderr': Nans.NOT_MISSING, - 'missing_sample_size': Nans.NOT_MISSING, - }, - ]), - 'message': 'success', - }) - - # invalid missing files - response = Epidata.covidcast( - 'src-name', 'test_missing1', 'day', 'state', 20200419, '*') - self.assertEqual(response, { - 'result': -2, - 'message': 'no results', - }) - response = Epidata.covidcast( - 'src-name', 'test_missing2', 'day', 'state', 20200419, '*') - self.assertEqual(response, { - 'result': -2, - 'message': 'no results', - }) - response = Epidata.covidcast( - 'src-name', 'test_missing3', 'day', 'state', 20200419, '*') - self.assertEqual(response, { - 'result': -2, - 'message': 'no results', - }) - - # request CSV data from the API on WIP signal - response = Epidata.covidcast( - 'src-name', 'wip_prototype', 'day', 'state', 20200419, '*') - - # verify data matches the CSV - # NB these are ordered by geo_value - self.assertEqual(response, { - 'result': 1, - 'epidata': apply_lag([ - { - 'time_value': 20200419, - 'geo_value': 'me', - 'value': 10, - 'stderr': 0.01, - 'sample_size': 100, - 'direction': None, - 'signal': 'wip_prototype', - 'missing_value': Nans.NOT_MISSING, - 'missing_stderr': Nans.NOT_MISSING, - 'missing_sample_size': Nans.NOT_MISSING, - }, - { - 'time_value': 20200419, - 'geo_value': 'nd', - 'value': 20, - 'stderr': 0.02, - 'sample_size': 200, - 'direction': None, - 'signal': 'wip_prototype', - 'missing_value': Nans.NOT_MISSING, - 'missing_stderr': Nans.NOT_MISSING, - 'missing_sample_size': Nans.NOT_MISSING, - }, - { - 'time_value': 20200419, - 'geo_value': 'wa', - 'value': 30, - 'stderr': 0.03, - 'sample_size': 300, - 'direction': None, - 'signal': 'wip_prototype', - 'missing_value': Nans.NOT_MISSING, - 'missing_stderr': Nans.NOT_MISSING, - 'missing_sample_size': Nans.NOT_MISSING, - }, - ]), - 'message': 'success', - }) - - # request CSV data from the API on the signal with name length 32 bool: + if '7dav_cumulative' in signal: + return True + if source in ('youtube-survey', 'indicator-combination'): + return True + return False + +def compute_missing_signals() -> List[Tuple[Tuple[str, str], Dict]]: + defined_meta = get(f"{base_url}/covidcast/meta").json() + defined_signals: Dict[Tuple[str, str], Dict] = {} + for source in defined_meta: + for signal in source['signals']: + defined_signals[(signal['source'], signal['signal'])] = signal + defined_signals[(source['db_source'], signal['signal'])] = signal + + computed_meta = get(f"{base_url}/covidcast_meta/?format=json").json() + computed_signals: Dict[Tuple[str, str], List[Dict]] = {} + for entry in computed_meta: + computed_signals.setdefault((entry['data_source'], entry['signal']), []).append(entry) + + missing_signals: List[Tuple[Tuple[str, str], Dict]] = [] + + for key, infos in computed_signals.items(): + defined_info = defined_signals.get(key) + if not defined_info: + if not is_known_missing(key[0], key[1]): + missing_signals.append((key, infos[0])) + return missing_signals + + +def gen_row(source: str, signal: str, info: Dict) -> Dict: + is_weighted = signal.startswith('smoothed_w') and not (signal.startswith('smoothed_wa') or signal.startswith('smoothed_we') or signal.startswith('smoothed_wi') or signal.startswith('smoothed_wo') or signal.startswith('smoothed_wu')) + base_name = signal.replace('smoothed_w', 'smoothed_') if is_weighted else signal + bool_str = lambda x: 'TRUE' if x else 'FALSE' + + return { + 'Source Subdivision': source, + 'Signal BaseName': base_name, + 'base_is_other': bool_str(False), + 'Signal': signal, + 'Compute From Base': False, + 'Name': "{base_name} (Weighted)" if is_weighted else signal, + 'Active': bool_str(True), + 'Short Description': 'TODO' if base_name == signal else '', + 'Description': 'TODO' if base_name == signal else '', + 'Time Type': info['time_type'], + 'Time Label': 'Week' if info['time_type'] == 'week' else 'Day', + 'Value Label': 'Percentage' if source == 'fb-survey' else 'Value', + 'Format': 'percent' if source == 'fb-survey' else 'raw', + 'Category': 'public' if source == 'fb-survey' else 'other', + 'High Values Are': 'neutral', + 'Is Smoothed': bool_str(signal.startswith('smoothed') or '7dav' in signal), + 'Is Weighted': bool_str(is_weighted), + 'Is Cumulative': bool_str('cumulative' in signal), + 'Has StdErr': 'TRUE' if source == 'fb-survey' else '', + 'Has Sample Size': 'TRUE' if source == 'fb-survey' else '', + 'Link': 'TODO' + } + +def generate_missing_info_hint(missing_signals: List[Tuple[Tuple[str, str], Dict]]) -> None: + missing = pd.DataFrame.from_records([gen_row(s[0], s[1], info) for s, info in missing_signals]) + + # use the current as base to have the right column order + current = pd.read_csv(base_dir / 'src/server/endpoints/covidcast_utils/db_signals.csv') + # clear + current = current[0:0] + guessed: pd.DataFrame = pd.concat([current, missing]) + guessed.to_csv(base_dir / 'missing_db_signals.csv', index=False) + +missing = compute_missing_signals() +if missing: + print(f'found {len(missing)} missing signals') + generate_missing_info_hint(missing) + sys.exit(1) +else: + print(f'all signals found') + sys.exit(0) + diff --git a/src/acquisition/covidcast/csv_importer.py b/src/acquisition/covidcast/csv_importer.py index f55327d85..05669cb82 100644 --- a/src/acquisition/covidcast/csv_importer.py +++ b/src/acquisition/covidcast/csv_importer.py @@ -38,6 +38,16 @@ class CsvImporter: MIN_YEAR = 2019 MAX_YEAR = 2030 + DTYPES = { + "geo_id": str, + "val": float, + "se": float, + "sample_size": float, + "missing_val": int, + "missing_se": int, + "missing_sample_size": int + } + # NOTE: this should be a Python 3.7+ `dataclass`, but the server is on 3.4 # See https://docs.python.org/3/library/dataclasses.html class RowValues: @@ -183,10 +193,9 @@ def floaty_int(value): """ float_value = float(value) - int_value = round(float_value) - if float_value != int_value: + if not float_value.is_integer(): raise ValueError('not an int: "%s"' % str(value)) - return int_value + return int(float_value) @staticmethod def maybe_apply(func, quantity): @@ -212,7 +221,7 @@ def validate_quantity(row, attr_quantity): return "Error" @staticmethod - def validate_missing_code(row, attr_quantity, attr_name): + def validate_missing_code(row, attr_quantity, attr_name, filepath=None, logger=None): """Take a row and validate the missing code associated with a quantity (e.g., val, se, stderr). @@ -221,27 +230,30 @@ def validate_missing_code(row, attr_quantity, attr_name): to infer missing codes except for a very simple cases; the default is to produce an error so that the issue can be fixed in indicators. """ - if hasattr(row, "missing_" + attr_name): - missing_entry = getattr(row, "missing_" + attr_name) - try: - missing_entry = int(missing_entry) - except ValueError: - return None - # A missing code should never contradict the quantity being present, - # since that will be filtered in the export_to_csv util in - # covidcast-indicators; nonetheless this code is here for safety. - if attr_quantity is not None and missing_entry != Nans.NOT_MISSING.value: - return None - elif attr_quantity is None and missing_entry == Nans.NOT_MISSING.value: - return None - return missing_entry - else: - if attr_quantity is None: - return Nans.OTHER.value + logger = get_structured_logger('load_csv') if logger is None else logger + missing_entry = getattr(row, "missing_" + attr_name, None) + + try: + missing_entry = CsvImporter.floaty_int(missing_entry) # convert from string to float to int + except (ValueError, TypeError): + missing_entry = None + + if missing_entry is None and attr_quantity is not None: return Nans.NOT_MISSING.value + if missing_entry is None and attr_quantity is None: + return Nans.OTHER.value + + if missing_entry != Nans.NOT_MISSING.value and attr_quantity is not None: + logger.warning(event = f"missing_{attr_name} column contradicting {attr_name} presence.", detail = (str(row)), file = filepath) + return Nans.NOT_MISSING.value + if missing_entry == Nans.NOT_MISSING.value and attr_quantity is None: + logger.warning(event = f"missing_{attr_name} column contradicting {attr_name} presence.", detail = (str(row)), file = filepath) + return Nans.OTHER.value + + return missing_entry @staticmethod - def extract_and_check_row(row, geo_type): + def extract_and_check_row(row, geo_type, filepath=None): """Extract and return `RowValues` from a CSV row, with sanity checks. Also returns the name of the field which failed sanity check, or None. @@ -300,31 +312,23 @@ def extract_and_check_row(row, geo_type): return (None, 'geo_type') # Validate row values - value = CsvImporter.validate_quantity(row, "val") - # val was a string or another dtype + value = CsvImporter.validate_quantity(row, "value") + # value was a string or another dtype if value == "Error": - return (None, 'val') - stderr = CsvImporter.validate_quantity(row, "se") - # Case 1: stderr was a string or another dtype - # Case 2: stderr is negative + return (None, 'value') + stderr = CsvImporter.validate_quantity(row, "stderr") + # stderr is a string, another dtype, or negative if stderr == "Error" or (stderr is not None and stderr < 0): - return (None, 'se') + return (None, 'stderr') sample_size = CsvImporter.validate_quantity(row, "sample_size") - # Case 1: sample_size was a string or another dtype - # Case 2: sample_size was negative + # sample_size is a string, another dtype, or negative if sample_size == "Error" or (sample_size is not None and sample_size < 0): return (None, 'sample_size') # Validate and write missingness codes - missing_value = CsvImporter.validate_missing_code(row, value, "val") - if missing_value is None: - return (None, 'missing_val') - missing_stderr = CsvImporter.validate_missing_code(row, stderr, "se") - if missing_stderr is None: - return (None, 'missing_se') - missing_sample_size = CsvImporter.validate_missing_code(row, sample_size, "sample_size") - if missing_sample_size is None: - return (None, 'missing_sample_size') + missing_value = CsvImporter.validate_missing_code(row, value, "value", filepath) + missing_stderr = CsvImporter.validate_missing_code(row, stderr, "stderr", filepath) + missing_sample_size = CsvImporter.validate_missing_code(row, sample_size, "sample_size", filepath) # return extracted and validated row values row_values = CsvImporter.RowValues( @@ -344,16 +348,22 @@ def load_csv(filepath, geo_type, pandas=pandas): including the header. """ logger = get_structured_logger('load_csv') - # don't use type inference, just get strings - table = pandas.read_csv(filepath, dtype='str') + + try: + table = pandas.read_csv(filepath, dtype=CsvImporter.DTYPES) + except ValueError as e: + logger.warning(event='Failed to open CSV with specified dtypes, switching to str', detail=str(e), file=filepath) + table = pandas.read_csv(filepath, dtype='str') if not CsvImporter.is_header_valid(table.columns): logger.warning(event='invalid header', detail=table.columns, file=filepath) yield None return + table.rename(columns={"val": "value", "se": "stderr", "missing_val": "missing_value", "missing_se": "missing_stderr"}, inplace=True) + for row in table.itertuples(index=False): - row_values, error = CsvImporter.extract_and_check_row(row, geo_type) + row_values, error = CsvImporter.extract_and_check_row(row, geo_type, filepath) if error: logger.warning(event = 'invalid value for row', detail=(str(row), error), file=filepath) yield None diff --git a/src/client/delphi_epidata.R b/src/client/delphi_epidata.R index 9aa54818e..ac05c3168 100644 --- a/src/client/delphi_epidata.R +++ b/src/client/delphi_epidata.R @@ -15,7 +15,7 @@ Epidata <- (function() { # API base url BASE_URL <- 'https://delphi.cmu.edu/epidata/api.php' - client_version <- '0.2.14' + client_version <- '0.2.15' # Helper function to cast values and/or ranges to strings .listitem <- function(value) { diff --git a/src/client/delphi_epidata.js b/src/client/delphi_epidata.js index ab4b94d16..95391e74e 100644 --- a/src/client/delphi_epidata.js +++ b/src/client/delphi_epidata.js @@ -22,7 +22,7 @@ } })(this, function (exports, fetchImpl, jQuery) { const BASE_URL = "https://delphi.cmu.edu/epidata/"; - const client_version = "0.2.14"; + const client_version = "0.2.15"; // Helper function to cast values and/or ranges to strings function _listitem(value) { diff --git a/src/client/packaging/npm/package-lock.json b/src/client/packaging/npm/package-lock.json index 07b5e0e65..d18d3c33f 100644 --- a/src/client/packaging/npm/package-lock.json +++ b/src/client/packaging/npm/package-lock.json @@ -1,6 +1,6 @@ { "name": "delphi_epidata", - "version": "0.2.7", + "version": "0.2.14", "lockfileVersion": 1, "requires": true, "dependencies": { @@ -4415,9 +4415,9 @@ "dev": true }, "tmpl": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/tmpl/-/tmpl-1.0.4.tgz", - "integrity": "sha1-I2QN17QtAEM5ERQIIOXPRA5SHdE=", + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/tmpl/-/tmpl-1.0.5.tgz", + "integrity": "sha512-3f0uOEAQwIqGuWW2MVzYg8fV/QNnc/IpuJNG837rLuczAaLVHslWHZQj4IGiEl5Hs3kkbhwL9Ab7Hrsmuj+Smw==", "dev": true }, "to-fast-properties": { diff --git a/src/client/packaging/npm/package.json b/src/client/packaging/npm/package.json index a034585a5..3da457f68 100644 --- a/src/client/packaging/npm/package.json +++ b/src/client/packaging/npm/package.json @@ -2,7 +2,7 @@ "name": "delphi_epidata", "description": "Delphi Epidata API Client", "authors": "Delphi Group", - "version": "0.2.14", + "version": "0.2.15", "license": "MIT", "homepage": "https://github.com/cmu-delphi/delphi-epidata", "bugs": { diff --git a/src/client/packaging/pypi/delphi_epidata/__init__.py b/src/client/packaging/pypi/delphi_epidata/__init__.py index 6514ccfb2..eb0313dc0 100644 --- a/src/client/packaging/pypi/delphi_epidata/__init__.py +++ b/src/client/packaging/pypi/delphi_epidata/__init__.py @@ -1,4 +1,4 @@ from .delphi_epidata import Epidata name = 'delphi_epidata' -__version__ = '0.2.14' +__version__ = '0.2.15' diff --git a/src/client/packaging/pypi/setup.py b/src/client/packaging/pypi/setup.py index 86bf9fa62..2b2cf293a 100644 --- a/src/client/packaging/pypi/setup.py +++ b/src/client/packaging/pypi/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name="delphi_epidata", - version="0.2.14", + version="0.2.15", author="David Farrow", author_email="dfarrow0@gmail.com", description="A programmatic interface to Delphi's Epidata API.", diff --git a/src/server/_config.py b/src/server/_config.py index 321398ea0..44f114c2f 100644 --- a/src/server/_config.py +++ b/src/server/_config.py @@ -5,7 +5,7 @@ load_dotenv() -VERSION = "0.2.14" +VERSION = "0.2.15" MAX_RESULTS = int(10e6) MAX_COMPATIBILITY_RESULTS = int(3650) diff --git a/src/server/endpoints/covidcast_utils/db_signals.csv b/src/server/endpoints/covidcast_utils/db_signals.csv index 938fc9cd2..461befead 100644 --- a/src/server/endpoints/covidcast_utils/db_signals.csv +++ b/src/server/endpoints/covidcast_utils/db_signals.csv @@ -1,17 +1,19 @@ Source Subdivision,Signal BaseName,base_is_other,Signal,Compute From Base,Name,Active,Short Description,Description,Time Type,Time Label,Value Label,Format,Category,High Values Are,Is Smoothed,Is Weighted,Is Cumulative,Has StdErr,Has Sample Size,Link -chng-cli,smoothed_outpatient_cli,FALSE,smoothed_outpatient_cli,FALSE,COVID-Related Doctor Visits,TRUE,Estimated percentage of outpatient doctor visits primarily about COVID-related symptoms,"Estimated percentage of outpatient doctor visits primarily about COVID-related symptoms, based on Change Healthcare claims data that has been de-identified in accordance with HIPAA privacy regulations, smoothed in time using a Gaussian linear smoother",day,Date,Value,raw,late,bad,TRUE,FALSE,FALSE,FALSE,FALSE, -chng-cli,smoothed_outpatient_cli,TRUE,smoothed_adj_outpatient_cli,FALSE,{base_name} (Day-adjusted),TRUE,,"{base_description}, and adjusted to reduce day-of-week effects",day,Date,Value,raw,late,bad,TRUE,FALSE,FALSE,FALSE,FALSE, -chng-covid,smoothed_outpatient_covid,FALSE,smoothed_outpatient_covid,FALSE,COVID-Confirmed Doctor Visits,TRUE,COVID-Confirmed Doctor Visits,"Estimated percentage of outpatient doctor visits with confirmed COVID-19, based on Change Healthcare claims data that has been de-identified in accordance with HIPAA privacy regulations, smoothed in time using a Gaussian linear smoother ",day,Date,Value,raw,late,bad,TRUE,FALSE,FALSE,FALSE,FALSE, -chng-covid,smoothed_outpatient_covid,TRUE,smoothed_adj_outpatient_covid,FALSE,{base_name} (Day-adjusted),TRUE,,"{base_description}, and adjusted to reduce day-of-week effects",day,Date,Value,raw,late,bad,TRUE,FALSE,FALSE,FALSE,TRUE, +chng-cli,smoothed_outpatient_cli,FALSE,smoothed_outpatient_cli,FALSE,COVID-Related Doctor Visits,TRUE,Estimated percentage of outpatient doctor visits primarily about COVID-related symptoms,"Estimated percentage of outpatient doctor visits primarily about COVID-related symptoms, based on Change Healthcare claims data that has been de-identified in accordance with HIPAA privacy regulations, smoothed in time using a Gaussian linear smoother",day,Date,Value,raw,early,bad,TRUE,FALSE,FALSE,FALSE,FALSE, +chng-cli,smoothed_outpatient_cli,TRUE,smoothed_adj_outpatient_cli,FALSE,{base_name} (Day-adjusted),TRUE,,"{base_description}, and adjusted to reduce day-of-week effects",day,Date,Value,raw,early,bad,TRUE,FALSE,FALSE,FALSE,FALSE, +chng-covid,smoothed_outpatient_covid,FALSE,smoothed_outpatient_covid,FALSE,COVID-Confirmed Doctor Visits,TRUE,COVID-Confirmed Doctor Visits,"Estimated percentage of outpatient doctor visits with confirmed COVID-19, based on Change Healthcare claims data that has been de-identified in accordance with HIPAA privacy regulations, smoothed in time using a Gaussian linear smoother ",day,Date,Value,raw,early,bad,TRUE,FALSE,FALSE,FALSE,FALSE, +chng-covid,smoothed_outpatient_covid,TRUE,smoothed_adj_outpatient_covid,FALSE,{base_name} (Day-adjusted),TRUE,,"{base_description}, and adjusted to reduce day-of-week effects",day,Date,Value,raw,early,bad,TRUE,FALSE,FALSE,FALSE,TRUE, covid-act-now,pcr_specimen_positivity_rate,FALSE,pcr_specimen_positivity_rate,FALSE,PCR Test Positivity Rate,TRUE,Proportion of PCR specimens tested that have a positive result,,day,Date,Value,fraction,other,bad,FALSE,FALSE,FALSE,FALSE,FALSE, covid-act-now,pcr_specimen_total_tests,FALSE,pcr_specimen_total_tests,FALSE,Total Number of PCR Tests,TRUE,Total number of PCR specimens tested,,day,Date,Value,count,other,good,FALSE,FALSE,FALSE,FALSE,FALSE, doctor-visits,smoothed_cli,FALSE,smoothed_cli,FALSE,COVID-Related Doctor Visits,TRUE,Percentage of daily doctor visits that are due to COVID-like symptoms,"Estimated percentage of outpatient doctor visits that are primarily about COVID-related symptoms, based on data from health system partners, smoothed in time using a Gaussian linear smoother",day,Date,Percentage,percent,early,bad,TRUE,FALSE,FALSE,FALSE,FALSE, doctor-visits,smoothed_cli,TRUE,smoothed_adj_cli,FALSE,{base_name} (Day-adjusted),TRUE,,"{base_description}, and adjusted to reduce day-of-week effects",day,Date,Percentage,percent,early,bad,TRUE,TRUE,FALSE,FALSE,FALSE, -fb-survey,raw_cli,FALSE,raw_cli,FALSE,COVID-Like Symptoms,TRUE,Estimated percentage of people with COVID-like illness ,"{source_description} For this signal, we estimate the percentage of people self-reporting COVID-like symptoms, defined here as fever along with either cough, shortness of breath, or difficulty breathing. While many other conditions can cause these symptoms, comparing the rates of COVID-like symptoms across the country can suggest where COVID is most active.",day,Date,Percentage,percent,early,bad,FALSE,FALSE,FALSE,TRUE,TRUE, -fb-survey,raw_cli,TRUE,raw_wcli,FALSE,{base_name} (Weighted),TRUE,,,day,Date,Percentage,percent,public,bad,FALSE,TRUE,FALSE,TRUE,TRUE,https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/fb-survey.html#ili-and-cli-indicators +fb-survey,raw_cli,FALSE,raw_cli,FALSE,COVID-Like Symptoms,TRUE,Estimated percentage of people with COVID-like illness ,"{source_description} For this signal, we estimate the percentage of people self-reporting COVID-like symptoms, defined here as fever along with either cough, shortness of breath, or difficulty breathing. While many other conditions can cause these symptoms, comparing the rates of COVID-like symptoms across the country can suggest where COVID is most active.",day,Date,Percentage,percent,early,bad,FALSE,FALSE,FALSE,TRUE,TRUE,"[Survey details](https://delphi.cmu.edu/covidcast/surveys/) +[Technical description](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/fb-survey.html#ili-and-cli-indicators)" +fb-survey,raw_cli,TRUE,raw_wcli,FALSE,{base_name} (Weighted),TRUE,,,day,Date,Percentage,percent,public,bad,FALSE,TRUE,FALSE,TRUE,TRUE, fb-survey,raw_cli,TRUE,smoothed_cli,FALSE,{base_name} (7-day average),TRUE,,,day,Date,Percentage,percent,early,bad,TRUE,FALSE,FALSE,TRUE,TRUE, fb-survey,raw_cli,TRUE,smoothed_wcli,FALSE,{base_name} (Weighted 7-day average),TRUE,,,day,Date,Percentage,percent,public,bad,TRUE,TRUE,FALSE,TRUE,TRUE, -fb-survey,raw_hh_cmnty_cli,FALSE,raw_hh_cmnty_cli,FALSE,COVID-Like Symptoms in Community,TRUE,Estimated percentage of people reporting illness in their local community,"{source_description} We also ask them if they know anyone in their local community who has COVID-like symptoms, defined here as fever along with either cough, shortness of breath, or difficulty breathing. For this indicator, we estimate the percentage of people who know someone, in their household or outside it, who has these symptoms. While many conditions can cause these symptoms, not just COVID, comparing the rates across the country can suggest where COVID is most active.",day,Date,Percentage,percent,early,bad,FALSE,FALSE,FALSE,TRUE,TRUE,https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/fb-survey.html#ili-and-cli-indicators +fb-survey,raw_hh_cmnty_cli,FALSE,raw_hh_cmnty_cli,FALSE,COVID-Like Symptoms in Community,TRUE,Estimated percentage of people reporting illness in their local community,"{source_description} We also ask them if they know anyone in their local community who has COVID-like symptoms, defined here as fever along with either cough, shortness of breath, or difficulty breathing. For this indicator, we estimate the percentage of people who know someone, in their household or outside it, who has these symptoms. While many conditions can cause these symptoms, not just COVID, comparing the rates across the country can suggest where COVID is most active.",day,Date,Percentage,percent,early,bad,FALSE,FALSE,FALSE,TRUE,TRUE,"[Survey details](https://delphi.cmu.edu/covidcast/surveys/) +[Technical description](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/fb-survey.html#ili-and-cli-indicators)" fb-survey,raw_hh_cmnty_cli,TRUE,raw_whh_cmnty_cli,FALSE,{base_name} (Weighted),TRUE,,,day,Date,Percentage,percent,early,bad,FALSE,TRUE,FALSE,TRUE,TRUE, fb-survey,raw_hh_cmnty_cli,TRUE,smoothed_hh_cmnty_cli,FALSE,{base_name} (7-day average),TRUE,,,day,Date,Percentage,percent,early,bad,TRUE,FALSE,FALSE,TRUE,TRUE, fb-survey,raw_hh_cmnty_cli,TRUE,smoothed_whh_cmnty_cli,FALSE,{base_name} (Weighted 7-day average),TRUE,,,day,Date,Percentage,percent,early,bad,TRUE,TRUE,FALSE,TRUE,TRUE, @@ -41,6 +43,11 @@ fb-survey,smoothed_covid_vaccinated,FALSE,smoothed_covid_vaccinated,FALSE,COVID- Note: The Centers for Disease Control compiles data on vaccine administration across the United States. This signal may differ from CDC data because of survey biases and should not be treated as authoritative. However, the survey signal is not subject to the lags and reporting problems in official vaccination data.",day,Date,Percentage,percent,public,good,TRUE,FALSE,FALSE,TRUE,TRUE,https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/fb-survey.html#vaccination-indicators fb-survey,smoothed_covid_vaccinated,TRUE,smoothed_wcovid_vaccinated,FALSE,{base_name} (Weighted),TRUE,,,day,Date,Percentage,percent,public,good,TRUE,TRUE,FALSE,TRUE,TRUE, +fb-survey,smoothed_covid_vaccinated_appointment_or_accept,smoothed_covid_vaccinated_appointment_or_accept,smoothed_covid_vaccinated_appointment_or_accept,FALSE,"COVID-19 Vaccine Acceptance: Vaccinated, Appointment, or Accept",TRUE,"Estimated percentage of respondents who either have already received a COVID vaccine, have an appointment to receive a COVID vaccine, or would definitely or probably choose to receive one if it were offered to them today.","{short_description} + +",day,Date,Percentage,percent,public,good,TRUE,FALSE,FALSE,TRUE,TRUE,"[Technical description](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/fb-survey.html#vaccination-indicators) +[Wave 11 revision updates](https://cmu-delphi.github.io/delphi-epidata/symptom-survey/coding.html#wave-11)" +fb-survey,smoothed_covid_vaccinated_appointment_or_accept,,smoothed_wcovid_vaccinated_appointment_or_accept,FALSE,{base_name} (Weighted),TRUE,,,day,Date,Percentage,percent,public,good,TRUE,TRUE,FALSE,TRUE,TRUE, fb-survey,smoothed_covid_vaccinated_or_accept,FALSE,smoothed_covid_vaccinated_or_accept,FALSE,COVID-19 Vaccinated or Vaccine Acceptance,FALSE,"Estimated percentage of respondents who either have already received a COVID vaccine or would definitely or probably choose to get vaccinated, if a vaccine were offered to them today.","{short_description} Discontinued as of Wave 11, May 19, 2021.",day,Date,Percentage,percent,public,good,TRUE,FALSE,FALSE,TRUE,TRUE,https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/fb-survey.html#vaccination-indicators @@ -247,11 +254,15 @@ Discontinued as of Wave 11, May 19, 2021.",day,Date,Percentage,percent,public,ne fb-survey,smoothed_wanted_test_14d,TRUE,smoothed_wwanted_test_14d,FALSE,{base_name} (Weighted),FALSE,,,day,Date,Percentage,percent,public,neutral,TRUE,TRUE,FALSE,TRUE,TRUE, fb-survey,smoothed_wearing_mask,FALSE,smoothed_wearing_mask,FALSE,People Wearing Masks (Last 5 Days),FALSE,Estimated percentage of people who wore a mask for most or all of the time while in public in the past 5 days; those not in public in the past 5 days are not counted. ,"{short_description} -Discontinued as of Wave 8, February 8, 2021.",day,Date,Percentage,percent,public,good,TRUE,FALSE,FALSE,TRUE,TRUE,https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/fb-survey.html#mask-use +Discontinued as of Wave 8, February 8, 2021.",day,Date,Percentage,percent,public,good,TRUE,FALSE,FALSE,TRUE,TRUE,"[Technical descritpion](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/fb-survey.html#mask-use) +[Interpreting mask use in context](https://delphi.cmu.edu/blog/2020/12/13/are-masks-widely-used-in-public/) +[Wave 10 revision updates](https://cmu-delphi.github.io/delphi-epidata/symptom-survey/coding.html#wave-10)" fb-survey,smoothed_wearing_mask,TRUE,smoothed_wwearing_mask,FALSE,{base_name} (Weighted),FALSE,,,day,Date,Percentage,percent,public,good,TRUE,TRUE,FALSE,TRUE,TRUE, fb-survey,smoothed_wearing_mask_7d,FALSE,smoothed_wearing_mask_7d,FALSE,People Wearing Masks (Last 7 Days),TRUE,Estimated percentage of people who wore a mask for most or all of the time while in public in the past 7 days; those not in public in the past 7 days are not counted. ,"{source_description} We also ask them if they wear a mask when they are in public. For this signal, we estimate the percentage of people who say they wear a mask most or all of the time when they are in public. -This item was shown to respondents starting in Wave 8, February 8, 2021, replacing a 5-day version of the same question.",day,Date,Percentage,percent,public,good,TRUE,FALSE,FALSE,TRUE,TRUE,https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/fb-survey.html#mask-use +This item was shown to respondents starting in Wave 8, February 8, 2021, replacing a 5-day version of the same question.",day,Date,Percentage,percent,public,good,TRUE,FALSE,FALSE,TRUE,TRUE,"[Technical descritpion](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/fb-survey.html#mask-use) +[Interpreting mask use in context](https://delphi.cmu.edu/blog/2020/12/13/are-masks-widely-used-in-public/) +[Wave 10 revision updates](https://cmu-delphi.github.io/delphi-epidata/symptom-survey/coding.html#wave-10)" fb-survey,smoothed_wearing_mask_7d,TRUE,smoothed_wwearing_mask_7d,FALSE,{base_name} (Weighted),TRUE,,,day,Date,Percentage,percent,public,good,TRUE,TRUE,FALSE,TRUE,TRUE, fb-survey,smoothed_work_outside_home_1d,FALSE,smoothed_work_outside_home_1d,FALSE,Work Outside Home (Last 24 Hours),FALSE,Estimated percentage of respondents who worked or went to school outside their home in the past 24 hours,"{short_description}. @@ -299,12 +310,12 @@ hhs,sum_confirmed_suspected_admissions_covid_1d,,sum_confirmed_suspected_admissi hhs,sum_confirmed_suspected_admissions_covid_1d,,sum_confirmed_suspected_admissions_covid_1d_prop_7dav,FALSE,"{base_name} (7-day average, per 100k people)",TRUE,,,day,Date,Value,per100k,late,bad,TRUE,FALSE,FALSE,FALSE,FALSE, hospital-admissions,smoothed_covid19,FALSE,smoothed_covid19,FALSE,COVID-19 Admissions (EMR and Claims),FALSE,Estimated percentage of new hospital admissions with COVID-associated diagnoses,"{short_description}, based on counts of electronic medical records and claims from health system partners, smoothed in time using a Gaussian linear smoother. -Discontinued October 1, 2020.",day,Date,Value,percent,late,bad,TRUE,FALSE,FALSE,FALSE,FALSE, +Discontinued October 1, 2020.",day,Date,Percentage,percent,late,bad,TRUE,FALSE,FALSE,FALSE,FALSE, hospital-admissions,smoothed_covid19,TRUE,smoothed_adj_covid19,FALSE,{base_name} (Day-adjusted),FALSE,,"{base_short_description}, based on counts of electronic medical records and claims from health system partners, smoothed in time using a Gaussian linear smoother, and adjusted to reduce day-of-week effects. -Discontinued October 1, 2020.",day,Date,Value,percent,late,bad,TRUE,FALSE,FALSE,FALSE,FALSE, -hospital-admissions,smoothed_covid19_from_claims,FALSE,smoothed_covid19_from_claims,FALSE,COVID-19 Admissions (Claims),TRUE,Estimated percentage of new hospital admissions with COVID-associated diagnoses,"{short_description}, based on counts of claims from health system partners, smoothed in time using a Gaussian linear smoother.",day,Date,Value,percent,late,bad,TRUE,FALSE,FALSE,FALSE,FALSE, -hospital-admissions,smoothed_covid19_from_claims,TRUE,smoothed_adj_covid19_from_claims,FALSE,{base_name} (Day-adjusted),TRUE,,"{base_short_description}, based on counts of claims from health system partners, smoothed in time using a Gaussian linear smoother, and adjusted to reduce day-of-week effects.",day,Date,Value,percent,late,bad,TRUE,FALSE,FALSE,FALSE,FALSE, +Discontinued October 1, 2020.",day,Date,Percentage,percent,late,bad,TRUE,FALSE,FALSE,FALSE,FALSE, +hospital-admissions,smoothed_covid19_from_claims,FALSE,smoothed_covid19_from_claims,FALSE,COVID-19 Admissions (Claims),TRUE,Estimated percentage of new hospital admissions with COVID-associated diagnoses,"{short_description}, based on counts of claims from health system partners, smoothed in time using a Gaussian linear smoother.",day,Date,Percentage,percent,late,bad,TRUE,FALSE,FALSE,FALSE,FALSE, +hospital-admissions,smoothed_covid19_from_claims,TRUE,smoothed_adj_covid19_from_claims,FALSE,{base_name} (Day-adjusted),TRUE,,"{base_short_description}, based on counts of claims from health system partners, smoothed in time using a Gaussian linear smoother, and adjusted to reduce day-of-week effects.",day,Date,Percentage,percent,late,bad,TRUE,FALSE,FALSE,FALSE,FALSE, indicator-combination-cases-deaths,confirmed_cumulative_num,FALSE,confirmed_cumulative_num,FALSE,Confirmed COVID Cases (Cumulative),TRUE,Cumulative confirmed COVID cases,Confirmed COVID-19 cases as reported by [USAFacts](https://usafacts.org/visualizations/coronavirus-covid-19-spread-map/) and [JHU-CSSE](https://github.com/CSSEGISandData/COVID-19),day,Date,Value,count,late,bad,FALSE,FALSE,TRUE,FALSE,FALSE, indicator-combination-cases-deaths,confirmed_cumulative_num,TRUE,confirmed_7dav_incidence_num,TRUE,"Confirmed COVID Cases (Daily new, 7-day average)",TRUE,"Daily new confirmed COVID cases, 7-day average ",,day,Date,Value,count,late,bad,TRUE,FALSE,FALSE,FALSE,FALSE, indicator-combination-cases-deaths,confirmed_cumulative_num,TRUE,confirmed_7dav_incidence_prop,FALSE,"Confirmed COVID Cases (Daily new, 7-day average, per 100k people)",TRUE,"Daily new confirmed COVID cases, 7-day average, per 100k people",,day,Date,Value,per100k,late,bad,TRUE,FALSE,FALSE,FALSE,FALSE, @@ -335,7 +346,46 @@ jhu-csse,deaths_cumulative_num,TRUE,deaths_7dav_incidence_prop,FALSE,"Confirmed jhu-csse,deaths_cumulative_num,TRUE,deaths_cumulative_prop,FALSE,"Confirmed COVID Deaths (Cumulative, per 100k people)",TRUE,"Cumulative confirmed COVID deaths, per 100k people",,day,Date,Value,per100k,late,bad,FALSE,FALSE,TRUE,FALSE,FALSE, jhu-csse,deaths_cumulative_num,TRUE,deaths_incidence_num,TRUE,Confirmed COVID Deaths (Daily new),TRUE,Daily new confirmed COVID deaths,,day,Date,Value,count,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, jhu-csse,deaths_cumulative_num,TRUE,deaths_incidence_prop,FALSE,"Confirmed COVID Deaths (Daily new, per 100k people)",TRUE,"Daily new confirmed COVID deaths, per 100k people",,day,Date,Value,per100k,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, -quidel-covid-ag,covid_ag_raw_pct_positive,FALSE,covid_ag_raw_pct_positive,FALSE,COVID-19 Antigen Tests: Percent Positive,TRUE,Percentage of antigen tests that were positive for COVID-19,"When a patient (whether at a doctor’s office, clinic, or hospital) has COVID-like symptoms, doctors may order an antigen test. An antigen test can detect parts of the virus that are present during an active infection. This is in contrast with antibody tests, which detect parts of the immune system that react to the virus, but which persist long after the infection has passed. For this signal, we compute the percentage of antigen tests performed that were positive for COVID-19.",day,Date,Percentage,percent,late,bad,FALSE,FALSE,FALSE,TRUE,TRUE, +nchs-mortality,deaths_allcause_incidence_num,FALSE,deaths_allcause_incidence_num,FALSE,All Causes Deaths (Weekly new),TRUE,Number of weekly new deaths from all causes,"{short_description}. + +National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm))",week,Week,Value,count,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, +nchs-mortality,deaths_allcause_incidence_num,TRUE,deaths_allcause_incidence_prop,FALSE,"All Causes Deaths (Weekly new, per 100k people)",TRUE,"Number of weekly new deaths from all causes, per 100k people","{short_description}. + +National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm))",week,Week,Value,per100k,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, +nchs-mortality,deaths_covid_and_pneumonia_notflu_incidence_num,FALSE,deaths_covid_and_pneumonia_notflu_incidence_num,FALSE,COVID and Pneumonia excl. Influenza Deaths (Weekly new),TRUE,"Number of weekly new deaths involving COVID-19 and Pneumonia, excluding Influenza ","{short_description}. + +National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm))",week,Week,Value,count,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, +nchs-mortality,deaths_covid_and_pneumonia_notflu_incidence_num,TRUE,deaths_covid_and_pneumonia_notflu_incidence_prop,FALSE,"COVID and Pneumonia excl. Influenza Deaths (Weekly new, per 100k people)",TRUE,"Number of weekly new deaths involving COVID-19 and Pneumonia, excluding Influenza, per 100k people","{short_description}. + +National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm))",week,Week,Value,per100k,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, +nchs-mortality,deaths_covid_incidence_num,FALSE,deaths_covid_incidence_num,FALSE,Confirmed or Presumed COVID Deaths (Weekly new),TRUE,Number of weekly new deaths with confirmed or presumed COVID-19 ,"{short_description}. + +National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm))",week,Week,Value,count,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, +nchs-mortality,deaths_covid_incidence_num,TRUE,deaths_covid_incidence_prop,FALSE,"Confirmed or Presumed COVID Deaths (Weekly new, per 100k people)",TRUE,"Number of weekly new deaths with confirmed or presumed COVID-19, per 100k people","{short_description}. + +National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm))",week,Week,Value,per100k,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, +nchs-mortality,deaths_flu_incidence_num,FALSE,deaths_flu_incidence_num,FALSE,Influenza Deaths (Weekly new),TRUE,"Number of weekly new deaths involving Influenza and at least one of (Pneumonia, COVID-19)","{short_description}. + +National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm))",week,Week,Value,count,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, +nchs-mortality,deaths_flu_incidence_num,TRUE,deaths_flu_incidence_prop,FALSE,"Influenza Deaths (Weekly new, per 100k people)",TRUE,"Number of weekly new deaths involving Influenza and at least one of (Pneumonia, COVID-19), per 100k people","{short_description}. + +National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm))",week,Week,Value,per100k,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, +nchs-mortality,deaths_percent_of_expected,FALSE,deaths_percent_of_expected,FALSE,Percentage of Expected Deaths,TRUE,Weekly new deaths for all causes in 2020 as a percentage of the average number across the same week in 2017-2019.,"{short_description}. + +National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm))",week,Week,Percentage,percent,late,neutral,FALSE,FALSE,FALSE,FALSE,FALSE, +nchs-mortality,deaths_pneumonia_notflu_incidence_num,FALSE,deaths_pneumonia_notflu_incidence_num,FALSE,Pneumonia excl. Influenza Deaths (Weekly new),TRUE,"Number of weekly new deaths involving Pneumonia, excluding Influenza deaths ","{short_description}. + +National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm))",week,Week,Value,count,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, +nchs-mortality,deaths_pneumonia_notflu_incidence_num,TRUE,deaths_pneumonia_notflu_incidence_prop,FALSE,"Pneumonia excl. Influenza Deaths (Weekly new, per 100k people)",TRUE,"Number of weekly new deaths involving Pneumonia, excluding Influenza deaths, per 100k people","{short_description}. + +National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm))",week,Week,Value,per100k,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, +nchs-mortality,deaths_pneumonia_or_flu_or_covid_incidence_num,FALSE,deaths_pneumonia_or_flu_or_covid_incidence_num,FALSE,"COVID, Pneumonia or Influenza Deaths (Weekly new)",TRUE,"Number of weekly new deaths involving Pneumonia, Influenza, or COVID-19 ","{short_description}. + +National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm))",week,Week,Value,count,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, +nchs-mortality,deaths_pneumonia_or_flu_or_covid_incidence_num,TRUE,deaths_pneumonia_or_flu_or_covid_incidence_prop,FALSE,"COVID, Pneumonia or Influenza Deaths (Weekly new, per 100k people)",TRUE,"Number of weekly new deaths involving Pneumonia, Influenza, or COVID-19, per 100k people","{short_description}. + +National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm))",week,Week,Value,per100k,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, +quidel-covid-ag,covid_ag_raw_pct_positive,FALSE,covid_ag_raw_pct_positive,FALSE,COVID-19 Antigen Tests: Percent Positive,TRUE,Percentage of antigen tests that were positive for COVID-19,"When a patient (whether at a doctor’s office, clinic, or hospital) has COVID-like symptoms, doctors may order an antigen test. An antigen test can detect parts of the virus that are present during an active infection. This is in contrast with antibody tests, which detect parts of the immune system that react to the virus, but which persist long after the infection has passed. For this signal, we compute the percentage of antigen tests performed that were positive for COVID-19.",day,Date,Percentage,percent,late,bad,FALSE,FALSE,FALSE,TRUE,TRUE,[Technical description](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/quidel.html#covid-19-tests) quidel-covid-ag,covid_ag_raw_pct_positive,TRUE,covid_ag_smoothed_pct_positive,FALSE,{base_name} (7-day average),TRUE,,,day,Date,Percentage,percent,late,bad,TRUE,FALSE,FALSE,TRUE,TRUE, quidel-flu,raw_pct_negative,FALSE,raw_pct_negative,FALSE,Flu Tests: Percent Negative,FALSE,"The percentage of flu tests that are negative, suggesting the patient's illness has another cause, possibly COVID-19 ","{short_description}. @@ -380,43 +430,4 @@ usa-facts,deaths_cumulative_num,TRUE,deaths_7dav_incidence_num,TRUE,"Confirmed C usa-facts,deaths_cumulative_num,TRUE,deaths_7dav_incidence_prop,FALSE,"Confirmed COVID Deaths (Daily new, 7-day average, per 100k people)",TRUE,"Daily new confirmed COVID deaths, 7-day average, per 100k people",,day,Date,Value,per100k,late,bad,TRUE,FALSE,FALSE,FALSE,FALSE, usa-facts,deaths_cumulative_num,TRUE,deaths_cumulative_prop,FALSE,"Confirmed COVID Deaths (Cumulative, per 100k people)",TRUE,"Cumulative confirmed COVID deaths, per 100k people",,day,Date,Value,per100k,late,bad,FALSE,FALSE,TRUE,FALSE,FALSE, usa-facts,deaths_cumulative_num,TRUE,deaths_incidence_num,TRUE,Confirmed COVID Deaths (Daily new),TRUE,Daily new confirmed COVID deaths,,day,Date,Value,count,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, -usa-facts,deaths_cumulative_num,TRUE,deaths_incidence_prop,FALSE,"Confirmed COVID Deaths (Daily new, per 100k people)",TRUE,"Daily new confirmed COVID deaths, per 100k people",,day,Date,Value,per100k,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, -nchs-mortality,deaths_covid_incidence_num,FALSE,deaths_covid_incidence_num,FALSE,Confirmed or Presumed COVID Deaths (Weekly new),TRUE,Number of weekly new deaths with confirmed or presumed COVID-19 ,"{short_description}. - -National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm))",week,Week,Value,count,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, -nchs-mortality,deaths_covid_incidence_num,TRUE,deaths_covid_incidence_prop,FALSE,"Confirmed or Presumed COVID Deaths (Weekly new, per 100k people)",TRUE,"Number of weekly new deaths with confirmed or presumed COVID-19, per 100k people","{short_description}. - -National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm))",week,Week,Value,per100k,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, -nchs-mortality,deaths_allcause_incidence_num,FALSE,deaths_allcause_incidence_num,FALSE,All Causes Deaths (Weekly new),TRUE,Number of weekly new deaths from all causes,"{short_description}. - -National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm))",week,Week,Value,count,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, -nchs-mortality,deaths_allcause_incidence_num,TRUE,deaths_allcause_incidence_prop,FALSE,"All Causes Deaths (Weekly new, per 100k people)",TRUE,"Number of weekly new deaths from all causes, per 100k people","{short_description}. - -National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm))",week,Week,Value,per100k,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, -nchs-mortality,deaths_flu_incidence_num,FALSE,deaths_flu_incidence_num,FALSE,Influenza Deaths (Weekly new),TRUE,"Number of weekly new deaths involving Influenza and at least one of (Pneumonia, COVID-19)","{short_description}. - -National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm))",week,Week,Value,count,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, -nchs-mortality,deaths_flu_incidence_num,TRUE,deaths_flu_incidence_prop,FALSE,"Influenza Deaths (Weekly new, per 100k people)",TRUE,"Number of weekly new deaths involving Influenza and at least one of (Pneumonia, COVID-19), per 100k people","{short_description}. - -National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm))",week,Week,Value,per100k,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, -nchs-mortality,deaths_pneumonia_notflu_incidence_num,FALSE,deaths_pneumonia_notflu_incidence_num,FALSE,Pneumonia excl. Influenza Deaths (Weekly new),TRUE,"Number of weekly new deaths involving Pneumonia, excluding Influenza deaths ","{short_description}. - -National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm))",week,Week,Value,count,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, -nchs-mortality,deaths_pneumonia_notflu_incidence_num,TRUE,deaths_pneumonia_notflu_incidence_prop,FALSE,"Pneumonia excl. Influenza Deaths (Weekly new, per 100k people)",TRUE,"Number of weekly new deaths involving Pneumonia, excluding Influenza deaths, per 100k people","{short_description}. - -National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm))",week,Week,Value,per100k,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, -nchs-mortality,deaths_covid_and_pneumonia_notflu_incidence_num,FALSE,deaths_covid_and_pneumonia_notflu_incidence_num,FALSE,COVID and Pneumonia excl. Influenza Deaths (Weekly new),TRUE,"Number of weekly new deaths involving COVID-19 and Pneumonia, excluding Influenza ","{short_description}. - -National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm))",week,Week,Value,count,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, -nchs-mortality,deaths_covid_and_pneumonia_notflu_incidence_num,TRUE,deaths_covid_and_pneumonia_notflu_incidence_prop,FALSE,"COVID and Pneumonia excl. Influenza Deaths (Weekly new, per 100k people)",TRUE,"Number of weekly new deaths involving COVID-19 and Pneumonia, excluding Influenza, per 100k people","{short_description}. - -National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm))",week,Week,Value,per100k,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, -nchs-mortality,deaths_pneumonia_or_flu_or_covid_incidence_num,FALSE,deaths_pneumonia_or_flu_or_covid_incidence_num,FALSE,"COVID, Pneumonia or Influenza Deaths (Weekly new)",TRUE,"Number of weekly new deaths involving Pneumonia, Influenza, or COVID-19 ","{short_description}. - -National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm))",week,Week,Value,count,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, -nchs-mortality,deaths_pneumonia_or_flu_or_covid_incidence_num,TRUE,deaths_pneumonia_or_flu_or_covid_incidence_prop,FALSE,"COVID, Pneumonia or Influenza Deaths (Weekly new, per 100k people)",TRUE,"Number of weekly new deaths involving Pneumonia, Influenza, or COVID-19, per 100k people","{short_description}. - -National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm))",week,Week,Value,per100k,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, -nchs-mortality,deaths_percent_of_expected,FALSE,deaths_percent_of_expected,FALSE,Percentage of Expected Deaths,TRUE,Weekly new deaths for all causes in 2020 as a percentage of the average number across the same week in 2017-2019.,"{short_description}. - -National provisional death counts is based on death certificate data received and coded by the National Center for Health Statistics ([NCHS](https://www.cdc.gov/nchs/nvss/vsrr/COVID19/index.htm))",week,Week,Value,percent,late,neutral,FALSE,FALSE,FALSE,FALSE,FALSE, \ No newline at end of file +usa-facts,deaths_cumulative_num,TRUE,deaths_incidence_prop,FALSE,"Confirmed COVID Deaths (Daily new, per 100k people)",TRUE,"Daily new confirmed COVID deaths, per 100k people",,day,Date,Value,per100k,late,bad,FALSE,FALSE,FALSE,FALSE,FALSE, \ No newline at end of file diff --git a/tests/acquisition/covidcast/test_csv_importer.py b/tests/acquisition/covidcast/test_csv_importer.py index b82dec1ce..3e7224d19 100644 --- a/tests/acquisition/covidcast/test_csv_importer.py +++ b/tests/acquisition/covidcast/test_csv_importer.py @@ -59,7 +59,7 @@ def test_find_issue_specific_csv_files(self,os_isdir_mock): issuedir_match= CsvImporter.PATTERN_ISSUE_DIR.match(path_prefix.lower()) issue_date_value = int(issuedir_match.group(2)) self.assertTrue(CsvImporter.is_sane_day(issue_date_value)) - + found = set(CsvImporter.find_issue_specific_csv_files(path_prefix, glob=mock_glob)) self.assertTrue(len(found)>0) @@ -159,22 +159,22 @@ def test_extract_and_check_row(self): def make_row( geo_type='state', geo_id='vi', - val='1.23', - se='4.56', + value='1.23', + stderr='4.56', sample_size='100.5', - missing_val=Nans.NOT_MISSING, - missing_se=Nans.NOT_MISSING, - missing_sample_size=Nans.NOT_MISSING): + missing_value=str(float(Nans.NOT_MISSING)), + missing_stderr=str(float(Nans.NOT_MISSING)), + missing_sample_size=str(float(Nans.NOT_MISSING))): row = MagicMock( geo_id=geo_id, - val=val, - se=se, + value=value, + stderr=stderr, sample_size=sample_size, - missing_val=missing_val, - missing_se=missing_se, + missing_value=missing_value, + missing_stderr=missing_stderr, missing_sample_size=missing_sample_size, - spec=["geo_id", "val", "se", "sample_size", - "missing_val", "missing_se", "missing_sample_size"]) + spec=["geo_id", "value", "stderr", "sample_size", + "missing_value", "missing_stderr", "missing_sample_size"]) return geo_type, row # cases to test each failure mode @@ -190,22 +190,16 @@ def make_row( (make_row(geo_type='nation', geo_id='0000'), 'geo_id'), (make_row(geo_type='hhs', geo_id='0'), 'geo_id'), (make_row(geo_type='province', geo_id='ab'), 'geo_type'), - (make_row(se='-1'), 'se'), + (make_row(stderr='-1'), 'stderr'), (make_row(geo_type=None), 'geo_type'), (make_row(geo_id=None), 'geo_id'), - (make_row(val='inf'), 'val'), - (make_row(se='inf'), 'se'), + (make_row(value='inf'), 'value'), + (make_row(stderr='inf'), 'stderr'), (make_row(sample_size='inf'), 'sample_size'), (make_row(geo_type='hrr', geo_id='hrr001'), 'geo_id'), - (make_row(val='val'), 'val'), - (make_row(se='se'), 'se'), + (make_row(value='value'), 'value'), + (make_row(stderr='stderr'), 'stderr'), (make_row(sample_size='sample_size'), 'sample_size'), - (make_row(missing_val='missing_val'), 'missing_val'), - (make_row(missing_se='missing_val'), 'missing_se'), - (make_row(missing_sample_size='missing_val'), 'missing_sample_size'), - (make_row(val='1.2', missing_val=Nans.OTHER), 'missing_val'), - (make_row(se='1.2', missing_se=Nans.OTHER), 'missing_se'), - (make_row(sample_size='1.2', missing_sample_size=Nans.OTHER), 'missing_sample_size') ] for ((geo_type, row), field) in failure_cases: @@ -213,30 +207,21 @@ def make_row( self.assertIsNone(values) self.assertEqual(error, field) - # a nominal case without missing values - geo_type, row = make_row() - values, error = CsvImporter.extract_and_check_row(row, geo_type) - - self.assertIsInstance(values, CsvImporter.RowValues) - self.assertEqual(str(values.geo_value), row.geo_id) - self.assertEqual(str(values.value), row.val) - self.assertEqual(str(values.stderr), row.se) - self.assertEqual(str(values.sample_size), row.sample_size) - self.assertIsNone(error) - - # a nominal case with missing values - geo_type, row = make_row( - se='', sample_size='NA', - missing_se=Nans.OTHER, missing_sample_size=Nans.OTHER - ) - values, error = CsvImporter.extract_and_check_row(row, geo_type) - - self.assertIsInstance(values, CsvImporter.RowValues) - self.assertEqual(str(values.geo_value), row.geo_id) - self.assertEqual(str(values.value), row.val) - self.assertIsNone(values.stderr) - self.assertIsNone(values.sample_size) - self.assertIsNone(error) + success_cases = [ + (make_row(), CsvImporter.RowValues('vi', 1.23, 4.56, 100.5, Nans.NOT_MISSING, Nans.NOT_MISSING, Nans.NOT_MISSING)), + (make_row(value=None, stderr=np.nan, sample_size='', missing_value=str(float(Nans.DELETED)), missing_stderr=str(float(Nans.DELETED)), missing_sample_size=str(float(Nans.DELETED))), CsvImporter.RowValues('vi', None, None, None, Nans.DELETED, Nans.DELETED, Nans.DELETED)), + (make_row(stderr='', sample_size='NA', missing_stderr=str(float(Nans.OTHER)), missing_sample_size=str(float(Nans.OTHER))), CsvImporter.RowValues('vi', 1.23, None, None, Nans.NOT_MISSING, Nans.OTHER, Nans.OTHER)), + (make_row(sample_size=None, missing_value='missing_value', missing_stderr=str(float(Nans.OTHER)), missing_sample_size=str(float(Nans.NOT_MISSING))), CsvImporter.RowValues('vi', 1.23, 4.56, None, Nans.NOT_MISSING, Nans.NOT_MISSING, Nans.OTHER)), + ] + + for ((geo_type, row), field) in success_cases: + values, error = CsvImporter.extract_and_check_row(row, geo_type) + self.assertIsNone(error) + self.assertIsInstance(values, CsvImporter.RowValues) + self.assertEqual(values.geo_value, field.geo_value) + self.assertEqual(values.value, field.value) + self.assertEqual(values.stderr, field.stderr) + self.assertEqual(values.sample_size, field.sample_size) def test_load_csv_with_invalid_header(self): """Bail loading a CSV when the header is invalid.""" @@ -291,16 +276,14 @@ def test_load_csv_with_valid_header(self): self.assertIsNone(rows[3]) - # now with missing values! the last missing_sample_size - # contains an error code while data is available, which - # should give an error + # now with missing values! data = { 'geo_id': ['ca', 'tx', 'fl', 'ak'], 'val': [np.nan, '1.2', '1.3', '1.4'], 'se': ['2.1', "na", '2.3', '2.4'], 'sample_size': ['301', '302', None, '304'], - 'missing_val': [Nans.NOT_APPLICABLE] + [Nans.NOT_MISSING] * 3, - 'missing_se': [Nans.NOT_MISSING, Nans.REGION_EXCEPTION, Nans.NOT_MISSING, Nans.NOT_MISSING], + 'missing_value': [Nans.NOT_APPLICABLE] + [Nans.NOT_MISSING] * 3, + 'missing_stderr': [Nans.NOT_MISSING, Nans.REGION_EXCEPTION, Nans.NOT_MISSING, Nans.NOT_MISSING], 'missing_sample_size': [Nans.NOT_MISSING] * 2 + [Nans.REGION_EXCEPTION] * 2 } mock_pandas = MagicMock() @@ -338,4 +321,10 @@ def test_load_csv_with_valid_header(self): self.assertEqual(rows[2].missing_stderr, Nans.NOT_MISSING) self.assertEqual(rows[2].missing_sample_size, Nans.REGION_EXCEPTION) - self.assertIsNone(rows[3]) + self.assertEqual(rows[3].geo_value, 'ak') + self.assertEqual(rows[3].value, 1.4) + self.assertEqual(rows[3].stderr, 2.4) + self.assertEqual(rows[3].sample_size, 304) + self.assertEqual(rows[3].missing_value, Nans.NOT_MISSING) + self.assertEqual(rows[3].missing_stderr, Nans.NOT_MISSING) + self.assertEqual(rows[3].missing_sample_size, Nans.NOT_MISSING)