diff --git a/changehc/delphi_changehc/sensor.py b/changehc/delphi_changehc/sensor.py index 7a625e4fe..4bc04ebc6 100644 --- a/changehc/delphi_changehc/sensor.py +++ b/changehc/delphi_changehc/sensor.py @@ -111,12 +111,15 @@ def fit(y_data, first_sensor_date, geo_id, num_col="num", den_col="den"): """ # backfill - total_counts, total_visits = CHCSensor.backfill(y_data[num_col].values, y_data[den_col].values) + total_counts, total_visits = CHCSensor.backfill(y_data[num_col].values, + y_data[den_col].values) # calculate smoothed counts and jeffreys rate # the left_gauss_linear smoother is not guaranteed to return values greater than 0 - smoothed_total_counts, smoothed_total_visits = CHCSensor.gauss_smooth(total_counts.flatten(),total_visits) + smoothed_total_counts, smoothed_total_visits = CHCSensor.gauss_smooth( + total_counts.flatten(), total_visits + ) # in smoothing, the numerator may have become more than the denominator # simple fix is to clip the max values elementwise to the denominator (note that @@ -136,12 +139,18 @@ def fit(y_data, first_sensor_date, geo_id, num_col="num", den_col="den"): ), f"0 or negative value, {geo_id}" # cut off at sensor indexes - rate_data = pd.DataFrame({'rate':smoothed_total_rates, 'den': smoothed_total_visits}, index=y_data.index) + rate_data = pd.DataFrame({'rate':smoothed_total_rates, 'den': smoothed_total_visits}, + index=y_data.index) rate_data = rate_data[first_sensor_date:] include = rate_data['den'] >= Config.MIN_DEN valid_rates = rate_data[include] se_valid = valid_rates.eval('sqrt(rate * (1 - rate) / den)') rate_data['se'] = se_valid - logging.debug(f"{geo_id}: {rate_data['rate'][-1]:.3f},[{rate_data['se'][-1]:.3f}]") - return {"geo_id": geo_id, "rate": 100 * rate_data['rate'], "se": 100 * rate_data['se'], "incl": include} + logging.debug("{0}: {1:.3f},[{2:.3f}]".format( + geo_id, rate_data['rate'][-1], rate_data['se'][-1] + )) + return {"geo_id": geo_id, + "rate": 100 * rate_data['rate'], + "se": 100 * rate_data['se'], + "incl": include} diff --git a/changehc/delphi_changehc/update_sensor.py b/changehc/delphi_changehc/update_sensor.py index 7e49d56be..58d3fe2d8 100644 --- a/changehc/delphi_changehc/update_sensor.py +++ b/changehc/delphi_changehc/update_sensor.py @@ -6,17 +6,18 @@ # standard packages import logging from multiprocessing import Pool, cpu_count -from delphi_utils import GeoMapper, S3ArchiveDiffer, read_params, add_prefix # third party import numpy as np import pandas as pd +from delphi_utils import GeoMapper, read_params, add_prefix + # first party from .config import Config, Constants +from .constants import SIGNALS, SMOOTHED, SMOOTHED_ADJ, NA from .load_data import load_combined_data from .sensor import CHCSensor from .weekday import Weekday -from .constants import SIGNALS, SMOOTHED, SMOOTHED_ADJ, NA def write_to_csv(output_dict, write_se, out_name, output_path="."): @@ -28,7 +29,7 @@ def write_to_csv(output_dict, write_se, out_name, output_path="."): output_path: outfile path to write the csv (default is current directory) """ if write_se: - logging.info(f"========= WARNING: WRITING SEs TO {out_name} =========") + logging.info("========= WARNING: WRITING SEs TO {0} =========".format(out_name)) geo_level = output_dict["geo_level"] dates = output_dict["dates"] geo_ids = output_dict["geo_ids"] @@ -52,7 +53,9 @@ def write_to_csv(output_dict, write_se, out_name, output_path="."): assert not np.isnan(sensor), "value for included sensor is nan" assert not np.isnan(se), "se for included sensor is nan" if sensor > 90: - logging.warning(f"value suspiciously high, {geo_id}: {sensor}") + logging.warning("value suspiciously high, {0}: {1}".format( + geo_id, sensor + )) assert se < 5, f"se suspiciously high, {geo_id}: {se}" if write_se: assert sensor > 0 and se > 0, "p=0, std_err=0 invalid" @@ -64,10 +67,12 @@ def write_to_csv(output_dict, write_se, out_name, output_path="."): "%s,%f,%s,%s,%s\n" % (geo_id, sensor, NA, NA, NA) ) out_n += 1 - logging.debug(f"wrote {out_n} rows for {len(geo_ids)} {geo_level}") + logging.debug("wrote {0} rows for {1} {2}".format( + out_n, len(geo_ids), geo_level + )) -class CHCSensorUpdator: +class CHCSensorUpdator: # pylint: disable=too-many-instance-attributes """Contains methods to update sensor and write results to csv """ @@ -136,7 +141,9 @@ def geo_reindex(self, data): geo = self.geo gmpr = GeoMapper() if geo not in {"county", "state", "msa", "hrr"}: - logging.error(f"{geo} is invalid, pick one of 'county', 'state', 'msa', 'hrr'") + logging.error("{0} is invalid, pick one of 'county', 'state', 'msa', 'hrr'".format( + geo + )) return False if geo == "county": data_frame = gmpr.fips_to_megacounty(data, @@ -203,7 +210,7 @@ def update_sensor(self, sensor_include[geo_id] = np.array(res.loc[final_sensor_idxs,"incl"]) else: n_cpu = min(10, cpu_count()) - logging.debug(f"starting pool with {n_cpu} workers") + logging.debug("starting pool with {0} workers".format(n_cpu)) with Pool(n_cpu) as pool: pool_results = [] for geo_id, sub_data in data_frame.groupby(level=0,as_index=False): @@ -235,30 +242,4 @@ def update_sensor(self, # write out results for signal in self.updated_signal_names: write_to_csv(output_dict, self.se, signal, outpath) - logging.debug(f"wrote files to {outpath}") - ''' - params = read_params() - - arch_diff = S3ArchiveDiffer( - params["cache_dir"], - params["export_dir"], - params["bucket_name"], "chc", - params["aws_credentials"]) - arch_diff.update_cache() - - _, common_diffs, new_files = arch_diff.diff_exports() - - # Archive changed and new files only - to_archive = [f for f, diff in common_diffs.items() if diff is not None] - to_archive += new_files - _, fails = arch_diff.archive_exports(to_archive) - print(fails) - - # Filter existing exports to exclude those that failed to archive - succ_common_diffs = {f: diff for f, diff in common_diffs.items() if f not in fails} - arch_diff.filter_exports(succ_common_diffs) - - # Report failures: someone should probably look at them - for exported_file in fails: - print(f"Failed to archive '{exported_file}'") - ''' + logging.debug("wrote files to {0}".format(outpath)) diff --git a/changehc/delphi_changehc/weekday.py b/changehc/delphi_changehc/weekday.py index a8148b8ab..03b69cba4 100644 --- a/changehc/delphi_changehc/weekday.py +++ b/changehc/delphi_changehc/weekday.py @@ -18,7 +18,7 @@ class Weekday: @staticmethod def get_params(data): - """Correct a signal estimated as numerator/denominator for weekday effects. + r"""Correct a signal estimated as numerator/denominator for weekday effects. The ordinary estimate would be numerator_t/denominator_t for each time point t. Instead, model @@ -63,7 +63,7 @@ def get_params(data): # Construct design matrix to have weekday indicator columns and then day # indicators. - X = np.zeros((nums.shape[0], 6 + nums.shape[0])) + X = np.zeros((nums.shape[0], 6 + nums.shape[0])) # pylint: disable=invalid-name not_sunday = np.where(nums.index.dayofweek != 6)[0] X[not_sunday, np.array(nums.index.dayofweek)[not_sunday]] = 1 X[np.where(nums.index.dayofweek == 6)[0], :6] = -1