cmu-delphi · krivard · Nov 10, 2020 · Nov 9, 2020 · chinandrew · Nov 9, 2020
diff --git a/changehc/delphi_changehc/sensor.py b/changehc/delphi_changehc/sensor.py
@@ -111,12 +111,15 @@ def fit(y_data, first_sensor_date, geo_id, num_col="num", den_col="den"):
 
         """
         # backfill
-        total_counts, total_visits = CHCSensor.backfill(y_data[num_col].values, y_data[den_col].values)
+        total_counts, total_visits = CHCSensor.backfill(y_data[num_col].values,
+                                                        y_data[den_col].values)
 
         # calculate smoothed counts and jeffreys rate
         # the left_gauss_linear smoother is not guaranteed to return values greater than 0
 
-        smoothed_total_counts, smoothed_total_visits = CHCSensor.gauss_smooth(total_counts.flatten(),total_visits)
+        smoothed_total_counts, smoothed_total_visits = CHCSensor.gauss_smooth(
+            total_counts.flatten(), total_visits
+        )
 
         # in smoothing, the numerator may have become more than the denominator
         # simple fix is to clip the max values elementwise to the denominator (note that
@@ -136,12 +139,18 @@ def fit(y_data, first_sensor_date, geo_id, num_col="num", den_col="den"):
         ), f"0 or negative value, {geo_id}"
 
         # cut off at sensor indexes
-        rate_data = pd.DataFrame({'rate':smoothed_total_rates, 'den': smoothed_total_visits}, index=y_data.index)
+        rate_data = pd.DataFrame({'rate':smoothed_total_rates, 'den': smoothed_total_visits},
+                                 index=y_data.index)
         rate_data = rate_data[first_sensor_date:]
         include = rate_data['den'] >= Config.MIN_DEN
         valid_rates = rate_data[include]
         se_valid = valid_rates.eval('sqrt(rate * (1 - rate) / den)')
         rate_data['se'] = se_valid
 
-        logging.debug(f"{geo_id}: {rate_data['rate'][-1]:.3f},[{rate_data['se'][-1]:.3f}]")
-        return {"geo_id": geo_id, "rate": 100 * rate_data['rate'], "se": 100 * rate_data['se'], "incl": include}
+        logging.debug("{0}: {1:.3f},[{2:.3f}]".format(
+            geo_id, rate_data['rate'][-1], rate_data['se'][-1]
+        ))
+        return {"geo_id": geo_id,
+                "rate": 100 * rate_data['rate'],
+                "se": 100 * rate_data['se'],
+                "incl": include}
diff --git a/changehc/delphi_changehc/update_sensor.py b/changehc/delphi_changehc/update_sensor.py
@@ -6,17 +6,18 @@
 # standard packages
 import logging
 from multiprocessing import Pool, cpu_count
-from delphi_utils import GeoMapper, S3ArchiveDiffer, read_params, add_prefix
 
 # third party
 import numpy as np
 import pandas as pd
+from delphi_utils import GeoMapper, read_params, add_prefix
+
 # first party
 from .config import Config, Constants
+from .constants import SIGNALS, SMOOTHED, SMOOTHED_ADJ, NA
 from .load_data import load_combined_data
 from .sensor import CHCSensor
 from .weekday import Weekday
-from .constants import SIGNALS, SMOOTHED, SMOOTHED_ADJ, NA
 
 
 def write_to_csv(output_dict, write_se, out_name, output_path="."):
@@ -28,7 +29,7 @@ def write_to_csv(output_dict, write_se, out_name, output_path="."):
         output_path: outfile path to write the csv (default is current directory)
     """
     if write_se:
-        logging.info(f"========= WARNING: WRITING SEs TO {out_name} =========")
+        logging.info("========= WARNING: WRITING SEs TO {0} =========".format(out_name))
     geo_level = output_dict["geo_level"]
     dates = output_dict["dates"]
     geo_ids = output_dict["geo_ids"]
@@ -52,7 +53,9 @@ def write_to_csv(output_dict, write_se, out_name, output_path="."):
                     assert not np.isnan(sensor), "value for included sensor is nan"
                     assert not np.isnan(se), "se for included sensor is nan"
                     if sensor > 90:
-                        logging.warning(f"value suspiciously high, {geo_id}: {sensor}")
+                        logging.warning("value suspiciously high, {0}: {1}".format(
+                            geo_id, sensor
+                        ))
                     assert se < 5, f"se suspiciously high, {geo_id}: {se}"
                     if write_se:
                         assert sensor > 0 and se > 0, "p=0, std_err=0 invalid"
@@ -64,10 +67,12 @@ def write_to_csv(output_dict, write_se, out_name, output_path="."):
                             "%s,%f,%s,%s,%s\n" % (geo_id, sensor, NA, NA, NA)
                         )
                     out_n += 1
-    logging.debug(f"wrote {out_n} rows for {len(geo_ids)} {geo_level}")
+    logging.debug("wrote {0} rows for {1} {2}".format(
+        out_n, len(geo_ids), geo_level
+    ))
 
 
-class CHCSensorUpdator:
+class CHCSensorUpdator:  # pylint: disable=too-many-instance-attributes
     """Contains methods to update sensor and write results to csv
     """
 
@@ -136,7 +141,9 @@ def geo_reindex(self, data):
         geo = self.geo
         gmpr = GeoMapper()
         if geo not in {"county", "state", "msa", "hrr"}:
-            logging.error(f"{geo} is invalid, pick one of 'county', 'state', 'msa', 'hrr'")
+            logging.error("{0} is invalid, pick one of 'county', 'state', 'msa', 'hrr'".format(
+                geo
+            ))
             return False
         if geo == "county":
             data_frame = gmpr.fips_to_megacounty(data,
@@ -203,7 +210,7 @@ def update_sensor(self,
                 sensor_include[geo_id] = np.array(res.loc[final_sensor_idxs,"incl"])
         else:
             n_cpu = min(10, cpu_count())
-            logging.debug(f"starting pool with {n_cpu} workers")
+            logging.debug("starting pool with {0} workers".format(n_cpu))
             with Pool(n_cpu) as pool:
                 pool_results = []
                 for geo_id, sub_data in data_frame.groupby(level=0,as_index=False):
@@ -235,30 +242,4 @@ def update_sensor(self,
         # write out results
         for signal in self.updated_signal_names:
             write_to_csv(output_dict, self.se, signal, outpath)
-        logging.debug(f"wrote files to {outpath}")
-        '''
-        params = read_params()
-
-        arch_diff = S3ArchiveDiffer(
-        params["cache_dir"],
-        params["export_dir"],
-        params["bucket_name"], "chc",
-        params["aws_credentials"])
-        arch_diff.update_cache()
-
-        _, common_diffs, new_files = arch_diff.diff_exports()
-
-        # Archive changed and new files only
-        to_archive = [f for f, diff in common_diffs.items() if diff is not None]
-        to_archive += new_files
-        _, fails = arch_diff.archive_exports(to_archive)
-        print(fails)
-
-        # Filter existing exports to exclude those that failed to archive
-        succ_common_diffs = {f: diff for f, diff in common_diffs.items() if f not in fails}
-        arch_diff.filter_exports(succ_common_diffs)
-
-        # Report failures: someone should probably look at them
-        for exported_file in fails:
-            print(f"Failed to archive '{exported_file}'")
-        '''
+        logging.debug("wrote files to {0}".format(outpath))
diff --git a/changehc/delphi_changehc/weekday.py b/changehc/delphi_changehc/weekday.py
@@ -18,7 +18,7 @@ class Weekday:
 
     @staticmethod
     def get_params(data):
-        """Correct a signal estimated as numerator/denominator for weekday effects.
+        r"""Correct a signal estimated as numerator/denominator for weekday effects.
 
         The ordinary estimate would be numerator_t/denominator_t for each time point
         t. Instead, model
@@ -63,7 +63,7 @@ def get_params(data):
 
         # Construct design matrix to have weekday indicator columns and then day
         # indicators.
-        X = np.zeros((nums.shape[0], 6 + nums.shape[0]))
+        X = np.zeros((nums.shape[0], 6 + nums.shape[0]))  # pylint: disable=invalid-name
         not_sunday = np.where(nums.index.dayofweek != 6)[0]
         X[not_sunday, np.array(nums.index.dayofweek)[not_sunday]] = 1
         X[np.where(nums.index.dayofweek == 6)[0], :6] = -1