From 88b1c792520a3bac4dbf28b10af7e127f26ad661 Mon Sep 17 00:00:00 2001
From: matt bowen <matthew.r.bowen@omb.eop.gov>
Date: Tue, 27 Sep 2022 16:13:24 -0400
Subject: [PATCH] Fill demos after the score (#1851)

---
 .../data_pipeline/etl/score/etl_score.py       | 18 +++++++++---------
 .../etl/sources/census_decennial/etl.py        |  1 +
 .../data_pipeline/score/field_names.py         |  2 +-
 .../data_pipeline/score/score_narwhal.py       |  1 -
 4 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score.py b/data/data-pipeline/data_pipeline/etl/score/etl_score.py
index 3275092dc..3bc08910d 100644
--- a/data/data-pipeline/data_pipeline/etl/score/etl_score.py
+++ b/data/data-pipeline/data_pipeline/etl/score/etl_score.py
@@ -657,19 +657,19 @@ class ReversePercentile:
             ]
         ].mean(axis=1, skipna=True)
 
-        # For AS, MP, GU, and VI, backfill data from the 2010 census where we have it
-        # df_copy = self._backfill_island_data(df_copy)
-
         return df_copy
 
-    def _backfill_island_data(self, df: pd.DataFrame) -> pd.DataFrame:
-        logger.info("Backfilling island data")
-        island_index = (
+    @staticmethod
+    def _get_island_areas(df: pd.DataFrame) -> pd.Series:
+        return (
             df[field_names.GEOID_TRACT_FIELD]
             .str[:2]
             .isin(constants.TILES_ISLAND_AREA_FIPS_CODES)
         )
 
+    def _backfill_island_demographics(self, df: pd.DataFrame) -> pd.DataFrame:
+        logger.info("Backfilling island demographic data")
+        island_index = self._get_island_areas(df)
         for backfill_field_name in self.ISLAND_DEMOGRAPHIC_BACKFILL_FIELDS:
             actual_field_name = backfill_field_name.replace(
                 field_names.ISLAND_AREA_BACKFILL_SUFFIX, ""
@@ -679,9 +679,6 @@ def _backfill_island_data(self, df: pd.DataFrame) -> pd.DataFrame:
             ]
         df = df.drop(columns=self.ISLAND_DEMOGRAPHIC_BACKFILL_FIELDS)
 
-        df.loc[island_index, field_names.TOTAL_POP_FIELD] = df.loc[
-            island_index, field_names.COMBINED_CENSUS_TOTAL_POPULATION_2010
-        ]
         return df
 
     def transform(self) -> None:
@@ -693,6 +690,9 @@ def transform(self) -> None:
         # calculate scores
         self.df = ScoreRunner(df=self.df).calculate_scores()
 
+        # We add island demographic data since it doesn't matter to the score anyway
+        self.df = self._backfill_island_demographics(self.df)
+
     def load(self) -> None:
         logger.info("Saving Score CSV")
         constants.DATA_SCORE_CSV_FULL_DIR.mkdir(parents=True, exist_ok=True)
diff --git a/data/data-pipeline/data_pipeline/etl/sources/census_decennial/etl.py b/data/data-pipeline/data_pipeline/etl/sources/census_decennial/etl.py
index 2a6ae510c..0ad37cab0 100644
--- a/data/data-pipeline/data_pipeline/etl/sources/census_decennial/etl.py
+++ b/data/data-pipeline/data_pipeline/etl/sources/census_decennial/etl.py
@@ -481,6 +481,7 @@ def transform(self) -> None:
             output_field_name = (
                 field_names.PERCENT_PREFIX
                 + race_field_name
+                + field_names.ISLAND_AREA_BACKFILL_SUFFIX
             )
             self.final_race_fields.append(output_field_name)
             self.df_all[output_field_name] = (
diff --git a/data/data-pipeline/data_pipeline/score/field_names.py b/data/data-pipeline/data_pipeline/score/field_names.py
index 35a3514e2..80115a418 100644
--- a/data/data-pipeline/data_pipeline/score/field_names.py
+++ b/data/data-pipeline/data_pipeline/score/field_names.py
@@ -3,7 +3,7 @@
 ISLAND_AREAS_PERCENTILE_ADJUSTMENT_FIELD = " for island areas"
 ADJACENT_MEAN_SUFFIX = " (based on adjacency index and low income alone)"
 ADJACENCY_INDEX_SUFFIX = " (average of neighbors)"
-ISLAND_AREA_BACKFILL_SUFFIX = "in 2009"
+ISLAND_AREA_BACKFILL_SUFFIX = " in 2009"
 
 # Geographic field names
 GEOID_TRACT_FIELD = "GEOID10_TRACT"
diff --git a/data/data-pipeline/data_pipeline/score/score_narwhal.py b/data/data-pipeline/data_pipeline/score/score_narwhal.py
index fd7129ffc..7e92654be 100644
--- a/data/data-pipeline/data_pipeline/score/score_narwhal.py
+++ b/data/data-pipeline/data_pipeline/score/score_narwhal.py
@@ -999,7 +999,6 @@ def _mark_donut_hole_tracts(self) -> pd.DataFrame:
 
     def add_columns(self) -> pd.DataFrame:
         logger.info("Adding Score Narhwal")
-
         self.df[field_names.THRESHOLD_COUNT] = 0
 
         self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED] = (