From f096184244cc9ab6d35dd8fb64f17428f8f2ebf5 Mon Sep 17 00:00:00 2001 From: matt bowen Date: Fri, 23 Sep 2022 17:38:13 -0400 Subject: [PATCH] Backfill population in island areas (#1882) --- .../data_pipeline/etl/score/etl_score.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score.py b/data/data-pipeline/data_pipeline/etl/score/etl_score.py index 53f7d2603..7eb9b0ce7 100644 --- a/data/data-pipeline/data_pipeline/etl/score/etl_score.py +++ b/data/data-pipeline/data_pipeline/etl/score/etl_score.py @@ -364,7 +364,7 @@ def _prepare_initial_df(self) -> pd.DataFrame: self.nature_deprived_df, self.eamlis_df, self.fuds_df, - self.tribal_overlap_df + self.tribal_overlap_df, ] # Sanity check each data frame before merging. @@ -640,8 +640,24 @@ class ReversePercentile: ] ].mean(axis=1, skipna=True) + # For AS, MP, GU, and VI, backfill data from the 2010 census where we have it + df_copy = self._backfill_island_data(df_copy) + return df_copy + @staticmethod + def _backfill_island_data(df: pd.DataFrame) -> pd.DataFrame: + logger.info("Backfilling island data") + island_index = ( + df[field_names.GEOID_TRACT_FIELD] + .str[:2] + .isin(constants.TILES_ISLAND_AREA_FIPS_CODES) + ) + df.loc[island_index, field_names.TOTAL_POP_FIELD] = df.loc[ + island_index, field_names.COMBINED_CENSUS_TOTAL_POPULATION_2010 + ] + return df + def transform(self) -> None: logger.info("Transforming Score Data")