Skip to content

Commit

Permalink
Fill demos after the score (#1851)
Browse files Browse the repository at this point in the history
  • Loading branch information
mattbowen-usds committed Sep 27, 2022
1 parent 68cd3fa commit 88b1c79
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 11 deletions.
18 changes: 9 additions & 9 deletions data/data-pipeline/data_pipeline/etl/score/etl_score.py
Original file line number Diff line number Diff line change
Expand Up @@ -657,19 +657,19 @@ class ReversePercentile:
]
].mean(axis=1, skipna=True)

# For AS, MP, GU, and VI, backfill data from the 2010 census where we have it
# df_copy = self._backfill_island_data(df_copy)

return df_copy

def _backfill_island_data(self, df: pd.DataFrame) -> pd.DataFrame:
logger.info("Backfilling island data")
island_index = (
@staticmethod
def _get_island_areas(df: pd.DataFrame) -> pd.Series:
return (
df[field_names.GEOID_TRACT_FIELD]
.str[:2]
.isin(constants.TILES_ISLAND_AREA_FIPS_CODES)
)

def _backfill_island_demographics(self, df: pd.DataFrame) -> pd.DataFrame:
logger.info("Backfilling island demographic data")
island_index = self._get_island_areas(df)
for backfill_field_name in self.ISLAND_DEMOGRAPHIC_BACKFILL_FIELDS:
actual_field_name = backfill_field_name.replace(
field_names.ISLAND_AREA_BACKFILL_SUFFIX, ""
Expand All @@ -679,9 +679,6 @@ def _backfill_island_data(self, df: pd.DataFrame) -> pd.DataFrame:
]
df = df.drop(columns=self.ISLAND_DEMOGRAPHIC_BACKFILL_FIELDS)

df.loc[island_index, field_names.TOTAL_POP_FIELD] = df.loc[
island_index, field_names.COMBINED_CENSUS_TOTAL_POPULATION_2010
]
return df

def transform(self) -> None:
Expand All @@ -693,6 +690,9 @@ def transform(self) -> None:
# calculate scores
self.df = ScoreRunner(df=self.df).calculate_scores()

# We add island demographic data since it doesn't matter to the score anyway
self.df = self._backfill_island_demographics(self.df)

def load(self) -> None:
logger.info("Saving Score CSV")
constants.DATA_SCORE_CSV_FULL_DIR.mkdir(parents=True, exist_ok=True)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -481,6 +481,7 @@ def transform(self) -> None:
output_field_name = (
field_names.PERCENT_PREFIX
+ race_field_name
+ field_names.ISLAND_AREA_BACKFILL_SUFFIX
)
self.final_race_fields.append(output_field_name)
self.df_all[output_field_name] = (
Expand Down
2 changes: 1 addition & 1 deletion data/data-pipeline/data_pipeline/score/field_names.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
ISLAND_AREAS_PERCENTILE_ADJUSTMENT_FIELD = " for island areas"
ADJACENT_MEAN_SUFFIX = " (based on adjacency index and low income alone)"
ADJACENCY_INDEX_SUFFIX = " (average of neighbors)"
ISLAND_AREA_BACKFILL_SUFFIX = "in 2009"
ISLAND_AREA_BACKFILL_SUFFIX = " in 2009"

# Geographic field names
GEOID_TRACT_FIELD = "GEOID10_TRACT"
Expand Down
1 change: 0 additions & 1 deletion data/data-pipeline/data_pipeline/score/score_narwhal.py
Original file line number Diff line number Diff line change
Expand Up @@ -999,7 +999,6 @@ def _mark_donut_hole_tracts(self) -> pd.DataFrame:

def add_columns(self) -> pd.DataFrame:
logger.info("Adding Score Narhwal")

self.df[field_names.THRESHOLD_COUNT] = 0

self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED] = (
Expand Down

0 comments on commit 88b1c79

Please sign in to comment.