Skip to content

Commit

Permalink
Add backfill data to score (#1851)
Browse files Browse the repository at this point in the history
  • Loading branch information
mattbowen-usds committed Sep 28, 2022
1 parent 2ea14ba commit a9459a6
Showing 1 changed file with 35 additions and 3 deletions.
38 changes: 35 additions & 3 deletions data/data-pipeline/data_pipeline/etl/score/etl_score.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import functools
from typing import List

from dataclasses import dataclass

import numpy as np
Expand Down Expand Up @@ -56,6 +58,8 @@ def __init__(self):
self.fuds_df: pd.DataFrame
self.tribal_overlap_df: pd.DataFrame

self.ISLAND_DEMOGRAPHIC_BACKFILL_FIELDS: List[str] = []

def extract(self) -> None:
logger.info("Loading data sets from disk.")

Expand Down Expand Up @@ -402,6 +406,25 @@ def _prepare_initial_df(self) -> pd.DataFrame:
df[field_names.MEDIAN_INCOME_FIELD] / df[field_names.AMI_FIELD]
)

self.ISLAND_DEMOGRAPHIC_BACKFILL_FIELDS = [
field_names.PERCENT_BLACK_FIELD_NAME
+ field_names.ISLAND_AREA_BACKFILL_SUFFIX,
field_names.PERCENT_AMERICAN_INDIAN_FIELD_NAME
+ field_names.ISLAND_AREA_BACKFILL_SUFFIX,
field_names.PERCENT_ASIAN_FIELD_NAME
+ field_names.ISLAND_AREA_BACKFILL_SUFFIX,
field_names.PERCENT_HAWAIIAN_FIELD_NAME
+ field_names.ISLAND_AREA_BACKFILL_SUFFIX,
field_names.PERCENT_TWO_OR_MORE_RACES_FIELD_NAME
+ field_names.ISLAND_AREA_BACKFILL_SUFFIX,
field_names.PERCENT_NON_HISPANIC_WHITE_FIELD_NAME
+ field_names.ISLAND_AREA_BACKFILL_SUFFIX,
field_names.PERCENT_HISPANIC_FIELD_NAME
+ field_names.ISLAND_AREA_BACKFILL_SUFFIX,
field_names.PERCENT_OTHER_RACE_FIELD_NAME
+ field_names.ISLAND_AREA_BACKFILL_SUFFIX,
]

# Donut columns get added later
numeric_columns = [
field_names.HOUSING_BURDEN_FIELD,
Expand Down Expand Up @@ -471,7 +494,7 @@ def _prepare_initial_df(self) -> pd.DataFrame:
field_names.PERCENT_AGE_OVER_64,
field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT,
field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT,
]
] + self.ISLAND_DEMOGRAPHIC_BACKFILL_FIELDS

non_numeric_columns = [
self.GEOID_TRACT_FIELD_NAME,
Expand Down Expand Up @@ -639,14 +662,23 @@ class ReversePercentile:

return df_copy

@staticmethod
def _backfill_island_data(df: pd.DataFrame) -> pd.DataFrame:
def _backfill_island_data(self, df: pd.DataFrame) -> pd.DataFrame:
logger.info("Backfilling island data")
island_index = (
df[field_names.GEOID_TRACT_FIELD]
.str[:2]
.isin(constants.TILES_ISLAND_AREA_FIPS_CODES)
)

for backfill_field_name in self.ISLAND_DEMOGRAPHIC_BACKFILL_FIELDS:
actual_field_name = backfill_field_name.replace(
field_names.ISLAND_AREA_BACKFILL_SUFFIX, ""
)
df.loc[island_index, actual_field_name] = df.loc[
island_index, backfill_field_name
]
df = df.drop(columns=self.ISLAND_DEMOGRAPHIC_BACKFILL_FIELDS)

df.loc[island_index, field_names.TOTAL_POP_FIELD] = df.loc[
island_index, field_names.COMBINED_CENSUS_TOTAL_POPULATION_2010
]
Expand Down

0 comments on commit a9459a6

Please sign in to comment.