Skip to content

Commit

Permalink
Make tribal overlap set score N (#2004)
Browse files Browse the repository at this point in the history
* Add "Is a Tribal DAC" field (#1998)

* Add tribal DACs to score N final (#1998)

* Add new fields to downloads (#1998)

* Make a int a float (#1998)

* Update field names, apply feedback (#1998)
  • Loading branch information
mattbowen-usds authored Oct 12, 2022
1 parent d89c516 commit 8b611ed
Show file tree
Hide file tree
Showing 12 changed files with 63 additions and 8 deletions.
6 changes: 6 additions & 0 deletions data/data-pipeline/data_pipeline/content/config/csv.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,15 @@ fields:
- score_name: Definition N (communities) (based on adjacency index and low income alone)
label: Identified as disadvantaged based on neighbors and relaxed low income threshold only
format: bool
- score_name: Identified as disadvantaged due to tribal overlap
label: Identified as disadvantaged due to tribal overlap
format: bool
- score_name: Definition N community, including adjacency index tracts
label: Identified as disadvantaged
format: bool
- score_name: Percentage of tract that is disadvantaged
label: Percentage of tract that is disadvantaged by area
format: percentage
- score_name: Definition N (communities) (average of neighbors)
label: Share of neighbors that are identified as disadvantaged
format: percentage
Expand Down
6 changes: 6 additions & 0 deletions data/data-pipeline/data_pipeline/content/config/excel.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,15 @@ sheets:
- score_name: Definition N (communities) (based on adjacency index and low income alone)
label: Identified as disadvantaged based on neighbors and relaxed low income threshold only
format: bool
- score_name: Identified as disadvantaged due to tribal overlap
label: Identified as disadvantaged due to tribal overlap
format: bool
- score_name: Definition N community, including adjacency index tracts
label: Identified as disadvantaged
format: bool
- score_name: Percentage of tract that is disadvantaged
label: Percentage of tract that is disadvantaged by area
format: percentage
- score_name: Definition N (communities) (average of neighbors)
label: Share of neighbors that are identified as disadvantaged
format: percentage
Expand Down
3 changes: 3 additions & 0 deletions data/data-pipeline/data_pipeline/etl/score/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,8 @@
field_names.SCORE_N_COMMUNITIES
+ field_names.ADJACENT_MEAN_SUFFIX: "SN_DON",
field_names.SCORE_N_COMMUNITIES: "SN_NO_DON",
field_names.IS_TRIBAL_DAC: "SN_T",
field_names.PERCENT_OF_TRACT_IS_DAC: "SN_PERC",
field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_FIELD: "EPLRLI",
field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_FIELD: "EALRLI",
field_names.EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_FIELD: "EBLRLI",
Expand Down Expand Up @@ -472,4 +474,5 @@
field_names.AML_BOOLEAN,
field_names.HISTORIC_REDLINING_SCORE_EXCEEDED,
field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT,
field_names.PERCENT_OF_TRACT_IS_DAC,
]
1 change: 1 addition & 0 deletions data/data-pipeline/data_pipeline/etl/score/etl_score.py
Original file line number Diff line number Diff line change
Expand Up @@ -505,6 +505,7 @@ def _prepare_initial_df(self) -> pd.DataFrame:
field_names.IMPUTED_INCOME_FLAG_FIELD_NAME,
field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME,
field_names.HISTORIC_REDLINING_SCORE_EXCEEDED,
field_names.IS_TRIBAL_DAC,
]

# For some columns, high values are "good", so we want to reverse the percentile
Expand Down

Large diffs are not rendered by default.

Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
12 changes: 10 additions & 2 deletions data/data-pipeline/data_pipeline/etl/sources/tribal_overlap/etl.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ class TribalOverlapETL(ExtractTransformLoad):
ANNETTE_ISLAND_TRIBAL_NAME = "Annette Island LAR"

CRS_INTEGER = 3857
TRIBAL_OVERLAP_CUTOFF = 0.995 # Percentage of overlap that rounds to 100%

# Define these for easy code completion
def __init__(self):
Expand All @@ -58,6 +59,7 @@ def __init__(self):
field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT,
field_names.NAMES_OF_TRIBAL_AREAS_IN_TRACT,
field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY,
field_names.IS_TRIBAL_DAC,
]

self.OVERALL_TRIBAL_COUNT = "OVERALL_TRIBAL_COUNT"
Expand All @@ -72,16 +74,17 @@ def _create_string_from_list(series: pd.Series) -> str:
str_list = sorted(str_list)
return ", ".join(str_list)

@staticmethod
@classmethod
def _adjust_percentage_for_frontend(
cls,
percentage_float: float,
) -> Optional[float]:
"""Round numbers very close to 0 to 0 and very close to 1 to 1 for display"""
if percentage_float is None:
return None
if percentage_float < 0.01:
return 0.0
if percentage_float > 0.9995:
if percentage_float > cls.TRIBAL_OVERLAP_CUTOFF:
return 1.0

return percentage_float
Expand Down Expand Up @@ -246,6 +249,11 @@ def transform(self) -> None:
field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_CONUS
] = None

merged_output_df[field_names.IS_TRIBAL_DAC] = (
merged_output_df[field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT]
> self.TRIBAL_OVERLAP_CUTOFF
)

# The very final thing we want to do is produce a string for the front end to show
# We do this here so that all of the logic is included
merged_output_df[
Expand Down
5 changes: 2 additions & 3 deletions data/data-pipeline/data_pipeline/score/field_names.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,6 @@
COUNTY_FIELD = "County Name"

# Definition Narwhal fields
FINAL_SCORE_N_BOOLEAN = (
"Definition M community, including adjacency index tracts"
)
SCORE_N_COMMUNITIES = "Definition N (communities)"
N_CLIMATE = "Climate Factor (Definition N)"
N_ENERGY = "Energy Factor (Definition N)"
Expand Down Expand Up @@ -368,6 +365,8 @@
PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY = (
"Percent of the Census tract that is within Tribal areas, for display"
)
IS_TRIBAL_DAC = "Identified as disadvantaged due to tribal overlap"
PERCENT_OF_TRACT_IS_DAC = "Percentage of tract that is disadvantaged"

#####
# Names for individual factors being exceeded
Expand Down
32 changes: 32 additions & 0 deletions data/data-pipeline/data_pipeline/score/score_narwhal.py
Original file line number Diff line number Diff line change
Expand Up @@ -997,6 +997,33 @@ def _mark_donut_hole_tracts(self) -> pd.DataFrame:
]
)

def _mark_tribal_dacs(self) -> None:
"""Per the October 7th compromise (#1988),
tracts that are approx 100% tribal are Score N communities.
"""
self.df[field_names.SCORE_N_COMMUNITIES] = np.where(
self.df[field_names.IS_TRIBAL_DAC],
True,
self.df[field_names.SCORE_N_COMMUNITIES],
)

def _get_percent_of_tract_that_is_dac(self) -> float:
"""Per the October 7th compromise (#1988),
tracts can be partially DACs if some portion of the tract is tribal land.
Rules are as follows:
If a tract is a SCORE_N_COMMUNITY, it is 100% a DAC
If a tract is not, but contains tribal land, the percent that is tribal land is a DAC.
"""
tribal_percent = self.df[
field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT
].fillna(0.0)
return np.where(
self.df[field_names.FINAL_SCORE_N_BOOLEAN],
1.0,
tribal_percent,
)

def add_columns(self) -> pd.DataFrame:
logger.info("Adding Score Narhwal")
self.df[field_names.THRESHOLD_COUNT] = 0
Expand Down Expand Up @@ -1031,10 +1058,15 @@ def add_columns(self) -> pd.DataFrame:
]
self.df[field_names.CATEGORY_COUNT] = self.df[factors].sum(axis=1)
self.df[field_names.SCORE_N_COMMUNITIES] = self.df[factors].any(axis=1)
self._mark_tribal_dacs()
self.df[
field_names.SCORE_N_COMMUNITIES
+ field_names.PERCENTILE_FIELD_SUFFIX
] = self.df[field_names.SCORE_N_COMMUNITIES].astype(int)

self._mark_donut_hole_tracts()
self.df[
field_names.PERCENT_OF_TRACT_IS_DAC
] = self._get_percent_of_tract_that_is_dac()

return self.df

0 comments on commit 8b611ed

Please sign in to comment.