Make tribal overlap set score N (#2004)

* Add "Is a Tribal DAC" field (#1998) * Add tribal DACs to score N final (#1998) * Add new fields to downloads (#1998) * Make a int a float (#1998) * Update field names, apply feedback (#1998)
usds · Oct 12, 2022 · 8b611ed · 8b611ed
1 parent d89c516
commit 8b611ed
Show file tree

Hide file tree

Showing 12 changed files with 63 additions and 8 deletions.
diff --git a/data/data-pipeline/data_pipeline/content/config/csv.yml b/data/data-pipeline/data_pipeline/content/config/csv.yml
@@ -59,9 +59,15 @@ fields:
 - score_name: Definition N (communities) (based on adjacency index and low income alone)
   label: Identified as disadvantaged based on neighbors and relaxed low income threshold only
   format: bool
+- score_name: Identified as disadvantaged due to tribal overlap
+  label: Identified as disadvantaged due to tribal overlap
+  format: bool
 - score_name: Definition N community, including adjacency index tracts
   label: Identified as disadvantaged
   format: bool
+- score_name: Percentage of tract that is disadvantaged
+  label:  Percentage of tract that is disadvantaged by area
+  format: percentage
 - score_name: Definition N (communities) (average of neighbors)
   label: Share of neighbors that are identified as disadvantaged
   format: percentage

diff --git a/data/data-pipeline/data_pipeline/content/config/excel.yml b/data/data-pipeline/data_pipeline/content/config/excel.yml
@@ -63,9 +63,15 @@ sheets:
       - score_name: Definition N (communities) (based on adjacency index and low income alone)
         label: Identified as disadvantaged based on neighbors and relaxed low income threshold only
         format: bool
+      - score_name: Identified as disadvantaged due to tribal overlap
+        label: Identified as disadvantaged due to tribal overlap
+        format: bool
       - score_name: Definition N community, including adjacency index tracts
         label: Identified as disadvantaged
         format: bool
+      - score_name: Percentage of tract that is disadvantaged
+        label:  Percentage of tract that is disadvantaged by area
+        format: percentage
       - score_name: Definition N (communities) (average of neighbors)
         label: Share of neighbors that are identified as disadvantaged
         format: percentage

diff --git a/data/data-pipeline/data_pipeline/etl/score/constants.py b/data/data-pipeline/data_pipeline/etl/score/constants.py
@@ -279,6 +279,8 @@
     field_names.SCORE_N_COMMUNITIES
     + field_names.ADJACENT_MEAN_SUFFIX: "SN_DON",
     field_names.SCORE_N_COMMUNITIES: "SN_NO_DON",
+    field_names.IS_TRIBAL_DAC: "SN_T",
+    field_names.PERCENT_OF_TRACT_IS_DAC: "SN_PERC",
     field_names.EXPECTED_POPULATION_LOSS_RATE_LOW_INCOME_FIELD: "EPLRLI",
     field_names.EXPECTED_AGRICULTURE_LOSS_RATE_LOW_INCOME_FIELD: "EALRLI",
     field_names.EXPECTED_BUILDING_LOSS_RATE_LOW_INCOME_FIELD: "EBLRLI",
@@ -472,4 +474,5 @@
     field_names.AML_BOOLEAN,
     field_names.HISTORIC_REDLINING_SCORE_EXCEEDED,
     field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT,
+    field_names.PERCENT_OF_TRACT_IS_DAC,
 ]
diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score.py b/data/data-pipeline/data_pipeline/etl/score/etl_score.py
@@ -505,6 +505,7 @@ def _prepare_initial_df(self) -> pd.DataFrame:
             field_names.IMPUTED_INCOME_FLAG_FIELD_NAME,
             field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME,
             field_names.HISTORIC_REDLINING_SCORE_EXCEEDED,
+            field_names.IS_TRIBAL_DAC,
         ]
 
         # For some columns, high values are "good", so we want to reverse the percentile

diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv b/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv
diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl
diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl
diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl
diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl
diff --git a/data/data-pipeline/data_pipeline/etl/sources/tribal_overlap/etl.py b/data/data-pipeline/data_pipeline/etl/sources/tribal_overlap/etl.py
@@ -48,6 +48,7 @@ class TribalOverlapETL(ExtractTransformLoad):
     ANNETTE_ISLAND_TRIBAL_NAME = "Annette Island LAR"
 
     CRS_INTEGER = 3857
+    TRIBAL_OVERLAP_CUTOFF = 0.995  # Percentage of overlap that rounds to 100%
 
     # Define these for easy code completion
     def __init__(self):
@@ -58,6 +59,7 @@ def __init__(self):
             field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT,
             field_names.NAMES_OF_TRIBAL_AREAS_IN_TRACT,
             field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY,
+            field_names.IS_TRIBAL_DAC,
         ]
 
         self.OVERALL_TRIBAL_COUNT = "OVERALL_TRIBAL_COUNT"
@@ -72,16 +74,17 @@ def _create_string_from_list(series: pd.Series) -> str:
         str_list = sorted(str_list)
         return ", ".join(str_list)
 
-    @staticmethod
+    @classmethod
     def _adjust_percentage_for_frontend(
+        cls,
         percentage_float: float,
     ) -> Optional[float]:
         """Round numbers very close to 0 to 0 and very close to 1 to 1 for display"""
         if percentage_float is None:
             return None
         if percentage_float < 0.01:
             return 0.0
-        if percentage_float > 0.9995:
+        if percentage_float > cls.TRIBAL_OVERLAP_CUTOFF:
             return 1.0
 
         return percentage_float
@@ -246,6 +249,11 @@ def transform(self) -> None:
             field_names.COUNT_OF_TRIBAL_AREAS_IN_TRACT_CONUS
         ] = None
 
+        merged_output_df[field_names.IS_TRIBAL_DAC] = (
+            merged_output_df[field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT]
+            > self.TRIBAL_OVERLAP_CUTOFF
+        )
+
         # The very final thing we want to do is produce a string for the front end to show
         # We do this here so that all of the logic is included
         merged_output_df[

diff --git a/data/data-pipeline/data_pipeline/score/field_names.py b/data/data-pipeline/data_pipeline/score/field_names.py
@@ -11,9 +11,6 @@
 COUNTY_FIELD = "County Name"
 
 # Definition Narwhal fields
-FINAL_SCORE_N_BOOLEAN = (
-    "Definition M community, including adjacency index tracts"
-)
 SCORE_N_COMMUNITIES = "Definition N (communities)"
 N_CLIMATE = "Climate Factor (Definition N)"
 N_ENERGY = "Energy Factor (Definition N)"
@@ -368,6 +365,8 @@
 PERCENT_OF_TRIBAL_AREA_IN_TRACT_DISPLAY = (
     "Percent of the Census tract that is within Tribal areas, for display"
 )
+IS_TRIBAL_DAC = "Identified as disadvantaged due to tribal overlap"
+PERCENT_OF_TRACT_IS_DAC = "Percentage of tract that is disadvantaged"
 
 #####
 # Names for individual factors being exceeded

diff --git a/data/data-pipeline/data_pipeline/score/score_narwhal.py b/data/data-pipeline/data_pipeline/score/score_narwhal.py
@@ -997,6 +997,33 @@ def _mark_donut_hole_tracts(self) -> pd.DataFrame:
             ]
         )
 
+    def _mark_tribal_dacs(self) -> None:
+        """Per the October 7th compromise (#1988),
+        tracts that are approx 100% tribal are Score N communities.
+        """
+        self.df[field_names.SCORE_N_COMMUNITIES] = np.where(
+            self.df[field_names.IS_TRIBAL_DAC],
+            True,
+            self.df[field_names.SCORE_N_COMMUNITIES],
+        )
+
+    def _get_percent_of_tract_that_is_dac(self) -> float:
+        """Per the October 7th compromise (#1988),
+        tracts can be partially DACs if some portion of the tract is tribal land.
+
+        Rules are as follows:
+        If a tract is a SCORE_N_COMMUNITY, it is 100% a DAC
+        If a tract is not, but contains tribal land, the percent that is tribal land is a DAC.
+        """
+        tribal_percent = self.df[
+            field_names.PERCENT_OF_TRIBAL_AREA_IN_TRACT
+        ].fillna(0.0)
+        return np.where(
+            self.df[field_names.FINAL_SCORE_N_BOOLEAN],
+            1.0,
+            tribal_percent,
+        )
+
     def add_columns(self) -> pd.DataFrame:
         logger.info("Adding Score Narhwal")
         self.df[field_names.THRESHOLD_COUNT] = 0
@@ -1031,10 +1058,15 @@ def add_columns(self) -> pd.DataFrame:
         ]
         self.df[field_names.CATEGORY_COUNT] = self.df[factors].sum(axis=1)
         self.df[field_names.SCORE_N_COMMUNITIES] = self.df[factors].any(axis=1)
+        self._mark_tribal_dacs()
         self.df[
             field_names.SCORE_N_COMMUNITIES
             + field_names.PERCENTILE_FIELD_SUFFIX
         ] = self.df[field_names.SCORE_N_COMMUNITIES].astype(int)
+
         self._mark_donut_hole_tracts()
+        self.df[
+            field_names.PERCENT_OF_TRACT_IS_DAC
+        ] = self._get_percent_of_tract_that_is_dac()
 
         return self.df