usds · lucasmbrown-usds · Aug 22, 2022 · Aug 17, 2022 · Aug 17, 2022 · Aug 17, 2022
diff --git a/data/data-pipeline/README.md b/data/data-pipeline/README.md
@@ -350,7 +350,8 @@ We have four pickle files that correspond to expected files:
 
 To update the pickles, let's go one by one:
 
-For the `score_transformed_expected.pkl`, put a breakpoint on [this line](https://github.com/usds/justice40-tool/blob/main/data/data-pipeline/data_pipeline/etl/score/tests/test_score_post.py#L58), before the `pdt.assert_frame_equal` and run:
+For the `score_transformed_expected.pkl`, put a breakpoint on [this line]
+(https://github.com/usds/justice40-tool/blob/main/data/data-pipeline/data_pipeline/etl/score/tests/test_score_post.py#L62), before the `pdt.assert_frame_equal` and run:
 `pytest data_pipeline/etl/score/tests/test_score_post.py::test_transform_score`
 
 Once on the breakpoint, capture the df to a pickle as follows:
@@ -378,7 +379,8 @@ score_data_actual.to_pickle(data_path / "data_pipeline" / "etl" / "score" / "tes
 
 Then take out the breakpoint and re-run the test: `pytest data_pipeline/etl/score/tests/test_score_post.py::test_create_score_data`
 
-For the `tile_data_expected.pkl`, put a breakpoint on [this line](https://github.com/usds/justice40-tool/blob/main/data/data-pipeline/data_pipeline/etl/score/tests/test_score_post.py#L86), before the `pdt.assert_frame_equal` and run:
+For the `tile_data_expected.pkl`, put a breakpoint on [this line](https://github
+.com/usds/justice40-tool/blob/main/data/data-pipeline/data_pipeline/etl/score/tests/test_score_post.py#L90), before the `pdt.assert_frame_equal` and run:
 `pytest data_pipeline/etl/score/tests/test_score_post.py::test_create_tile_data`
 
 Once on the breakpoint, capture the df to a pickle as follows:

diff --git a/data/data-pipeline/data_pipeline/content/config/csv.yml b/data/data-pipeline/data_pipeline/content/config/csv.yml
@@ -14,6 +14,39 @@ fields:
   - score_name: State/Territory
     label: State/Territory
     format: string
+  - score_name: Percent Black or African American
+    label: Percent Black or African American alone
+    format: float
+  - score_name: Percent American Indian / Alaska Native
+    label: Percent American Indian / Alaska Native
+    format: float
+  - score_name: Percent Asian
+    label: Percent Asian
+    format: float
+  - score_name: Percent Native Hawaiian or Pacific
+    label: Percent Native Hawaiian or Pacific
+    format: float
+  - score_name: Percent two or more races
+    label: Percent two or more races
+    format: float
+  - score_name: Percent White
+    label: Percent White
+    format: float
+  - score_name: Percent Hispanic or Latino
+    label: Percent Hispanic or Latino
+    format: float
+  - score_name: Percent other races
+    label: Percent other races
+    format: float
+  - score_name: Percent age under 10
+    label: Percent age under 10
+    format: float
+  - score_name: Percent age 10 to 64
+    label: Percent age 10 to 64
+    format: float
+  - score_name: Percent age over 64
+    label: Percent age over 64
+    format: float
   - score_name: Total threshold criteria exceeded
     label: Total threshold criteria exceeded
     format: int64

diff --git a/data/data-pipeline/data_pipeline/content/config/excel.yml b/data/data-pipeline/data_pipeline/content/config/excel.yml
@@ -18,6 +18,39 @@ sheets:
       - score_name: State/Territory
         label: State/Territory
         format: string
+      - score_name: Percent Black or African American
+        label: Percent Black or African American alone
+        format: float
+      - score_name: Percent American Indian / Alaska Native
+        label: Percent American Indian / Alaska Native
+        format: float
+      - score_name: Percent Asian
+        label: Percent Asian
+        format: float
+      - score_name: Percent Native Hawaiian or Pacific
+        label: Percent Native Hawaiian or Pacific
+        format: float
+      - score_name: Percent two or more races
+        label: Percent two or more races
+        format: float
+      - score_name: Percent White
+        label: Percent White
+        format: float
+      - score_name: Percent Hispanic or Latino
+        label: Percent Hispanic or Latino
+        format: float
+      - score_name: Percent other races
+        label: Percent other races
+        format: float
+      - score_name: Percent age under 10
+        label: Percent age under 10
+        format: float
+      - score_name: Percent age 10 to 64
+        label: Percent age 10 to 64
+        format: float
+      - score_name: Percent age over 64
+        label: Percent age over 64
+        format: float
       - score_name: Total threshold criteria exceeded
         label: Total threshold criteria exceeded
         format: int64

diff --git a/data/data-pipeline/data_pipeline/etl/constants.py b/data/data-pipeline/data_pipeline/etl/constants.py
@@ -198,10 +198,12 @@
     "name": "census",
     "module_dir": "census",
     "class_name": "CensusETL",
+    "is_memory_intensive": False,
 }
 
 TRIBAL_INFO = {
     "name": "tribal",
     "module_dir": "tribal",
     "class_name": "TribalETL",
+    "is_memory_intensive": False,
 }
diff --git a/data/data-pipeline/data_pipeline/etl/score/constants.py b/data/data-pipeline/data_pipeline/etl/score/constants.py
@@ -315,9 +315,20 @@
     field_names.NON_NATURAL_LOW_INCOME_FIELD_NAME: "IS_ET",
     field_names.AML_BOOLEAN: "AML_ET",
     field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME: "FUDS_ET",
-    field_names.IMPUTED_INCOME_FLAG_FIELD_NAME: "IMP_FLG"
+    field_names.IMPUTED_INCOME_FLAG_FIELD_NAME: "IMP_FLG",
     ## FPL 200 and low higher ed for all others should no longer be M_EBSI, but rather
     ## FPL_200 (there is no higher ed in narwhal)
+    field_names.PERCENT_BLACK_FIELD_NAME: "DM_B",
+    field_names.PERCENT_AMERICAN_INDIAN_FIELD_NAME: "DM_AI",
+    field_names.PERCENT_ASIAN_FIELD_NAME: "DM_A",
+    field_names.PERCENT_HAWAIIAN_FIELD_NAME: "DM_HI",
+    field_names.PERCENT_TWO_OR_MORE_RACES_FIELD_NAME: "DM_T",
+    field_names.PERCENT_NON_HISPANIC_WHITE_FIELD_NAME: "DM_W",
+    field_names.PERCENT_HISPANIC_FIELD_NAME: "DM_H",
+    field_names.PERCENT_OTHER_RACE_FIELD_NAME: "DM_O",
+    field_names.PERCENT_AGE_UNDER_10: "AGE_10",
+    field_names.PERCENT_AGE_10_TO_64: "AGE_MIDDLE",
+    field_names.PERCENT_AGE_OVER_64: "AGE_OLD",
 }
 
 # columns to round floats to 2 decimals
@@ -375,4 +386,16 @@
     field_names.SCORE_N_COMMUNITIES + field_names.ADJACENCY_INDEX_SUFFIX,
     field_names.TRACT_PERCENT_NON_NATURAL_FIELD_NAME
     + field_names.PERCENTILE_FIELD_SUFFIX,
+    # Include demographic data for sidebar -- as percents, NOT as percentiles.
+    field_names.PERCENT_BLACK_FIELD_NAME,
+    field_names.PERCENT_AMERICAN_INDIAN_FIELD_NAME,
+    field_names.PERCENT_ASIAN_FIELD_NAME,
+    field_names.PERCENT_HAWAIIAN_FIELD_NAME,
+    field_names.PERCENT_TWO_OR_MORE_RACES_FIELD_NAME,
+    field_names.PERCENT_NON_HISPANIC_WHITE_FIELD_NAME,
+    field_names.PERCENT_HISPANIC_FIELD_NAME,
+    field_names.PERCENT_OTHER_RACE_FIELD_NAME,
+    field_names.PERCENT_AGE_UNDER_10,
+    field_names.PERCENT_AGE_10_TO_64,
+    field_names.PERCENT_AGE_OVER_64,
 ]
diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score.py b/data/data-pipeline/data_pipeline/etl/score/etl_score.py
@@ -461,6 +461,17 @@ def _prepare_initial_df(self) -> pd.DataFrame:
             field_names.FUTURE_WILDFIRE_RISK_FIELD,
             field_names.TRACT_PERCENT_NON_NATURAL_FIELD_NAME,
             field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD,
+            field_names.PERCENT_BLACK_FIELD_NAME,
+            field_names.PERCENT_AMERICAN_INDIAN_FIELD_NAME,
+            field_names.PERCENT_ASIAN_FIELD_NAME,
+            field_names.PERCENT_HAWAIIAN_FIELD_NAME,
+            field_names.PERCENT_TWO_OR_MORE_RACES_FIELD_NAME,
+            field_names.PERCENT_NON_HISPANIC_WHITE_FIELD_NAME,
+            field_names.PERCENT_HISPANIC_FIELD_NAME,
+            field_names.PERCENT_OTHER_RACE_FIELD_NAME,
+            field_names.PERCENT_AGE_UNDER_10,
+            field_names.PERCENT_AGE_10_TO_64,
+            field_names.PERCENT_AGE_OVER_64,
         ]
 
         non_numeric_columns = [

diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv b/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv
diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl
diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl
diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl
diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl