From d52497b895d33d0d7dbdb6f8c06853d37a4c9e6a Mon Sep 17 00:00:00 2001 From: lucasmbrown-usds Date: Wed, 17 Aug 2022 17:56:40 -0400 Subject: [PATCH 01/14] WIP on adding demo --- .../data_pipeline/etl/sources/census_acs/etl.py | 7 +++++++ data/data-pipeline/data_pipeline/score/field_names.py | 10 ++++++++++ 2 files changed, 17 insertions(+) diff --git a/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py b/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py index a670d3107..c746d9ba0 100644 --- a/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py +++ b/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py @@ -186,6 +186,7 @@ def __init__(self): "B03002_003E", "B03003_001E", "B03003_003E", + "B02001_007E", # "Some other race alone" ] # Name output demographics fields. @@ -198,6 +199,7 @@ def __init__(self): self.TWO_OR_MORE_RACES_FIELD_NAME = "Two or more races" self.NON_HISPANIC_WHITE_FIELD_NAME = "Non-Hispanic White" self.HISPANIC_FIELD_NAME = "Hispanic or Latino" + self.OTHER_RACE_FIELD_NAME = "Some other race alone" self.RE_OUTPUT_FIELDS = [ self.BLACK_FIELD_NAME, @@ -207,6 +209,7 @@ def __init__(self): self.TWO_OR_MORE_RACES_FIELD_NAME, self.NON_HISPANIC_WHITE_FIELD_NAME, self.HISPANIC_FIELD_NAME, + self.OTHER_RACE_FIELD_NAME, ] self.PERCENT_PREFIX = "Percent " @@ -413,6 +416,7 @@ def transform(self) -> None: df[self.TWO_OR_MORE_RACES_FIELD_NAME] = df["B02001_008E"] df[self.NON_HISPANIC_WHITE_FIELD_NAME] = df["B03002_003E"] df[self.HISPANIC_FIELD_NAME] = df["B03003_003E"] + df[self.OTHER_RACE_FIELD_NAME] = df["B02001_007E"] # Calculate demographics as percent df[self.PERCENT_PREFIX + self.BLACK_FIELD_NAME] = ( @@ -436,6 +440,9 @@ def transform(self) -> None: df[self.PERCENT_PREFIX + self.HISPANIC_FIELD_NAME] = ( df["B03003_003E"] / df["B03003_001E"] ) + df[self.PERCENT_PREFIX + self.OTHER_RACE_FIELD_NAME] = ( + df["B02001_007E"] / df["B03003_001E"] + ) # Calculate college attendance and adjust low income df[self.COLLEGE_ATTENDANCE_FIELD] = ( diff --git a/data/data-pipeline/data_pipeline/score/field_names.py b/data/data-pipeline/data_pipeline/score/field_names.py index e8b5e7d3d..7a115a0b5 100644 --- a/data/data-pipeline/data_pipeline/score/field_names.py +++ b/data/data-pipeline/data_pipeline/score/field_names.py @@ -99,6 +99,16 @@ "Low median household income as a percent of area median income" ) +# Additional ACS demographic fields. +BLACK_FIELD_NAME = "Black or African American alone" +AMERICAN_INDIAN_FIELD_NAME = "American Indian and Alaska Native alone" +ASIAN_FIELD_NAME = "Asian alone" +HAWAIIAN_FIELD_NAME = "Native Hawaiian and Other Pacific alone" +TWO_OR_MORE_RACES_FIELD_NAME = "Two or more races" +NON_HISPANIC_WHITE_FIELD_NAME = "Non-Hispanic White" +HISPANIC_FIELD_NAME = "Hispanic or Latino" +OTHER_RACE_FIELD_NAME = "Some other race alone" + # Climate FEMA_RISK_FIELD = "FEMA Risk Index Expected Annual Loss Score" EXPECTED_BUILDING_LOSS_RATE_FIELD = ( From e2d18b7758c3c1cf1022430e9dbff87e9b353005 Mon Sep 17 00:00:00 2001 From: lucasmbrown-usds Date: Wed, 17 Aug 2022 18:02:23 -0400 Subject: [PATCH 02/14] switching to field_names --- .../etl/sources/census_acs/etl.py | 62 ++++++++----------- .../data_pipeline/score/field_names.py | 12 ++++ 2 files changed, 37 insertions(+), 37 deletions(-) diff --git a/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py b/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py index c746d9ba0..f5cbef073 100644 --- a/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py +++ b/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py @@ -190,30 +190,18 @@ def __init__(self): ] # Name output demographics fields. - self.BLACK_FIELD_NAME = "Black or African American alone" - self.AMERICAN_INDIAN_FIELD_NAME = ( - "American Indian and Alaska Native alone" - ) - self.ASIAN_FIELD_NAME = "Asian alone" - self.HAWAIIAN_FIELD_NAME = "Native Hawaiian and Other Pacific alone" - self.TWO_OR_MORE_RACES_FIELD_NAME = "Two or more races" - self.NON_HISPANIC_WHITE_FIELD_NAME = "Non-Hispanic White" - self.HISPANIC_FIELD_NAME = "Hispanic or Latino" - self.OTHER_RACE_FIELD_NAME = "Some other race alone" self.RE_OUTPUT_FIELDS = [ - self.BLACK_FIELD_NAME, - self.AMERICAN_INDIAN_FIELD_NAME, - self.ASIAN_FIELD_NAME, - self.HAWAIIAN_FIELD_NAME, - self.TWO_OR_MORE_RACES_FIELD_NAME, - self.NON_HISPANIC_WHITE_FIELD_NAME, - self.HISPANIC_FIELD_NAME, - self.OTHER_RACE_FIELD_NAME, + field_names.BLACK_FIELD_NAME, + field_names.AMERICAN_INDIAN_FIELD_NAME, + field_names.ASIAN_FIELD_NAME, + field_names.HAWAIIAN_FIELD_NAME, + field_names.TWO_OR_MORE_RACES_FIELD_NAME, + field_names.NON_HISPANIC_WHITE_FIELD_NAME, + field_names.HISPANIC_FIELD_NAME, + field_names.OTHER_RACE_FIELD_NAME, ] - self.PERCENT_PREFIX = "Percent " - self.STATE_GEOID_FIELD_NAME = "GEOID2" self.COLUMNS_TO_KEEP = ( @@ -233,7 +221,7 @@ def __init__(self): field_names.IMPUTED_INCOME_FLAG_FIELD_NAME, ] + self.RE_OUTPUT_FIELDS - + [self.PERCENT_PREFIX + field for field in self.RE_OUTPUT_FIELDS] + + [field_names.PERCENT_PREFIX + field for field in self.RE_OUTPUT_FIELDS] + [ field_names.POVERTY_LESS_THAN_200_FPL_FIELD, field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD, @@ -409,38 +397,38 @@ def transform(self) -> None: ) # Calculate some demographic information. - df[self.BLACK_FIELD_NAME] = df["B02001_003E"] - df[self.AMERICAN_INDIAN_FIELD_NAME] = df["B02001_004E"] - df[self.ASIAN_FIELD_NAME] = df["B02001_005E"] - df[self.HAWAIIAN_FIELD_NAME] = df["B02001_006E"] - df[self.TWO_OR_MORE_RACES_FIELD_NAME] = df["B02001_008E"] - df[self.NON_HISPANIC_WHITE_FIELD_NAME] = df["B03002_003E"] - df[self.HISPANIC_FIELD_NAME] = df["B03003_003E"] - df[self.OTHER_RACE_FIELD_NAME] = df["B02001_007E"] + df[field_names.BLACK_FIELD_NAME] = df["B02001_003E"] + df[field_names.AMERICAN_INDIAN_FIELD_NAME] = df["B02001_004E"] + df[field_names.ASIAN_FIELD_NAME] = df["B02001_005E"] + df[field_names.HAWAIIAN_FIELD_NAME] = df["B02001_006E"] + df[field_names.TWO_OR_MORE_RACES_FIELD_NAME] = df["B02001_008E"] + df[field_names.NON_HISPANIC_WHITE_FIELD_NAME] = df["B03002_003E"] + df[field_names.HISPANIC_FIELD_NAME] = df["B03003_003E"] + df[field_names.OTHER_RACE_FIELD_NAME] = df["B02001_007E"] # Calculate demographics as percent - df[self.PERCENT_PREFIX + self.BLACK_FIELD_NAME] = ( + df[field_names.PERCENT_PREFIX + field_names.BLACK_FIELD_NAME] = ( df["B02001_003E"] / df["B02001_001E"] ) - df[self.PERCENT_PREFIX + self.AMERICAN_INDIAN_FIELD_NAME] = ( + df[field_names.PERCENT_PREFIX + field_names.AMERICAN_INDIAN_FIELD_NAME] = ( df["B02001_004E"] / df["B02001_001E"] ) - df[self.PERCENT_PREFIX + self.ASIAN_FIELD_NAME] = ( + df[field_names.PERCENT_PREFIX + field_names.ASIAN_FIELD_NAME] = ( df["B02001_005E"] / df["B02001_001E"] ) - df[self.PERCENT_PREFIX + self.HAWAIIAN_FIELD_NAME] = ( + df[field_names.PERCENT_PREFIX + field_names.HAWAIIAN_FIELD_NAME] = ( df["B02001_006E"] / df["B02001_001E"] ) - df[self.PERCENT_PREFIX + self.TWO_OR_MORE_RACES_FIELD_NAME] = ( + df[field_names.PERCENT_PREFIX + field_names.TWO_OR_MORE_RACES_FIELD_NAME] = ( df["B02001_008E"] / df["B02001_001E"] ) - df[self.PERCENT_PREFIX + self.NON_HISPANIC_WHITE_FIELD_NAME] = ( + df[field_names.PERCENT_PREFIX + field_names.NON_HISPANIC_WHITE_FIELD_NAME] = ( df["B03002_003E"] / df["B03002_001E"] ) - df[self.PERCENT_PREFIX + self.HISPANIC_FIELD_NAME] = ( + df[field_names.PERCENT_PREFIX + field_names.HISPANIC_FIELD_NAME] = ( df["B03003_003E"] / df["B03003_001E"] ) - df[self.PERCENT_PREFIX + self.OTHER_RACE_FIELD_NAME] = ( + df[field_names.PERCENT_PREFIX + field_names.OTHER_RACE_FIELD_NAME] = ( df["B02001_007E"] / df["B03003_001E"] ) diff --git a/data/data-pipeline/data_pipeline/score/field_names.py b/data/data-pipeline/data_pipeline/score/field_names.py index 7a115a0b5..95e314648 100644 --- a/data/data-pipeline/data_pipeline/score/field_names.py +++ b/data/data-pipeline/data_pipeline/score/field_names.py @@ -109,6 +109,18 @@ HISPANIC_FIELD_NAME = "Hispanic or Latino" OTHER_RACE_FIELD_NAME = "Some other race alone" +# Same fields, but with a percent prefix +PERCENT_PREFIX = "Percent " + +PERCENT_BLACK_FIELD_NAME = PERCENT_PREFIX + BLACK_FIELD_NAME +PERCENT_AMERICAN_INDIAN_FIELD_NAME = PERCENT_PREFIX + AMERICAN_INDIAN_FIELD_NAME +PERCENT_ASIAN_FIELD_NAME = PERCENT_PREFIX + ASIAN_FIELD_NAME +PERCENT_HAWAIIAN_FIELD_NAME = PERCENT_PREFIX + HAWAIIAN_FIELD_NAME +PERCENT_TWO_OR_MORE_RACES_FIELD_NAME = PERCENT_PREFIX + TWO_OR_MORE_RACES_FIELD_NAME +PERCENT_NON_HISPANIC_WHITE_FIELD_NAME = PERCENT_PREFIX + NON_HISPANIC_WHITE_FIELD_NAME +PERCENT_HISPANIC_FIELD_NAME = PERCENT_PREFIX + HISPANIC_FIELD_NAME +PERCENT_OTHER_RACE_FIELD_NAME = PERCENT_PREFIX + OTHER_RACE_FIELD_NAME + # Climate FEMA_RISK_FIELD = "FEMA Risk Index Expected Annual Loss Score" EXPECTED_BUILDING_LOSS_RATE_FIELD = ( From 9790bbbe7275327d50602d215f0911519e947c93 Mon Sep 17 00:00:00 2001 From: lucasmbrown-usds Date: Wed, 17 Aug 2022 18:03:56 -0400 Subject: [PATCH 03/14] running black linter --- .../etl/sources/census_acs/etl.py | 25 +++++++++++-------- .../data_pipeline/score/field_names.py | 8 ++++-- 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py b/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py index f5cbef073..3f98fd1be 100644 --- a/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py +++ b/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py @@ -221,7 +221,10 @@ def __init__(self): field_names.IMPUTED_INCOME_FLAG_FIELD_NAME, ] + self.RE_OUTPUT_FIELDS - + [field_names.PERCENT_PREFIX + field for field in self.RE_OUTPUT_FIELDS] + + [ + field_names.PERCENT_PREFIX + field + for field in self.RE_OUTPUT_FIELDS + ] + [ field_names.POVERTY_LESS_THAN_200_FPL_FIELD, field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD, @@ -410,21 +413,23 @@ def transform(self) -> None: df[field_names.PERCENT_PREFIX + field_names.BLACK_FIELD_NAME] = ( df["B02001_003E"] / df["B02001_001E"] ) - df[field_names.PERCENT_PREFIX + field_names.AMERICAN_INDIAN_FIELD_NAME] = ( - df["B02001_004E"] / df["B02001_001E"] - ) + df[ + field_names.PERCENT_PREFIX + field_names.AMERICAN_INDIAN_FIELD_NAME + ] = (df["B02001_004E"] / df["B02001_001E"]) df[field_names.PERCENT_PREFIX + field_names.ASIAN_FIELD_NAME] = ( df["B02001_005E"] / df["B02001_001E"] ) df[field_names.PERCENT_PREFIX + field_names.HAWAIIAN_FIELD_NAME] = ( df["B02001_006E"] / df["B02001_001E"] ) - df[field_names.PERCENT_PREFIX + field_names.TWO_OR_MORE_RACES_FIELD_NAME] = ( - df["B02001_008E"] / df["B02001_001E"] - ) - df[field_names.PERCENT_PREFIX + field_names.NON_HISPANIC_WHITE_FIELD_NAME] = ( - df["B03002_003E"] / df["B03002_001E"] - ) + df[ + field_names.PERCENT_PREFIX + + field_names.TWO_OR_MORE_RACES_FIELD_NAME + ] = (df["B02001_008E"] / df["B02001_001E"]) + df[ + field_names.PERCENT_PREFIX + + field_names.NON_HISPANIC_WHITE_FIELD_NAME + ] = (df["B03002_003E"] / df["B03002_001E"]) df[field_names.PERCENT_PREFIX + field_names.HISPANIC_FIELD_NAME] = ( df["B03003_003E"] / df["B03003_001E"] ) diff --git a/data/data-pipeline/data_pipeline/score/field_names.py b/data/data-pipeline/data_pipeline/score/field_names.py index 95e314648..d5d6e6cb9 100644 --- a/data/data-pipeline/data_pipeline/score/field_names.py +++ b/data/data-pipeline/data_pipeline/score/field_names.py @@ -116,8 +116,12 @@ PERCENT_AMERICAN_INDIAN_FIELD_NAME = PERCENT_PREFIX + AMERICAN_INDIAN_FIELD_NAME PERCENT_ASIAN_FIELD_NAME = PERCENT_PREFIX + ASIAN_FIELD_NAME PERCENT_HAWAIIAN_FIELD_NAME = PERCENT_PREFIX + HAWAIIAN_FIELD_NAME -PERCENT_TWO_OR_MORE_RACES_FIELD_NAME = PERCENT_PREFIX + TWO_OR_MORE_RACES_FIELD_NAME -PERCENT_NON_HISPANIC_WHITE_FIELD_NAME = PERCENT_PREFIX + NON_HISPANIC_WHITE_FIELD_NAME +PERCENT_TWO_OR_MORE_RACES_FIELD_NAME = ( + PERCENT_PREFIX + TWO_OR_MORE_RACES_FIELD_NAME +) +PERCENT_NON_HISPANIC_WHITE_FIELD_NAME = ( + PERCENT_PREFIX + NON_HISPANIC_WHITE_FIELD_NAME +) PERCENT_HISPANIC_FIELD_NAME = PERCENT_PREFIX + HISPANIC_FIELD_NAME PERCENT_OTHER_RACE_FIELD_NAME = PERCENT_PREFIX + OTHER_RACE_FIELD_NAME From 9443c252f8f3c25ad1d7c599ab332eef6095d5ee Mon Sep 17 00:00:00 2001 From: lucasmbrown-usds Date: Thu, 18 Aug 2022 17:05:29 -0400 Subject: [PATCH 04/14] updating yaml etc --- .../data_pipeline/content/config/csv.yml | 24 +++++++++++++++++++ .../data_pipeline/content/config/excel.yml | 24 +++++++++++++++++++ .../data_pipeline/etl/score/constants.py | 17 +++++++++++++ .../data_pipeline/etl/score/etl_score.py | 8 +++++++ .../etl/sources/census_acs/etl.py | 1 - .../data_pipeline/score/field_names.py | 14 +++++------ 6 files changed, 80 insertions(+), 8 deletions(-) diff --git a/data/data-pipeline/data_pipeline/content/config/csv.yml b/data/data-pipeline/data_pipeline/content/config/csv.yml index 639a3f2f1..648546e2b 100644 --- a/data/data-pipeline/data_pipeline/content/config/csv.yml +++ b/data/data-pipeline/data_pipeline/content/config/csv.yml @@ -14,6 +14,30 @@ fields: - score_name: State/Territory label: State/Territory format: string + - score_name: Percent Black or African Amer. + label: Percent Black or African American alone + format: float + - score_name: Percent Amer. Indian / Alaska Native + label: Percent Amer. Indian / Alaska Native + format: float + - score_name: Percent Asian + label: Percent Asian + format: float + - score_name: Percent Native Hawaiian or Pacific + label: Percent Native Hawaiian or Pacific + format: float + - score_name: Percent Two or more + label: Percent Two or more + format: float + - score_name: Percent White + label: Percent White + format: float + - score_name: Percent Hispanic or Latino + label: Percent Hispanic or Latino + format: float + - score_name: Percent Other + label: Percent Other + format: float - score_name: Total threshold criteria exceeded label: Total threshold criteria exceeded format: int64 diff --git a/data/data-pipeline/data_pipeline/content/config/excel.yml b/data/data-pipeline/data_pipeline/content/config/excel.yml index ad8202295..d7a19cab8 100644 --- a/data/data-pipeline/data_pipeline/content/config/excel.yml +++ b/data/data-pipeline/data_pipeline/content/config/excel.yml @@ -18,6 +18,30 @@ sheets: - score_name: State/Territory label: State/Territory format: string + - score_name: Percent Black or African Amer. + label: Percent Black or African American alone + format: float + - score_name: Percent Amer. Indian / Alaska Native + label: Percent Amer. Indian / Alaska Native + format: float + - score_name: Percent Asian + label: Percent Asian + format: float + - score_name: Percent Native Hawaiian or Pacific + label: Percent Native Hawaiian or Pacific + format: float + - score_name: Percent Two or more + label: Percent Two or more + format: float + - score_name: Percent White + label: Percent White + format: float + - score_name: Percent Hispanic or Latino + label: Percent Hispanic or Latino + format: float + - score_name: Percent Other + label: Percent Other + format: float - score_name: Total threshold criteria exceeded label: Total threshold criteria exceeded format: int64 diff --git a/data/data-pipeline/data_pipeline/etl/score/constants.py b/data/data-pipeline/data_pipeline/etl/score/constants.py index ee431a6db..ca8210b82 100644 --- a/data/data-pipeline/data_pipeline/etl/score/constants.py +++ b/data/data-pipeline/data_pipeline/etl/score/constants.py @@ -318,6 +318,14 @@ field_names.IMPUTED_INCOME_FLAG_FIELD_NAME: "IMP_FLG" ## FPL 200 and low higher ed for all others should no longer be M_EBSI, but rather ## FPL_200 (there is no higher ed in narwhal) + field_names.PERCENT_BLACK_FIELD_NAME: "DM_B", + field_names.PERCENT_AMERICAN_INDIAN_FIELD_NAME: "DM_AI", + field_names.PERCENT_ASIAN_FIELD_NAME: "DM_A", + field_names.PERCENT_HAWAIIAN_FIELD_NAME: "DM_HI", + field_names.PERCENT_TWO_OR_MORE_RACES_FIELD_NAME: "DM_T", + field_names.PERCENT_NON_HISPANIC_WHITE_FIELD_NAME: "DM_W", + field_names.PERCENT_HISPANIC_FIELD_NAME: "DM_H", + field_names.PERCENT_OTHER_RACE_FIELD_NAME: "DM_O", } # columns to round floats to 2 decimals @@ -375,4 +383,13 @@ field_names.SCORE_N_COMMUNITIES + field_names.ADJACENCY_INDEX_SUFFIX, field_names.TRACT_PERCENT_NON_NATURAL_FIELD_NAME + field_names.PERCENTILE_FIELD_SUFFIX, + # Include demographic data for sidebar -- as percents, NOT as percentiles. + field_names.PERCENT_BLACK_FIELD_NAME, + field_names.PERCENT_AMERICAN_INDIAN_FIELD_NAME, + field_names.PERCENT_ASIAN_FIELD_NAME, + field_names.PERCENT_HAWAIIAN_FIELD_NAME, + field_names.PERCENT_TWO_OR_MORE_RACES_FIELD_NAME, + field_names.PERCENT_NON_HISPANIC_WHITE_FIELD_NAME, + field_names.PERCENT_HISPANIC_FIELD_NAME, + field_names.PERCENT_OTHER_RACE_FIELD_NAME, ] diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score.py b/data/data-pipeline/data_pipeline/etl/score/etl_score.py index d64e10b52..da6d09d06 100644 --- a/data/data-pipeline/data_pipeline/etl/score/etl_score.py +++ b/data/data-pipeline/data_pipeline/etl/score/etl_score.py @@ -461,6 +461,14 @@ def _prepare_initial_df(self) -> pd.DataFrame: field_names.FUTURE_WILDFIRE_RISK_FIELD, field_names.TRACT_PERCENT_NON_NATURAL_FIELD_NAME, field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD, + field_names.PERCENT_BLACK_FIELD_NAME, + field_names.PERCENT_AMERICAN_INDIAN_FIELD_NAME, + field_names.PERCENT_ASIAN_FIELD_NAME, + field_names.PERCENT_HAWAIIAN_FIELD_NAME, + field_names.PERCENT_TWO_OR_MORE_RACES_FIELD_NAME, + field_names.PERCENT_NON_HISPANIC_WHITE_FIELD_NAME, + field_names.PERCENT_HISPANIC_FIELD_NAME, + field_names.PERCENT_OTHER_RACE_FIELD_NAME, ] non_numeric_columns = [ diff --git a/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py b/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py index 3f98fd1be..788951537 100644 --- a/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py +++ b/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py @@ -190,7 +190,6 @@ def __init__(self): ] # Name output demographics fields. - self.RE_OUTPUT_FIELDS = [ field_names.BLACK_FIELD_NAME, field_names.AMERICAN_INDIAN_FIELD_NAME, diff --git a/data/data-pipeline/data_pipeline/score/field_names.py b/data/data-pipeline/data_pipeline/score/field_names.py index d5d6e6cb9..98f8da2ac 100644 --- a/data/data-pipeline/data_pipeline/score/field_names.py +++ b/data/data-pipeline/data_pipeline/score/field_names.py @@ -100,14 +100,14 @@ ) # Additional ACS demographic fields. -BLACK_FIELD_NAME = "Black or African American alone" -AMERICAN_INDIAN_FIELD_NAME = "American Indian and Alaska Native alone" -ASIAN_FIELD_NAME = "Asian alone" -HAWAIIAN_FIELD_NAME = "Native Hawaiian and Other Pacific alone" -TWO_OR_MORE_RACES_FIELD_NAME = "Two or more races" -NON_HISPANIC_WHITE_FIELD_NAME = "Non-Hispanic White" +BLACK_FIELD_NAME = "Black or African Amer." +AMERICAN_INDIAN_FIELD_NAME = "Amer. Indian / Alaska Native" +ASIAN_FIELD_NAME = "Asian" +HAWAIIAN_FIELD_NAME = "Native Hawaiian or Pacific" +TWO_OR_MORE_RACES_FIELD_NAME = "Two or more" +NON_HISPANIC_WHITE_FIELD_NAME = "White" HISPANIC_FIELD_NAME = "Hispanic or Latino" -OTHER_RACE_FIELD_NAME = "Some other race alone" +OTHER_RACE_FIELD_NAME = "Other" # Same fields, but with a percent prefix PERCENT_PREFIX = "Percent " From 3b4ad8c7aaa931ecbdb2d3e6cd350d1b58ba888b Mon Sep 17 00:00:00 2001 From: lucasmbrown-usds Date: Thu, 18 Aug 2022 12:27:08 -0400 Subject: [PATCH 05/14] updating abbreviations --- .../data_pipeline/content/config/csv.yml | 14 +++++++------- .../data_pipeline/content/config/excel.yml | 14 +++++++------- .../data_pipeline/score/field_names.py | 8 ++++---- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/data/data-pipeline/data_pipeline/content/config/csv.yml b/data/data-pipeline/data_pipeline/content/config/csv.yml index 648546e2b..b5b103a73 100644 --- a/data/data-pipeline/data_pipeline/content/config/csv.yml +++ b/data/data-pipeline/data_pipeline/content/config/csv.yml @@ -14,11 +14,11 @@ fields: - score_name: State/Territory label: State/Territory format: string - - score_name: Percent Black or African Amer. + - score_name: Percent Black or African American label: Percent Black or African American alone format: float - - score_name: Percent Amer. Indian / Alaska Native - label: Percent Amer. Indian / Alaska Native + - score_name: Percent American Indian / Alaska Native + label: Percent American Indian / Alaska Native format: float - score_name: Percent Asian label: Percent Asian @@ -26,8 +26,8 @@ fields: - score_name: Percent Native Hawaiian or Pacific label: Percent Native Hawaiian or Pacific format: float - - score_name: Percent Two or more - label: Percent Two or more + - score_name: Percent Two or more races + label: Percent Two or more races format: float - score_name: Percent White label: Percent White @@ -35,8 +35,8 @@ fields: - score_name: Percent Hispanic or Latino label: Percent Hispanic or Latino format: float - - score_name: Percent Other - label: Percent Other + - score_name: Percent Other races + label: Percent Other races format: float - score_name: Total threshold criteria exceeded label: Total threshold criteria exceeded diff --git a/data/data-pipeline/data_pipeline/content/config/excel.yml b/data/data-pipeline/data_pipeline/content/config/excel.yml index d7a19cab8..33e4132e4 100644 --- a/data/data-pipeline/data_pipeline/content/config/excel.yml +++ b/data/data-pipeline/data_pipeline/content/config/excel.yml @@ -18,11 +18,11 @@ sheets: - score_name: State/Territory label: State/Territory format: string - - score_name: Percent Black or African Amer. + - score_name: Percent Black or African American label: Percent Black or African American alone format: float - - score_name: Percent Amer. Indian / Alaska Native - label: Percent Amer. Indian / Alaska Native + - score_name: Percent American Indian / Alaska Native + label: Percent American Indian / Alaska Native format: float - score_name: Percent Asian label: Percent Asian @@ -30,8 +30,8 @@ sheets: - score_name: Percent Native Hawaiian or Pacific label: Percent Native Hawaiian or Pacific format: float - - score_name: Percent Two or more - label: Percent Two or more + - score_name: Percent Two or more races + label: Percent Two or more races format: float - score_name: Percent White label: Percent White @@ -39,8 +39,8 @@ sheets: - score_name: Percent Hispanic or Latino label: Percent Hispanic or Latino format: float - - score_name: Percent Other - label: Percent Other + - score_name: Percent Other races + label: Percent Other races format: float - score_name: Total threshold criteria exceeded label: Total threshold criteria exceeded diff --git a/data/data-pipeline/data_pipeline/score/field_names.py b/data/data-pipeline/data_pipeline/score/field_names.py index 98f8da2ac..782d61a13 100644 --- a/data/data-pipeline/data_pipeline/score/field_names.py +++ b/data/data-pipeline/data_pipeline/score/field_names.py @@ -100,14 +100,14 @@ ) # Additional ACS demographic fields. -BLACK_FIELD_NAME = "Black or African Amer." -AMERICAN_INDIAN_FIELD_NAME = "Amer. Indian / Alaska Native" +BLACK_FIELD_NAME = "Black or African American" +AMERICAN_INDIAN_FIELD_NAME = "American Indian / Alaska Native" ASIAN_FIELD_NAME = "Asian" HAWAIIAN_FIELD_NAME = "Native Hawaiian or Pacific" -TWO_OR_MORE_RACES_FIELD_NAME = "Two or more" +TWO_OR_MORE_RACES_FIELD_NAME = "Two or more races" NON_HISPANIC_WHITE_FIELD_NAME = "White" HISPANIC_FIELD_NAME = "Hispanic or Latino" -OTHER_RACE_FIELD_NAME = "Other" +OTHER_RACE_FIELD_NAME = "Other Races" # Same fields, but with a percent prefix PERCENT_PREFIX = "Percent " From 47466978a241dd51bc95bbce20298abb46851d6b Mon Sep 17 00:00:00 2001 From: lucasmbrown-usds Date: Thu, 18 Aug 2022 12:37:30 -0400 Subject: [PATCH 06/14] fixing memory intensive bug --- data/data-pipeline/data_pipeline/etl/constants.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/data/data-pipeline/data_pipeline/etl/constants.py b/data/data-pipeline/data_pipeline/etl/constants.py index 7d76b4f05..dbb90ee43 100644 --- a/data/data-pipeline/data_pipeline/etl/constants.py +++ b/data/data-pipeline/data_pipeline/etl/constants.py @@ -198,10 +198,12 @@ "name": "census", "module_dir": "census", "class_name": "CensusETL", + "is_memory_intensive": True, } TRIBAL_INFO = { "name": "tribal", "module_dir": "tribal", "class_name": "TribalETL", + "is_memory_intensive": True, } From f3f4284be577ecfd4a6c5c93890c359737794b46 Mon Sep 17 00:00:00 2001 From: lucasmbrown-usds Date: Thu, 18 Aug 2022 12:40:00 -0400 Subject: [PATCH 07/14] switching mem intensive flags --- data/data-pipeline/data_pipeline/etl/constants.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/data/data-pipeline/data_pipeline/etl/constants.py b/data/data-pipeline/data_pipeline/etl/constants.py index dbb90ee43..569c088cd 100644 --- a/data/data-pipeline/data_pipeline/etl/constants.py +++ b/data/data-pipeline/data_pipeline/etl/constants.py @@ -198,12 +198,12 @@ "name": "census", "module_dir": "census", "class_name": "CensusETL", - "is_memory_intensive": True, + "is_memory_intensive": False, } TRIBAL_INFO = { "name": "tribal", "module_dir": "tribal", "class_name": "TribalETL", - "is_memory_intensive": True, + "is_memory_intensive": False, } From dcd6781d1f173488e97315ede6f2b44e7f0ba7c1 Mon Sep 17 00:00:00 2001 From: lucasmbrown-usds Date: Thu, 18 Aug 2022 17:03:57 -0400 Subject: [PATCH 08/14] using rename --- .../etl/sources/census_acs/etl.py | 47 ++++++++++++------- .../data_pipeline/score/field_names.py | 30 ++++-------- 2 files changed, 40 insertions(+), 37 deletions(-) diff --git a/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py b/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py index 788951537..4ee6058a8 100644 --- a/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py +++ b/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py @@ -189,16 +189,25 @@ def __init__(self): "B02001_007E", # "Some other race alone" ] + self.BLACK_FIELD_NAME = "Black or African American" + self.AMERICAN_INDIAN_FIELD_NAME = "American Indian / Alaska Native" + self.ASIAN_FIELD_NAME = "Asian" + self.HAWAIIAN_FIELD_NAME = "Native Hawaiian or Pacific" + self.TWO_OR_MORE_RACES_FIELD_NAME = "Two or more races" + self.NON_HISPANIC_WHITE_FIELD_NAME = "White" + self.HISPANIC_FIELD_NAME = "Hispanic or Latino" + self.OTHER_RACE_FIELD_NAME = "Other Races" + # Name output demographics fields. self.RE_OUTPUT_FIELDS = [ - field_names.BLACK_FIELD_NAME, - field_names.AMERICAN_INDIAN_FIELD_NAME, - field_names.ASIAN_FIELD_NAME, - field_names.HAWAIIAN_FIELD_NAME, - field_names.TWO_OR_MORE_RACES_FIELD_NAME, - field_names.NON_HISPANIC_WHITE_FIELD_NAME, - field_names.HISPANIC_FIELD_NAME, - field_names.OTHER_RACE_FIELD_NAME, + self.BLACK_FIELD_NAME, + self.AMERICAN_INDIAN_FIELD_NAME, + self.ASIAN_FIELD_NAME, + self.HAWAIIAN_FIELD_NAME, + self.TWO_OR_MORE_RACES_FIELD_NAME, + self.NON_HISPANIC_WHITE_FIELD_NAME, + self.HISPANIC_FIELD_NAME, + self.OTHER_RACE_FIELD_NAME, ] self.STATE_GEOID_FIELD_NAME = "GEOID2" @@ -399,14 +408,20 @@ def transform(self) -> None: ) # Calculate some demographic information. - df[field_names.BLACK_FIELD_NAME] = df["B02001_003E"] - df[field_names.AMERICAN_INDIAN_FIELD_NAME] = df["B02001_004E"] - df[field_names.ASIAN_FIELD_NAME] = df["B02001_005E"] - df[field_names.HAWAIIAN_FIELD_NAME] = df["B02001_006E"] - df[field_names.TWO_OR_MORE_RACES_FIELD_NAME] = df["B02001_008E"] - df[field_names.NON_HISPANIC_WHITE_FIELD_NAME] = df["B03002_003E"] - df[field_names.HISPANIC_FIELD_NAME] = df["B03003_003E"] - df[field_names.OTHER_RACE_FIELD_NAME] = df["B02001_007E"] + + df = df.rename( + columns={ + "B02001_003E": self.BLACK_FIELD_NAME, + "B02001_004E": self.AMERICAN_INDIAN_FIELD_NAME, + "B02001_005E": self.ASIAN_FIELD_NAME, + "B02001_006E": self.HAWAIIAN_FIELD_NAME, + "B02001_008E": self.TWO_OR_MORE_RACES_FIELD_NAME, + "B03002_003E": self.NON_HISPANIC_WHITE_FIELD_NAME, + "B03003_003E": self.HISPANIC_FIELD_NAME, + "B02001_007E": self.OTHER_RACE_FIELD_NAME, + }, + errors="raise", + ) # Calculate demographics as percent df[field_names.PERCENT_PREFIX + field_names.BLACK_FIELD_NAME] = ( diff --git a/data/data-pipeline/data_pipeline/score/field_names.py b/data/data-pipeline/data_pipeline/score/field_names.py index 782d61a13..8ed3e6b37 100644 --- a/data/data-pipeline/data_pipeline/score/field_names.py +++ b/data/data-pipeline/data_pipeline/score/field_names.py @@ -100,30 +100,18 @@ ) # Additional ACS demographic fields. -BLACK_FIELD_NAME = "Black or African American" -AMERICAN_INDIAN_FIELD_NAME = "American Indian / Alaska Native" -ASIAN_FIELD_NAME = "Asian" -HAWAIIAN_FIELD_NAME = "Native Hawaiian or Pacific" -TWO_OR_MORE_RACES_FIELD_NAME = "Two or more races" -NON_HISPANIC_WHITE_FIELD_NAME = "White" -HISPANIC_FIELD_NAME = "Hispanic or Latino" -OTHER_RACE_FIELD_NAME = "Other Races" - -# Same fields, but with a percent prefix PERCENT_PREFIX = "Percent " -PERCENT_BLACK_FIELD_NAME = PERCENT_PREFIX + BLACK_FIELD_NAME -PERCENT_AMERICAN_INDIAN_FIELD_NAME = PERCENT_PREFIX + AMERICAN_INDIAN_FIELD_NAME -PERCENT_ASIAN_FIELD_NAME = PERCENT_PREFIX + ASIAN_FIELD_NAME -PERCENT_HAWAIIAN_FIELD_NAME = PERCENT_PREFIX + HAWAIIAN_FIELD_NAME -PERCENT_TWO_OR_MORE_RACES_FIELD_NAME = ( - PERCENT_PREFIX + TWO_OR_MORE_RACES_FIELD_NAME +PERCENT_BLACK_FIELD_NAME = PERCENT_PREFIX + "Black or African American" +PERCENT_AMERICAN_INDIAN_FIELD_NAME = ( + PERCENT_PREFIX + "American Indian / Alaska Native" ) -PERCENT_NON_HISPANIC_WHITE_FIELD_NAME = ( - PERCENT_PREFIX + NON_HISPANIC_WHITE_FIELD_NAME -) -PERCENT_HISPANIC_FIELD_NAME = PERCENT_PREFIX + HISPANIC_FIELD_NAME -PERCENT_OTHER_RACE_FIELD_NAME = PERCENT_PREFIX + OTHER_RACE_FIELD_NAME +PERCENT_ASIAN_FIELD_NAME = PERCENT_PREFIX + "Asian" +PERCENT_HAWAIIAN_FIELD_NAME = PERCENT_PREFIX + "Native Hawaiian or Pacific" +PERCENT_TWO_OR_MORE_RACES_FIELD_NAME = PERCENT_PREFIX + "Two or more races" +PERCENT_NON_HISPANIC_WHITE_FIELD_NAME = PERCENT_PREFIX + "White" +PERCENT_HISPANIC_FIELD_NAME = PERCENT_PREFIX + "Hispanic or Latino" +PERCENT_OTHER_RACE_FIELD_NAME = PERCENT_PREFIX + "Other Races" # Climate FEMA_RISK_FIELD = "FEMA Risk Index Expected Annual Loss Score" From 9cfb87127870b5fd745513671bc4a76101885c10 Mon Sep 17 00:00:00 2001 From: lucasmbrown-usds Date: Fri, 19 Aug 2022 19:48:04 -0400 Subject: [PATCH 09/14] update --- .../data_pipeline/etl/score/constants.py | 2 +- .../etl/sources/census_acs/etl.py | 36 +++++-------------- 2 files changed, 10 insertions(+), 28 deletions(-) diff --git a/data/data-pipeline/data_pipeline/etl/score/constants.py b/data/data-pipeline/data_pipeline/etl/score/constants.py index ca8210b82..d5d20967b 100644 --- a/data/data-pipeline/data_pipeline/etl/score/constants.py +++ b/data/data-pipeline/data_pipeline/etl/score/constants.py @@ -315,7 +315,7 @@ field_names.NON_NATURAL_LOW_INCOME_FIELD_NAME: "IS_ET", field_names.AML_BOOLEAN: "AML_ET", field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME: "FUDS_ET", - field_names.IMPUTED_INCOME_FLAG_FIELD_NAME: "IMP_FLG" + field_names.IMPUTED_INCOME_FLAG_FIELD_NAME: "IMP_FLG", ## FPL 200 and low higher ed for all others should no longer be M_EBSI, but rather ## FPL_200 (there is no higher ed in narwhal) field_names.PERCENT_BLACK_FIELD_NAME: "DM_B", diff --git a/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py b/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py index 4ee6058a8..9a196adef 100644 --- a/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py +++ b/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py @@ -198,6 +198,10 @@ def __init__(self): self.HISPANIC_FIELD_NAME = "Hispanic or Latino" self.OTHER_RACE_FIELD_NAME = "Other Races" + self.TOTAL_RACE_POPULATION_FIELD_NAME = ( + "Total population surveyed on racial data" + ) + # Name output demographics fields. self.RE_OUTPUT_FIELDS = [ self.BLACK_FIELD_NAME, @@ -419,37 +423,15 @@ def transform(self) -> None: "B03002_003E": self.NON_HISPANIC_WHITE_FIELD_NAME, "B03003_003E": self.HISPANIC_FIELD_NAME, "B02001_007E": self.OTHER_RACE_FIELD_NAME, + "B02001_001E": self.TOTAL_RACE_POPULATION_FIELD_NAME, }, errors="raise", ) - # Calculate demographics as percent - df[field_names.PERCENT_PREFIX + field_names.BLACK_FIELD_NAME] = ( - df["B02001_003E"] / df["B02001_001E"] - ) - df[ - field_names.PERCENT_PREFIX + field_names.AMERICAN_INDIAN_FIELD_NAME - ] = (df["B02001_004E"] / df["B02001_001E"]) - df[field_names.PERCENT_PREFIX + field_names.ASIAN_FIELD_NAME] = ( - df["B02001_005E"] / df["B02001_001E"] - ) - df[field_names.PERCENT_PREFIX + field_names.HAWAIIAN_FIELD_NAME] = ( - df["B02001_006E"] / df["B02001_001E"] - ) - df[ - field_names.PERCENT_PREFIX - + field_names.TWO_OR_MORE_RACES_FIELD_NAME - ] = (df["B02001_008E"] / df["B02001_001E"]) - df[ - field_names.PERCENT_PREFIX - + field_names.NON_HISPANIC_WHITE_FIELD_NAME - ] = (df["B03002_003E"] / df["B03002_001E"]) - df[field_names.PERCENT_PREFIX + field_names.HISPANIC_FIELD_NAME] = ( - df["B03003_003E"] / df["B03003_001E"] - ) - df[field_names.PERCENT_PREFIX + field_names.OTHER_RACE_FIELD_NAME] = ( - df["B02001_007E"] / df["B03003_001E"] - ) + for race_field_name in self.RE_OUTPUT_FIELDS: + df[field_names.PERCENT_PREFIX + race_field_name] = ( + df[race_field_name] / df[self.TOTAL_RACE_POPULATION_FIELD_NAME] + ) # Calculate college attendance and adjust low income df[self.COLLEGE_ATTENDANCE_FIELD] = ( From 8bd86a73f70de1c80d32ab21663ea70f02dbb2f5 Mon Sep 17 00:00:00 2001 From: lucasmbrown-usds Date: Fri, 19 Aug 2022 18:21:29 -0400 Subject: [PATCH 10/14] changing other field name --- .../data-pipeline/data_pipeline/etl/sources/census_acs/etl.py | 4 +++- data/data-pipeline/data_pipeline/score/field_names.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py b/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py index 9a196adef..da66cd29e 100644 --- a/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py +++ b/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py @@ -196,7 +196,9 @@ def __init__(self): self.TWO_OR_MORE_RACES_FIELD_NAME = "Two or more races" self.NON_HISPANIC_WHITE_FIELD_NAME = "White" self.HISPANIC_FIELD_NAME = "Hispanic or Latino" - self.OTHER_RACE_FIELD_NAME = "Other Races" + # Note that `other` is lowercase because the whole field will show up in the download + # file as "Percent other races" + self.OTHER_RACE_FIELD_NAME = "other races" self.TOTAL_RACE_POPULATION_FIELD_NAME = ( "Total population surveyed on racial data" diff --git a/data/data-pipeline/data_pipeline/score/field_names.py b/data/data-pipeline/data_pipeline/score/field_names.py index 8ed3e6b37..018ec9da4 100644 --- a/data/data-pipeline/data_pipeline/score/field_names.py +++ b/data/data-pipeline/data_pipeline/score/field_names.py @@ -111,7 +111,9 @@ PERCENT_TWO_OR_MORE_RACES_FIELD_NAME = PERCENT_PREFIX + "Two or more races" PERCENT_NON_HISPANIC_WHITE_FIELD_NAME = PERCENT_PREFIX + "White" PERCENT_HISPANIC_FIELD_NAME = PERCENT_PREFIX + "Hispanic or Latino" -PERCENT_OTHER_RACE_FIELD_NAME = PERCENT_PREFIX + "Other Races" +# Note that `other` is lowercase because the whole field will show up in the download +# file as "Percent other races" +PERCENT_OTHER_RACE_FIELD_NAME = PERCENT_PREFIX + "other races" # Climate FEMA_RISK_FIELD = "FEMA Risk Index Expected Annual Loss Score" From 02101a260775edf3f230fe10b2798f56d919b8c6 Mon Sep 17 00:00:00 2001 From: lucasmbrown-usds Date: Fri, 19 Aug 2022 19:23:04 -0400 Subject: [PATCH 11/14] two or more --- data/data-pipeline/data_pipeline/content/config/csv.yml | 8 ++++---- data/data-pipeline/data_pipeline/content/config/excel.yml | 8 ++++---- .../data_pipeline/etl/sources/census_acs/etl.py | 2 +- data/data-pipeline/data_pipeline/score/field_names.py | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/data/data-pipeline/data_pipeline/content/config/csv.yml b/data/data-pipeline/data_pipeline/content/config/csv.yml index b5b103a73..a5ebe0c7f 100644 --- a/data/data-pipeline/data_pipeline/content/config/csv.yml +++ b/data/data-pipeline/data_pipeline/content/config/csv.yml @@ -26,8 +26,8 @@ fields: - score_name: Percent Native Hawaiian or Pacific label: Percent Native Hawaiian or Pacific format: float - - score_name: Percent Two or more races - label: Percent Two or more races + - score_name: Percent two or more races + label: Percent two or more races format: float - score_name: Percent White label: Percent White @@ -35,8 +35,8 @@ fields: - score_name: Percent Hispanic or Latino label: Percent Hispanic or Latino format: float - - score_name: Percent Other races - label: Percent Other races + - score_name: Percent other races + label: Percent other races format: float - score_name: Total threshold criteria exceeded label: Total threshold criteria exceeded diff --git a/data/data-pipeline/data_pipeline/content/config/excel.yml b/data/data-pipeline/data_pipeline/content/config/excel.yml index 33e4132e4..d7e273b86 100644 --- a/data/data-pipeline/data_pipeline/content/config/excel.yml +++ b/data/data-pipeline/data_pipeline/content/config/excel.yml @@ -30,8 +30,8 @@ sheets: - score_name: Percent Native Hawaiian or Pacific label: Percent Native Hawaiian or Pacific format: float - - score_name: Percent Two or more races - label: Percent Two or more races + - score_name: Percent two or more races + label: Percent two or more races format: float - score_name: Percent White label: Percent White @@ -39,8 +39,8 @@ sheets: - score_name: Percent Hispanic or Latino label: Percent Hispanic or Latino format: float - - score_name: Percent Other races - label: Percent Other races + - score_name: Percent other races + label: Percent other races format: float - score_name: Total threshold criteria exceeded label: Total threshold criteria exceeded diff --git a/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py b/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py index da66cd29e..28d849459 100644 --- a/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py +++ b/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py @@ -193,7 +193,7 @@ def __init__(self): self.AMERICAN_INDIAN_FIELD_NAME = "American Indian / Alaska Native" self.ASIAN_FIELD_NAME = "Asian" self.HAWAIIAN_FIELD_NAME = "Native Hawaiian or Pacific" - self.TWO_OR_MORE_RACES_FIELD_NAME = "Two or more races" + self.TWO_OR_MORE_RACES_FIELD_NAME = "two or more races" self.NON_HISPANIC_WHITE_FIELD_NAME = "White" self.HISPANIC_FIELD_NAME = "Hispanic or Latino" # Note that `other` is lowercase because the whole field will show up in the download diff --git a/data/data-pipeline/data_pipeline/score/field_names.py b/data/data-pipeline/data_pipeline/score/field_names.py index 018ec9da4..1f19e92ff 100644 --- a/data/data-pipeline/data_pipeline/score/field_names.py +++ b/data/data-pipeline/data_pipeline/score/field_names.py @@ -108,7 +108,7 @@ ) PERCENT_ASIAN_FIELD_NAME = PERCENT_PREFIX + "Asian" PERCENT_HAWAIIAN_FIELD_NAME = PERCENT_PREFIX + "Native Hawaiian or Pacific" -PERCENT_TWO_OR_MORE_RACES_FIELD_NAME = PERCENT_PREFIX + "Two or more races" +PERCENT_TWO_OR_MORE_RACES_FIELD_NAME = PERCENT_PREFIX + "two or more races" PERCENT_NON_HISPANIC_WHITE_FIELD_NAME = PERCENT_PREFIX + "White" PERCENT_HISPANIC_FIELD_NAME = PERCENT_PREFIX + "Hispanic or Latino" # Note that `other` is lowercase because the whole field will show up in the download From bfaae43b13c15ac7fd478e5c7ba31d283f5b9a4c Mon Sep 17 00:00:00 2001 From: lucasmbrown-usds Date: Fri, 19 Aug 2022 21:30:46 -0400 Subject: [PATCH 12/14] adding age --- .../data_pipeline/content/config/csv.yml | 9 ++ .../data_pipeline/content/config/excel.yml | 9 ++ .../data_pipeline/etl/score/constants.py | 6 + .../data_pipeline/etl/score/etl_score.py | 3 + .../etl/sources/census_acs/etl.py | 137 +++++++++++++++++- .../data_pipeline/score/field_names.py | 5 + 6 files changed, 168 insertions(+), 1 deletion(-) diff --git a/data/data-pipeline/data_pipeline/content/config/csv.yml b/data/data-pipeline/data_pipeline/content/config/csv.yml index a5ebe0c7f..591c2b727 100644 --- a/data/data-pipeline/data_pipeline/content/config/csv.yml +++ b/data/data-pipeline/data_pipeline/content/config/csv.yml @@ -38,6 +38,15 @@ fields: - score_name: Percent other races label: Percent other races format: float + - score_name: Percent age under 10 + label: Percent age under 10 + format: float + - score_name: Percent age 10 to 64 + label: Percent age 10 to 64 + format: float + - score_name: Percent age over 64 + label: Percent age over 64 + format: float - score_name: Total threshold criteria exceeded label: Total threshold criteria exceeded format: int64 diff --git a/data/data-pipeline/data_pipeline/content/config/excel.yml b/data/data-pipeline/data_pipeline/content/config/excel.yml index d7e273b86..03fd55a66 100644 --- a/data/data-pipeline/data_pipeline/content/config/excel.yml +++ b/data/data-pipeline/data_pipeline/content/config/excel.yml @@ -42,6 +42,15 @@ sheets: - score_name: Percent other races label: Percent other races format: float + - score_name: Percent age under 10 + label: Percent age under 10 + format: float + - score_name: Percent age 10 to 64 + label: Percent age 10 to 64 + format: float + - score_name: Percent age over 64 + label: Percent age over 64 + format: float - score_name: Total threshold criteria exceeded label: Total threshold criteria exceeded format: int64 diff --git a/data/data-pipeline/data_pipeline/etl/score/constants.py b/data/data-pipeline/data_pipeline/etl/score/constants.py index d5d20967b..abf387b2d 100644 --- a/data/data-pipeline/data_pipeline/etl/score/constants.py +++ b/data/data-pipeline/data_pipeline/etl/score/constants.py @@ -326,6 +326,9 @@ field_names.PERCENT_NON_HISPANIC_WHITE_FIELD_NAME: "DM_W", field_names.PERCENT_HISPANIC_FIELD_NAME: "DM_H", field_names.PERCENT_OTHER_RACE_FIELD_NAME: "DM_O", + field_names.PERCENT_AGE_UNDER_10: "AGE_10", + field_names.PERCENT_AGE_10_TO_64: "AGE_MIDDLE", + field_names.PERCENT_AGE_OVER_64: "AGE_OLD", } # columns to round floats to 2 decimals @@ -392,4 +395,7 @@ field_names.PERCENT_NON_HISPANIC_WHITE_FIELD_NAME, field_names.PERCENT_HISPANIC_FIELD_NAME, field_names.PERCENT_OTHER_RACE_FIELD_NAME, + field_names.PERCENT_AGE_UNDER_10, + field_names.PERCENT_AGE_10_TO_64, + field_names.PERCENT_AGE_OVER_64, ] diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score.py b/data/data-pipeline/data_pipeline/etl/score/etl_score.py index da6d09d06..648043226 100644 --- a/data/data-pipeline/data_pipeline/etl/score/etl_score.py +++ b/data/data-pipeline/data_pipeline/etl/score/etl_score.py @@ -469,6 +469,9 @@ def _prepare_initial_df(self) -> pd.DataFrame: field_names.PERCENT_NON_HISPANIC_WHITE_FIELD_NAME, field_names.PERCENT_HISPANIC_FIELD_NAME, field_names.PERCENT_OTHER_RACE_FIELD_NAME, + field_names.PERCENT_AGE_UNDER_10, + field_names.PERCENT_AGE_10_TO_64, + field_names.PERCENT_AGE_OVER_64, ] non_numeric_columns = [ diff --git a/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py b/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py index 28d849459..dba9d06ba 100644 --- a/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py +++ b/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py @@ -216,6 +216,62 @@ def __init__(self): self.OTHER_RACE_FIELD_NAME, ] + self.AGE_INPUT_FIELDS = [ + "B01001_001E", # Estimate!!Total: + "B01001_003E", # Estimate!!Total:!!Male:!!Under 5 years + "B01001_004E", # Estimate!!Total:!!Male:!!5 to 9 years + "B01001_005E", # Estimate!!Total:!!Male:!!10 to 14 years + "B01001_006E", # Estimate!!Total:!!Male:!!15 to 17 years + "B01001_007E", # Estimate!!Total:!!Male:!!18 and 19 years + "B01001_008E", # Estimate!!Total:!!Male:!!20 years + "B01001_009E", # Estimate!!Total:!!Male:!!21 years + "B01001_010E", # Estimate!!Total:!!Male:!!22 to 24 years + "B01001_011E", # Estimate!!Total:!!Male:!!25 to 29 years + "B01001_012E", # Estimate!!Total:!!Male:!!30 to 34 years + "B01001_013E", # Estimate!!Total:!!Male:!!35 to 39 years + "B01001_014E", # Estimate!!Total:!!Male:!!40 to 44 years + "B01001_015E", # Estimate!!Total:!!Male:!!45 to 49 years + "B01001_016E", # Estimate!!Total:!!Male:!!50 to 54 years + "B01001_017E", # Estimate!!Total:!!Male:!!55 to 59 years + "B01001_018E", # Estimate!!Total:!!Male:!!60 and 61 years + "B01001_019E", # Estimate!!Total:!!Male:!!62 to 64 years + "B01001_020E", # Estimate!!Total:!!Male:!!65 and 66 years + "B01001_021E", # Estimate!!Total:!!Male:!!67 to 69 years + "B01001_022E", # Estimate!!Total:!!Male:!!70 to 74 years + "B01001_023E", # Estimate!!Total:!!Male:!!75 to 79 years + "B01001_024E", # Estimate!!Total:!!Male:!!80 to 84 years + "B01001_025E", # Estimate!!Total:!!Male:!!85 years and over + "B01001_027E", # Estimate!!Total:!!Female:!!Under 5 years + "B01001_028E", # Estimate!!Total:!!Female:!!5 to 9 years + "B01001_029E", # Estimate!!Total:!!Female:!!10 to 14 years + "B01001_030E", # Estimate!!Total:!!Female:!!15 to 17 years + "B01001_031E", # Estimate!!Total:!!Female:!!18 and 19 years + "B01001_032E", # Estimate!!Total:!!Female:!!20 years + "B01001_033E", # Estimate!!Total:!!Female:!!21 years + "B01001_034E", # Estimate!!Total:!!Female:!!22 to 24 years + "B01001_035E", # Estimate!!Total:!!Female:!!25 to 29 years + "B01001_036E", # Estimate!!Total:!!Female:!!30 to 34 years + "B01001_037E", # Estimate!!Total:!!Female:!!35 to 39 years + "B01001_038E", # Estimate!!Total:!!Female:!!40 to 44 years + "B01001_039E", # Estimate!!Total:!!Female:!!45 to 49 years + "B01001_040E", # Estimate!!Total:!!Female:!!50 to 54 years + "B01001_041E", # Estimate!!Total:!!Female:!!55 to 59 years + "B01001_042E", # Estimate!!Total:!!Female:!!60 and 61 years + "B01001_043E", # Estimate!!Total:!!Female:!!62 to 64 years + "B01001_044E", # Estimate!!Total:!!Female:!!65 and 66 years + "B01001_045E", # Estimate!!Total:!!Female:!!67 to 69 years + "B01001_046E", # Estimate!!Total:!!Female:!!70 to 74 years + "B01001_047E", # Estimate!!Total:!!Female:!!75 to 79 years + "B01001_048E", # Estimate!!Total:!!Female:!!80 to 84 years + "B01001_049E", # Estimate!!Total:!!Female:!!85 years and over + ] + + self.AGE_OUTPUT_FIELDS = [ + field_names.PERCENT_AGE_UNDER_10, + field_names.PERCENT_AGE_10_TO_64, + field_names.PERCENT_AGE_OVER_64, + ] + self.STATE_GEOID_FIELD_NAME = "GEOID2" self.COLUMNS_TO_KEEP = ( @@ -239,6 +295,7 @@ def __init__(self): field_names.PERCENT_PREFIX + field for field in self.RE_OUTPUT_FIELDS ] + + self.AGE_OUTPUT_FIELDS + [ field_names.POVERTY_LESS_THAN_200_FPL_FIELD, field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD, @@ -288,6 +345,7 @@ def extract(self) -> None: + self.EDUCATIONAL_FIELDS + self.RE_FIELDS + self.COLLEGE_ATTENDANCE_FIELDS + + self.AGE_INPUT_FIELDS ) self.df = retrieve_census_acs_data( @@ -435,6 +493,83 @@ def transform(self) -> None: df[race_field_name] / df[self.TOTAL_RACE_POPULATION_FIELD_NAME] ) + # First value is the `age bucket`, and the second value is a list of all fields + # that will be summed in the calculations of the total population in that age + # bucket. + age_bucket_and_its_sum_columns = [ + ( + field_names.PERCENT_AGE_UNDER_10, + [ + "B01001_003E", # Estimate!!Total:!!Male:!!Under 5 years + "B01001_004E", # Estimate!!Total:!!Male:!!5 to 9 years + "B01001_027E", # Estimate!!Total:!!Female:!!Under 5 years + "B01001_028E", # Estimate!!Total:!!Female:!!5 to 9 years + ], + ), + ( + field_names.PERCENT_AGE_10_TO_64, + [ + "B01001_005E", # Estimate!!Total:!!Male:!!10 to 14 years + "B01001_006E", # Estimate!!Total:!!Male:!!15 to 17 years + "B01001_007E", # Estimate!!Total:!!Male:!!18 and 19 years + "B01001_008E", # Estimate!!Total:!!Male:!!20 years + "B01001_009E", # Estimate!!Total:!!Male:!!21 years + "B01001_010E", # Estimate!!Total:!!Male:!!22 to 24 years + "B01001_011E", # Estimate!!Total:!!Male:!!25 to 29 years + "B01001_012E", # Estimate!!Total:!!Male:!!30 to 34 years + "B01001_013E", # Estimate!!Total:!!Male:!!35 to 39 years + "B01001_014E", # Estimate!!Total:!!Male:!!40 to 44 years + "B01001_015E", # Estimate!!Total:!!Male:!!45 to 49 years + "B01001_016E", # Estimate!!Total:!!Male:!!50 to 54 years + "B01001_017E", # Estimate!!Total:!!Male:!!55 to 59 years + "B01001_018E", # Estimate!!Total:!!Male:!!60 and 61 years + "B01001_019E", # Estimate!!Total:!!Male:!!62 to 64 years + "B01001_029E", # Estimate!!Total:!!Female:!!10 to 14 years + "B01001_030E", # Estimate!!Total:!!Female:!!15 to 17 years + "B01001_031E", # Estimate!!Total:!!Female:!!18 and 19 years + "B01001_032E", # Estimate!!Total:!!Female:!!20 years + "B01001_033E", # Estimate!!Total:!!Female:!!21 years + "B01001_034E", # Estimate!!Total:!!Female:!!22 to 24 years + "B01001_035E", # Estimate!!Total:!!Female:!!25 to 29 years + "B01001_036E", # Estimate!!Total:!!Female:!!30 to 34 years + "B01001_037E", # Estimate!!Total:!!Female:!!35 to 39 years + "B01001_038E", # Estimate!!Total:!!Female:!!40 to 44 years + "B01001_039E", # Estimate!!Total:!!Female:!!45 to 49 years + "B01001_040E", # Estimate!!Total:!!Female:!!50 to 54 years + "B01001_041E", # Estimate!!Total:!!Female:!!55 to 59 years + "B01001_042E", # Estimate!!Total:!!Female:!!60 and 61 years + "B01001_043E", # Estimate!!Total:!!Female:!!62 to 64 years + ], + ), + ( + field_names.PERCENT_AGE_OVER_64, + [ + "B01001_020E", # Estimate!!Total:!!Male:!!65 and 66 years + "B01001_021E", # Estimate!!Total:!!Male:!!67 to 69 years + "B01001_022E", # Estimate!!Total:!!Male:!!70 to 74 years + "B01001_023E", # Estimate!!Total:!!Male:!!75 to 79 years + "B01001_024E", # Estimate!!Total:!!Male:!!80 to 84 years + "B01001_025E", # Estimate!!Total:!!Male:!!85 years and over + "B01001_044E", # Estimate!!Total:!!Female:!!65 and 66 years + "B01001_045E", # Estimate!!Total:!!Female:!!67 to 69 years + "B01001_046E", # Estimate!!Total:!!Female:!!70 to 74 years + "B01001_047E", # Estimate!!Total:!!Female:!!75 to 79 years + "B01001_048E", # Estimate!!Total:!!Female:!!80 to 84 years + "B01001_049E", # Estimate!!Total:!!Female:!!85 years and over + ], + ), + ] + + # Calculate age groups + total_population_age_series = df["B01001_001E"] + + # For each age bucket, sum the relevant columns and calculate the total + # percentage. + for age_bucket, sum_columns in age_bucket_and_its_sum_columns: + df[age_bucket] = ( + df[sum_columns].sum(axis=1) / total_population_age_series + ) + # Calculate college attendance and adjust low income df[self.COLLEGE_ATTENDANCE_FIELD] = ( df[self.COLLEGE_ATTENDANCE_MALE_ENROLLED_PUBLIC] @@ -503,7 +638,7 @@ def transform(self) -> None: ) # We generate a boolean that is TRUE when there is an imputed income but not a baseline income, and FALSE otherwise. - # This allows us to see which tracts have an imputed income. + # This allows us to see which tracts have an imputed income. df[field_names.IMPUTED_INCOME_FLAG_FIELD_NAME] = ( df[field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD].notna() & df[field_names.POVERTY_LESS_THAN_200_FPL_FIELD].isna() diff --git a/data/data-pipeline/data_pipeline/score/field_names.py b/data/data-pipeline/data_pipeline/score/field_names.py index 1f19e92ff..a07816ff4 100644 --- a/data/data-pipeline/data_pipeline/score/field_names.py +++ b/data/data-pipeline/data_pipeline/score/field_names.py @@ -115,6 +115,11 @@ # file as "Percent other races" PERCENT_OTHER_RACE_FIELD_NAME = PERCENT_PREFIX + "other races" +# Age +PERCENT_AGE_UNDER_10 = "Percent age under 10" +PERCENT_AGE_10_TO_64 = "Percent age 10 to 64" +PERCENT_AGE_OVER_64 = "Percent age over 64" + # Climate FEMA_RISK_FIELD = "FEMA Risk Index Expected Annual Loss Score" EXPECTED_BUILDING_LOSS_RATE_FIELD = ( From 175a6c8c2ead08e2cd3ecbf7ca56abb440e8c929 Mon Sep 17 00:00:00 2001 From: lucasmbrown-usds Date: Fri, 19 Aug 2022 21:32:17 -0400 Subject: [PATCH 13/14] readme --- data/data-pipeline/README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/data/data-pipeline/README.md b/data/data-pipeline/README.md index 5b843dbd5..97c078626 100644 --- a/data/data-pipeline/README.md +++ b/data/data-pipeline/README.md @@ -350,7 +350,8 @@ We have four pickle files that correspond to expected files: To update the pickles, let's go one by one: -For the `score_transformed_expected.pkl`, put a breakpoint on [this line](https://github.com/usds/justice40-tool/blob/main/data/data-pipeline/data_pipeline/etl/score/tests/test_score_post.py#L58), before the `pdt.assert_frame_equal` and run: +For the `score_transformed_expected.pkl`, put a breakpoint on [this line] +(https://github.com/usds/justice40-tool/blob/main/data/data-pipeline/data_pipeline/etl/score/tests/test_score_post.py#L62), before the `pdt.assert_frame_equal` and run: `pytest data_pipeline/etl/score/tests/test_score_post.py::test_transform_score` Once on the breakpoint, capture the df to a pickle as follows: @@ -378,7 +379,8 @@ score_data_actual.to_pickle(data_path / "data_pipeline" / "etl" / "score" / "tes Then take out the breakpoint and re-run the test: `pytest data_pipeline/etl/score/tests/test_score_post.py::test_create_score_data` -For the `tile_data_expected.pkl`, put a breakpoint on [this line](https://github.com/usds/justice40-tool/blob/main/data/data-pipeline/data_pipeline/etl/score/tests/test_score_post.py#L86), before the `pdt.assert_frame_equal` and run: +For the `tile_data_expected.pkl`, put a breakpoint on [this line](https://github +.com/usds/justice40-tool/blob/main/data/data-pipeline/data_pipeline/etl/score/tests/test_score_post.py#L90), before the `pdt.assert_frame_equal` and run: `pytest data_pipeline/etl/score/tests/test_score_post.py::test_create_tile_data` Once on the breakpoint, capture the df to a pickle as follows: From 0b41c03bcf148e5662a6e416fff5e1caea3aa566 Mon Sep 17 00:00:00 2001 From: lucasmbrown-usds Date: Fri, 19 Aug 2022 21:45:57 -0400 Subject: [PATCH 14/14] updating text fixtures --- .../tests/sample_data/score_data_initial.csv | 6 +++--- .../snapshots/downloadable_data_expected.pkl | Bin 17063 -> 17649 bytes .../tests/snapshots/score_data_expected.pkl | Bin 24195 -> 25454 bytes .../snapshots/score_transformed_expected.pkl | Bin 23876 -> 25135 bytes .../tests/snapshots/tile_data_expected.pkl | Bin 4413 -> 4789 bytes 5 files changed, 3 insertions(+), 3 deletions(-) diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv b/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv index f78260c50..a4043668e 100644 --- a/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv +++ b/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv @@ -1,3 +1,3 @@ -GEOID10_TRACT,Persistent Poverty Census Tract,Tract-level redlining score meets or exceeds 3.25,Does the tract have at least 35 acres in it?,Contains agricultural value,Is there at least one Formerly Used Defense Site (FUDS) in the tract?,Is there at least one abandoned mine in this census tract?,Income data has been estimated based on neighbor income,Housing burden (percent),Share of homes with no kitchen or indoor plumbing (percent),Total population,Median household income (% of state median household income),Current asthma among adults aged greater than or equal to 18 years,Coronary heart disease among adults aged greater than or equal to 18 years,Cancer (excluding skin cancer) among adults aged greater than or equal to 18 years,Current lack of health insurance among adults aged 18-64 years,Diagnosed diabetes among adults aged greater than or equal to 18 years,Physical health not good for greater than or equal to 14 days among adults aged greater than or equal to 18 years,Percent of individuals < 100% Federal Poverty Line,Percent of individuals < 150% Federal Poverty Line,Percent of individuals below 200% Federal Poverty Line,Area Median Income (State or metropolitan),Median household income in the past 12 months,Energy burden,FEMA Risk Index Expected Annual Loss Score,Urban Heuristic Flag,Air toxics cancer risk,Respiratory hazard index,Diesel particulate matter exposure,PM2.5 in the air,Ozone,Traffic proximity and volume,Proximity to Risk Management Plan (RMP) facilities,Proximity to hazardous waste sites,Proximity to NPL sites,Wastewater discharge,Percent pre-1960s housing (lead paint indicator),Individuals under 5 years old,Individuals over 64 years old,Linguistic isolation (percent),Percent of households in linguistic isolation,Poverty (Less than 200% of federal poverty line),Percent individuals age 25 or over with less than high school degree,Unemployment (percent),Median value ($) of owner-occupied housing units,Percent enrollment in college or graduate school,Percent of population not currently enrolled in college or graduate school,Expected building loss rate (Natural Hazards Risk Index),Expected agricultural loss rate (Natural Hazards Risk Index),Expected population loss rate (Natural Hazards Risk Index),Percent individuals age 25 or over with less than high school degree in 2009,Percentage households below 100% of federal poverty line in 2009,Unemployment (percent) in 2009,Unemployment (percent) in 2010,Percent of individuals less than 100% Federal Poverty Line in 2010,Total population in 2009,Summer days above 90F,Percent low access to healthy food,Percent impenetrable surface areas,Leaky underground storage tanks,DOT Travel Barriers Score,Share of properties at risk of flood in 30 years,Share of properties at risk of fire in 30 years,Share of the tract's land area that is covered by impervious surface or cropland as a percent,"Percent of individuals below 200% Federal Poverty Line, imputed and adjusted",Third grade reading proficiency,Median household income as a percent of area median income,Life expectancy (years),Median household income as a percent of territory median income in 2009,Housing burden (percent) (percentile),Share of homes with no kitchen or indoor plumbing (percent) (percentile),Total population (percentile),Median household income (% of state median household income) (percentile),Current asthma among adults aged greater than or equal to 18 years (percentile),Coronary heart disease among adults aged greater than or equal to 18 years (percentile),Cancer (excluding skin cancer) among adults aged greater than or equal to 18 years (percentile),Current lack of health insurance among adults aged 18-64 years (percentile),Diagnosed diabetes among adults aged greater than or equal to 18 years (percentile),Physical health not good for greater than or equal to 14 days among adults aged greater than or equal to 18 years (percentile),Percent of individuals < 100% Federal Poverty Line (percentile),Percent of individuals < 150% Federal Poverty Line (percentile),Percent of individuals below 200% Federal Poverty Line (percentile),Area Median Income (State or metropolitan) (percentile),Median household income in the past 12 months (percentile),Energy burden (percentile),FEMA Risk Index Expected Annual Loss Score (percentile),Urban Heuristic Flag (percentile),Air toxics cancer risk (percentile),Respiratory hazard index (percentile),Diesel particulate matter exposure (percentile),PM2.5 in the air (percentile),Ozone (percentile),Traffic proximity and volume (percentile),Proximity to Risk Management Plan (RMP) facilities (percentile),Proximity to hazardous waste sites (percentile),Proximity to NPL sites (percentile),Wastewater discharge (percentile),Percent pre-1960s housing (lead paint indicator) (percentile),Individuals under 5 years old (percentile),Individuals over 64 years old (percentile),Linguistic isolation (percent) (percentile),Percent of households in linguistic isolation (percentile),Poverty (Less than 200% of federal poverty line) (percentile),Percent individuals age 25 or over with less than high school degree (percentile),Unemployment (percent) (percentile),Median value ($) of owner-occupied housing units (percentile),Percent enrollment in college or graduate school (percentile),Percent of population not currently enrolled in college or graduate school (percentile),Expected building loss rate (Natural Hazards Risk Index) (percentile),Expected agricultural loss rate (Natural Hazards Risk Index) (percentile),Expected population loss rate (Natural Hazards Risk Index) (percentile),Percent individuals age 25 or over with less than high school degree in 2009 (percentile),Percentage households below 100% of federal poverty line in 2009 (percentile),Unemployment (percent) in 2009 (percentile),Unemployment (percent) in 2010 (percentile),Percent of individuals less than 100% Federal Poverty Line in 2010 (percentile),Total population in 2009 (percentile),Summer days above 90F (percentile),Percent low access to healthy food (percentile),Percent impenetrable surface areas (percentile),Leaky underground storage tanks (percentile),DOT Travel Barriers Score (percentile),Share of properties at risk of flood in 30 years (percentile),Share of properties at risk of fire in 30 years (percentile),Share of the tract's land area that is covered by impervious surface or cropland as a percent (percentile),"Percent of individuals below 200% Federal Poverty Line, imputed and adjusted (percentile)",Low third grade reading proficiency (percentile),Low median household income as a percent of area median income (percentile),Low life expectancy (percentile),Low median household income as a percent of territory median income in 2009 (percentile),Total population in 2009 (island areas) and 2019 (states and PR),Total threshold criteria exceeded,Exceeds FPL200 threshold,Percent higher ed enrollment rate is less than 20%,Is low income and has a low percent of higher ed students?,Greater than or equal to the 90th percentile for expected population loss,Greater than or equal to the 90th percentile for expected agricultural loss,Greater than or equal to the 90th percentile for expected building loss,At least one climate threshold exceeded,"Greater than or equal to the 90th percentile for expected population loss rate, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for expected agriculture loss rate, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for expected building loss rate, is low income, and has a low percent of higher ed students?",Climate Factor (Definition M),Greater than or equal to the 90th percentile for energy burden,Greater than or equal to the 90th percentile for pm2.5 exposure,At least one energy threshold exceeded,"Greater than or equal to the 90th percentile for PM2.5 exposure, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for energy burden, is low income, and has a low percent of higher ed students?",Energy Factor (Definition M),Greater than or equal to the 90th percentile for diesel particulate matter,Greater than or equal to the 90th percentile for traffic proximity,At least one traffic threshold exceeded,"Greater than or equal to the 90th percentile for diesel particulate matter, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for traffic proximity, is low income, and has a low percent of higher ed students?",Transportation Factor (Definition M),Greater than or equal to the 90th percentile for lead paint and the median house value is less than 90th percentile,Greater than or equal to the 90th percentile for housing burden,At least one housing threshold exceeded,"Greater than or equal to the 90th percentile for lead paint, the median house value is less than 90th percentile, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for housing burden, is low income, and has a low percent of higher ed students?",Housing Factor (Definition M),Greater than or equal to the 90th percentile for RMP proximity,Greater than or equal to the 90th percentile for NPL (superfund sites) proximity,Greater than or equal to the 90th percentile for proximity to hazardous waste sites,At least one pollution threshold exceeded,"Greater than or equal to the 90th percentile for proximity to RMP sites, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for proximity to superfund sites, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for proximity to hazardous waste facilities, is low income, and has a low percent of higher ed students?",Pollution Factor (Definition M),Greater than or equal to the 90th percentile for wastewater discharge,At least one water threshold exceeded,"Greater than or equal to the 90th percentile for wastewater discharge, is low income, and has a low percent of higher ed students?",Water Factor (Definition M),Greater than or equal to the 90th percentile for diabetes,Greater than or equal to the 90th percentile for asthma,Greater than or equal to the 90th percentile for heart disease,Greater than or equal to the 90th percentile for low life expectancy,At least one health threshold exceeded,"Greater than or equal to the 90th percentile for diabetes, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for asthma, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for heart disease, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for low life expectancy, is low income, and has a low percent of higher ed students?",Health Factor (Definition M),Low high school education and low percent of higher ed students,Greater than or equal to the 90th percentile for unemployment,Greater than or equal to the 90th percentile for low median household income as a percent of area median income,Greater than or equal to the 90th percentile for households in linguistic isolation,Greater than or equal to the 90th percentile for households at or below 100% federal poverty level,"Greater than or equal to the 90th percentile for households in linguistic isolation, has low HS attainment, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for households at or below 100% federal poverty level, has low HS attainment, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for low median household income as a percent of area median income, has low HS attainment, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for unemployment, has low HS attainment, and has a low percent of higher ed students?",Unemployment (percent) in 2009 (island areas) and 2010 (states and PR),Unemployment (percent) in 2009 for island areas (percentile),Unemployment (percent) in 2009 exceeds 90th percentile,Percentage households below 100% of federal poverty line in 2009 (island areas) and 2010 (states and PR),Percentage households below 100% of federal poverty line in 2009 for island areas (percentile),Percentage households below 100% of federal poverty line in 2009 exceeds 90th percentile,Low median household income as a percent of territory median income in 2009 exceeds 90th percentile,Low high school education in 2009 (island areas),Greater than or equal to the 90th percentile for unemployment and has low HS education in 2009 (island areas)?,Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS education in 2009 (island areas)?,Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS education in 2009 (island areas)?,At least one workforce threshold exceeded,Both workforce socioeconomic indicators exceeded,Workforce Factor (Definition M),Total categories exceeded,Definition M (communities),Any Non-Workforce Factor (Definition M),Definition M (percentile),Is low income (imputed and adjusted)?,Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years,Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years,Greater than or equal to the 90th percentile for expected population loss rate and is low income?,Greater than or equal to the 90th percentile for expected agriculture loss rate and is low income?,Greater than or equal to the 90th percentile for expected building loss rate and is low income?,Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years and is low income?,Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years and is low income?,Climate Factor (Definition N),Greater than or equal to the 90th percentile for PM2.5 exposure and is low income?,Greater than or equal to the 90th percentile for energy burden and is low income?,Energy Factor (Definition N),Greater than or equal to the 90th percentile for DOT travel barriers,Greater than or equal to the 90th percentile for diesel particulate matter and is low income?,Greater than or equal to the 90th percentile for traffic proximity and is low income?,Greater than or equal to the 90th percentile for DOT transit barriers and is low income?,Transportation Factor (Definition N),Tract-level redlining score meets or exceeds 3.25 and is low income,Greater than or equal to the 90th percentile for share of homes without indoor plumbing or a kitchen,Greater than or equal to the 90th percentile for share of homes with no kitchen or indoor plumbing and is low income?,Greater than or equal to the 90th percentile for lead paint and the median house value is less than 90th percentile and is low income?,Greater than or equal to the 90th percentile for housing burden and is low income?,Greater than or equal to the 90th percentile for share of the tract's land area that is covered by impervious surface or cropland as a percent,Greater than or equal to the 90th percentile for share of the tract's land area that is covered by impervious surface or cropland as a percent and is low income?,Housing Factor (Definition N),Greater than or equal to the 90th percentile for proximity to RMP sites and is low income?,Greater than or equal to the 90th percentile for proximity to superfund sites and is low income?,Greater than or equal to the 90th percentile for proximity to hazardous waste facilities and is low income?,There is at least one abandoned mine in this census tract and the tract is low income.,There is at least one Formerly Used Defense Site (FUDS) in the tract and the tract is low income.,Pollution Factor (Definition N),Greater than or equal to the 90th percentile for leaky underwater storage tanks,Greater than or equal to the 90th percentile for wastewater discharge and is low income?,Greater than or equal to the 90th percentile for leaky underground storage tanks and is low income?,Water Factor (Definition N),Greater than or equal to the 90th percentile for diabetes and is low income?,Greater than or equal to the 90th percentile for asthma and is low income?,Greater than or equal to the 90th percentile for heart disease and is low income?,Greater than or equal to the 90th percentile for low life expectancy and is low income?,Health Factor (Definition N),Low high school education,Greater than or equal to the 90th percentile for households in linguistic isolation and has low HS education?,Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS education?,Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS education?,Greater than or equal to the 90th percentile for unemployment and has low HS education?,Workforce Factor (Definition N),Definition N (communities),Definition N (communities) (percentile),Meets the less stringent low income criterion for the adjacency index?,Definition N (communities) (average of neighbors),Is the tract surrounded by disadvantaged communities?,Definition N (communities) (based on adjacency index and low income alone),"Definition M community, including adjacency index tracts" -01073001100,True,True,True,True,,,False,0.2752043596730245,0.0,4781.0,0.7327449738800064,11.2,7.2,6.7,16.6,19.3,15.1,0.150375939849624,0.318796992481203,0.3744360902255639,57447.0,37030.0,0.049,18.7674524286,1.0,40.0,0.5,0.467489734286576,9.8735797260274,43.056760130719,181.621925132718,2.0427358988323,0.702342755246247,0.134193041307899,4.45238981883771,0.168806466951973,0.035557414766785,0.203932231750679,0.0,0.0,0.374436090225563,0.0821917808219178,0.0092071611253196,85500.0,0.0890751899397432,0.9109248100602568,0.0004047858,5.6328e-05,2.8039e-06,,,,0.1536983669548511,0.3189099613330878,,62.666668,0.068036923,0.171,1.96440511031451,47.695227725,0.0754274220583305,0.6620851491786792,-77.7525,0.2853609002858206,58.143433,0.6445941476491375,70.3,,0.6466760729305078,0.2159833426939357,0.6290185267766651,0.2601978513507951,0.8509696039125366,0.7264920810941454,0.4789587420739856,0.6191105803406409,0.965388552418323,0.697012994398476,0.6204255784694491,0.7319894972922707,0.6305043487774192,0.3145069836211475,0.1524256393370651,0.864954517474865,0.6038301323911519,0.5972204988211937,0.9070825388177608,0.8818509942794879,0.8407790792699537,0.8257128232087766,0.5755156814188676,0.3920895082932574,0.9007580978635424,0.4820205132363076,0.7531654977635437,0.9619599422457518,0.3979135417088958,0.1737408953933055,0.7659355954649262,0.1287706711725437,0.13169416629505,0.6347481790786611,0.4189065592792301,0.029797296373751,0.1130218397675614,0.7459773722926589,0.2540362752992234,0.7846412062513758,0.2153147384849333,0.6143028498159407,,,,0.9349594607528132,0.8950599559730369,,0.7537922665342821,0.8019598155467721,0.4126953421856217,0.521114579532709,0.4517484245644384,0.4973964722881056,0.8410893082809093,0.2685589820648203,0.607629501459933,0.990724418702258,0.8218135517196475,0.97046998263836,,4781.0,0,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,True,False,False,False,False,True,False,False,False,True,False,False,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,0.1536983669548511,,False,0.3189099613330878,,False,False,False,False,False,False,False,False,False,0.0,False,False,0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,0,True,0.8571428571428571,False,False,False -01073001400,True,True,True,True,,,False,0.1823529411764705,0.0047058823529411,1946.0,0.7136694633528574,11.1,9.1,7.3,21.4,22.4,17.4,0.2816032887975334,0.3679342240493319,0.4835560123329907,57447.0,36066.0,0.07,17.3011023381,1.0,40.0,0.6,0.655319095139786,9.945103013698628,43.1266823529412,3260.33374354854,1.81915896353987,3.34035680534013,0.214095348702766,0.103297800913177,0.647212543554006,0.054984583761562,0.189105858170606,0.0245098039215686,0.024509803921569,0.48355601233299,0.1742543171114599,0.1150121065375302,67800.0,0.0771549125979505,0.9228450874020494,0.0008951111,5.1282e-06,2.3791e-06,,,,0.0804953560371517,0.2950894905920146,,61.666668,0.087159691,0.34900002,3.16184976454882,44.7571359825,0.2384615384615384,0.0,-56.8746,0.4064010997350401,93.77919,0.6278134628440127,71.0,,0.3421186011150532,0.5051574635963891,0.0916001135119795,0.240302951305517,0.8385794307486707,0.9217563763541756,0.6048579715089994,0.7894025988796952,0.9878088657624612,0.8447283118655634,0.8689486351950112,0.8013648049887862,0.7892483999781194,0.3145069836211475,0.1404620788058391,0.970802270706518,0.5282998116553705,0.5972204988211937,0.9070825388177608,0.9704848815036776,0.9380686461454644,0.8391046304110233,0.5827649654828936,0.9563394697362702,0.8799745949379062,0.800259455953298,0.8653801975648978,0.8431750027766466,0.8462723476709774,0.471128768530155,0.6930041485925866,0.5867081244286861,0.5847015580870529,0.7916514641694031,0.7516347007030237,0.9067399297439892,0.0522639122516786,0.6434566620719774,0.356556985519905,0.9166162227602904,0.0865380767537716,0.558933421571466,,,,0.6917513228236646,0.8737301229199994,,0.7501654807214959,0.8647617479139218,0.6268497920495212,0.6418426778016514,0.3716517703914219,0.8850358496224203,0.3366245885930925,0.5569693544162451,0.7883908294582027,0.9537899773356836,0.8364273002184828,0.959938777375042,,1946.0,9,True,True,True,False,False,True,True,False,False,True,True,True,False,True,False,True,True,True,True,True,True,True,True,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,True,True,True,True,False,True,True,True,True,True,False,False,False,False,False,False,True,0.0804953560371517,,False,0.2950894905920146,,False,False,False,False,False,False,True,True,True,6.0,True,True,1,True,False,False,False,False,True,False,False,True,False,True,True,False,True,True,False,True,True,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,True,False,True,True,True,True,False,False,False,True,True,True,1,True,1.0,True,True,True +GEOID10_TRACT,Persistent Poverty Census Tract,Tract-level redlining score meets or exceeds 3.25,Does the tract have at least 35 acres in it?,Contains agricultural value,Is there at least one Formerly Used Defense Site (FUDS) in the tract?,Is there at least one abandoned mine in this census tract?,Income data has been estimated based on neighbor income,Housing burden (percent),Share of homes with no kitchen or indoor plumbing (percent),Total population,Median household income (% of state median household income),Current asthma among adults aged greater than or equal to 18 years,Coronary heart disease among adults aged greater than or equal to 18 years,Cancer (excluding skin cancer) among adults aged greater than or equal to 18 years,Current lack of health insurance among adults aged 18-64 years,Diagnosed diabetes among adults aged greater than or equal to 18 years,Physical health not good for greater than or equal to 14 days among adults aged greater than or equal to 18 years,Percent of individuals < 100% Federal Poverty Line,Percent of individuals < 150% Federal Poverty Line,Percent of individuals below 200% Federal Poverty Line,Area Median Income (State or metropolitan),Median household income in the past 12 months,Energy burden,FEMA Risk Index Expected Annual Loss Score,Urban Heuristic Flag,Air toxics cancer risk,Respiratory hazard index,Diesel particulate matter exposure,PM2.5 in the air,Ozone,Traffic proximity and volume,Proximity to Risk Management Plan (RMP) facilities,Proximity to hazardous waste sites,Proximity to NPL sites,Wastewater discharge,Percent pre-1960s housing (lead paint indicator),Individuals under 5 years old,Individuals over 64 years old,Linguistic isolation (percent),Percent of households in linguistic isolation,Poverty (Less than 200% of federal poverty line),Percent individuals age 25 or over with less than high school degree,Unemployment (percent),Median value ($) of owner-occupied housing units,Percent enrollment in college or graduate school,Percent of population not currently enrolled in college or graduate school,Expected building loss rate (Natural Hazards Risk Index),Expected agricultural loss rate (Natural Hazards Risk Index),Expected population loss rate (Natural Hazards Risk Index),Percent individuals age 25 or over with less than high school degree in 2009,Percentage households below 100% of federal poverty line in 2009,Unemployment (percent) in 2009,Unemployment (percent) in 2010,Percent of individuals less than 100% Federal Poverty Line in 2010,Total population in 2009,Summer days above 90F,Percent low access to healthy food,Percent impenetrable surface areas,Leaky underground storage tanks,DOT Travel Barriers Score,Share of properties at risk of flood in 30 years,Share of properties at risk of fire in 30 years,Share of the tract's land area that is covered by impervious surface or cropland as a percent,"Percent of individuals below 200% Federal Poverty Line, imputed and adjusted",Percent Black or African American,Percent American Indian / Alaska Native,Percent Asian,Percent Native Hawaiian or Pacific,Percent two or more races,Percent White,Percent Hispanic or Latino,Percent other races,Percent age under 10,Percent age 10 to 64,Percent age over 64,Third grade reading proficiency,Median household income as a percent of area median income,Life expectancy (years),Median household income as a percent of territory median income in 2009,Housing burden (percent) (percentile),Share of homes with no kitchen or indoor plumbing (percent) (percentile),Total population (percentile),Median household income (% of state median household income) (percentile),Current asthma among adults aged greater than or equal to 18 years (percentile),Coronary heart disease among adults aged greater than or equal to 18 years (percentile),Cancer (excluding skin cancer) among adults aged greater than or equal to 18 years (percentile),Current lack of health insurance among adults aged 18-64 years (percentile),Diagnosed diabetes among adults aged greater than or equal to 18 years (percentile),Physical health not good for greater than or equal to 14 days among adults aged greater than or equal to 18 years (percentile),Percent of individuals < 100% Federal Poverty Line (percentile),Percent of individuals < 150% Federal Poverty Line (percentile),Percent of individuals below 200% Federal Poverty Line (percentile),Area Median Income (State or metropolitan) (percentile),Median household income in the past 12 months (percentile),Energy burden (percentile),FEMA Risk Index Expected Annual Loss Score (percentile),Urban Heuristic Flag (percentile),Air toxics cancer risk (percentile),Respiratory hazard index (percentile),Diesel particulate matter exposure (percentile),PM2.5 in the air (percentile),Ozone (percentile),Traffic proximity and volume (percentile),Proximity to Risk Management Plan (RMP) facilities (percentile),Proximity to hazardous waste sites (percentile),Proximity to NPL sites (percentile),Wastewater discharge (percentile),Percent pre-1960s housing (lead paint indicator) (percentile),Individuals under 5 years old (percentile),Individuals over 64 years old (percentile),Linguistic isolation (percent) (percentile),Percent of households in linguistic isolation (percentile),Poverty (Less than 200% of federal poverty line) (percentile),Percent individuals age 25 or over with less than high school degree (percentile),Unemployment (percent) (percentile),Median value ($) of owner-occupied housing units (percentile),Percent enrollment in college or graduate school (percentile),Percent of population not currently enrolled in college or graduate school (percentile),Expected building loss rate (Natural Hazards Risk Index) (percentile),Expected agricultural loss rate (Natural Hazards Risk Index) (percentile),Expected population loss rate (Natural Hazards Risk Index) (percentile),Percent individuals age 25 or over with less than high school degree in 2009 (percentile),Percentage households below 100% of federal poverty line in 2009 (percentile),Unemployment (percent) in 2009 (percentile),Unemployment (percent) in 2010 (percentile),Percent of individuals less than 100% Federal Poverty Line in 2010 (percentile),Total population in 2009 (percentile),Summer days above 90F (percentile),Percent low access to healthy food (percentile),Percent impenetrable surface areas (percentile),Leaky underground storage tanks (percentile),DOT Travel Barriers Score (percentile),Share of properties at risk of flood in 30 years (percentile),Share of properties at risk of fire in 30 years (percentile),Share of the tract's land area that is covered by impervious surface or cropland as a percent (percentile),"Percent of individuals below 200% Federal Poverty Line, imputed and adjusted (percentile)",Percent Black or African American (percentile),Percent American Indian / Alaska Native (percentile),Percent Asian (percentile),Percent Native Hawaiian or Pacific (percentile),Percent two or more races (percentile),Percent White (percentile),Percent Hispanic or Latino (percentile),Percent other races (percentile),Percent age under 10 (percentile),Percent age 10 to 64 (percentile),Percent age over 64 (percentile),Low third grade reading proficiency (percentile),Low median household income as a percent of area median income (percentile),Low life expectancy (percentile),Low median household income as a percent of territory median income in 2009 (percentile),Total population in 2009 (island areas) and 2019 (states and PR),Total threshold criteria exceeded,Exceeds FPL200 threshold,Percent higher ed enrollment rate is less than 20%,Is low income and has a low percent of higher ed students?,Greater than or equal to the 90th percentile for expected population loss,Greater than or equal to the 90th percentile for expected agricultural loss,Greater than or equal to the 90th percentile for expected building loss,At least one climate threshold exceeded,"Greater than or equal to the 90th percentile for expected population loss rate, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for expected agriculture loss rate, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for expected building loss rate, is low income, and has a low percent of higher ed students?",Climate Factor (Definition M),Greater than or equal to the 90th percentile for energy burden,Greater than or equal to the 90th percentile for pm2.5 exposure,At least one energy threshold exceeded,"Greater than or equal to the 90th percentile for PM2.5 exposure, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for energy burden, is low income, and has a low percent of higher ed students?",Energy Factor (Definition M),Greater than or equal to the 90th percentile for diesel particulate matter,Greater than or equal to the 90th percentile for traffic proximity,At least one traffic threshold exceeded,"Greater than or equal to the 90th percentile for diesel particulate matter, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for traffic proximity, is low income, and has a low percent of higher ed students?",Transportation Factor (Definition M),Greater than or equal to the 90th percentile for lead paint and the median house value is less than 90th percentile,Greater than or equal to the 90th percentile for housing burden,At least one housing threshold exceeded,"Greater than or equal to the 90th percentile for lead paint, the median house value is less than 90th percentile, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for housing burden, is low income, and has a low percent of higher ed students?",Housing Factor (Definition M),Greater than or equal to the 90th percentile for RMP proximity,Greater than or equal to the 90th percentile for NPL (superfund sites) proximity,Greater than or equal to the 90th percentile for proximity to hazardous waste sites,At least one pollution threshold exceeded,"Greater than or equal to the 90th percentile for proximity to RMP sites, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for proximity to superfund sites, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for proximity to hazardous waste facilities, is low income, and has a low percent of higher ed students?",Pollution Factor (Definition M),Greater than or equal to the 90th percentile for wastewater discharge,At least one water threshold exceeded,"Greater than or equal to the 90th percentile for wastewater discharge, is low income, and has a low percent of higher ed students?",Water Factor (Definition M),Greater than or equal to the 90th percentile for diabetes,Greater than or equal to the 90th percentile for asthma,Greater than or equal to the 90th percentile for heart disease,Greater than or equal to the 90th percentile for low life expectancy,At least one health threshold exceeded,"Greater than or equal to the 90th percentile for diabetes, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for asthma, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for heart disease, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for low life expectancy, is low income, and has a low percent of higher ed students?",Health Factor (Definition M),Low high school education and low percent of higher ed students,Greater than or equal to the 90th percentile for unemployment,Greater than or equal to the 90th percentile for low median household income as a percent of area median income,Greater than or equal to the 90th percentile for households in linguistic isolation,Greater than or equal to the 90th percentile for households at or below 100% federal poverty level,"Greater than or equal to the 90th percentile for households in linguistic isolation, has low HS attainment, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for households at or below 100% federal poverty level, has low HS attainment, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for low median household income as a percent of area median income, has low HS attainment, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for unemployment, has low HS attainment, and has a low percent of higher ed students?",Unemployment (percent) in 2009 (island areas) and 2010 (states and PR),Unemployment (percent) in 2009 for island areas (percentile),Unemployment (percent) in 2009 exceeds 90th percentile,Percentage households below 100% of federal poverty line in 2009 (island areas) and 2010 (states and PR),Percentage households below 100% of federal poverty line in 2009 for island areas (percentile),Percentage households below 100% of federal poverty line in 2009 exceeds 90th percentile,Low median household income as a percent of territory median income in 2009 exceeds 90th percentile,Low high school education in 2009 (island areas),Greater than or equal to the 90th percentile for unemployment and has low HS education in 2009 (island areas)?,Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS education in 2009 (island areas)?,Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS education in 2009 (island areas)?,At least one workforce threshold exceeded,Both workforce socioeconomic indicators exceeded,Workforce Factor (Definition M),Total categories exceeded,Definition M (communities),Any Non-Workforce Factor (Definition M),Definition M (percentile),Is low income (imputed and adjusted)?,Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years,Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years,Greater than or equal to the 90th percentile for expected population loss rate and is low income?,Greater than or equal to the 90th percentile for expected agriculture loss rate and is low income?,Greater than or equal to the 90th percentile for expected building loss rate and is low income?,Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years and is low income?,Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years and is low income?,Climate Factor (Definition N),Greater than or equal to the 90th percentile for PM2.5 exposure and is low income?,Greater than or equal to the 90th percentile for energy burden and is low income?,Energy Factor (Definition N),Greater than or equal to the 90th percentile for DOT travel barriers,Greater than or equal to the 90th percentile for diesel particulate matter and is low income?,Greater than or equal to the 90th percentile for traffic proximity and is low income?,Greater than or equal to the 90th percentile for DOT transit barriers and is low income?,Transportation Factor (Definition N),Tract-level redlining score meets or exceeds 3.25 and is low income,Greater than or equal to the 90th percentile for share of homes without indoor plumbing or a kitchen,Greater than or equal to the 90th percentile for share of homes with no kitchen or indoor plumbing and is low income?,Greater than or equal to the 90th percentile for lead paint and the median house value is less than 90th percentile and is low income?,Greater than or equal to the 90th percentile for housing burden and is low income?,Greater than or equal to the 90th percentile for share of the tract's land area that is covered by impervious surface or cropland as a percent,Greater than or equal to the 90th percentile for share of the tract's land area that is covered by impervious surface or cropland as a percent and is low income?,Housing Factor (Definition N),Greater than or equal to the 90th percentile for proximity to RMP sites and is low income?,Greater than or equal to the 90th percentile for proximity to superfund sites and is low income?,Greater than or equal to the 90th percentile for proximity to hazardous waste facilities and is low income?,There is at least one abandoned mine in this census tract and the tract is low income.,There is at least one Formerly Used Defense Site (FUDS) in the tract and the tract is low income.,Pollution Factor (Definition N),Greater than or equal to the 90th percentile for leaky underwater storage tanks,Greater than or equal to the 90th percentile for wastewater discharge and is low income?,Greater than or equal to the 90th percentile for leaky underground storage tanks and is low income?,Water Factor (Definition N),Greater than or equal to the 90th percentile for diabetes and is low income?,Greater than or equal to the 90th percentile for asthma and is low income?,Greater than or equal to the 90th percentile for heart disease and is low income?,Greater than or equal to the 90th percentile for low life expectancy and is low income?,Health Factor (Definition N),Low high school education,Greater than or equal to the 90th percentile for households in linguistic isolation and has low HS education?,Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS education?,Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS education?,Greater than or equal to the 90th percentile for unemployment and has low HS education?,Workforce Factor (Definition N),Definition N (communities),Definition N (communities) (percentile),Meets the less stringent low income criterion for the adjacency index?,Definition N (communities) (average of neighbors),Is the tract surrounded by disadvantaged communities?,Definition N (communities) (based on adjacency index and low income alone),"Definition M community, including adjacency index tracts" +01073001100,True,True,True,True,,,False,0.2752043596730245,0.0,4781.0,0.7327449738800064,11.2,7.2,6.7,16.6,19.3,15.1,0.150375939849624,0.318796992481203,0.3744360902255639,57447.0,37030.0,0.049,18.7674524286,1.0,40.0,0.5,0.467489734286576,9.8735797260274,43.056760130719,181.621925132718,2.0427358988323,0.702342755246247,0.134193041307899,4.45238981883771,0.168806466951973,0.035557414766785,0.203932231750679,0.0,0.0,0.374436090225563,0.0821917808219178,0.0092071611253196,85500.0,0.0890751899397432,0.9109248100602568,0.0004047858,5.6328e-05,2.8039e-06,,,,0.1536983669548511,0.3189099613330878,,62.666668,0.068036923,0.171,1.96440511031451,47.695227725,0.0754274220583305,0.6620851491786792,-77.7525,0.2853609002858206,0.9682074879732272,0.0121313532733737,0.0,0.0,0.0,0.0161054172767203,0.0035557414766785,0.0,0.1344906923237816,0.6615770759255386,0.2039322317506798,58.143433,0.6445941476491375,70.3,,0.6466760729305078,0.2159833426939357,0.6290185267766651,0.2601978513507951,0.8509696039125366,0.7264920810941454,0.4789587420739856,0.6191105803406409,0.965388552418323,0.697012994398476,0.6204255784694491,0.7319894972922707,0.6305043487774192,0.3145069836211475,0.1524256393370651,0.864954517474865,0.6038301323911519,0.5972204988211937,0.9070825388177608,0.8818509942794879,0.8407790792699537,0.8257128232087766,0.5755156814188676,0.3920895082932574,0.9007580978635424,0.4820205132363076,0.7531654977635437,0.9619599422457518,0.3979135417088958,0.1737408953933055,0.7659355954649262,0.1287706711725437,0.13169416629505,0.6347481790786611,0.4189065592792301,0.029797296373751,0.1130218397675614,0.7459773722926589,0.2540362752992234,0.7846412062513758,0.2153147384849333,0.6143028498159407,,,,0.9349594607528132,0.8950599559730369,,0.7537922665342821,0.8019598155467721,0.4126953421856217,0.521114579532709,0.4517484245644384,0.4973964722881056,0.8410893082809093,0.2685589820648203,0.607629501459933,0.9950049813710372,0.8553628212301939,0.0982626615533689,0.4219630696163662,0.0261283146588784,0.0311301570837825,0.0475755053020894,0.0977645244496608,0.6708610265718614,0.1578889904876284,0.763719241739795,0.990724418702258,0.8218135517196475,0.97046998263836,,4781.0,0,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,True,False,False,False,False,True,False,False,False,True,False,False,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,0.1536983669548511,,False,0.3189099613330878,,False,False,False,False,False,False,False,False,False,0.0,False,False,0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,0,True,0.8571428571428571,False,False,False +01073001400,True,True,True,True,,,False,0.1823529411764705,0.0047058823529411,1946.0,0.7136694633528574,11.1,9.1,7.3,21.4,22.4,17.4,0.2816032887975334,0.3679342240493319,0.4835560123329907,57447.0,36066.0,0.07,17.3011023381,1.0,40.0,0.6,0.655319095139786,9.945103013698628,43.1266823529412,3260.33374354854,1.81915896353987,3.34035680534013,0.214095348702766,0.103297800913177,0.647212543554006,0.054984583761562,0.189105858170606,0.0245098039215686,0.024509803921569,0.48355601233299,0.1742543171114599,0.1150121065375302,67800.0,0.0771549125979505,0.9228450874020494,0.0008951111,5.1282e-06,2.3791e-06,,,,0.0804953560371517,0.2950894905920146,,61.666668,0.087159691,0.34900002,3.16184976454882,44.7571359825,0.2384615384615384,0.0,-56.8746,0.4064010997350401,0.9167523124357656,0.0,0.0,0.0,0.0035971223021582,0.0,0.0683453237410072,0.0775950668036999,0.0853031860226104,0.7255909558067831,0.1891058581706063,93.77919,0.6278134628440127,71.0,,0.3421186011150532,0.5051574635963891,0.0916001135119795,0.240302951305517,0.8385794307486707,0.9217563763541756,0.6048579715089994,0.7894025988796952,0.9878088657624612,0.8447283118655634,0.8689486351950112,0.8013648049887862,0.7892483999781194,0.3145069836211475,0.1404620788058391,0.970802270706518,0.5282998116553705,0.5972204988211937,0.9070825388177608,0.9704848815036776,0.9380686461454644,0.8391046304110233,0.5827649654828936,0.9563394697362702,0.8799745949379062,0.800259455953298,0.8653801975648978,0.8431750027766466,0.8462723476709774,0.471128768530155,0.6930041485925866,0.5867081244286861,0.5847015580870529,0.7916514641694031,0.7516347007030237,0.9067399297439892,0.0522639122516786,0.6434566620719774,0.356556985519905,0.9166162227602904,0.0865380767537716,0.558933421571466,,,,0.6917513228236646,0.8737301229199994,,0.7501654807214959,0.8647617479139218,0.6268497920495212,0.6418426778016514,0.3716517703914219,0.8850358496224203,0.3366245885930925,0.5569693544162451,0.7883908294582027,0.9840732602732248,0.2486523003016117,0.0982626615533689,0.4219630696163662,0.0924351398195788,0.0038486209108402,0.4634108061632525,0.8246557394947661,0.1930997775442523,0.5561393692083032,0.6900904835341803,0.9537899773356836,0.8364273002184828,0.959938777375042,,1946.0,9,True,True,True,False,False,True,True,False,False,True,True,True,False,True,False,True,True,True,True,True,True,True,True,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,True,True,True,True,False,True,True,True,True,True,False,False,False,False,False,False,True,0.0804953560371517,,False,0.2950894905920146,,False,False,False,False,False,False,True,True,True,6.0,True,True,1,True,False,False,False,False,True,False,False,True,False,True,True,False,True,True,False,True,True,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,True,False,True,True,True,True,False,False,False,True,True,True,1,True,1.0,True,True,True diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl index 508b5e91436736ebaafc0d8e17bd01096cf9a464..0eb41e974fc4b2b972211b7486a564ce446b7536 100644 GIT binary patch delta 1522 zcmbWzU2Icj7zglmZ@abKN87dASi7}ne25?D*w`5`I}RPMp5cSKDF}-LIdmO8+14{o zO9w2bm^jFi#tWY=euP95O*CFCS!T>kywDqsCd3;PO$ZlWh%tx}hy5h6vtu58$))oNa+FnZFh}2Hi4ijI%f-WZo3H z7q-gW@8&q!pyDTP&bGr2Q7htzSH)`{j-LtRtzPVDNMfH)#Wxx_djk68zH1&Y1{#z2 zv8Z2ev&QdTc^Y?y^ zzqP1%)91zUrX=I=l&pW%#PLR}io?yE=`e<`H5c)EK*e1xoIL{|XT)#8^-%Tg93O2{ z@hxF@SjAv~?j!l#as}U9d4sv!JPRQDPh_e$gDf6um zf3sc1SK2sU6#mr4*~>5`TN7cG6LUQY-!@u3gsWYN z$w6A}C5Ooo@*sJLe40E=9wDD6Um%ZRPc(r%8pQ3fAWlVyS=I^xd^@JN;`n(?EopyjPTXFOZ|d5ecr_(-?e$AP>uZrEop#mz&+s4T zXlrz7o%2`d+IsaB6Tj=)mmmD-A6fWN*FL3pXumiw`&BeVSJ%FMr~19CYv0r=8+T%3 zu2OutlI>kN^rwqnUQaU8O?t=%vXS(XKGIJ%k5w&`l=D9puC0W8@xk zh}=&mNsR<@hCEAV$x5Me2I}!f&nedu4#y9ac8+K4bS76&M(2$5tYX{)kEzX&#yy#cTTb*@nKp8U@4YRt_ zvP;Uikv3<{bfw(oOcfTb>O1BwJEPb}I+MTe#N%1BAZ~~{vtwp{!N{5E>V_lY`kYlM zhn=QX$YyNkh=@})&SsSJxoOeq-(A^NexTmJTPaw|(BOTCRL8BNm>wG3D8{dEb6r6` Gn*JC6-*Nc> delta 910 zcmXZXOGuPa6u|MmIrDVh^UXLfod=o|y+E6rI*zYAH0V*X)G~UQ6)8j%K?5O@EnK*9 z(J68v6toCh1TifNf~ZA{TC`~qgwQI8+6Y0k>vBEc=J!A6yIi>2Yu3|M%eCTsuzBQ# z%XWa5e7x=K;t2;rkV2+E=#Eg-^dKPE6gT}!cQYkTcRCT0lrsHHcbYP~>(D~2X1D1= z$Wy^|F68A_ca?8B6|cJy+NfR5cscB;!fxs?^NI(dgG#3Vd5{-eif?+6ziWSKPrDUg z_92r;@sQzY)A~(p`xWOx2qT1M_b`O)3Mh_*5sp&L?AF4_f6dOzO_3^$(wNyVMG%O_ zP5;q7P7|iDMUj6775~s)3@M(AAsnMg^Y3d6`J3Jyw~?pAia**2CuoYJ@jAbaD1H-1 z-WyY_639=q&l7krp}5qHd|Ugt8Q}^o=&LGROjcom78@6mUo39RrAGa`cD{fZ7Gq*U zOo>@BCl=%b7=(lUp-ME@$Tf$jKYof@SmTtZKY$sad`D>ixyy+Q^0_S2;wV*d&I;s2Jy& qbbfe`O#8%xVy9RZhr~1DS@9hI&Rw-`@N9md@xJh>&06Ahy!0QpO2qj9 diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl index 0ea454333f555cd74f77836ebf221e007e97fcc5..8d726f018b231da65dee247614e24cc01247b32a 100644 GIT binary patch delta 1521 zcmZqP%lPgXBWnZ8)a2xitbt4{vW&iRlcSiTS>zdg6@U!)Nz8)Wij2NWjNXjxQ-Y>U zUdJJ|*^GG(WrEVole;)D|^4k*nDrMaLqHd9MA>1K%A#)K0L6 zh6;m)TD95&Adsbt!U%A(|+Rt&7*|1pq zslDt~y*4h3=k}4}e|n0gPT4b0(tI_~$~mm6^1(;5qkZ6^J<d*0ksjDKxEGynaXYnRX4PgOnjbg%kOIQ{HAdrBeW zZF?B~weebW;=x(=I=kBZuB_Z{|D!0OYU+de_ClvLV3@jv^XeAC-aCIv&aEc`DACY6h@`ZE5y`z>J_i2>#FEUi)G0l@ zaFxYC`6)e0Fg`?qf=6O`VkSr@(4K(Az<@q3)-29?cg`&jd)M6wP z!!t5VfHp|M)OutV7bNCoCWAEi0Ikoa{G6W=?Uj|fdX0rK=QV3tY9?T$|VWpr^013{_oK(#zJti;> z1Og6C8{ChMpiqQMPWI5}ug4XTz<@xtTMcG6Mxdf8gxdxVVKhm2)I)>29x1%hlq$e1 nL<)Q~CGfNYIc^m+{eLg}7t@9PZ~r delta 297 zcmaENjIntyBWnZ8)QY%`tbt4{?2Nt~lcSiTSvVPexquA!Nz8)W+>E|FjNXjxQ-Y>U zUdJJ|*^GG(oA^~%$=MJ0J3C8=Kufz diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl index 6e5d23eba468ba26f31469cf6755c8c640877647..8119822adf52c6e280ec39aa6485f8f0250ad8f2 100644 GIT binary patch delta 1498 zcmX@Ii*fxCM%D(FsVYeuS=TaePGT`&ocu#rfbq~|MiKGJvsgVO&oMy2c_@7WN?)A( znpK_g#$;nQf6k{+v1gNKvz-@YWQ1^;pfvO3Y_b07?r&X(1>r45dY&v?!DogV0h85>NptC@l@8WhN_%s4~u&ypmmurGb-yVRAp0 z!shSn4a^cTr-fYme!sIeWduGo?@v}_RNzsU(GW)VV@>y#vCK$>#*tiZu@(M z;SNWHUO(U5&Ao@Y{w@FM^&;2b+iULLZqi=+-rjo8o12R9ukB~%zh86h@_GBIs;8dr zRo@AxpPgq}UTaP~ILlsVSDW9JmD}xq6eUzmeK6m?=#KY1mG*r5Iq!Dz zU5s8~|0T)wOzVof_KQ+1x+U0m+8@65DQ`;hD+7Cd1I_M@&E$uXiSjB=X~imLI{ zD+Z(%C8y?c`2DdCHe}EIf=#D zi3)y+C7ETZQ+jyeDvN>gQ+kwOe24-CkHqrCOps2XJpqZynQ8Tz$y0hHVX8{X^FcDX z`9-M;MTyC&#YiTGXJnQDZIFVg^~fwPNX*Mj25ImCTA!CcrAHVhnO~BTS_Csf1SXo8 zo~lrqmjaYAG?>x@7S;lJ9VBjOpiq*pU}l132uL`;45%6;Djzc0-au|MyOI%Ky&lX! zoMEP*Q2+_n%$!usDLp1I4Fm!ZO&i>=j-b$lOV&dS)rT31D{O&bgX#=5m=26^MpFp4 z7#!$mlIry^V=$utO{oG*B~pB#DS@Xysf3;C3NJ+h#3;Svrh6Cm)Te F2LQ+$<=X%N delta 248 zcmZ2~gz?BOM%D(Fsg|)DS=Ta8P7qzeICJxAW^+cyLz9_V{W;GwK){8`*{tf5Im84Q zZ%h^x6Q4Yb)kE?LRPHI1eg>tVPkzmMo{?$tX*T)E7GiRYO_Nu$Yq2PBFfdH+=Tg}G zoxOp1vpUZO=E=(ij&U@#PRa1>bQ0KHD)^XL^8nO{gHZY~ls*Eb&p~PC$%bOJg3VCg wMJUY(6jh!4M688{oza(La*lWkBgf{8;%YpbWmSXtHmjI+>o6{!ydkk309ocd?EnA( diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl index 08aa265b508618cfcf04a256b6f9954fb66e0066..49f9fe033f0caa7284246833b59e6a051e302ab8 100644 GIT binary patch delta 437 zcmdn1v{jY0fo19{p^dC2?2J8=*RZKF8g5QtPh_fhw|}=I@O7))4tM*vav+wxyM0U5 zL-%ze)$R;XKqdo^Iri(ImLiO{-w1L=;2U@Q1B1rb~<%z{>W*}7}yAPeG`;!hSDuix)n;dLFslV-2tUL zp>!9N?w%~mox#| YlR^w(0fMun46Fo`su8CF6O&RBW2D~{z5oCK