diff --git a/data/data-pipeline/data_pipeline/content/config/csv.yml b/data/data-pipeline/data_pipeline/content/config/csv.yml index 8b754e405..4f074df58 100644 --- a/data/data-pipeline/data_pipeline/content/config/csv.yml +++ b/data/data-pipeline/data_pipeline/content/config/csv.yml @@ -289,4 +289,25 @@ fields: format: bool - score_name: Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years label: Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years + format: bool + - score_name: Greater than or equal to the 90th percentile for share of the tract's land area that is covered by impervious surface or cropland as a percent and is low income? + label: Greater than or equal to the 90th percentile for share of the tract's land area that is covered by impervious surface or cropland as a percent and is low income? + format: bool + - score_name: Greater than or equal to the 90th percentile for share of the tract's land area that is covered by impervious surface or cropland as a percent + label: Greater than or equal to the 90th percentile for share of the tract's land area that is covered by impervious surface or cropland as a percent + format: bool + - score_name: Share of the tract's land area that is covered by impervious surface or cropland as a percent + label: Share of the tract's land area that is covered by impervious surface or cropland as a percent + format: percentage + - score_name: Share of the tract's land area that is covered by impervious surface or cropland as a percent (percentile) + label: Share of the tract's land area that is covered by impervious surface or cropland as a percent (percentile) + format: percentage + - score_name: Share of properties at risk of flood in 30 years (percentile) + label: Share of properties at risk of flood in 30 years (percentile) + format: percentage + - score_name: Share of properties at risk of fire in 30 years (percentile) + label: Share of properties at risk of fire in 30 years (percentile) + format: percentage + - score_name: Does the tract have at least 35 acres in it? + label: Does the tract have at least 35 acres in it? format: bool \ No newline at end of file diff --git a/data/data-pipeline/data_pipeline/content/config/excel.yml b/data/data-pipeline/data_pipeline/content/config/excel.yml index a9ab57b69..0659eddb3 100644 --- a/data/data-pipeline/data_pipeline/content/config/excel.yml +++ b/data/data-pipeline/data_pipeline/content/config/excel.yml @@ -278,10 +278,10 @@ sheets: format: bool - score_name: Share of properties at risk of flood in 30 years label: Share of properties at risk of flood in 30 years - format: float + format: percentage - score_name: Share of properties at risk of fire in 30 years label: Share of properties at risk of fire in 30 years - format: float + format: percentage - score_name: Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years and is low income? label: Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years and is low income? format: bool @@ -294,3 +294,25 @@ sheets: - score_name: Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years label: Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years format: bool + - score_name: Greater than or equal to the 90th percentile for share of the tract's land area that is covered by impervious surface or cropland as a percent and is low income? + label: Greater than or equal to the 90th percentile for share of the tract's land area that is covered by impervious surface or cropland as a percent and is low income? + format: bool + - score_name: Greater than or equal to the 90th percentile for share of the tract's land area that is covered by impervious surface or cropland as a percent + label: Greater than or equal to the 90th percentile for share of the tract's land area that is covered by impervious surface or cropland as a percent + format: bool + - score_name: Share of the tract's land area that is covered by impervious surface or cropland as a percent + label: Share of the tract's land area that is covered by impervious surface or cropland as a percent + format: percentage + - score_name: Share of the tract's land area that is covered by impervious surface or cropland as a percent (percentile) + label: Share of the tract's land area that is covered by impervious surface or cropland as a percent (percentile) + format: percentage + - score_name: Share of properties at risk of flood in 30 years (percentile) + label: Share of properties at risk of flood in 30 years (percentile) + format: percentage + - score_name: Share of properties at risk of fire in 30 years (percentile) + label: Share of properties at risk of fire in 30 years (percentile) + format: percentage + - score_name: Does the tract have at least 35 acres in it? + label: Does the tract have at least 35 acres in it? + format: bool + \ No newline at end of file diff --git a/data/data-pipeline/data_pipeline/etl/constants.py b/data/data-pipeline/data_pipeline/etl/constants.py index 74b697b66..7d76b4f05 100644 --- a/data/data-pipeline/data_pipeline/etl/constants.py +++ b/data/data-pipeline/data_pipeline/etl/constants.py @@ -65,6 +65,12 @@ "class_name": "HudHousingETL", "is_memory_intensive": False, }, + { + "name": "nlcd_nature_deprived", + "module_dir": "nlcd_nature_deprived", + "class_name": "NatureDeprivedETL", + "is_memory_intensive": False, + }, { "name": "census_acs_median_income", "module_dir": "census_acs_median_income", diff --git a/data/data-pipeline/data_pipeline/etl/score/config/datasets.yml b/data/data-pipeline/data_pipeline/etl/score/config/datasets.yml index 1b59b4d61..dc06b4f0f 100644 --- a/data/data-pipeline/data_pipeline/etl/score/config/datasets.yml +++ b/data/data-pipeline/data_pipeline/etl/score/config/datasets.yml @@ -35,7 +35,6 @@ datasets: include_in_tiles: true include_in_downloadable_files: true create_percentile: true - - short_name: "ex_ag_loss" df_field_name: "EXPECTED_AGRICULTURE_LOSS_RATE_FIELD_NAME" long_name: "Expected agricultural loss rate (Natural Hazards Risk Index)" @@ -54,7 +53,6 @@ datasets: include_in_tiles: true include_in_downloadable_files: true create_percentile: true - - short_name: "ex_bldg_loss" df_field_name: "EXPECTED_BUILDING_LOSS_RATE_FIELD_NAME" long_name: "Expected building loss rate (Natural Hazards Risk Index)" @@ -72,7 +70,6 @@ datasets: include_in_tiles: true include_in_downloadable_files: true create_percentile: true - - short_name: "has_ag_val" df_field_name: "CONTAINS_AGRIVALUE" long_name: "Contains agricultural value" @@ -168,7 +165,6 @@ datasets: field_type: float include_in_tiles: true include_in_downloadable_files: true - - long_name: "First Street Foundation Flood Risk" short_name: "FSF Flood Risk" module_name: fsf_flood_risk @@ -209,7 +205,6 @@ datasets: include_in_tiles: false include_in_downloadable_files: true create_percentile: true - - long_name: "First Street Foundation Wildfire Risk" short_name: "FSF Wildfire Risk" module_name: fsf_wildfire_risk @@ -250,7 +245,6 @@ datasets: include_in_tiles: false include_in_downloadable_files: true create_percentile: true - - long_name: "DOT Travel Disadvantage Index" short_name: "DOT" module_name: "travel_composite" @@ -263,3 +257,36 @@ datasets: include_in_tiles: true include_in_downloadable_files: true create_percentile: true + - long_name: "National Land Cover Database (NLCD) Lack of Green Space / Nature-Deprived Communities dataset, as compiled by TPL" + short_name: "nlcd_nature_deprived" + module_name: "nlcd_nature_deprived" + input_geoid_tract_field_name: "GEOID10_TRACT" + load_fields: + - short_name: "ncld_eligible" + df_field_name: "ELIGIBLE_FOR_NATURE_DEPRIVED_FIELD_NAME" + long_name: "Does the tract have at least 35 acres in it?" + field_type: bool + include_in_tiles: true + include_in_downloadable_files: true + create_percentile: false + - short_name: "percent_impervious" + df_field_name: "TRACT_PERCENT_IMPERVIOUS_FIELD_NAME" + long_name: "Share of the tract's land area that is covered by impervious surface as a percent" + field_type: percentage + include_in_tiles: true + include_in_downloadable_files: true + create_percentile: true + - short_name: "percent_nonnatural" + df_field_name: "TRACT_PERCENT_NON_NATURAL_FIELD_NAME" + long_name: "Share of the tract's land area that is covered by impervious surface or cropland as a percent" + field_type: percentage + include_in_tiles: true + include_in_downloadable_files: true + create_percentile: true + - short_name: "percent_cropland" + df_field_name: "TRACT_PERCENT_CROPLAND_FIELD_NAME" + long_name: "Share of the tract's land area that is covered by cropland as a percent" + field_type: percentage + include_in_tiles: true + include_in_downloadable_files: true + create_percentile: true \ No newline at end of file diff --git a/data/data-pipeline/data_pipeline/etl/score/constants.py b/data/data-pipeline/data_pipeline/etl/score/constants.py index 07ee7f2c2..50e523cf2 100644 --- a/data/data-pipeline/data_pipeline/etl/score/constants.py +++ b/data/data-pipeline/data_pipeline/etl/score/constants.py @@ -305,6 +305,9 @@ + field_names.PERCENTILE_FIELD_SUFFIX: "WF_PFS", field_names.HIGH_FUTURE_FLOOD_RISK_FIELD: "FLD_ET", field_names.HIGH_FUTURE_WILDFIRE_RISK_FIELD: "WF_ET", + field_names.TRACT_PERCENT_NON_NATURAL_FIELD_NAME + + field_names.PERCENTILE_FIELD_SUFFIX: "IS_PFS", + field_names.NON_NATURAL_LOW_INCOME_FIELD_NAME: "IS_ET", ## FPL 200 and low higher ed for all others should no longer be M_EBSI, but rather ## FPL_200 (there is no higher ed in narwhal) } @@ -361,4 +364,6 @@ field_names.FUTURE_FLOOD_RISK_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, field_names.FUTURE_WILDFIRE_RISK_FIELD + field_names.PERCENTILE_FIELD_SUFFIX, + field_names.TRACT_PERCENT_NON_NATURAL_FIELD_NAME + + field_names.PERCENTILE_FIELD_SUFFIX, ] diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score.py b/data/data-pipeline/data_pipeline/etl/score/etl_score.py index 55fe24d4b..0d942d5c0 100644 --- a/data/data-pipeline/data_pipeline/etl/score/etl_score.py +++ b/data/data-pipeline/data_pipeline/etl/score/etl_score.py @@ -14,6 +14,7 @@ from data_pipeline.etl.sources.fsf_flood_risk.etl import ( FloodRiskETL, ) +from data_pipeline.etl.sources.nlcd_nature_deprived.etl import NatureDeprivedETL from data_pipeline.etl.sources.fsf_wildfire_risk.etl import WildfireRiskETL from data_pipeline.score.score_runner import ScoreRunner from data_pipeline.score import field_names @@ -47,6 +48,7 @@ def __init__(self): self.dot_travel_disadvantage_df: pd.DataFrame self.fsf_flood_df: pd.DataFrame self.fsf_fire_df: pd.DataFrame + self.nature_deprived_df: pd.DataFrame def extract(self) -> None: logger.info("Loading data sets from disk.") @@ -134,6 +136,9 @@ def extract(self) -> None: # Load flood risk data self.fsf_flood_df = FloodRiskETL.get_data_frame() + # Load NLCD Nature-Deprived Communities data + self.nature_deprived_df = NatureDeprivedETL.get_data_frame() + # Load GeoCorr Urban Rural Map geocorr_urban_rural_csv = ( constants.DATA_PATH / "dataset" / "geocorr" / "usa.csv" @@ -356,6 +361,7 @@ def _prepare_initial_df(self) -> pd.DataFrame: self.dot_travel_disadvantage_df, self.fsf_flood_df, self.fsf_fire_df, + self.nature_deprived_df, ] # Sanity check each data frame before merging. @@ -439,9 +445,9 @@ def _prepare_initial_df(self) -> pd.DataFrame: field_names.IMPENETRABLE_SURFACES_FIELD, field_names.UST_FIELD, field_names.DOT_TRAVEL_BURDEN_FIELD, - field_names.AGRICULTURAL_VALUE_BOOL_FIELD, field_names.FUTURE_FLOOD_RISK_FIELD, field_names.FUTURE_WILDFIRE_RISK_FIELD, + field_names.TRACT_PERCENT_NON_NATURAL_FIELD_NAME, field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD, ] @@ -449,6 +455,8 @@ def _prepare_initial_df(self) -> pd.DataFrame: self.GEOID_TRACT_FIELD_NAME, field_names.PERSISTENT_POVERTY_FIELD, field_names.HISTORIC_REDLINING_SCORE_EXCEEDED, + field_names.TRACT_ELIGIBLE_FOR_NONNATURAL_THRESHOLD, + field_names.AGRICULTURAL_VALUE_BOOL_FIELD, ] # For some columns, high values are "good", so we want to reverse the percentile @@ -500,7 +508,7 @@ def _prepare_initial_df(self) -> pd.DataFrame: df_copy[numeric_columns] = df_copy[numeric_columns].apply(pd.to_numeric) # Convert all columns to numeric and do math - # Note that we have a few special conditions here, that we handle explicitly. + # Note that we have a few special conditions here and we handle them explicitly. # For *Linguistic Isolation*, we do NOT want to include Puerto Rico in the percentile # calculation. This is because linguistic isolation as a category doesn't make much sense # in Puerto Rico, where Spanish is a recognized language. Thus, we construct a list @@ -509,6 +517,10 @@ def _prepare_initial_df(self) -> pd.DataFrame: # For *Expected Agricultural Loss*, we only want to include in the percentile tracts # in which there is some agricultural value. This helps us adjust the data such that we have # the ability to discern which tracts truly are at the 90th percentile, since many tracts have 0 value. + # + # For *Non-Natural Space*, we may only want to include tracts that have at least 35 acreas, I think. This will + # get rid of tracts that we think are aberrations statistically. Right now, we have left this out + # pending ground-truthing. for numeric_column in numeric_columns: drop_tracts = [] @@ -524,7 +536,6 @@ def _prepare_initial_df(self) -> pd.DataFrame: logger.info( f"Dropping {len(drop_tracts)} tracts from Agricultural Value Loss" ) - elif numeric_column == field_names.LINGUISTIC_ISO_FIELD: drop_tracts = df_copy[ # 72 is the FIPS code for Puerto Rico diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv b/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv index d878573f1..abcdc13a7 100644 --- a/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv +++ b/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv @@ -1,3 +1,3 @@ -GEOID10_TRACT,Persistent Poverty Census Tract,Tract-level redlining score meets or exceeds 3.25,Housing burden (percent),Share of homes with no kitchen or indoor plumbing (percent),Total population,Median household income (% of state median household income),Current asthma among adults aged greater than or equal to 18 years,Coronary heart disease among adults aged greater than or equal to 18 years,Cancer (excluding skin cancer) among adults aged greater than or equal to 18 years,Current lack of health insurance among adults aged 18-64 years,Diagnosed diabetes among adults aged greater than or equal to 18 years,Physical health not good for greater than or equal to 14 days among adults aged greater than or equal to 18 years,Percent of individuals < 100% Federal Poverty Line,Percent of individuals < 150% Federal Poverty Line,Percent of individuals below 200% Federal Poverty Line,Area Median Income (State or metropolitan),Median household income in the past 12 months,Energy burden,FEMA Risk Index Expected Annual Loss Score,Urban Heuristic Flag,Air toxics cancer risk,Respiratory hazard index,Diesel particulate matter exposure,PM2.5 in the air,Ozone,Traffic proximity and volume,Proximity to Risk Management Plan (RMP) facilities,Proximity to hazardous waste sites,Proximity to NPL sites,Wastewater discharge,Percent pre-1960s housing (lead paint indicator),Individuals under 5 years old,Individuals over 64 years old,Linguistic isolation (percent),Percent of households in linguistic isolation,Poverty (Less than 200% of federal poverty line),Percent individuals age 25 or over with less than high school degree,Unemployment (percent),Median value ($) of owner-occupied housing units,Percent enrollment in college or graduate school,Percent of population not currently enrolled in college or graduate school,Expected building loss rate (Natural Hazards Risk Index),Expected agricultural loss rate (Natural Hazards Risk Index),Expected population loss rate (Natural Hazards Risk Index),Percent individuals age 25 or over with less than high school degree in 2009,Percentage households below 100% of federal poverty line in 2009,Unemployment (percent) in 2009,Unemployment (percent) in 2010,Percent of individuals less than 100% Federal Poverty Line in 2010,Total population in 2009,Summer days above 90F,Percent low access to healthy food,Percent impenetrable surface areas,Leaky underground storage tanks,DOT Travel Barriers Score,Contains agricultural value,Share of properties at risk of flood in 30 years,Share of properties at risk of fire in 30 years,"Percent of individuals below 200% Federal Poverty Line, imputed and adjusted",Third grade reading proficiency,Median household income as a percent of area median income,Life expectancy (years),Median household income as a percent of territory median income in 2009,Housing burden (percent) (percentile),Share of homes with no kitchen or indoor plumbing (percent) (percentile),Total population (percentile),Median household income (% of state median household income) (percentile),Current asthma among adults aged greater than or equal to 18 years (percentile),Coronary heart disease among adults aged greater than or equal to 18 years (percentile),Cancer (excluding skin cancer) among adults aged greater than or equal to 18 years (percentile),Current lack of health insurance among adults aged 18-64 years (percentile),Diagnosed diabetes among adults aged greater than or equal to 18 years (percentile),Physical health not good for greater than or equal to 14 days among adults aged greater than or equal to 18 years (percentile),Percent of individuals < 100% Federal Poverty Line (percentile),Percent of individuals < 150% Federal Poverty Line (percentile),Percent of individuals below 200% Federal Poverty Line (percentile),Area Median Income (State or metropolitan) (percentile),Median household income in the past 12 months (percentile),Energy burden (percentile),FEMA Risk Index Expected Annual Loss Score (percentile),Urban Heuristic Flag (percentile),Air toxics cancer risk (percentile),Respiratory hazard index (percentile),Diesel particulate matter exposure (percentile),PM2.5 in the air (percentile),Ozone (percentile),Traffic proximity and volume (percentile),Proximity to Risk Management Plan (RMP) facilities (percentile),Proximity to hazardous waste sites (percentile),Proximity to NPL sites (percentile),Wastewater discharge (percentile),Percent pre-1960s housing (lead paint indicator) (percentile),Individuals under 5 years old (percentile),Individuals over 64 years old (percentile),Linguistic isolation (percent) (percentile),Percent of households in linguistic isolation (percentile),Poverty (Less than 200% of federal poverty line) (percentile),Percent individuals age 25 or over with less than high school degree (percentile),Unemployment (percent) (percentile),Median value ($) of owner-occupied housing units (percentile),Percent enrollment in college or graduate school (percentile),Percent of population not currently enrolled in college or graduate school (percentile),Expected building loss rate (Natural Hazards Risk Index) (percentile),Expected agricultural loss rate (Natural Hazards Risk Index) (percentile),Expected population loss rate (Natural Hazards Risk Index) (percentile),Percent individuals age 25 or over with less than high school degree in 2009 (percentile),Percentage households below 100% of federal poverty line in 2009 (percentile),Unemployment (percent) in 2009 (percentile),Unemployment (percent) in 2010 (percentile),Percent of individuals less than 100% Federal Poverty Line in 2010 (percentile),Total population in 2009 (percentile),Summer days above 90F (percentile),Percent low access to healthy food (percentile),Percent impenetrable surface areas (percentile),Leaky underground storage tanks (percentile),DOT Travel Barriers Score (percentile),Contains agricultural value (percentile),Share of properties at risk of flood in 30 years (percentile),Share of properties at risk of fire in 30 years (percentile),"Percent of individuals below 200% Federal Poverty Line, imputed and adjusted (percentile)",Low third grade reading proficiency (percentile),Low median household income as a percent of area median income (percentile),Low life expectancy (percentile),Low median household income as a percent of territory median income in 2009 (percentile),Total population in 2009 (island areas) and 2019 (states and PR),Total threshold criteria exceeded,Exceeds FPL200 threshold,Percent higher ed enrollment rate is less than 20%,Is low income and has a low percent of higher ed students?,Greater than or equal to the 90th percentile for expected population loss,Greater than or equal to the 90th percentile for expected agricultural loss,Greater than or equal to the 90th percentile for expected building loss,At least one climate threshold exceeded,"Greater than or equal to the 90th percentile for expected population loss rate, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for expected agriculture loss rate, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for expected building loss rate, is low income, and has a low percent of higher ed students?",Climate Factor (Definition M),Greater than or equal to the 90th percentile for energy burden,Greater than or equal to the 90th percentile for pm2.5 exposure,At least one energy threshold exceeded,"Greater than or equal to the 90th percentile for PM2.5 exposure, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for energy burden, is low income, and has a low percent of higher ed students?",Energy Factor (Definition M),Greater than or equal to the 90th percentile for diesel particulate matter,Greater than or equal to the 90th percentile for traffic proximity,At least one traffic threshold exceeded,"Greater than or equal to the 90th percentile for diesel particulate matter, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for traffic proximity, is low income, and has a low percent of higher ed students?",Transportation Factor (Definition M),Greater than or equal to the 90th percentile for lead paint and the median house value is less than 90th percentile,Greater than or equal to the 90th percentile for housing burden,At least one housing threshold exceeded,"Greater than or equal to the 90th percentile for lead paint, the median house value is less than 90th percentile, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for housing burden, is low income, and has a low percent of higher ed students?",Housing Factor (Definition M),Greater than or equal to the 90th percentile for RMP proximity,Greater than or equal to the 90th percentile for NPL (superfund sites) proximity,Greater than or equal to the 90th percentile for proximity to hazardous waste sites,At least one pollution threshold exceeded,"Greater than or equal to the 90th percentile for proximity to RMP sites, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for proximity to superfund sites, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for proximity to hazardous waste facilities, is low income, and has a low percent of higher ed students?",Pollution Factor (Definition M),Greater than or equal to the 90th percentile for wastewater discharge,At least one water threshold exceeded,"Greater than or equal to the 90th percentile for wastewater discharge, is low income, and has a low percent of higher ed students?",Water Factor (Definition M),Greater than or equal to the 90th percentile for diabetes,Greater than or equal to the 90th percentile for asthma,Greater than or equal to the 90th percentile for heart disease,Greater than or equal to the 90th percentile for low life expectancy,At least one health threshold exceeded,"Greater than or equal to the 90th percentile for diabetes, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for asthma, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for heart disease, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for low life expectancy, is low income, and has a low percent of higher ed students?",Health Factor (Definition M),Low high school education and low percent of higher ed students,Greater than or equal to the 90th percentile for unemployment,Greater than or equal to the 90th percentile for low median household income as a percent of area median income,Greater than or equal to the 90th percentile for households in linguistic isolation,Greater than or equal to the 90th percentile for households at or below 100% federal poverty level,"Greater than or equal to the 90th percentile for households in linguistic isolation, has low HS attainment, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for households at or below 100% federal poverty level, has low HS attainment, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for low median household income as a percent of area median income, has low HS attainment, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for unemployment, has low HS attainment, and has a low percent of higher ed students?",Unemployment (percent) in 2009 (island areas) and 2010 (states and PR),Unemployment (percent) in 2009 for island areas (percentile),Unemployment (percent) in 2009 exceeds 90th percentile,Percentage households below 100% of federal poverty line in 2009 (island areas) and 2010 (states and PR),Percentage households below 100% of federal poverty line in 2009 for island areas (percentile),Percentage households below 100% of federal poverty line in 2009 exceeds 90th percentile,Low median household income as a percent of territory median income in 2009 exceeds 90th percentile,Low high school education in 2009 (island areas),Greater than or equal to the 90th percentile for unemployment and has low HS education in 2009 (island areas)?,Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS education in 2009 (island areas)?,Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS education in 2009 (island areas)?,At least one workforce threshold exceeded,Both workforce socioeconomic indicators exceeded,Workforce Factor (Definition M),Total categories exceeded,Definition M (communities),Any Non-Workforce Factor (Definition M),Definition M (percentile),Is low income (imputed and adjusted)?,Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years,Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years,Greater than or equal to the 90th percentile for expected population loss rate and is low income?,Greater than or equal to the 90th percentile for expected agriculture loss rate and is low income?,Greater than or equal to the 90th percentile for expected building loss rate and is low income?,Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years and is low income?,Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years and is low income?,Climate Factor (Definition N),Greater than or equal to the 90th percentile for PM2.5 exposure and is low income?,Greater than or equal to the 90th percentile for energy burden and is low income?,Energy Factor (Definition N),Greater than or equal to the 90th percentile for DOT travel barriers,Greater than or equal to the 90th percentile for diesel particulate matter and is low income?,Greater than or equal to the 90th percentile for traffic proximity and is low income?,Greater than or equal to the 90th percentile for DOT transit barriers and is low income?,Transportation Factor (Definition N),Tract-level redlining score meets or exceeds 3.25 and is low income,Greater than or equal to the 90th percentile for share of homes without indoor plumbing or a kitchen,Greater than or equal to the 90th percentile for share of homes with no kitchen or indoor plumbing and is low income?,Greater than or equal to the 90th percentile for lead paint and the median house value is less than 90th percentile and is low income?,Greater than or equal to the 90th percentile for housing burden and is low income?,Housing Factor (Definition N),Greater than or equal to the 90th percentile for proximity to RMP sites and is low income?,Greater than or equal to the 90th percentile for proximity to superfund sites and is low income?,Greater than or equal to the 90th percentile for proximity to hazardous waste facilities and is low income?,Pollution Factor (Definition N),Greater than or equal to the 90th percentile for leaky underwater storage tanks,Greater than or equal to the 90th percentile for wastewater discharge and is low income?,Greater than or equal to the 90th percentile for leaky underground storage tanks and is low income?,Water Factor (Definition N),Greater than or equal to the 90th percentile for diabetes and is low income?,Greater than or equal to the 90th percentile for asthma and is low income?,Greater than or equal to the 90th percentile for heart disease and is low income?,Greater than or equal to the 90th percentile for low life expectancy and is low income?,Health Factor (Definition N),Low high school education,Greater than or equal to the 90th percentile for households in linguistic isolation and has low HS education?,Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS education?,Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS education?,Greater than or equal to the 90th percentile for unemployment and has low HS education?,Workforce Factor (Definition N),Definition N (communities),Definition N (communities) (percentile) -01073001100,True,True,0.2752043596730245,0.0,4781.0,0.7327449738800064,11.2,7.2,6.7,16.6,19.3,15.1,0.150375939849624,0.318796992481203,0.3744360902255639,57447.0,37030.0,0.049,18.7674524286,1.0,40.0,0.5,0.467489734286576,9.8735797260274,43.056760130719,181.621925132718,2.0427358988323,0.702342755246247,0.134193041307899,4.45238981883771,0.168806466951973,0.035557414766785,0.203932231750679,0.0,0.0,0.374436090225563,0.0821917808219178,0.0092071611253196,85500.0,0.0890751899397432,0.9109248100602568,0.0004047858,5.6328e-05,2.8039e-06,,,,0.1536983669548511,0.3189099613330878,,62.666668,0.068036923,0.171,1.96440511031451,47.695227725,1.0,0.0754274220583305,0.6620851491786792,0.2853609002858206,58.143433,0.6445941476491375,70.3,,0.6466760729305078,0.2159833426939357,0.6290185267766651,0.2601978513507951,0.8509696039125366,0.7264920810941454,0.4789587420739856,0.6191105803406409,0.965388552418323,0.697012994398476,0.6204255784694491,0.7319894972922707,0.6305043487774192,0.3145069836211475,0.1524256393370651,0.864954517474865,0.6038301323911519,0.5972204988211937,0.9070825388177608,0.8818509942794879,0.8407790792699537,0.8257128232087766,0.5755156814188676,0.3920895082932574,0.9007580978635424,0.4820205132363076,0.7531654977635437,0.9619599422457518,0.3979135417088958,0.1737408953933055,0.7659355954649262,0.1287706711725437,0.13169416629505,0.6347481790786611,0.4189065592792301,0.029797296373751,0.1130218397675614,0.7459773722926589,0.2540362752992234,0.7846412062513758,0.2153147384849333,0.6143028498159407,,,,0.9349594607528132,0.8950599559730369,,0.7537922665342821,0.8019598155467721,0.4126953421856217,0.521114579532709,0.4517484245644384,0.6564635202573585,0.4977059209088922,0.8410893082809093,0.607629501459933,0.990724418702258,0.8218135517196475,0.97046998263836,,4781.0,0,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,True,False,False,False,False,True,False,False,False,True,False,False,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,0.1536983669548511,,False,0.3189099613330878,,False,False,False,False,False,False,False,False,False,0.0,False,False,0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,0 -01073001400,True,True,0.1823529411764705,0.0047058823529411,1946.0,0.7136694633528574,11.1,9.1,7.3,21.4,22.4,17.4,0.2816032887975334,0.3679342240493319,0.4835560123329907,57447.0,36066.0,0.07,17.3011023381,1.0,40.0,0.6,0.655319095139786,9.945103013698628,43.1266823529412,3260.33374354854,1.81915896353987,3.34035680534013,0.214095348702766,0.103297800913177,0.647212543554006,0.054984583761562,0.189105858170606,0.0245098039215686,0.024509803921569,0.48355601233299,0.1742543171114599,0.1150121065375302,67800.0,0.0771549125979505,0.9228450874020494,0.0008951111,5.1282e-06,2.3791e-06,,,,0.0804953560371517,0.2950894905920146,,61.666668,0.087159691,0.34900002,3.16184976454882,44.7571359825,1.0,0.2384615384615384,0.0,0.4064010997350401,93.77919,0.6278134628440127,71.0,,0.3421186011150532,0.5051574635963891,0.0916001135119795,0.240302951305517,0.8385794307486707,0.9217563763541756,0.6048579715089994,0.7894025988796952,0.9878088657624612,0.8447283118655634,0.8689486351950112,0.8013648049887862,0.7892483999781194,0.3145069836211475,0.1404620788058391,0.970802270706518,0.5282998116553705,0.5972204988211937,0.9070825388177608,0.9704848815036776,0.9380686461454644,0.8391046304110233,0.5827649654828936,0.9563394697362702,0.8799745949379062,0.800259455953298,0.8653801975648978,0.8431750027766466,0.8462723476709774,0.471128768530155,0.6930041485925866,0.5867081244286861,0.5847015580870529,0.7916514641694031,0.7516347007030237,0.9067399297439892,0.0522639122516786,0.6434566620719774,0.356556985519905,0.9166162227602904,0.0865380767537716,0.558933421571466,,,,0.6917513228236646,0.8737301229199994,,0.7501654807214959,0.8647617479139218,0.6268497920495212,0.6418426778016514,0.3716517703914219,0.6564635202573585,0.8849410093948001,0.3366245885930925,0.7883908294582027,0.9537899773356836,0.8364273002184828,0.959938777375042,,1946.0,9,True,True,True,False,False,True,True,False,False,True,True,True,False,True,False,True,True,True,True,True,True,True,True,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,True,True,True,True,False,True,True,True,True,True,False,False,False,False,False,False,True,0.0804953560371517,,False,0.2950894905920146,,False,False,False,False,False,False,True,True,True,6.0,True,True,1,True,False,False,False,False,True,False,False,True,False,True,True,False,True,True,False,True,True,False,False,False,False,True,False,False,False,False,False,False,False,False,True,False,True,True,True,True,False,False,False,True,True,True,1 +GEOID10_TRACT,Persistent Poverty Census Tract,Tract-level redlining score meets or exceeds 3.25,Does the tract have at least 35 acres in it?,Contains agricultural value,Housing burden (percent),Share of homes with no kitchen or indoor plumbing (percent),Total population,Median household income (% of state median household income),Current asthma among adults aged greater than or equal to 18 years,Coronary heart disease among adults aged greater than or equal to 18 years,Cancer (excluding skin cancer) among adults aged greater than or equal to 18 years,Current lack of health insurance among adults aged 18-64 years,Diagnosed diabetes among adults aged greater than or equal to 18 years,Physical health not good for greater than or equal to 14 days among adults aged greater than or equal to 18 years,Percent of individuals < 100% Federal Poverty Line,Percent of individuals < 150% Federal Poverty Line,Percent of individuals below 200% Federal Poverty Line,Area Median Income (State or metropolitan),Median household income in the past 12 months,Energy burden,FEMA Risk Index Expected Annual Loss Score,Urban Heuristic Flag,Air toxics cancer risk,Respiratory hazard index,Diesel particulate matter exposure,PM2.5 in the air,Ozone,Traffic proximity and volume,Proximity to Risk Management Plan (RMP) facilities,Proximity to hazardous waste sites,Proximity to NPL sites,Wastewater discharge,Percent pre-1960s housing (lead paint indicator),Individuals under 5 years old,Individuals over 64 years old,Linguistic isolation (percent),Percent of households in linguistic isolation,Poverty (Less than 200% of federal poverty line),Percent individuals age 25 or over with less than high school degree,Unemployment (percent),Median value ($) of owner-occupied housing units,Percent enrollment in college or graduate school,Percent of population not currently enrolled in college or graduate school,Expected building loss rate (Natural Hazards Risk Index),Expected agricultural loss rate (Natural Hazards Risk Index),Expected population loss rate (Natural Hazards Risk Index),Percent individuals age 25 or over with less than high school degree in 2009,Percentage households below 100% of federal poverty line in 2009,Unemployment (percent) in 2009,Unemployment (percent) in 2010,Percent of individuals less than 100% Federal Poverty Line in 2010,Total population in 2009,Summer days above 90F,Percent low access to healthy food,Percent impenetrable surface areas,Leaky underground storage tanks,DOT Travel Barriers Score,Share of properties at risk of flood in 30 years,Share of properties at risk of fire in 30 years,Share of the tract's land area that is covered by impervious surface or cropland as a percent,"Percent of individuals below 200% Federal Poverty Line, imputed and adjusted",Third grade reading proficiency,Median household income as a percent of area median income,Life expectancy (years),Median household income as a percent of territory median income in 2009,Housing burden (percent) (percentile),Share of homes with no kitchen or indoor plumbing (percent) (percentile),Total population (percentile),Median household income (% of state median household income) (percentile),Current asthma among adults aged greater than or equal to 18 years (percentile),Coronary heart disease among adults aged greater than or equal to 18 years (percentile),Cancer (excluding skin cancer) among adults aged greater than or equal to 18 years (percentile),Current lack of health insurance among adults aged 18-64 years (percentile),Diagnosed diabetes among adults aged greater than or equal to 18 years (percentile),Physical health not good for greater than or equal to 14 days among adults aged greater than or equal to 18 years (percentile),Percent of individuals < 100% Federal Poverty Line (percentile),Percent of individuals < 150% Federal Poverty Line (percentile),Percent of individuals below 200% Federal Poverty Line (percentile),Area Median Income (State or metropolitan) (percentile),Median household income in the past 12 months (percentile),Energy burden (percentile),FEMA Risk Index Expected Annual Loss Score (percentile),Urban Heuristic Flag (percentile),Air toxics cancer risk (percentile),Respiratory hazard index (percentile),Diesel particulate matter exposure (percentile),PM2.5 in the air (percentile),Ozone (percentile),Traffic proximity and volume (percentile),Proximity to Risk Management Plan (RMP) facilities (percentile),Proximity to hazardous waste sites (percentile),Proximity to NPL sites (percentile),Wastewater discharge (percentile),Percent pre-1960s housing (lead paint indicator) (percentile),Individuals under 5 years old (percentile),Individuals over 64 years old (percentile),Linguistic isolation (percent) (percentile),Percent of households in linguistic isolation (percentile),Poverty (Less than 200% of federal poverty line) (percentile),Percent individuals age 25 or over with less than high school degree (percentile),Unemployment (percent) (percentile),Median value ($) of owner-occupied housing units (percentile),Percent enrollment in college or graduate school (percentile),Percent of population not currently enrolled in college or graduate school (percentile),Expected building loss rate (Natural Hazards Risk Index) (percentile),Expected agricultural loss rate (Natural Hazards Risk Index) (percentile),Expected population loss rate (Natural Hazards Risk Index) (percentile),Percent individuals age 25 or over with less than high school degree in 2009 (percentile),Percentage households below 100% of federal poverty line in 2009 (percentile),Unemployment (percent) in 2009 (percentile),Unemployment (percent) in 2010 (percentile),Percent of individuals less than 100% Federal Poverty Line in 2010 (percentile),Total population in 2009 (percentile),Summer days above 90F (percentile),Percent low access to healthy food (percentile),Percent impenetrable surface areas (percentile),Leaky underground storage tanks (percentile),DOT Travel Barriers Score (percentile),Share of properties at risk of flood in 30 years (percentile),Share of properties at risk of fire in 30 years (percentile),Share of the tract's land area that is covered by impervious surface or cropland as a percent (percentile),"Percent of individuals below 200% Federal Poverty Line, imputed and adjusted (percentile)",Low third grade reading proficiency (percentile),Low median household income as a percent of area median income (percentile),Low life expectancy (percentile),Low median household income as a percent of territory median income in 2009 (percentile),Total population in 2009 (island areas) and 2019 (states and PR),Total threshold criteria exceeded,Exceeds FPL200 threshold,Percent higher ed enrollment rate is less than 20%,Is low income and has a low percent of higher ed students?,Greater than or equal to the 90th percentile for expected population loss,Greater than or equal to the 90th percentile for expected agricultural loss,Greater than or equal to the 90th percentile for expected building loss,At least one climate threshold exceeded,"Greater than or equal to the 90th percentile for expected population loss rate, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for expected agriculture loss rate, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for expected building loss rate, is low income, and has a low percent of higher ed students?",Climate Factor (Definition M),Greater than or equal to the 90th percentile for energy burden,Greater than or equal to the 90th percentile for pm2.5 exposure,At least one energy threshold exceeded,"Greater than or equal to the 90th percentile for PM2.5 exposure, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for energy burden, is low income, and has a low percent of higher ed students?",Energy Factor (Definition M),Greater than or equal to the 90th percentile for diesel particulate matter,Greater than or equal to the 90th percentile for traffic proximity,At least one traffic threshold exceeded,"Greater than or equal to the 90th percentile for diesel particulate matter, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for traffic proximity, is low income, and has a low percent of higher ed students?",Transportation Factor (Definition M),Greater than or equal to the 90th percentile for lead paint and the median house value is less than 90th percentile,Greater than or equal to the 90th percentile for housing burden,At least one housing threshold exceeded,"Greater than or equal to the 90th percentile for lead paint, the median house value is less than 90th percentile, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for housing burden, is low income, and has a low percent of higher ed students?",Housing Factor (Definition M),Greater than or equal to the 90th percentile for RMP proximity,Greater than or equal to the 90th percentile for NPL (superfund sites) proximity,Greater than or equal to the 90th percentile for proximity to hazardous waste sites,At least one pollution threshold exceeded,"Greater than or equal to the 90th percentile for proximity to RMP sites, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for proximity to superfund sites, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for proximity to hazardous waste facilities, is low income, and has a low percent of higher ed students?",Pollution Factor (Definition M),Greater than or equal to the 90th percentile for wastewater discharge,At least one water threshold exceeded,"Greater than or equal to the 90th percentile for wastewater discharge, is low income, and has a low percent of higher ed students?",Water Factor (Definition M),Greater than or equal to the 90th percentile for diabetes,Greater than or equal to the 90th percentile for asthma,Greater than or equal to the 90th percentile for heart disease,Greater than or equal to the 90th percentile for low life expectancy,At least one health threshold exceeded,"Greater than or equal to the 90th percentile for diabetes, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for asthma, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for heart disease, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for low life expectancy, is low income, and has a low percent of higher ed students?",Health Factor (Definition M),Low high school education and low percent of higher ed students,Greater than or equal to the 90th percentile for unemployment,Greater than or equal to the 90th percentile for low median household income as a percent of area median income,Greater than or equal to the 90th percentile for households in linguistic isolation,Greater than or equal to the 90th percentile for households at or below 100% federal poverty level,"Greater than or equal to the 90th percentile for households in linguistic isolation, has low HS attainment, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for households at or below 100% federal poverty level, has low HS attainment, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for low median household income as a percent of area median income, has low HS attainment, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for unemployment, has low HS attainment, and has a low percent of higher ed students?",Unemployment (percent) in 2009 (island areas) and 2010 (states and PR),Unemployment (percent) in 2009 for island areas (percentile),Unemployment (percent) in 2009 exceeds 90th percentile,Percentage households below 100% of federal poverty line in 2009 (island areas) and 2010 (states and PR),Percentage households below 100% of federal poverty line in 2009 for island areas (percentile),Percentage households below 100% of federal poverty line in 2009 exceeds 90th percentile,Low median household income as a percent of territory median income in 2009 exceeds 90th percentile,Low high school education in 2009 (island areas),Greater than or equal to the 90th percentile for unemployment and has low HS education in 2009 (island areas)?,Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS education in 2009 (island areas)?,Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS education in 2009 (island areas)?,At least one workforce threshold exceeded,Both workforce socioeconomic indicators exceeded,Workforce Factor (Definition M),Total categories exceeded,Definition M (communities),Any Non-Workforce Factor (Definition M),Definition M (percentile),Is low income (imputed and adjusted)?,Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years,Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years,Greater than or equal to the 90th percentile for expected population loss rate and is low income?,Greater than or equal to the 90th percentile for expected agriculture loss rate and is low income?,Greater than or equal to the 90th percentile for expected building loss rate and is low income?,Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years and is low income?,Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years and is low income?,Climate Factor (Definition N),Greater than or equal to the 90th percentile for PM2.5 exposure and is low income?,Greater than or equal to the 90th percentile for energy burden and is low income?,Energy Factor (Definition N),Greater than or equal to the 90th percentile for DOT travel barriers,Greater than or equal to the 90th percentile for diesel particulate matter and is low income?,Greater than or equal to the 90th percentile for traffic proximity and is low income?,Greater than or equal to the 90th percentile for DOT transit barriers and is low income?,Transportation Factor (Definition N),Tract-level redlining score meets or exceeds 3.25 and is low income,Greater than or equal to the 90th percentile for share of homes without indoor plumbing or a kitchen,Greater than or equal to the 90th percentile for share of homes with no kitchen or indoor plumbing and is low income?,Greater than or equal to the 90th percentile for lead paint and the median house value is less than 90th percentile and is low income?,Greater than or equal to the 90th percentile for housing burden and is low income?,Greater than or equal to the 90th percentile for share of the tract's land area that is covered by impervious surface or cropland as a percent,Greater than or equal to the 90th percentile for share of the tract's land area that is covered by impervious surface or cropland as a percent and is low income?,Housing Factor (Definition N),Greater than or equal to the 90th percentile for proximity to RMP sites and is low income?,Greater than or equal to the 90th percentile for proximity to superfund sites and is low income?,Greater than or equal to the 90th percentile for proximity to hazardous waste facilities and is low income?,Pollution Factor (Definition N),Greater than or equal to the 90th percentile for leaky underwater storage tanks,Greater than or equal to the 90th percentile for wastewater discharge and is low income?,Greater than or equal to the 90th percentile for leaky underground storage tanks and is low income?,Water Factor (Definition N),Greater than or equal to the 90th percentile for diabetes and is low income?,Greater than or equal to the 90th percentile for asthma and is low income?,Greater than or equal to the 90th percentile for heart disease and is low income?,Greater than or equal to the 90th percentile for low life expectancy and is low income?,Health Factor (Definition N),Low high school education,Greater than or equal to the 90th percentile for households in linguistic isolation and has low HS education?,Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS education?,Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS education?,Greater than or equal to the 90th percentile for unemployment and has low HS education?,Workforce Factor (Definition N),Definition N (communities),Definition N (communities) (percentile) +01073001100,True,True,True,True,0.2752043596730245,0.0,4781.0,0.7327449738800064,11.2,7.2,6.7,16.6,19.3,15.1,0.150375939849624,0.318796992481203,0.3744360902255639,57447.0,37030.0,0.049,18.7674524286,1.0,40.0,0.5,0.467489734286576,9.8735797260274,43.056760130719,181.621925132718,2.0427358988323,0.702342755246247,0.134193041307899,4.45238981883771,0.168806466951973,0.035557414766785,0.203932231750679,0.0,0.0,0.374436090225563,0.0821917808219178,0.0092071611253196,85500.0,0.0890751899397432,0.9109248100602568,0.0004047858,5.6328e-05,2.8039e-06,,,,0.1536983669548511,0.3189099613330878,,62.666668,0.068036923,0.171,1.96440511031451,47.695227725,0.0754274220583305,0.6620851491786792,-77.7525,0.2853609002858206,58.143433,0.6445941476491375,70.3,,0.6466760729305078,0.2159833426939357,0.6290185267766651,0.2601978513507951,0.8509696039125366,0.7264920810941454,0.4789587420739856,0.6191105803406409,0.965388552418323,0.697012994398476,0.6204255784694491,0.7319894972922707,0.6305043487774192,0.3145069836211475,0.1524256393370651,0.864954517474865,0.6038301323911519,0.5972204988211937,0.9070825388177608,0.8818509942794879,0.8407790792699537,0.8257128232087766,0.5755156814188676,0.3920895082932574,0.9007580978635424,0.4820205132363076,0.7531654977635437,0.9619599422457518,0.3979135417088958,0.1737408953933055,0.7659355954649262,0.1287706711725437,0.13169416629505,0.6347481790786611,0.4189065592792301,0.029797296373751,0.1130218397675614,0.7459773722926589,0.2540362752992234,0.7846412062513758,0.2153147384849333,0.6143028498159407,,,,0.9349594607528132,0.8950599559730369,,0.7537922665342821,0.8019598155467721,0.4126953421856217,0.521114579532709,0.4517484245644384,0.4977059209088922,0.8410893082809093,,0.607629501459933,0.990724418702258,0.8218135517196475,0.97046998263836,,4781.0,0,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,True,False,False,False,False,True,False,False,False,True,False,False,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,0.1536983669548511,,False,0.3189099613330878,,False,False,False,False,False,False,False,False,False,0.0,False,False,0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,0 +01073001400,True,True,True,True,0.1823529411764705,0.0047058823529411,1946.0,0.7136694633528574,11.1,9.1,7.3,21.4,22.4,17.4,0.2816032887975334,0.3679342240493319,0.4835560123329907,57447.0,36066.0,0.07,17.3011023381,1.0,40.0,0.6,0.655319095139786,9.945103013698628,43.1266823529412,3260.33374354854,1.81915896353987,3.34035680534013,0.214095348702766,0.103297800913177,0.647212543554006,0.054984583761562,0.189105858170606,0.0245098039215686,0.024509803921569,0.48355601233299,0.1742543171114599,0.1150121065375302,67800.0,0.0771549125979505,0.9228450874020494,0.0008951111,5.1282e-06,2.3791e-06,,,,0.0804953560371517,0.2950894905920146,,61.666668,0.087159691,0.34900002,3.16184976454882,44.7571359825,0.2384615384615384,0.0,-56.8746,0.4064010997350401,93.77919,0.6278134628440127,71.0,,0.3421186011150532,0.5051574635963891,0.0916001135119795,0.240302951305517,0.8385794307486707,0.9217563763541756,0.6048579715089994,0.7894025988796952,0.9878088657624612,0.8447283118655634,0.8689486351950112,0.8013648049887862,0.7892483999781194,0.3145069836211475,0.1404620788058391,0.970802270706518,0.5282998116553705,0.5972204988211937,0.9070825388177608,0.9704848815036776,0.9380686461454644,0.8391046304110233,0.5827649654828936,0.9563394697362702,0.8799745949379062,0.800259455953298,0.8653801975648978,0.8431750027766466,0.8462723476709774,0.471128768530155,0.6930041485925866,0.5867081244286861,0.5847015580870529,0.7916514641694031,0.7516347007030237,0.9067399297439892,0.0522639122516786,0.6434566620719774,0.356556985519905,0.9166162227602904,0.0865380767537716,0.558933421571466,,,,0.6917513228236646,0.8737301229199994,,0.7501654807214959,0.8647617479139218,0.6268497920495212,0.6418426778016514,0.3716517703914219,0.8849410093948001,0.3366245885930925,,0.7883908294582027,0.9537899773356836,0.8364273002184828,0.959938777375042,,1946.0,9,True,True,True,False,False,True,True,False,False,True,True,True,False,True,False,True,True,True,True,True,True,True,True,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,True,True,True,True,False,True,True,True,True,True,False,False,False,False,False,False,True,0.0804953560371517,,False,0.2950894905920146,,False,False,False,False,False,False,True,True,True,6.0,True,True,1,True,False,False,False,False,True,False,False,True,False,True,True,False,True,True,False,True,True,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,True,False,True,True,True,True,False,False,False,True,True,True,1 diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl index 3febf24a1..b29cedead 100644 Binary files a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl and b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl differ diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl index e53e516c7..cc79535be 100644 Binary files a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl and b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl differ diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl index 0895c955e..edbb02332 100644 Binary files a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl and b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl differ diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl index e8cd7aa7c..3404bd3a3 100644 Binary files a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl and b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl differ diff --git a/data/data-pipeline/data_pipeline/etl/sources/fsf_flood_risk/etl.py b/data/data-pipeline/data_pipeline/etl/sources/fsf_flood_risk/etl.py index 78a4aefa9..9776e8011 100644 --- a/data/data-pipeline/data_pipeline/etl/sources/fsf_flood_risk/etl.py +++ b/data/data-pipeline/data_pipeline/etl/sources/fsf_flood_risk/etl.py @@ -48,7 +48,6 @@ def transform(self) -> None: """ logger.info("Transforming National Risk Index Data") - logger.info(self.COLUMNS_TO_KEEP) # read in the unzipped csv data source then rename the # Census Tract column for merging df_fsf_flood_disagg: pd.DataFrame = pd.read_csv( diff --git a/data/data-pipeline/data_pipeline/etl/sources/fsf_wildfire_risk/etl.py b/data/data-pipeline/data_pipeline/etl/sources/fsf_wildfire_risk/etl.py index 2d36a079f..a41ce1e31 100644 --- a/data/data-pipeline/data_pipeline/etl/sources/fsf_wildfire_risk/etl.py +++ b/data/data-pipeline/data_pipeline/etl/sources/fsf_wildfire_risk/etl.py @@ -47,8 +47,6 @@ def transform(self) -> None: - Calculates share of properties at risk, left-clipping number of properties at 250 """ logger.info("Transforming National Risk Index Data") - - logger.info(self.COLUMNS_TO_KEEP) # read in the unzipped csv data source then rename the # Census Tract column for merging df_fsf_fire_disagg: pd.DataFrame = pd.read_csv( diff --git a/data/data-pipeline/data_pipeline/etl/sources/nlcd_nature_deprived/README.md b/data/data-pipeline/data_pipeline/etl/sources/nlcd_nature_deprived/README.md new file mode 100644 index 000000000..aa1b6e3b5 --- /dev/null +++ b/data/data-pipeline/data_pipeline/etl/sources/nlcd_nature_deprived/README.md @@ -0,0 +1,80 @@ +# Nature deprived communities data + +The following dataset was compiled by TPL (Trust for Public Lands) using NCLD data. We define as: AREA - [CROPLAND] - [IMPERVIOUS SURFACES]. + +## Codebook +- GEOID10 – Census tract ID +- SF – State Name +- CF – County Name +- P200_PFS – Percent of individuals below 200% Federal Poverty Line (from CEJST source data). +- CA_LT20 – Percent higher ed enrollment rate is less than 20% (from CEJST source data). +- TractAcres – Acres of tract calculated from ALAND10 field (area land/meters) in 2010 census tracts. + - CAVEAT: Some census tracts in the CEJST source file extend into open water. ALAND10 area was used to constrain percent calculations (e.g. cropland area) to land only. +- AcresCrops – Acres crops calculated by summing all cells in the NLCD Cropland Data Layer crop classes. +- PctCrops – Formula: AcresCrops/TractAcres*100. +- PctImperv – Mean imperviousness for each census tract. + - CAVEAT: Where tracts extend into open water, mean imperviousness may be underestimated. +- __TO USE__ PctNatural – Formula: 100 – PctCrops – PctImperv. +- PctNat90 – Tract in or below 10th percentile for PctNatural. 1 = True, 0 = False. + - PctNatural 10th percentile = 28.6439% +- ImpOrCrop – If tract >= 90th percentile for PctImperv OR PctCrops. 1 = True, 0 = False. + - PctImperv 90th percentile = 67.4146 % + - PctCrops 90th percentile = 27.8116 % +- LowInAndEd – If tract >= 65th percentile for P200_PFS AND CA_LT20. + - P200_PFS 65th percentile = 64.0% +- NatureDep – ImpOrCrp = 1 AND LowInAndEd = 1. + +We added `GEOID10_TRACT` before converting shapefile to csv. + +## Instructions to recreate + +### Creating Impervious plus Cropland Attributes for Census Tracts + +The Cropland Data Layer and NLCD Impervious layer were too big to put on our OneDrive, but you can download them here: + CDL: https://www.nass.usda.gov/Research_and_Science/Cropland/Release/datasets/2021_30m_cdls.zip + Impervious: https://s3-us-west-2.amazonaws.com/mrlc/nlcd_2019_impervious_l48_20210604.zip + + +#### Crops + +Add an attribute called TractAcres (or similar) to the census tracts to hold a value representing acres covered by the census tract. +Calculate the TractAcres field for each census tract by using the Calculate Geometry tool (set the Property to Area (geodesic), and the Units to Acres). +From the Cropland Data Layer (CDL), extract only the pixels representing crops, using the Extract by Attributes tool in ArcGIS Spatial Analyst toolbox. +a. The attribute table tells you the names of each type of land cover. Since the CDL also contains NLCD classes and empty classes, the actual crop classes must be extracted. +From the crops-only raster extracted from the CDL, run the Reclassify tool to create a binary layer where all crops = 1, and everything else is Null. +Run the Tabulate Area tool: +a. Zone data = census tracts +b. Input raster data = the binary crops layer +c. This will produce a table with the square meters of crops in each census tract contained in an attribute called VALUE_1 +Run the Join Field tool to join the table to the census tracts, with the VALUE_1 field as the Transfer Field, to transfer the VALUE_1 field (square meters of crops) to the census tracts. +Add a field to the census tracts called AcresCrops (or similar) to hold the acreage of crops in each census tract. +Calculate the AcresCrops field by multiplying the VALUE_1 field by 0.000247105 to produce acres of crops in each census tracts. +a. You can delete the VALUE_1 field. +Add a field called PctCrops (or similar) to hold the percent of each census tract occupied by crops. +Calculate the PctCrops field by dividing the AcresCrops field by the TractAcres field, and multiply by 100 to get the percent. +Impervious + +Run the Zonal Statistics as Table tool: +a. Zone data = census tracts +b. Input raster data = impervious data raster layer +c. Statistics type = Mean +d. This will produce a table with the percent of each census tract occupied by impervious surfaces, contained in an attribute called MEAN + +Run the Join Field tool to join the table to the census tracts, with the MEAN field as the Transfer Field, to transfer the MEAN field (percent impervious) to the census tracts. + +Add a field called PctImperv (or similar) to hold the percent impervious value. + +Calculate the PctImperv field by setting it equal to the MEAN field. +a. You can delete the MEAN field. +Combine the Crops and Impervious Data + +Open the census tracts attribute table and add a field called PctNatural (or similar). Calculate this field using this equation: 100 – PctCrops – PctImperv . This produces a value that tells you the percent of each census tract covered in natural land cover. + +Define the census tracts that fall in the 90th percentile of non-natural land cover: +a. Add a field called PctNat90 (or similar) +b. Right-click on the PctNatural field, and click Sort Ascending (lowest PctNatural values on top) +c. Select the top 10 percent of rows after the sort +d. Click on Show Selected Records in the attribute table +e. Calculate the PctNat90 field for the selected records = 1 +f. Clear the selection +g. The rows that now have a value of 1 for PctNat90 are the most lacking for natural land cover, and can be symbolized accordingly in a map diff --git a/data/data-pipeline/data_pipeline/etl/sources/nlcd_nature_deprived/__init__.py b/data/data-pipeline/data_pipeline/etl/sources/nlcd_nature_deprived/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/data/data-pipeline/data_pipeline/etl/sources/nlcd_nature_deprived/etl.py b/data/data-pipeline/data_pipeline/etl/sources/nlcd_nature_deprived/etl.py new file mode 100644 index 000000000..e9951da24 --- /dev/null +++ b/data/data-pipeline/data_pipeline/etl/sources/nlcd_nature_deprived/etl.py @@ -0,0 +1,75 @@ +# pylint: disable=unsubscriptable-object +# pylint: disable=unsupported-assignment-operation + +import pandas as pd +from data_pipeline.config import settings + +from data_pipeline.etl.base import ExtractTransformLoad, ValidGeoLevel +from data_pipeline.utils import get_module_logger + +logger = get_module_logger(__name__) + + +class NatureDeprivedETL(ExtractTransformLoad): + """ETL class for the Nature Deprived Communities dataset""" + + NAME = "nlcd_nature_deprived" + SOURCE_URL = ( + settings.AWS_JUSTICE40_DATASOURCES_URL + + "/usa_conus_nat_dep__compiled_by_TPL.csv.zip" + ) + GEO_LEVEL = ValidGeoLevel.CENSUS_TRACT + + # Output score variables (values set on datasets.yml) for linting purposes + ELIGIBLE_FOR_NATURE_DEPRIVED_FIELD_NAME: str + TRACT_PERCENT_IMPERVIOUS_FIELD_NAME: str + TRACT_PERCENT_NON_NATURAL_FIELD_NAME: str + TRACT_PERCENT_CROPLAND_FIELD_NAME: str + + def __init__(self): + # define the full path for the input CSV file + self.INPUT_CSV = ( + self.get_tmp_path() / "usa_conus_nat_dep__compiled_by_TPL.csv" + ) + + # this is the main dataframe + self.df: pd.DataFrame + + # Start dataset-specific vars here + self.PERCENT_NATURAL_FIELD_NAME = "PctNatural" + self.PERCENT_IMPERVIOUS_FIELD_NAME = "PctImperv" + self.PERCENT_CROPLAND_FIELD_NAME = "PctCrops" + self.TRACT_ACRES_FIELD_NAME = "TractAcres" + # In order to ensure that tracts with very small Acreage, we want to create an eligibility criterion + # similar to agrivalue. Here, we are ensuring that a tract has at least 35 acres, or is above the 1st percentile + # for area. This does indeed remove tracts from the 90th+ percentile later on + self.TRACT_ACRES_LOWER_BOUND = 35 + + def transform(self) -> None: + """Reads the unzipped data file into memory and applies the following + transformations to prepare it for the load() method: + + - Renames columns as needed + """ + logger.info("Transforming NLCD Data") + + df_ncld: pd.DataFrame = pd.read_csv( + self.INPUT_CSV, + dtype={self.INPUT_GEOID_TRACT_FIELD_NAME: str}, + low_memory=False, + ) + + df_ncld[self.ELIGIBLE_FOR_NATURE_DEPRIVED_FIELD_NAME] = ( + df_ncld[self.TRACT_ACRES_FIELD_NAME] >= self.TRACT_ACRES_LOWER_BOUND + ) + df_ncld[self.TRACT_PERCENT_NON_NATURAL_FIELD_NAME] = ( + 1 - df_ncld[self.PERCENT_NATURAL_FIELD_NAME] + ) + + # Assign the final df to the class' output_df for the load method with rename + self.output_df = df_ncld.rename( + columns={ + self.PERCENT_IMPERVIOUS_FIELD_NAME: self.TRACT_PERCENT_IMPERVIOUS_FIELD_NAME, + self.PERCENT_CROPLAND_FIELD_NAME: self.TRACT_PERCENT_CROPLAND_FIELD_NAME, + } + ) diff --git a/data/data-pipeline/data_pipeline/score/field_names.py b/data/data-pipeline/data_pipeline/score/field_names.py index a357786c2..3a00d7f8d 100644 --- a/data/data-pipeline/data_pipeline/score/field_names.py +++ b/data/data-pipeline/data_pipeline/score/field_names.py @@ -636,6 +636,19 @@ "at risk of fire in 30 years" ) +# NCLD Nature Deprived +TRACT_PERCENT_NON_NATURAL_FIELD_NAME = "Share of the tract's land area that is covered by impervious surface or cropland as a percent" +NON_NATURAL_PCTILE_THRESHOLD = ( + f"Greater than or equal to the {PERCENTILE}th percentile for share of the tract's land area that is covered " + "by impervious surface or cropland as a percent" +) +NON_NATURAL_LOW_INCOME_FIELD_NAME = ( + f"Greater than or equal to the {PERCENTILE}th percentile for share of the tract's land area that is covered " + "by impervious surface or cropland as a percent and is low income?" +) +TRACT_ELIGIBLE_FOR_NONNATURAL_THRESHOLD = ( + "Does the tract have at least 35 acres in it?" +) LINGUISTIC_ISOLATION_PCTILE_THRESHOLD = f"Greater than or equal to the {PERCENTILE}th percentile for households in linguistic isolation" POVERTY_PCTILE_THRESHOLD = f"Greater than or equal to the {PERCENTILE}th percentile for households at or below 100% federal poverty level" @@ -650,7 +663,6 @@ ISLAND_UNEMPLOYMENT_PCTILE_THRESHOLD = f"{CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2009} exceeds {PERCENTILE}th percentile" ISLAND_POVERTY_PCTILE_THRESHOLD = f"{CENSUS_DECENNIAL_POVERTY_LESS_THAN_100_FPL_FIELD_2009} exceeds {PERCENTILE}th percentile" - # Not currently used in a factor EXTREME_HEAT_MEDIAN_HOUSE_VALUE_LOW_INCOME_FIELD = ( f"Greater than or equal to the {PERCENTILE}th percentile for summer days above 90F and " diff --git a/data/data-pipeline/data_pipeline/score/score_narwhal.py b/data/data-pipeline/data_pipeline/score/score_narwhal.py index 4f354b0e8..1effde59e 100644 --- a/data/data-pipeline/data_pipeline/score/score_narwhal.py +++ b/data/data-pipeline/data_pipeline/score/score_narwhal.py @@ -365,6 +365,7 @@ def _housing_factor(self) -> bool: field_names.HOUSING_BURDEN_LOW_INCOME_FIELD, field_names.HISTORIC_REDLINING_SCORE_EXCEEDED_LOW_INCOME_FIELD, field_names.NO_KITCHEN_OR_INDOOR_PLUMBING_LOW_INCOME_FIELD, + field_names.NON_NATURAL_LOW_INCOME_FIELD_NAME, ] # Historic disinvestment @@ -419,6 +420,19 @@ def _housing_factor(self) -> bool: & self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED] ) + # High non-natural space + self.df[field_names.NON_NATURAL_PCTILE_THRESHOLD] = ( + self.df[ + field_names.TRACT_PERCENT_NON_NATURAL_FIELD_NAME + + field_names.PERCENTILE_FIELD_SUFFIX + ] + >= self.ENVIRONMENTAL_BURDEN_THRESHOLD + ) + self.df[field_names.NON_NATURAL_LOW_INCOME_FIELD_NAME] = ( + self.df[field_names.NON_NATURAL_PCTILE_THRESHOLD] + & self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED] + ) + # any of the burdens self.df[field_names.HOUSING_THREHSOLD_EXCEEDED] = self.df[ housing_eligibility_columns