diff --git a/data/data-pipeline/README.md b/data/data-pipeline/README.md index 5b843dbd5..97c078626 100644 --- a/data/data-pipeline/README.md +++ b/data/data-pipeline/README.md @@ -350,7 +350,8 @@ We have four pickle files that correspond to expected files: To update the pickles, let's go one by one: -For the `score_transformed_expected.pkl`, put a breakpoint on [this line](https://github.com/usds/justice40-tool/blob/main/data/data-pipeline/data_pipeline/etl/score/tests/test_score_post.py#L58), before the `pdt.assert_frame_equal` and run: +For the `score_transformed_expected.pkl`, put a breakpoint on [this line] +(https://github.com/usds/justice40-tool/blob/main/data/data-pipeline/data_pipeline/etl/score/tests/test_score_post.py#L62), before the `pdt.assert_frame_equal` and run: `pytest data_pipeline/etl/score/tests/test_score_post.py::test_transform_score` Once on the breakpoint, capture the df to a pickle as follows: @@ -378,7 +379,8 @@ score_data_actual.to_pickle(data_path / "data_pipeline" / "etl" / "score" / "tes Then take out the breakpoint and re-run the test: `pytest data_pipeline/etl/score/tests/test_score_post.py::test_create_score_data` -For the `tile_data_expected.pkl`, put a breakpoint on [this line](https://github.com/usds/justice40-tool/blob/main/data/data-pipeline/data_pipeline/etl/score/tests/test_score_post.py#L86), before the `pdt.assert_frame_equal` and run: +For the `tile_data_expected.pkl`, put a breakpoint on [this line](https://github +.com/usds/justice40-tool/blob/main/data/data-pipeline/data_pipeline/etl/score/tests/test_score_post.py#L90), before the `pdt.assert_frame_equal` and run: `pytest data_pipeline/etl/score/tests/test_score_post.py::test_create_tile_data` Once on the breakpoint, capture the df to a pickle as follows: diff --git a/data/data-pipeline/data_pipeline/content/config/csv.yml b/data/data-pipeline/data_pipeline/content/config/csv.yml index 639a3f2f1..591c2b727 100644 --- a/data/data-pipeline/data_pipeline/content/config/csv.yml +++ b/data/data-pipeline/data_pipeline/content/config/csv.yml @@ -14,6 +14,39 @@ fields: - score_name: State/Territory label: State/Territory format: string + - score_name: Percent Black or African American + label: Percent Black or African American alone + format: float + - score_name: Percent American Indian / Alaska Native + label: Percent American Indian / Alaska Native + format: float + - score_name: Percent Asian + label: Percent Asian + format: float + - score_name: Percent Native Hawaiian or Pacific + label: Percent Native Hawaiian or Pacific + format: float + - score_name: Percent two or more races + label: Percent two or more races + format: float + - score_name: Percent White + label: Percent White + format: float + - score_name: Percent Hispanic or Latino + label: Percent Hispanic or Latino + format: float + - score_name: Percent other races + label: Percent other races + format: float + - score_name: Percent age under 10 + label: Percent age under 10 + format: float + - score_name: Percent age 10 to 64 + label: Percent age 10 to 64 + format: float + - score_name: Percent age over 64 + label: Percent age over 64 + format: float - score_name: Total threshold criteria exceeded label: Total threshold criteria exceeded format: int64 diff --git a/data/data-pipeline/data_pipeline/content/config/excel.yml b/data/data-pipeline/data_pipeline/content/config/excel.yml index ad8202295..03fd55a66 100644 --- a/data/data-pipeline/data_pipeline/content/config/excel.yml +++ b/data/data-pipeline/data_pipeline/content/config/excel.yml @@ -18,6 +18,39 @@ sheets: - score_name: State/Territory label: State/Territory format: string + - score_name: Percent Black or African American + label: Percent Black or African American alone + format: float + - score_name: Percent American Indian / Alaska Native + label: Percent American Indian / Alaska Native + format: float + - score_name: Percent Asian + label: Percent Asian + format: float + - score_name: Percent Native Hawaiian or Pacific + label: Percent Native Hawaiian or Pacific + format: float + - score_name: Percent two or more races + label: Percent two or more races + format: float + - score_name: Percent White + label: Percent White + format: float + - score_name: Percent Hispanic or Latino + label: Percent Hispanic or Latino + format: float + - score_name: Percent other races + label: Percent other races + format: float + - score_name: Percent age under 10 + label: Percent age under 10 + format: float + - score_name: Percent age 10 to 64 + label: Percent age 10 to 64 + format: float + - score_name: Percent age over 64 + label: Percent age over 64 + format: float - score_name: Total threshold criteria exceeded label: Total threshold criteria exceeded format: int64 diff --git a/data/data-pipeline/data_pipeline/etl/constants.py b/data/data-pipeline/data_pipeline/etl/constants.py index 7d76b4f05..569c088cd 100644 --- a/data/data-pipeline/data_pipeline/etl/constants.py +++ b/data/data-pipeline/data_pipeline/etl/constants.py @@ -198,10 +198,12 @@ "name": "census", "module_dir": "census", "class_name": "CensusETL", + "is_memory_intensive": False, } TRIBAL_INFO = { "name": "tribal", "module_dir": "tribal", "class_name": "TribalETL", + "is_memory_intensive": False, } diff --git a/data/data-pipeline/data_pipeline/etl/score/constants.py b/data/data-pipeline/data_pipeline/etl/score/constants.py index ee431a6db..abf387b2d 100644 --- a/data/data-pipeline/data_pipeline/etl/score/constants.py +++ b/data/data-pipeline/data_pipeline/etl/score/constants.py @@ -315,9 +315,20 @@ field_names.NON_NATURAL_LOW_INCOME_FIELD_NAME: "IS_ET", field_names.AML_BOOLEAN: "AML_ET", field_names.ELIGIBLE_FUDS_BINARY_FIELD_NAME: "FUDS_ET", - field_names.IMPUTED_INCOME_FLAG_FIELD_NAME: "IMP_FLG" + field_names.IMPUTED_INCOME_FLAG_FIELD_NAME: "IMP_FLG", ## FPL 200 and low higher ed for all others should no longer be M_EBSI, but rather ## FPL_200 (there is no higher ed in narwhal) + field_names.PERCENT_BLACK_FIELD_NAME: "DM_B", + field_names.PERCENT_AMERICAN_INDIAN_FIELD_NAME: "DM_AI", + field_names.PERCENT_ASIAN_FIELD_NAME: "DM_A", + field_names.PERCENT_HAWAIIAN_FIELD_NAME: "DM_HI", + field_names.PERCENT_TWO_OR_MORE_RACES_FIELD_NAME: "DM_T", + field_names.PERCENT_NON_HISPANIC_WHITE_FIELD_NAME: "DM_W", + field_names.PERCENT_HISPANIC_FIELD_NAME: "DM_H", + field_names.PERCENT_OTHER_RACE_FIELD_NAME: "DM_O", + field_names.PERCENT_AGE_UNDER_10: "AGE_10", + field_names.PERCENT_AGE_10_TO_64: "AGE_MIDDLE", + field_names.PERCENT_AGE_OVER_64: "AGE_OLD", } # columns to round floats to 2 decimals @@ -375,4 +386,16 @@ field_names.SCORE_N_COMMUNITIES + field_names.ADJACENCY_INDEX_SUFFIX, field_names.TRACT_PERCENT_NON_NATURAL_FIELD_NAME + field_names.PERCENTILE_FIELD_SUFFIX, + # Include demographic data for sidebar -- as percents, NOT as percentiles. + field_names.PERCENT_BLACK_FIELD_NAME, + field_names.PERCENT_AMERICAN_INDIAN_FIELD_NAME, + field_names.PERCENT_ASIAN_FIELD_NAME, + field_names.PERCENT_HAWAIIAN_FIELD_NAME, + field_names.PERCENT_TWO_OR_MORE_RACES_FIELD_NAME, + field_names.PERCENT_NON_HISPANIC_WHITE_FIELD_NAME, + field_names.PERCENT_HISPANIC_FIELD_NAME, + field_names.PERCENT_OTHER_RACE_FIELD_NAME, + field_names.PERCENT_AGE_UNDER_10, + field_names.PERCENT_AGE_10_TO_64, + field_names.PERCENT_AGE_OVER_64, ] diff --git a/data/data-pipeline/data_pipeline/etl/score/etl_score.py b/data/data-pipeline/data_pipeline/etl/score/etl_score.py index d64e10b52..648043226 100644 --- a/data/data-pipeline/data_pipeline/etl/score/etl_score.py +++ b/data/data-pipeline/data_pipeline/etl/score/etl_score.py @@ -461,6 +461,17 @@ def _prepare_initial_df(self) -> pd.DataFrame: field_names.FUTURE_WILDFIRE_RISK_FIELD, field_names.TRACT_PERCENT_NON_NATURAL_FIELD_NAME, field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD, + field_names.PERCENT_BLACK_FIELD_NAME, + field_names.PERCENT_AMERICAN_INDIAN_FIELD_NAME, + field_names.PERCENT_ASIAN_FIELD_NAME, + field_names.PERCENT_HAWAIIAN_FIELD_NAME, + field_names.PERCENT_TWO_OR_MORE_RACES_FIELD_NAME, + field_names.PERCENT_NON_HISPANIC_WHITE_FIELD_NAME, + field_names.PERCENT_HISPANIC_FIELD_NAME, + field_names.PERCENT_OTHER_RACE_FIELD_NAME, + field_names.PERCENT_AGE_UNDER_10, + field_names.PERCENT_AGE_10_TO_64, + field_names.PERCENT_AGE_OVER_64, ] non_numeric_columns = [ diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv b/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv index f78260c50..a4043668e 100644 --- a/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv +++ b/data/data-pipeline/data_pipeline/etl/score/tests/sample_data/score_data_initial.csv @@ -1,3 +1,3 @@ -GEOID10_TRACT,Persistent Poverty Census Tract,Tract-level redlining score meets or exceeds 3.25,Does the tract have at least 35 acres in it?,Contains agricultural value,Is there at least one Formerly Used Defense Site (FUDS) in the tract?,Is there at least one abandoned mine in this census tract?,Income data has been estimated based on neighbor income,Housing burden (percent),Share of homes with no kitchen or indoor plumbing (percent),Total population,Median household income (% of state median household income),Current asthma among adults aged greater than or equal to 18 years,Coronary heart disease among adults aged greater than or equal to 18 years,Cancer (excluding skin cancer) among adults aged greater than or equal to 18 years,Current lack of health insurance among adults aged 18-64 years,Diagnosed diabetes among adults aged greater than or equal to 18 years,Physical health not good for greater than or equal to 14 days among adults aged greater than or equal to 18 years,Percent of individuals < 100% Federal Poverty Line,Percent of individuals < 150% Federal Poverty Line,Percent of individuals below 200% Federal Poverty Line,Area Median Income (State or metropolitan),Median household income in the past 12 months,Energy burden,FEMA Risk Index Expected Annual Loss Score,Urban Heuristic Flag,Air toxics cancer risk,Respiratory hazard index,Diesel particulate matter exposure,PM2.5 in the air,Ozone,Traffic proximity and volume,Proximity to Risk Management Plan (RMP) facilities,Proximity to hazardous waste sites,Proximity to NPL sites,Wastewater discharge,Percent pre-1960s housing (lead paint indicator),Individuals under 5 years old,Individuals over 64 years old,Linguistic isolation (percent),Percent of households in linguistic isolation,Poverty (Less than 200% of federal poverty line),Percent individuals age 25 or over with less than high school degree,Unemployment (percent),Median value ($) of owner-occupied housing units,Percent enrollment in college or graduate school,Percent of population not currently enrolled in college or graduate school,Expected building loss rate (Natural Hazards Risk Index),Expected agricultural loss rate (Natural Hazards Risk Index),Expected population loss rate (Natural Hazards Risk Index),Percent individuals age 25 or over with less than high school degree in 2009,Percentage households below 100% of federal poverty line in 2009,Unemployment (percent) in 2009,Unemployment (percent) in 2010,Percent of individuals less than 100% Federal Poverty Line in 2010,Total population in 2009,Summer days above 90F,Percent low access to healthy food,Percent impenetrable surface areas,Leaky underground storage tanks,DOT Travel Barriers Score,Share of properties at risk of flood in 30 years,Share of properties at risk of fire in 30 years,Share of the tract's land area that is covered by impervious surface or cropland as a percent,"Percent of individuals below 200% Federal Poverty Line, imputed and adjusted",Third grade reading proficiency,Median household income as a percent of area median income,Life expectancy (years),Median household income as a percent of territory median income in 2009,Housing burden (percent) (percentile),Share of homes with no kitchen or indoor plumbing (percent) (percentile),Total population (percentile),Median household income (% of state median household income) (percentile),Current asthma among adults aged greater than or equal to 18 years (percentile),Coronary heart disease among adults aged greater than or equal to 18 years (percentile),Cancer (excluding skin cancer) among adults aged greater than or equal to 18 years (percentile),Current lack of health insurance among adults aged 18-64 years (percentile),Diagnosed diabetes among adults aged greater than or equal to 18 years (percentile),Physical health not good for greater than or equal to 14 days among adults aged greater than or equal to 18 years (percentile),Percent of individuals < 100% Federal Poverty Line (percentile),Percent of individuals < 150% Federal Poverty Line (percentile),Percent of individuals below 200% Federal Poverty Line (percentile),Area Median Income (State or metropolitan) (percentile),Median household income in the past 12 months (percentile),Energy burden (percentile),FEMA Risk Index Expected Annual Loss Score (percentile),Urban Heuristic Flag (percentile),Air toxics cancer risk (percentile),Respiratory hazard index (percentile),Diesel particulate matter exposure (percentile),PM2.5 in the air (percentile),Ozone (percentile),Traffic proximity and volume (percentile),Proximity to Risk Management Plan (RMP) facilities (percentile),Proximity to hazardous waste sites (percentile),Proximity to NPL sites (percentile),Wastewater discharge (percentile),Percent pre-1960s housing (lead paint indicator) (percentile),Individuals under 5 years old (percentile),Individuals over 64 years old (percentile),Linguistic isolation (percent) (percentile),Percent of households in linguistic isolation (percentile),Poverty (Less than 200% of federal poverty line) (percentile),Percent individuals age 25 or over with less than high school degree (percentile),Unemployment (percent) (percentile),Median value ($) of owner-occupied housing units (percentile),Percent enrollment in college or graduate school (percentile),Percent of population not currently enrolled in college or graduate school (percentile),Expected building loss rate (Natural Hazards Risk Index) (percentile),Expected agricultural loss rate (Natural Hazards Risk Index) (percentile),Expected population loss rate (Natural Hazards Risk Index) (percentile),Percent individuals age 25 or over with less than high school degree in 2009 (percentile),Percentage households below 100% of federal poverty line in 2009 (percentile),Unemployment (percent) in 2009 (percentile),Unemployment (percent) in 2010 (percentile),Percent of individuals less than 100% Federal Poverty Line in 2010 (percentile),Total population in 2009 (percentile),Summer days above 90F (percentile),Percent low access to healthy food (percentile),Percent impenetrable surface areas (percentile),Leaky underground storage tanks (percentile),DOT Travel Barriers Score (percentile),Share of properties at risk of flood in 30 years (percentile),Share of properties at risk of fire in 30 years (percentile),Share of the tract's land area that is covered by impervious surface or cropland as a percent (percentile),"Percent of individuals below 200% Federal Poverty Line, imputed and adjusted (percentile)",Low third grade reading proficiency (percentile),Low median household income as a percent of area median income (percentile),Low life expectancy (percentile),Low median household income as a percent of territory median income in 2009 (percentile),Total population in 2009 (island areas) and 2019 (states and PR),Total threshold criteria exceeded,Exceeds FPL200 threshold,Percent higher ed enrollment rate is less than 20%,Is low income and has a low percent of higher ed students?,Greater than or equal to the 90th percentile for expected population loss,Greater than or equal to the 90th percentile for expected agricultural loss,Greater than or equal to the 90th percentile for expected building loss,At least one climate threshold exceeded,"Greater than or equal to the 90th percentile for expected population loss rate, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for expected agriculture loss rate, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for expected building loss rate, is low income, and has a low percent of higher ed students?",Climate Factor (Definition M),Greater than or equal to the 90th percentile for energy burden,Greater than or equal to the 90th percentile for pm2.5 exposure,At least one energy threshold exceeded,"Greater than or equal to the 90th percentile for PM2.5 exposure, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for energy burden, is low income, and has a low percent of higher ed students?",Energy Factor (Definition M),Greater than or equal to the 90th percentile for diesel particulate matter,Greater than or equal to the 90th percentile for traffic proximity,At least one traffic threshold exceeded,"Greater than or equal to the 90th percentile for diesel particulate matter, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for traffic proximity, is low income, and has a low percent of higher ed students?",Transportation Factor (Definition M),Greater than or equal to the 90th percentile for lead paint and the median house value is less than 90th percentile,Greater than or equal to the 90th percentile for housing burden,At least one housing threshold exceeded,"Greater than or equal to the 90th percentile for lead paint, the median house value is less than 90th percentile, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for housing burden, is low income, and has a low percent of higher ed students?",Housing Factor (Definition M),Greater than or equal to the 90th percentile for RMP proximity,Greater than or equal to the 90th percentile for NPL (superfund sites) proximity,Greater than or equal to the 90th percentile for proximity to hazardous waste sites,At least one pollution threshold exceeded,"Greater than or equal to the 90th percentile for proximity to RMP sites, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for proximity to superfund sites, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for proximity to hazardous waste facilities, is low income, and has a low percent of higher ed students?",Pollution Factor (Definition M),Greater than or equal to the 90th percentile for wastewater discharge,At least one water threshold exceeded,"Greater than or equal to the 90th percentile for wastewater discharge, is low income, and has a low percent of higher ed students?",Water Factor (Definition M),Greater than or equal to the 90th percentile for diabetes,Greater than or equal to the 90th percentile for asthma,Greater than or equal to the 90th percentile for heart disease,Greater than or equal to the 90th percentile for low life expectancy,At least one health threshold exceeded,"Greater than or equal to the 90th percentile for diabetes, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for asthma, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for heart disease, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for low life expectancy, is low income, and has a low percent of higher ed students?",Health Factor (Definition M),Low high school education and low percent of higher ed students,Greater than or equal to the 90th percentile for unemployment,Greater than or equal to the 90th percentile for low median household income as a percent of area median income,Greater than or equal to the 90th percentile for households in linguistic isolation,Greater than or equal to the 90th percentile for households at or below 100% federal poverty level,"Greater than or equal to the 90th percentile for households in linguistic isolation, has low HS attainment, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for households at or below 100% federal poverty level, has low HS attainment, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for low median household income as a percent of area median income, has low HS attainment, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for unemployment, has low HS attainment, and has a low percent of higher ed students?",Unemployment (percent) in 2009 (island areas) and 2010 (states and PR),Unemployment (percent) in 2009 for island areas (percentile),Unemployment (percent) in 2009 exceeds 90th percentile,Percentage households below 100% of federal poverty line in 2009 (island areas) and 2010 (states and PR),Percentage households below 100% of federal poverty line in 2009 for island areas (percentile),Percentage households below 100% of federal poverty line in 2009 exceeds 90th percentile,Low median household income as a percent of territory median income in 2009 exceeds 90th percentile,Low high school education in 2009 (island areas),Greater than or equal to the 90th percentile for unemployment and has low HS education in 2009 (island areas)?,Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS education in 2009 (island areas)?,Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS education in 2009 (island areas)?,At least one workforce threshold exceeded,Both workforce socioeconomic indicators exceeded,Workforce Factor (Definition M),Total categories exceeded,Definition M (communities),Any Non-Workforce Factor (Definition M),Definition M (percentile),Is low income (imputed and adjusted)?,Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years,Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years,Greater than or equal to the 90th percentile for expected population loss rate and is low income?,Greater than or equal to the 90th percentile for expected agriculture loss rate and is low income?,Greater than or equal to the 90th percentile for expected building loss rate and is low income?,Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years and is low income?,Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years and is low income?,Climate Factor (Definition N),Greater than or equal to the 90th percentile for PM2.5 exposure and is low income?,Greater than or equal to the 90th percentile for energy burden and is low income?,Energy Factor (Definition N),Greater than or equal to the 90th percentile for DOT travel barriers,Greater than or equal to the 90th percentile for diesel particulate matter and is low income?,Greater than or equal to the 90th percentile for traffic proximity and is low income?,Greater than or equal to the 90th percentile for DOT transit barriers and is low income?,Transportation Factor (Definition N),Tract-level redlining score meets or exceeds 3.25 and is low income,Greater than or equal to the 90th percentile for share of homes without indoor plumbing or a kitchen,Greater than or equal to the 90th percentile for share of homes with no kitchen or indoor plumbing and is low income?,Greater than or equal to the 90th percentile for lead paint and the median house value is less than 90th percentile and is low income?,Greater than or equal to the 90th percentile for housing burden and is low income?,Greater than or equal to the 90th percentile for share of the tract's land area that is covered by impervious surface or cropland as a percent,Greater than or equal to the 90th percentile for share of the tract's land area that is covered by impervious surface or cropland as a percent and is low income?,Housing Factor (Definition N),Greater than or equal to the 90th percentile for proximity to RMP sites and is low income?,Greater than or equal to the 90th percentile for proximity to superfund sites and is low income?,Greater than or equal to the 90th percentile for proximity to hazardous waste facilities and is low income?,There is at least one abandoned mine in this census tract and the tract is low income.,There is at least one Formerly Used Defense Site (FUDS) in the tract and the tract is low income.,Pollution Factor (Definition N),Greater than or equal to the 90th percentile for leaky underwater storage tanks,Greater than or equal to the 90th percentile for wastewater discharge and is low income?,Greater than or equal to the 90th percentile for leaky underground storage tanks and is low income?,Water Factor (Definition N),Greater than or equal to the 90th percentile for diabetes and is low income?,Greater than or equal to the 90th percentile for asthma and is low income?,Greater than or equal to the 90th percentile for heart disease and is low income?,Greater than or equal to the 90th percentile for low life expectancy and is low income?,Health Factor (Definition N),Low high school education,Greater than or equal to the 90th percentile for households in linguistic isolation and has low HS education?,Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS education?,Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS education?,Greater than or equal to the 90th percentile for unemployment and has low HS education?,Workforce Factor (Definition N),Definition N (communities),Definition N (communities) (percentile),Meets the less stringent low income criterion for the adjacency index?,Definition N (communities) (average of neighbors),Is the tract surrounded by disadvantaged communities?,Definition N (communities) (based on adjacency index and low income alone),"Definition M community, including adjacency index tracts" -01073001100,True,True,True,True,,,False,0.2752043596730245,0.0,4781.0,0.7327449738800064,11.2,7.2,6.7,16.6,19.3,15.1,0.150375939849624,0.318796992481203,0.3744360902255639,57447.0,37030.0,0.049,18.7674524286,1.0,40.0,0.5,0.467489734286576,9.8735797260274,43.056760130719,181.621925132718,2.0427358988323,0.702342755246247,0.134193041307899,4.45238981883771,0.168806466951973,0.035557414766785,0.203932231750679,0.0,0.0,0.374436090225563,0.0821917808219178,0.0092071611253196,85500.0,0.0890751899397432,0.9109248100602568,0.0004047858,5.6328e-05,2.8039e-06,,,,0.1536983669548511,0.3189099613330878,,62.666668,0.068036923,0.171,1.96440511031451,47.695227725,0.0754274220583305,0.6620851491786792,-77.7525,0.2853609002858206,58.143433,0.6445941476491375,70.3,,0.6466760729305078,0.2159833426939357,0.6290185267766651,0.2601978513507951,0.8509696039125366,0.7264920810941454,0.4789587420739856,0.6191105803406409,0.965388552418323,0.697012994398476,0.6204255784694491,0.7319894972922707,0.6305043487774192,0.3145069836211475,0.1524256393370651,0.864954517474865,0.6038301323911519,0.5972204988211937,0.9070825388177608,0.8818509942794879,0.8407790792699537,0.8257128232087766,0.5755156814188676,0.3920895082932574,0.9007580978635424,0.4820205132363076,0.7531654977635437,0.9619599422457518,0.3979135417088958,0.1737408953933055,0.7659355954649262,0.1287706711725437,0.13169416629505,0.6347481790786611,0.4189065592792301,0.029797296373751,0.1130218397675614,0.7459773722926589,0.2540362752992234,0.7846412062513758,0.2153147384849333,0.6143028498159407,,,,0.9349594607528132,0.8950599559730369,,0.7537922665342821,0.8019598155467721,0.4126953421856217,0.521114579532709,0.4517484245644384,0.4973964722881056,0.8410893082809093,0.2685589820648203,0.607629501459933,0.990724418702258,0.8218135517196475,0.97046998263836,,4781.0,0,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,True,False,False,False,False,True,False,False,False,True,False,False,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,0.1536983669548511,,False,0.3189099613330878,,False,False,False,False,False,False,False,False,False,0.0,False,False,0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,0,True,0.8571428571428571,False,False,False -01073001400,True,True,True,True,,,False,0.1823529411764705,0.0047058823529411,1946.0,0.7136694633528574,11.1,9.1,7.3,21.4,22.4,17.4,0.2816032887975334,0.3679342240493319,0.4835560123329907,57447.0,36066.0,0.07,17.3011023381,1.0,40.0,0.6,0.655319095139786,9.945103013698628,43.1266823529412,3260.33374354854,1.81915896353987,3.34035680534013,0.214095348702766,0.103297800913177,0.647212543554006,0.054984583761562,0.189105858170606,0.0245098039215686,0.024509803921569,0.48355601233299,0.1742543171114599,0.1150121065375302,67800.0,0.0771549125979505,0.9228450874020494,0.0008951111,5.1282e-06,2.3791e-06,,,,0.0804953560371517,0.2950894905920146,,61.666668,0.087159691,0.34900002,3.16184976454882,44.7571359825,0.2384615384615384,0.0,-56.8746,0.4064010997350401,93.77919,0.6278134628440127,71.0,,0.3421186011150532,0.5051574635963891,0.0916001135119795,0.240302951305517,0.8385794307486707,0.9217563763541756,0.6048579715089994,0.7894025988796952,0.9878088657624612,0.8447283118655634,0.8689486351950112,0.8013648049887862,0.7892483999781194,0.3145069836211475,0.1404620788058391,0.970802270706518,0.5282998116553705,0.5972204988211937,0.9070825388177608,0.9704848815036776,0.9380686461454644,0.8391046304110233,0.5827649654828936,0.9563394697362702,0.8799745949379062,0.800259455953298,0.8653801975648978,0.8431750027766466,0.8462723476709774,0.471128768530155,0.6930041485925866,0.5867081244286861,0.5847015580870529,0.7916514641694031,0.7516347007030237,0.9067399297439892,0.0522639122516786,0.6434566620719774,0.356556985519905,0.9166162227602904,0.0865380767537716,0.558933421571466,,,,0.6917513228236646,0.8737301229199994,,0.7501654807214959,0.8647617479139218,0.6268497920495212,0.6418426778016514,0.3716517703914219,0.8850358496224203,0.3366245885930925,0.5569693544162451,0.7883908294582027,0.9537899773356836,0.8364273002184828,0.959938777375042,,1946.0,9,True,True,True,False,False,True,True,False,False,True,True,True,False,True,False,True,True,True,True,True,True,True,True,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,True,True,True,True,False,True,True,True,True,True,False,False,False,False,False,False,True,0.0804953560371517,,False,0.2950894905920146,,False,False,False,False,False,False,True,True,True,6.0,True,True,1,True,False,False,False,False,True,False,False,True,False,True,True,False,True,True,False,True,True,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,True,False,True,True,True,True,False,False,False,True,True,True,1,True,1.0,True,True,True +GEOID10_TRACT,Persistent Poverty Census Tract,Tract-level redlining score meets or exceeds 3.25,Does the tract have at least 35 acres in it?,Contains agricultural value,Is there at least one Formerly Used Defense Site (FUDS) in the tract?,Is there at least one abandoned mine in this census tract?,Income data has been estimated based on neighbor income,Housing burden (percent),Share of homes with no kitchen or indoor plumbing (percent),Total population,Median household income (% of state median household income),Current asthma among adults aged greater than or equal to 18 years,Coronary heart disease among adults aged greater than or equal to 18 years,Cancer (excluding skin cancer) among adults aged greater than or equal to 18 years,Current lack of health insurance among adults aged 18-64 years,Diagnosed diabetes among adults aged greater than or equal to 18 years,Physical health not good for greater than or equal to 14 days among adults aged greater than or equal to 18 years,Percent of individuals < 100% Federal Poverty Line,Percent of individuals < 150% Federal Poverty Line,Percent of individuals below 200% Federal Poverty Line,Area Median Income (State or metropolitan),Median household income in the past 12 months,Energy burden,FEMA Risk Index Expected Annual Loss Score,Urban Heuristic Flag,Air toxics cancer risk,Respiratory hazard index,Diesel particulate matter exposure,PM2.5 in the air,Ozone,Traffic proximity and volume,Proximity to Risk Management Plan (RMP) facilities,Proximity to hazardous waste sites,Proximity to NPL sites,Wastewater discharge,Percent pre-1960s housing (lead paint indicator),Individuals under 5 years old,Individuals over 64 years old,Linguistic isolation (percent),Percent of households in linguistic isolation,Poverty (Less than 200% of federal poverty line),Percent individuals age 25 or over with less than high school degree,Unemployment (percent),Median value ($) of owner-occupied housing units,Percent enrollment in college or graduate school,Percent of population not currently enrolled in college or graduate school,Expected building loss rate (Natural Hazards Risk Index),Expected agricultural loss rate (Natural Hazards Risk Index),Expected population loss rate (Natural Hazards Risk Index),Percent individuals age 25 or over with less than high school degree in 2009,Percentage households below 100% of federal poverty line in 2009,Unemployment (percent) in 2009,Unemployment (percent) in 2010,Percent of individuals less than 100% Federal Poverty Line in 2010,Total population in 2009,Summer days above 90F,Percent low access to healthy food,Percent impenetrable surface areas,Leaky underground storage tanks,DOT Travel Barriers Score,Share of properties at risk of flood in 30 years,Share of properties at risk of fire in 30 years,Share of the tract's land area that is covered by impervious surface or cropland as a percent,"Percent of individuals below 200% Federal Poverty Line, imputed and adjusted",Percent Black or African American,Percent American Indian / Alaska Native,Percent Asian,Percent Native Hawaiian or Pacific,Percent two or more races,Percent White,Percent Hispanic or Latino,Percent other races,Percent age under 10,Percent age 10 to 64,Percent age over 64,Third grade reading proficiency,Median household income as a percent of area median income,Life expectancy (years),Median household income as a percent of territory median income in 2009,Housing burden (percent) (percentile),Share of homes with no kitchen or indoor plumbing (percent) (percentile),Total population (percentile),Median household income (% of state median household income) (percentile),Current asthma among adults aged greater than or equal to 18 years (percentile),Coronary heart disease among adults aged greater than or equal to 18 years (percentile),Cancer (excluding skin cancer) among adults aged greater than or equal to 18 years (percentile),Current lack of health insurance among adults aged 18-64 years (percentile),Diagnosed diabetes among adults aged greater than or equal to 18 years (percentile),Physical health not good for greater than or equal to 14 days among adults aged greater than or equal to 18 years (percentile),Percent of individuals < 100% Federal Poverty Line (percentile),Percent of individuals < 150% Federal Poverty Line (percentile),Percent of individuals below 200% Federal Poverty Line (percentile),Area Median Income (State or metropolitan) (percentile),Median household income in the past 12 months (percentile),Energy burden (percentile),FEMA Risk Index Expected Annual Loss Score (percentile),Urban Heuristic Flag (percentile),Air toxics cancer risk (percentile),Respiratory hazard index (percentile),Diesel particulate matter exposure (percentile),PM2.5 in the air (percentile),Ozone (percentile),Traffic proximity and volume (percentile),Proximity to Risk Management Plan (RMP) facilities (percentile),Proximity to hazardous waste sites (percentile),Proximity to NPL sites (percentile),Wastewater discharge (percentile),Percent pre-1960s housing (lead paint indicator) (percentile),Individuals under 5 years old (percentile),Individuals over 64 years old (percentile),Linguistic isolation (percent) (percentile),Percent of households in linguistic isolation (percentile),Poverty (Less than 200% of federal poverty line) (percentile),Percent individuals age 25 or over with less than high school degree (percentile),Unemployment (percent) (percentile),Median value ($) of owner-occupied housing units (percentile),Percent enrollment in college or graduate school (percentile),Percent of population not currently enrolled in college or graduate school (percentile),Expected building loss rate (Natural Hazards Risk Index) (percentile),Expected agricultural loss rate (Natural Hazards Risk Index) (percentile),Expected population loss rate (Natural Hazards Risk Index) (percentile),Percent individuals age 25 or over with less than high school degree in 2009 (percentile),Percentage households below 100% of federal poverty line in 2009 (percentile),Unemployment (percent) in 2009 (percentile),Unemployment (percent) in 2010 (percentile),Percent of individuals less than 100% Federal Poverty Line in 2010 (percentile),Total population in 2009 (percentile),Summer days above 90F (percentile),Percent low access to healthy food (percentile),Percent impenetrable surface areas (percentile),Leaky underground storage tanks (percentile),DOT Travel Barriers Score (percentile),Share of properties at risk of flood in 30 years (percentile),Share of properties at risk of fire in 30 years (percentile),Share of the tract's land area that is covered by impervious surface or cropland as a percent (percentile),"Percent of individuals below 200% Federal Poverty Line, imputed and adjusted (percentile)",Percent Black or African American (percentile),Percent American Indian / Alaska Native (percentile),Percent Asian (percentile),Percent Native Hawaiian or Pacific (percentile),Percent two or more races (percentile),Percent White (percentile),Percent Hispanic or Latino (percentile),Percent other races (percentile),Percent age under 10 (percentile),Percent age 10 to 64 (percentile),Percent age over 64 (percentile),Low third grade reading proficiency (percentile),Low median household income as a percent of area median income (percentile),Low life expectancy (percentile),Low median household income as a percent of territory median income in 2009 (percentile),Total population in 2009 (island areas) and 2019 (states and PR),Total threshold criteria exceeded,Exceeds FPL200 threshold,Percent higher ed enrollment rate is less than 20%,Is low income and has a low percent of higher ed students?,Greater than or equal to the 90th percentile for expected population loss,Greater than or equal to the 90th percentile for expected agricultural loss,Greater than or equal to the 90th percentile for expected building loss,At least one climate threshold exceeded,"Greater than or equal to the 90th percentile for expected population loss rate, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for expected agriculture loss rate, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for expected building loss rate, is low income, and has a low percent of higher ed students?",Climate Factor (Definition M),Greater than or equal to the 90th percentile for energy burden,Greater than or equal to the 90th percentile for pm2.5 exposure,At least one energy threshold exceeded,"Greater than or equal to the 90th percentile for PM2.5 exposure, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for energy burden, is low income, and has a low percent of higher ed students?",Energy Factor (Definition M),Greater than or equal to the 90th percentile for diesel particulate matter,Greater than or equal to the 90th percentile for traffic proximity,At least one traffic threshold exceeded,"Greater than or equal to the 90th percentile for diesel particulate matter, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for traffic proximity, is low income, and has a low percent of higher ed students?",Transportation Factor (Definition M),Greater than or equal to the 90th percentile for lead paint and the median house value is less than 90th percentile,Greater than or equal to the 90th percentile for housing burden,At least one housing threshold exceeded,"Greater than or equal to the 90th percentile for lead paint, the median house value is less than 90th percentile, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for housing burden, is low income, and has a low percent of higher ed students?",Housing Factor (Definition M),Greater than or equal to the 90th percentile for RMP proximity,Greater than or equal to the 90th percentile for NPL (superfund sites) proximity,Greater than or equal to the 90th percentile for proximity to hazardous waste sites,At least one pollution threshold exceeded,"Greater than or equal to the 90th percentile for proximity to RMP sites, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for proximity to superfund sites, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for proximity to hazardous waste facilities, is low income, and has a low percent of higher ed students?",Pollution Factor (Definition M),Greater than or equal to the 90th percentile for wastewater discharge,At least one water threshold exceeded,"Greater than or equal to the 90th percentile for wastewater discharge, is low income, and has a low percent of higher ed students?",Water Factor (Definition M),Greater than or equal to the 90th percentile for diabetes,Greater than or equal to the 90th percentile for asthma,Greater than or equal to the 90th percentile for heart disease,Greater than or equal to the 90th percentile for low life expectancy,At least one health threshold exceeded,"Greater than or equal to the 90th percentile for diabetes, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for asthma, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for heart disease, is low income, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for low life expectancy, is low income, and has a low percent of higher ed students?",Health Factor (Definition M),Low high school education and low percent of higher ed students,Greater than or equal to the 90th percentile for unemployment,Greater than or equal to the 90th percentile for low median household income as a percent of area median income,Greater than or equal to the 90th percentile for households in linguistic isolation,Greater than or equal to the 90th percentile for households at or below 100% federal poverty level,"Greater than or equal to the 90th percentile for households in linguistic isolation, has low HS attainment, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for households at or below 100% federal poverty level, has low HS attainment, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for low median household income as a percent of area median income, has low HS attainment, and has a low percent of higher ed students?","Greater than or equal to the 90th percentile for unemployment, has low HS attainment, and has a low percent of higher ed students?",Unemployment (percent) in 2009 (island areas) and 2010 (states and PR),Unemployment (percent) in 2009 for island areas (percentile),Unemployment (percent) in 2009 exceeds 90th percentile,Percentage households below 100% of federal poverty line in 2009 (island areas) and 2010 (states and PR),Percentage households below 100% of federal poverty line in 2009 for island areas (percentile),Percentage households below 100% of federal poverty line in 2009 exceeds 90th percentile,Low median household income as a percent of territory median income in 2009 exceeds 90th percentile,Low high school education in 2009 (island areas),Greater than or equal to the 90th percentile for unemployment and has low HS education in 2009 (island areas)?,Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS education in 2009 (island areas)?,Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS education in 2009 (island areas)?,At least one workforce threshold exceeded,Both workforce socioeconomic indicators exceeded,Workforce Factor (Definition M),Total categories exceeded,Definition M (communities),Any Non-Workforce Factor (Definition M),Definition M (percentile),Is low income (imputed and adjusted)?,Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years,Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years,Greater than or equal to the 90th percentile for expected population loss rate and is low income?,Greater than or equal to the 90th percentile for expected agriculture loss rate and is low income?,Greater than or equal to the 90th percentile for expected building loss rate and is low income?,Greater than or equal to the 90th percentile for share of properties at risk of flood in 30 years and is low income?,Greater than or equal to the 90th percentile for share of properties at risk of fire in 30 years and is low income?,Climate Factor (Definition N),Greater than or equal to the 90th percentile for PM2.5 exposure and is low income?,Greater than or equal to the 90th percentile for energy burden and is low income?,Energy Factor (Definition N),Greater than or equal to the 90th percentile for DOT travel barriers,Greater than or equal to the 90th percentile for diesel particulate matter and is low income?,Greater than or equal to the 90th percentile for traffic proximity and is low income?,Greater than or equal to the 90th percentile for DOT transit barriers and is low income?,Transportation Factor (Definition N),Tract-level redlining score meets or exceeds 3.25 and is low income,Greater than or equal to the 90th percentile for share of homes without indoor plumbing or a kitchen,Greater than or equal to the 90th percentile for share of homes with no kitchen or indoor plumbing and is low income?,Greater than or equal to the 90th percentile for lead paint and the median house value is less than 90th percentile and is low income?,Greater than or equal to the 90th percentile for housing burden and is low income?,Greater than or equal to the 90th percentile for share of the tract's land area that is covered by impervious surface or cropland as a percent,Greater than or equal to the 90th percentile for share of the tract's land area that is covered by impervious surface or cropland as a percent and is low income?,Housing Factor (Definition N),Greater than or equal to the 90th percentile for proximity to RMP sites and is low income?,Greater than or equal to the 90th percentile for proximity to superfund sites and is low income?,Greater than or equal to the 90th percentile for proximity to hazardous waste facilities and is low income?,There is at least one abandoned mine in this census tract and the tract is low income.,There is at least one Formerly Used Defense Site (FUDS) in the tract and the tract is low income.,Pollution Factor (Definition N),Greater than or equal to the 90th percentile for leaky underwater storage tanks,Greater than or equal to the 90th percentile for wastewater discharge and is low income?,Greater than or equal to the 90th percentile for leaky underground storage tanks and is low income?,Water Factor (Definition N),Greater than or equal to the 90th percentile for diabetes and is low income?,Greater than or equal to the 90th percentile for asthma and is low income?,Greater than or equal to the 90th percentile for heart disease and is low income?,Greater than or equal to the 90th percentile for low life expectancy and is low income?,Health Factor (Definition N),Low high school education,Greater than or equal to the 90th percentile for households in linguistic isolation and has low HS education?,Greater than or equal to the 90th percentile for households at or below 100% federal poverty level and has low HS education?,Greater than or equal to the 90th percentile for low median household income as a percent of area median income and has low HS education?,Greater than or equal to the 90th percentile for unemployment and has low HS education?,Workforce Factor (Definition N),Definition N (communities),Definition N (communities) (percentile),Meets the less stringent low income criterion for the adjacency index?,Definition N (communities) (average of neighbors),Is the tract surrounded by disadvantaged communities?,Definition N (communities) (based on adjacency index and low income alone),"Definition M community, including adjacency index tracts" +01073001100,True,True,True,True,,,False,0.2752043596730245,0.0,4781.0,0.7327449738800064,11.2,7.2,6.7,16.6,19.3,15.1,0.150375939849624,0.318796992481203,0.3744360902255639,57447.0,37030.0,0.049,18.7674524286,1.0,40.0,0.5,0.467489734286576,9.8735797260274,43.056760130719,181.621925132718,2.0427358988323,0.702342755246247,0.134193041307899,4.45238981883771,0.168806466951973,0.035557414766785,0.203932231750679,0.0,0.0,0.374436090225563,0.0821917808219178,0.0092071611253196,85500.0,0.0890751899397432,0.9109248100602568,0.0004047858,5.6328e-05,2.8039e-06,,,,0.1536983669548511,0.3189099613330878,,62.666668,0.068036923,0.171,1.96440511031451,47.695227725,0.0754274220583305,0.6620851491786792,-77.7525,0.2853609002858206,0.9682074879732272,0.0121313532733737,0.0,0.0,0.0,0.0161054172767203,0.0035557414766785,0.0,0.1344906923237816,0.6615770759255386,0.2039322317506798,58.143433,0.6445941476491375,70.3,,0.6466760729305078,0.2159833426939357,0.6290185267766651,0.2601978513507951,0.8509696039125366,0.7264920810941454,0.4789587420739856,0.6191105803406409,0.965388552418323,0.697012994398476,0.6204255784694491,0.7319894972922707,0.6305043487774192,0.3145069836211475,0.1524256393370651,0.864954517474865,0.6038301323911519,0.5972204988211937,0.9070825388177608,0.8818509942794879,0.8407790792699537,0.8257128232087766,0.5755156814188676,0.3920895082932574,0.9007580978635424,0.4820205132363076,0.7531654977635437,0.9619599422457518,0.3979135417088958,0.1737408953933055,0.7659355954649262,0.1287706711725437,0.13169416629505,0.6347481790786611,0.4189065592792301,0.029797296373751,0.1130218397675614,0.7459773722926589,0.2540362752992234,0.7846412062513758,0.2153147384849333,0.6143028498159407,,,,0.9349594607528132,0.8950599559730369,,0.7537922665342821,0.8019598155467721,0.4126953421856217,0.521114579532709,0.4517484245644384,0.4973964722881056,0.8410893082809093,0.2685589820648203,0.607629501459933,0.9950049813710372,0.8553628212301939,0.0982626615533689,0.4219630696163662,0.0261283146588784,0.0311301570837825,0.0475755053020894,0.0977645244496608,0.6708610265718614,0.1578889904876284,0.763719241739795,0.990724418702258,0.8218135517196475,0.97046998263836,,4781.0,0,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,True,False,False,False,False,True,False,False,False,True,False,False,True,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,0.1536983669548511,,False,0.3189099613330878,,False,False,False,False,False,False,False,False,False,0.0,False,False,0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,0,True,0.8571428571428571,False,False,False +01073001400,True,True,True,True,,,False,0.1823529411764705,0.0047058823529411,1946.0,0.7136694633528574,11.1,9.1,7.3,21.4,22.4,17.4,0.2816032887975334,0.3679342240493319,0.4835560123329907,57447.0,36066.0,0.07,17.3011023381,1.0,40.0,0.6,0.655319095139786,9.945103013698628,43.1266823529412,3260.33374354854,1.81915896353987,3.34035680534013,0.214095348702766,0.103297800913177,0.647212543554006,0.054984583761562,0.189105858170606,0.0245098039215686,0.024509803921569,0.48355601233299,0.1742543171114599,0.1150121065375302,67800.0,0.0771549125979505,0.9228450874020494,0.0008951111,5.1282e-06,2.3791e-06,,,,0.0804953560371517,0.2950894905920146,,61.666668,0.087159691,0.34900002,3.16184976454882,44.7571359825,0.2384615384615384,0.0,-56.8746,0.4064010997350401,0.9167523124357656,0.0,0.0,0.0,0.0035971223021582,0.0,0.0683453237410072,0.0775950668036999,0.0853031860226104,0.7255909558067831,0.1891058581706063,93.77919,0.6278134628440127,71.0,,0.3421186011150532,0.5051574635963891,0.0916001135119795,0.240302951305517,0.8385794307486707,0.9217563763541756,0.6048579715089994,0.7894025988796952,0.9878088657624612,0.8447283118655634,0.8689486351950112,0.8013648049887862,0.7892483999781194,0.3145069836211475,0.1404620788058391,0.970802270706518,0.5282998116553705,0.5972204988211937,0.9070825388177608,0.9704848815036776,0.9380686461454644,0.8391046304110233,0.5827649654828936,0.9563394697362702,0.8799745949379062,0.800259455953298,0.8653801975648978,0.8431750027766466,0.8462723476709774,0.471128768530155,0.6930041485925866,0.5867081244286861,0.5847015580870529,0.7916514641694031,0.7516347007030237,0.9067399297439892,0.0522639122516786,0.6434566620719774,0.356556985519905,0.9166162227602904,0.0865380767537716,0.558933421571466,,,,0.6917513228236646,0.8737301229199994,,0.7501654807214959,0.8647617479139218,0.6268497920495212,0.6418426778016514,0.3716517703914219,0.8850358496224203,0.3366245885930925,0.5569693544162451,0.7883908294582027,0.9840732602732248,0.2486523003016117,0.0982626615533689,0.4219630696163662,0.0924351398195788,0.0038486209108402,0.4634108061632525,0.8246557394947661,0.1930997775442523,0.5561393692083032,0.6900904835341803,0.9537899773356836,0.8364273002184828,0.959938777375042,,1946.0,9,True,True,True,False,False,True,True,False,False,True,True,True,False,True,False,True,True,True,True,True,True,True,True,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,True,True,True,True,False,True,True,True,True,True,False,False,False,False,False,False,True,0.0804953560371517,,False,0.2950894905920146,,False,False,False,False,False,False,True,True,True,6.0,True,True,1,True,False,False,False,False,True,False,False,True,False,True,True,False,True,True,False,True,True,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,True,False,True,True,True,True,False,False,False,True,True,True,1,True,1.0,True,True,True diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl index 508b5e914..0eb41e974 100644 Binary files a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl and b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/downloadable_data_expected.pkl differ diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl index 0ea454333..8d726f018 100644 Binary files a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl and b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_data_expected.pkl differ diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl index 6e5d23eba..8119822ad 100644 Binary files a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl and b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/score_transformed_expected.pkl differ diff --git a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl index 08aa265b5..49f9fe033 100644 Binary files a/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl and b/data/data-pipeline/data_pipeline/etl/score/tests/snapshots/tile_data_expected.pkl differ diff --git a/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py b/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py index a670d3107..dba9d06ba 100644 --- a/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py +++ b/data/data-pipeline/data_pipeline/etl/sources/census_acs/etl.py @@ -186,19 +186,25 @@ def __init__(self): "B03002_003E", "B03003_001E", "B03003_003E", + "B02001_007E", # "Some other race alone" ] - # Name output demographics fields. - self.BLACK_FIELD_NAME = "Black or African American alone" - self.AMERICAN_INDIAN_FIELD_NAME = ( - "American Indian and Alaska Native alone" - ) - self.ASIAN_FIELD_NAME = "Asian alone" - self.HAWAIIAN_FIELD_NAME = "Native Hawaiian and Other Pacific alone" - self.TWO_OR_MORE_RACES_FIELD_NAME = "Two or more races" - self.NON_HISPANIC_WHITE_FIELD_NAME = "Non-Hispanic White" + self.BLACK_FIELD_NAME = "Black or African American" + self.AMERICAN_INDIAN_FIELD_NAME = "American Indian / Alaska Native" + self.ASIAN_FIELD_NAME = "Asian" + self.HAWAIIAN_FIELD_NAME = "Native Hawaiian or Pacific" + self.TWO_OR_MORE_RACES_FIELD_NAME = "two or more races" + self.NON_HISPANIC_WHITE_FIELD_NAME = "White" self.HISPANIC_FIELD_NAME = "Hispanic or Latino" + # Note that `other` is lowercase because the whole field will show up in the download + # file as "Percent other races" + self.OTHER_RACE_FIELD_NAME = "other races" + + self.TOTAL_RACE_POPULATION_FIELD_NAME = ( + "Total population surveyed on racial data" + ) + # Name output demographics fields. self.RE_OUTPUT_FIELDS = [ self.BLACK_FIELD_NAME, self.AMERICAN_INDIAN_FIELD_NAME, @@ -207,9 +213,64 @@ def __init__(self): self.TWO_OR_MORE_RACES_FIELD_NAME, self.NON_HISPANIC_WHITE_FIELD_NAME, self.HISPANIC_FIELD_NAME, + self.OTHER_RACE_FIELD_NAME, ] - self.PERCENT_PREFIX = "Percent " + self.AGE_INPUT_FIELDS = [ + "B01001_001E", # Estimate!!Total: + "B01001_003E", # Estimate!!Total:!!Male:!!Under 5 years + "B01001_004E", # Estimate!!Total:!!Male:!!5 to 9 years + "B01001_005E", # Estimate!!Total:!!Male:!!10 to 14 years + "B01001_006E", # Estimate!!Total:!!Male:!!15 to 17 years + "B01001_007E", # Estimate!!Total:!!Male:!!18 and 19 years + "B01001_008E", # Estimate!!Total:!!Male:!!20 years + "B01001_009E", # Estimate!!Total:!!Male:!!21 years + "B01001_010E", # Estimate!!Total:!!Male:!!22 to 24 years + "B01001_011E", # Estimate!!Total:!!Male:!!25 to 29 years + "B01001_012E", # Estimate!!Total:!!Male:!!30 to 34 years + "B01001_013E", # Estimate!!Total:!!Male:!!35 to 39 years + "B01001_014E", # Estimate!!Total:!!Male:!!40 to 44 years + "B01001_015E", # Estimate!!Total:!!Male:!!45 to 49 years + "B01001_016E", # Estimate!!Total:!!Male:!!50 to 54 years + "B01001_017E", # Estimate!!Total:!!Male:!!55 to 59 years + "B01001_018E", # Estimate!!Total:!!Male:!!60 and 61 years + "B01001_019E", # Estimate!!Total:!!Male:!!62 to 64 years + "B01001_020E", # Estimate!!Total:!!Male:!!65 and 66 years + "B01001_021E", # Estimate!!Total:!!Male:!!67 to 69 years + "B01001_022E", # Estimate!!Total:!!Male:!!70 to 74 years + "B01001_023E", # Estimate!!Total:!!Male:!!75 to 79 years + "B01001_024E", # Estimate!!Total:!!Male:!!80 to 84 years + "B01001_025E", # Estimate!!Total:!!Male:!!85 years and over + "B01001_027E", # Estimate!!Total:!!Female:!!Under 5 years + "B01001_028E", # Estimate!!Total:!!Female:!!5 to 9 years + "B01001_029E", # Estimate!!Total:!!Female:!!10 to 14 years + "B01001_030E", # Estimate!!Total:!!Female:!!15 to 17 years + "B01001_031E", # Estimate!!Total:!!Female:!!18 and 19 years + "B01001_032E", # Estimate!!Total:!!Female:!!20 years + "B01001_033E", # Estimate!!Total:!!Female:!!21 years + "B01001_034E", # Estimate!!Total:!!Female:!!22 to 24 years + "B01001_035E", # Estimate!!Total:!!Female:!!25 to 29 years + "B01001_036E", # Estimate!!Total:!!Female:!!30 to 34 years + "B01001_037E", # Estimate!!Total:!!Female:!!35 to 39 years + "B01001_038E", # Estimate!!Total:!!Female:!!40 to 44 years + "B01001_039E", # Estimate!!Total:!!Female:!!45 to 49 years + "B01001_040E", # Estimate!!Total:!!Female:!!50 to 54 years + "B01001_041E", # Estimate!!Total:!!Female:!!55 to 59 years + "B01001_042E", # Estimate!!Total:!!Female:!!60 and 61 years + "B01001_043E", # Estimate!!Total:!!Female:!!62 to 64 years + "B01001_044E", # Estimate!!Total:!!Female:!!65 and 66 years + "B01001_045E", # Estimate!!Total:!!Female:!!67 to 69 years + "B01001_046E", # Estimate!!Total:!!Female:!!70 to 74 years + "B01001_047E", # Estimate!!Total:!!Female:!!75 to 79 years + "B01001_048E", # Estimate!!Total:!!Female:!!80 to 84 years + "B01001_049E", # Estimate!!Total:!!Female:!!85 years and over + ] + + self.AGE_OUTPUT_FIELDS = [ + field_names.PERCENT_AGE_UNDER_10, + field_names.PERCENT_AGE_10_TO_64, + field_names.PERCENT_AGE_OVER_64, + ] self.STATE_GEOID_FIELD_NAME = "GEOID2" @@ -230,7 +291,11 @@ def __init__(self): field_names.IMPUTED_INCOME_FLAG_FIELD_NAME, ] + self.RE_OUTPUT_FIELDS - + [self.PERCENT_PREFIX + field for field in self.RE_OUTPUT_FIELDS] + + [ + field_names.PERCENT_PREFIX + field + for field in self.RE_OUTPUT_FIELDS + ] + + self.AGE_OUTPUT_FIELDS + [ field_names.POVERTY_LESS_THAN_200_FPL_FIELD, field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD, @@ -280,6 +345,7 @@ def extract(self) -> None: + self.EDUCATIONAL_FIELDS + self.RE_FIELDS + self.COLLEGE_ATTENDANCE_FIELDS + + self.AGE_INPUT_FIELDS ) self.df = retrieve_census_acs_data( @@ -406,36 +472,103 @@ def transform(self) -> None: ) # Calculate some demographic information. - df[self.BLACK_FIELD_NAME] = df["B02001_003E"] - df[self.AMERICAN_INDIAN_FIELD_NAME] = df["B02001_004E"] - df[self.ASIAN_FIELD_NAME] = df["B02001_005E"] - df[self.HAWAIIAN_FIELD_NAME] = df["B02001_006E"] - df[self.TWO_OR_MORE_RACES_FIELD_NAME] = df["B02001_008E"] - df[self.NON_HISPANIC_WHITE_FIELD_NAME] = df["B03002_003E"] - df[self.HISPANIC_FIELD_NAME] = df["B03003_003E"] - # Calculate demographics as percent - df[self.PERCENT_PREFIX + self.BLACK_FIELD_NAME] = ( - df["B02001_003E"] / df["B02001_001E"] - ) - df[self.PERCENT_PREFIX + self.AMERICAN_INDIAN_FIELD_NAME] = ( - df["B02001_004E"] / df["B02001_001E"] - ) - df[self.PERCENT_PREFIX + self.ASIAN_FIELD_NAME] = ( - df["B02001_005E"] / df["B02001_001E"] - ) - df[self.PERCENT_PREFIX + self.HAWAIIAN_FIELD_NAME] = ( - df["B02001_006E"] / df["B02001_001E"] - ) - df[self.PERCENT_PREFIX + self.TWO_OR_MORE_RACES_FIELD_NAME] = ( - df["B02001_008E"] / df["B02001_001E"] - ) - df[self.PERCENT_PREFIX + self.NON_HISPANIC_WHITE_FIELD_NAME] = ( - df["B03002_003E"] / df["B03002_001E"] - ) - df[self.PERCENT_PREFIX + self.HISPANIC_FIELD_NAME] = ( - df["B03003_003E"] / df["B03003_001E"] - ) + df = df.rename( + columns={ + "B02001_003E": self.BLACK_FIELD_NAME, + "B02001_004E": self.AMERICAN_INDIAN_FIELD_NAME, + "B02001_005E": self.ASIAN_FIELD_NAME, + "B02001_006E": self.HAWAIIAN_FIELD_NAME, + "B02001_008E": self.TWO_OR_MORE_RACES_FIELD_NAME, + "B03002_003E": self.NON_HISPANIC_WHITE_FIELD_NAME, + "B03003_003E": self.HISPANIC_FIELD_NAME, + "B02001_007E": self.OTHER_RACE_FIELD_NAME, + "B02001_001E": self.TOTAL_RACE_POPULATION_FIELD_NAME, + }, + errors="raise", + ) + + for race_field_name in self.RE_OUTPUT_FIELDS: + df[field_names.PERCENT_PREFIX + race_field_name] = ( + df[race_field_name] / df[self.TOTAL_RACE_POPULATION_FIELD_NAME] + ) + + # First value is the `age bucket`, and the second value is a list of all fields + # that will be summed in the calculations of the total population in that age + # bucket. + age_bucket_and_its_sum_columns = [ + ( + field_names.PERCENT_AGE_UNDER_10, + [ + "B01001_003E", # Estimate!!Total:!!Male:!!Under 5 years + "B01001_004E", # Estimate!!Total:!!Male:!!5 to 9 years + "B01001_027E", # Estimate!!Total:!!Female:!!Under 5 years + "B01001_028E", # Estimate!!Total:!!Female:!!5 to 9 years + ], + ), + ( + field_names.PERCENT_AGE_10_TO_64, + [ + "B01001_005E", # Estimate!!Total:!!Male:!!10 to 14 years + "B01001_006E", # Estimate!!Total:!!Male:!!15 to 17 years + "B01001_007E", # Estimate!!Total:!!Male:!!18 and 19 years + "B01001_008E", # Estimate!!Total:!!Male:!!20 years + "B01001_009E", # Estimate!!Total:!!Male:!!21 years + "B01001_010E", # Estimate!!Total:!!Male:!!22 to 24 years + "B01001_011E", # Estimate!!Total:!!Male:!!25 to 29 years + "B01001_012E", # Estimate!!Total:!!Male:!!30 to 34 years + "B01001_013E", # Estimate!!Total:!!Male:!!35 to 39 years + "B01001_014E", # Estimate!!Total:!!Male:!!40 to 44 years + "B01001_015E", # Estimate!!Total:!!Male:!!45 to 49 years + "B01001_016E", # Estimate!!Total:!!Male:!!50 to 54 years + "B01001_017E", # Estimate!!Total:!!Male:!!55 to 59 years + "B01001_018E", # Estimate!!Total:!!Male:!!60 and 61 years + "B01001_019E", # Estimate!!Total:!!Male:!!62 to 64 years + "B01001_029E", # Estimate!!Total:!!Female:!!10 to 14 years + "B01001_030E", # Estimate!!Total:!!Female:!!15 to 17 years + "B01001_031E", # Estimate!!Total:!!Female:!!18 and 19 years + "B01001_032E", # Estimate!!Total:!!Female:!!20 years + "B01001_033E", # Estimate!!Total:!!Female:!!21 years + "B01001_034E", # Estimate!!Total:!!Female:!!22 to 24 years + "B01001_035E", # Estimate!!Total:!!Female:!!25 to 29 years + "B01001_036E", # Estimate!!Total:!!Female:!!30 to 34 years + "B01001_037E", # Estimate!!Total:!!Female:!!35 to 39 years + "B01001_038E", # Estimate!!Total:!!Female:!!40 to 44 years + "B01001_039E", # Estimate!!Total:!!Female:!!45 to 49 years + "B01001_040E", # Estimate!!Total:!!Female:!!50 to 54 years + "B01001_041E", # Estimate!!Total:!!Female:!!55 to 59 years + "B01001_042E", # Estimate!!Total:!!Female:!!60 and 61 years + "B01001_043E", # Estimate!!Total:!!Female:!!62 to 64 years + ], + ), + ( + field_names.PERCENT_AGE_OVER_64, + [ + "B01001_020E", # Estimate!!Total:!!Male:!!65 and 66 years + "B01001_021E", # Estimate!!Total:!!Male:!!67 to 69 years + "B01001_022E", # Estimate!!Total:!!Male:!!70 to 74 years + "B01001_023E", # Estimate!!Total:!!Male:!!75 to 79 years + "B01001_024E", # Estimate!!Total:!!Male:!!80 to 84 years + "B01001_025E", # Estimate!!Total:!!Male:!!85 years and over + "B01001_044E", # Estimate!!Total:!!Female:!!65 and 66 years + "B01001_045E", # Estimate!!Total:!!Female:!!67 to 69 years + "B01001_046E", # Estimate!!Total:!!Female:!!70 to 74 years + "B01001_047E", # Estimate!!Total:!!Female:!!75 to 79 years + "B01001_048E", # Estimate!!Total:!!Female:!!80 to 84 years + "B01001_049E", # Estimate!!Total:!!Female:!!85 years and over + ], + ), + ] + + # Calculate age groups + total_population_age_series = df["B01001_001E"] + + # For each age bucket, sum the relevant columns and calculate the total + # percentage. + for age_bucket, sum_columns in age_bucket_and_its_sum_columns: + df[age_bucket] = ( + df[sum_columns].sum(axis=1) / total_population_age_series + ) # Calculate college attendance and adjust low income df[self.COLLEGE_ATTENDANCE_FIELD] = ( @@ -505,7 +638,7 @@ def transform(self) -> None: ) # We generate a boolean that is TRUE when there is an imputed income but not a baseline income, and FALSE otherwise. - # This allows us to see which tracts have an imputed income. + # This allows us to see which tracts have an imputed income. df[field_names.IMPUTED_INCOME_FLAG_FIELD_NAME] = ( df[field_names.POVERTY_LESS_THAN_200_FPL_IMPUTED_FIELD].notna() & df[field_names.POVERTY_LESS_THAN_200_FPL_FIELD].isna() diff --git a/data/data-pipeline/data_pipeline/score/field_names.py b/data/data-pipeline/data_pipeline/score/field_names.py index e8b5e7d3d..a07816ff4 100644 --- a/data/data-pipeline/data_pipeline/score/field_names.py +++ b/data/data-pipeline/data_pipeline/score/field_names.py @@ -99,6 +99,27 @@ "Low median household income as a percent of area median income" ) +# Additional ACS demographic fields. +PERCENT_PREFIX = "Percent " + +PERCENT_BLACK_FIELD_NAME = PERCENT_PREFIX + "Black or African American" +PERCENT_AMERICAN_INDIAN_FIELD_NAME = ( + PERCENT_PREFIX + "American Indian / Alaska Native" +) +PERCENT_ASIAN_FIELD_NAME = PERCENT_PREFIX + "Asian" +PERCENT_HAWAIIAN_FIELD_NAME = PERCENT_PREFIX + "Native Hawaiian or Pacific" +PERCENT_TWO_OR_MORE_RACES_FIELD_NAME = PERCENT_PREFIX + "two or more races" +PERCENT_NON_HISPANIC_WHITE_FIELD_NAME = PERCENT_PREFIX + "White" +PERCENT_HISPANIC_FIELD_NAME = PERCENT_PREFIX + "Hispanic or Latino" +# Note that `other` is lowercase because the whole field will show up in the download +# file as "Percent other races" +PERCENT_OTHER_RACE_FIELD_NAME = PERCENT_PREFIX + "other races" + +# Age +PERCENT_AGE_UNDER_10 = "Percent age under 10" +PERCENT_AGE_10_TO_64 = "Percent age 10 to 64" +PERCENT_AGE_OVER_64 = "Percent age over 64" + # Climate FEMA_RISK_FIELD = "FEMA Risk Index Expected Annual Loss Score" EXPECTED_BUILDING_LOSS_RATE_FIELD = (