-
Notifications
You must be signed in to change notification settings - Fork 48
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add demos for island areas #1932
Changes from all commits
47b7ea2
97179ec
136c866
2ea14ba
a9459a6
a9e5d6e
6c049ed
f11252e
b741001
403c55a
95a4d8e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
import json | ||
from typing import List | ||
import requests | ||
|
||
import numpy as np | ||
|
@@ -147,6 +148,65 @@ def __init__(self): | |
field_names.CENSUS_DECENNIAL_UNEMPLOYMENT_FIELD_2009 | ||
) | ||
|
||
# Race/Ethnicity fields | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @lucasmbrown-usds I'd love your feedback on how I mapped these various variables back to the main results. I tried to follow similar logic as was in the ACS ETL, and also it's subjective so I'm happy to make any changes. |
||
self.TOTAL_RACE_POPULATION_FIELD = "PCT086001" # Total | ||
self.ASIAN_FIELD = "PCT086002" # Total!!Asian | ||
self.BLACK_FIELD = "PCT086003" # Total!!Black or African American | ||
self.HAWAIIAN_FIELD = ( | ||
"PCT086004" # Total!!Native Hawaiian and Other Pacific Islander | ||
) | ||
# Note that the 2010 census for island araeas does not break out | ||
# hispanic and non-hispanic white, so this is slightly different from | ||
# our other demographic data | ||
self.NON_HISPANIC_WHITE_FIELD = "PCT086005" # Total!!White | ||
self.HISPANIC_FIELD = "PCT086006" # Total!!Hispanic or Latino | ||
self.OTHER_RACE_FIELD = ( | ||
"PCT086007" # Total!!Other Ethnic Origin or Ra | ||
) | ||
|
||
self.TOTAL_RACE_POPULATION_VI_FIELD = "P003001" # Total | ||
self.BLACK_VI_FIELD = ( | ||
"P003003" # Total!!One race!!Black or African American alone | ||
) | ||
self.AMERICAN_INDIAN_VI_FIELD = "P003005" # Total!!One race!!American Indian and Alaska Native alone | ||
mattbowen-usds marked this conversation as resolved.
Show resolved
Hide resolved
|
||
self.ASIAN_VI_FIELD = "P003006" # Total!!One race!!Asian alone | ||
self.HAWAIIAN_VI_FIELD = "P003007" # Total!!One race!!Native Hawaiian and Other Pacific Islander alone | ||
self.TWO_OR_MORE_RACES_VI_FIELD = "P003009" # Total!!Two or More Races | ||
self.NON_HISPANIC_WHITE_VI_FIELD = ( | ||
"P005006" # Total!!Not Hispanic or Latino!!One race!!White alone | ||
) | ||
self.HISPANIC_VI_FIELD = "P005002" # Total!!Hispanic or Latino | ||
self.OTHER_RACE_VI_FIELD = ( | ||
"P003008" # Total!!One race!!Some Other Race alone | ||
) | ||
self.TOTAL_RACE_POPULATION_VI_FIELD = "P003001" # Total | ||
|
||
self.TOTAL_RACE_POPULATION_FIELD_NAME = ( | ||
"Total population surveyed on racial data" | ||
) | ||
self.BLACK_FIELD_NAME = "Black or African American" | ||
self.AMERICAN_INDIAN_FIELD_NAME = "American Indian / Alaska Native" | ||
self.ASIAN_FIELD_NAME = "Asian" | ||
self.HAWAIIAN_FIELD_NAME = "Native Hawaiian or Pacific" | ||
self.TWO_OR_MORE_RACES_FIELD_NAME = "two or more races" | ||
self.NON_HISPANIC_WHITE_FIELD_NAME = "White" | ||
self.HISPANIC_FIELD_NAME = "Hispanic or Latino" | ||
# Note that `other` is lowercase because the whole field will show up in the download | ||
# file as "Percent other races" | ||
self.OTHER_RACE_FIELD_NAME = "other races" | ||
|
||
# Name output demographics fields. | ||
self.RE_OUTPUT_FIELDS = [ | ||
self.BLACK_FIELD_NAME, | ||
self.AMERICAN_INDIAN_FIELD_NAME, | ||
self.ASIAN_FIELD_NAME, | ||
self.HAWAIIAN_FIELD_NAME, | ||
self.TWO_OR_MORE_RACES_FIELD_NAME, | ||
self.NON_HISPANIC_WHITE_FIELD_NAME, | ||
self.HISPANIC_FIELD_NAME, | ||
self.OTHER_RACE_FIELD_NAME, | ||
] | ||
|
||
var_list = [ | ||
self.MEDIAN_INCOME_FIELD, | ||
self.TOTAL_HOUSEHOLD_RATIO_INCOME_TO_POVERTY_LEVEL_FIELD, | ||
|
@@ -162,6 +222,13 @@ def __init__(self): | |
self.EMPLOYMENT_FEMALE_IN_LABOR_FORCE_FIELD, | ||
self.EMPLOYMENT_FEMALE_UNEMPLOYED_FIELD, | ||
self.TOTAL_POP_FIELD, | ||
self.TOTAL_RACE_POPULATION_FIELD, | ||
self.ASIAN_FIELD, | ||
self.BLACK_FIELD, | ||
self.HAWAIIAN_FIELD, | ||
self.NON_HISPANIC_WHITE_FIELD, | ||
self.HISPANIC_FIELD, | ||
self.OTHER_RACE_FIELD, | ||
] | ||
var_list = ",".join(var_list) | ||
|
||
|
@@ -180,6 +247,15 @@ def __init__(self): | |
self.EMPLOYMENT_FEMALE_IN_LABOR_FORCE_VI_FIELD, | ||
self.EMPLOYMENT_FEMALE_UNEMPLOYED_VI_FIELD, | ||
self.TOTAL_POP_VI_FIELD, | ||
self.BLACK_VI_FIELD, | ||
self.AMERICAN_INDIAN_VI_FIELD, | ||
self.ASIAN_VI_FIELD, | ||
self.HAWAIIAN_VI_FIELD, | ||
self.TWO_OR_MORE_RACES_VI_FIELD, | ||
self.NON_HISPANIC_WHITE_VI_FIELD, | ||
self.HISPANIC_VI_FIELD, | ||
self.OTHER_RACE_VI_FIELD, | ||
self.TOTAL_RACE_POPULATION_VI_FIELD, | ||
] | ||
var_list_vi = ",".join(var_list_vi) | ||
|
||
|
@@ -210,6 +286,23 @@ def __init__(self): | |
self.EMPLOYMENT_MALE_UNEMPLOYED_FIELD: self.EMPLOYMENT_MALE_UNEMPLOYED_FIELD, | ||
self.EMPLOYMENT_FEMALE_IN_LABOR_FORCE_FIELD: self.EMPLOYMENT_FEMALE_IN_LABOR_FORCE_FIELD, | ||
self.EMPLOYMENT_FEMALE_UNEMPLOYED_FIELD: self.EMPLOYMENT_FEMALE_UNEMPLOYED_FIELD, | ||
self.TOTAL_RACE_POPULATION_FIELD: self.TOTAL_RACE_POPULATION_FIELD_NAME, | ||
self.TOTAL_RACE_POPULATION_VI_FIELD: self.TOTAL_RACE_POPULATION_FIELD_NAME, | ||
# Note there is no American Indian data for AS/GU/MI | ||
self.AMERICAN_INDIAN_VI_FIELD: self.AMERICAN_INDIAN_FIELD_NAME, | ||
mattbowen-usds marked this conversation as resolved.
Show resolved
Hide resolved
|
||
self.ASIAN_FIELD: self.ASIAN_FIELD_NAME, | ||
self.ASIAN_VI_FIELD: self.ASIAN_FIELD_NAME, | ||
self.BLACK_FIELD: self.BLACK_FIELD_NAME, | ||
self.BLACK_VI_FIELD: self.BLACK_FIELD_NAME, | ||
self.HAWAIIAN_FIELD: self.HAWAIIAN_FIELD_NAME, | ||
self.HAWAIIAN_VI_FIELD: self.HAWAIIAN_FIELD_NAME, | ||
self.TWO_OR_MORE_RACES_VI_FIELD: self.TWO_OR_MORE_RACES_FIELD_NAME, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. want to add a note like `# Note there are no Two or more races data for AS/GU/MI |
||
self.NON_HISPANIC_WHITE_FIELD: self.NON_HISPANIC_WHITE_FIELD_NAME, | ||
self.NON_HISPANIC_WHITE_VI_FIELD: self.NON_HISPANIC_WHITE_FIELD_NAME, | ||
self.HISPANIC_FIELD: self.HISPANIC_FIELD_NAME, | ||
self.HISPANIC_VI_FIELD: self.HISPANIC_FIELD_NAME, | ||
self.OTHER_RACE_FIELD: self.OTHER_RACE_FIELD_NAME, | ||
self.OTHER_RACE_VI_FIELD: self.OTHER_RACE_FIELD_NAME, | ||
} | ||
|
||
# To do: Ask Census Slack Group about whether you need to hardcode the county fips | ||
|
@@ -252,6 +345,8 @@ def __init__(self): | |
+ "&for=tract:*&in=state:{}%20county:{}" | ||
) | ||
|
||
self.final_race_fields: List[str] = [] | ||
|
||
self.df: pd.DataFrame | ||
self.df_vi: pd.DataFrame | ||
self.df_all: pd.DataFrame | ||
|
@@ -264,14 +359,16 @@ def extract(self) -> None: | |
f"Downloading data for state/territory {island['state_abbreviation']}" | ||
) | ||
for county in island["county_fips"]: | ||
api_url = self.API_URL.format( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nice |
||
self.DECENNIAL_YEAR, | ||
island["state_abbreviation"], | ||
island["var_list"], | ||
island["fips"], | ||
county, | ||
) | ||
logger.debug(f"CENSUS: Requesting {api_url}") | ||
download = requests.get( | ||
self.API_URL.format( | ||
self.DECENNIAL_YEAR, | ||
island["state_abbreviation"], | ||
island["var_list"], | ||
island["fips"], | ||
county, | ||
), | ||
api_url, | ||
timeout=settings.REQUESTS_DEFAULT_TIMOUT, | ||
) | ||
|
||
|
@@ -379,6 +476,19 @@ def transform(self) -> None: | |
self.df_all["state"] + self.df_all["county"] + self.df_all["tract"] | ||
) | ||
|
||
# Calculate stats by race | ||
for race_field_name in self.RE_OUTPUT_FIELDS: | ||
output_field_name = ( | ||
field_names.PERCENT_PREFIX | ||
+ race_field_name | ||
+ field_names.ISLAND_AREA_BACKFILL_SUFFIX | ||
) | ||
self.final_race_fields.append(output_field_name) | ||
self.df_all[output_field_name] = ( | ||
self.df_all[race_field_name] | ||
/ self.df_all[self.TOTAL_RACE_POPULATION_FIELD_NAME] | ||
) | ||
|
||
# Reporting Missing Values | ||
for col in self.df_all.columns: | ||
missing_value_count = self.df_all[col].isnull().sum() | ||
|
@@ -402,7 +512,7 @@ def load(self) -> None: | |
self.PERCENTAGE_HOUSEHOLDS_BELOW_200_PERC_POVERTY_LEVEL_FIELD_NAME, | ||
self.PERCENTAGE_HIGH_SCHOOL_ED_FIELD_NAME, | ||
self.UNEMPLOYMENT_FIELD_NAME, | ||
] | ||
] + self.final_race_fields | ||
|
||
self.df_all[columns_to_include].to_csv( | ||
path_or_buf=self.OUTPUT_PATH / "usa.csv", index=False | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
is it worth adding a test for this? it seems like it might be straightforward to test and would add a lot of confidence.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good call --- done in 403c55a
It is an ugly test, but it is a test.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Got it. I was thinking a test directly of
_backfill_island_demographics
(just give it a simple input DF and test the output DF), but your test works!