Skip to content

Commit

Permalink
Merge pull request #9 from olincollege/SAN-30-Collect-income-levels
Browse files Browse the repository at this point in the history
SAN-30 collect income levels
  • Loading branch information
cory0417 authored Nov 17, 2024
2 parents 3f95e24 + c59bf9b commit 3843235
Show file tree
Hide file tree
Showing 5 changed files with 127 additions and 2 deletions.
4 changes: 4 additions & 0 deletions docs/source/explanations/datasets.rst
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,7 @@ Streetlights Dataset

The main information that the streetlights dataset should contain is the location of the streetlights. Additional information such as the type of bulb, last-replacement year, and wattage, etc. are useful to have as well. After talking to Michael Donaghy, Superintendent of Street Lighting at the City of Boston Public Works Department, we learned that Boston has recently completed a full catalog of their streetlight assets in 2023. We acknowledge that many cities might not have this data available, in which case, `OpenStreetMap features <https://wiki.openstreetmap.org/wiki/Tag:highway%3Dstreet_lamp>`_ could be used to roughly estimate the streetlight locations. The Boston streetlight dataset can be viewed at the `following link <https://sdmaps.maps.arcgis.com/apps/dashboards/84e1553e754b424f9c544ab5079ed99f>`_.

Income Dataset
**************

The income dataset is also sourced from the American Community Survey (ACS) 5-year estimates. The dataset includes median household income data for each census tract within a specified state and year. The data is used to analyze the relationship between income levels and pedestrian safety, as well as to identify areas with possible infrastructure inequity.
38 changes: 38 additions & 0 deletions src/night_light/socioeconomic/median_household_income.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from pygris.data import get_census
from night_light.utils.fips import StateFIPS
from pygris import tracts
import geopandas as gpd


def get_median_household_income(
year: int = 2021, state: StateFIPS = StateFIPS.MASSACHUSETTS
) -> gpd.GeoDataFrame:
"""
Fetch median household income data using the ACS 5-year survey.
Args:
year (int): The year of the ACS survey.
state (StateFIPS): The state to fetch data for.
Returns:
gpd.GeoDataFrame: A GeoDataFrame containing the median household income data.
"""
df = get_census(
year=year,
variables=["B19013_001E"],
params={
"for": "tract:*",
"in": f"state:{state.value}",
},
dataset="acs/acs5",
return_geoid=True,
guess_dtypes=True,
)

df["median_household_income"] = df["B19013_001E"]
df.drop(columns=["B19013_001E"], inplace=True)

gdf = tracts(state=state.value, year=year)
gdf = gdf.merge(df, on="GEOID")

return gdf
4 changes: 2 additions & 2 deletions tests/test_adt_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@ def test_query_boston_adt(boston_traffic):

def test_save_boston_geojson(boston_traffic):
"""Test saving the Boston average annual daily traffic to a GeoJSON file"""
query_geojson.save_geojson(boston_traffic, "test_boston_crosswalk.geojson")
geojson_filename = "test_boston_crosswalk.geojson"
query_geojson.save_geojson(boston_traffic, "test_boston_traffic.geojson")
geojson_filename = "test_boston_traffic.geojson"
saved_gdf = query_geojson.gpd.read_file(geojson_filename)

assert boston_traffic.crs == saved_gdf.crs
Expand Down
51 changes: 51 additions & 0 deletions tests/test_median_household_income_mapping.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import os
import time

import folium

from night_light.socioeconomic.median_household_income import (
get_median_household_income,
)
from night_light.utils import create_folium_map, open_html_file, Tooltip
from night_light.utils.fips import StateFIPS
from night_light.utils.mapping import Choropleth


def test_ma_median_household_income_choropleth():
"""Test creating MA median household income choropleth map."""
map_filename = "test_ma_median_household_income_map.html"
median_income_data = get_median_household_income(
year=2021, state=StateFIPS.MASSACHUSETTS
)
median_income_data = median_income_data.dropna(subset=["median_household_income"])

income_layer = Choropleth(
geo_data=median_income_data,
name="Median Household Income Choropleth",
data=median_income_data,
columns=["GEOID", "median_household_income"],
key_on="feature.properties.GEOID",
)

tooltip_layer = folium.GeoJson(
median_income_data,
name="Median Household Income Tooltips",
style_function=lambda x: {"fillColor": "transparent", "color": "transparent"},
tooltip=Tooltip(
fields=["NAMELSAD", "median_household_income"],
aliases=["Tract Name", "Median Household Income"],
),
)

create_folium_map(
layers=[income_layer, tooltip_layer],
center=[42.4072, -71.3824],
zoom_start=9,
map_filename=map_filename,
)

assert os.path.exists(map_filename)
open_html_file(map_filename)
time.sleep(1)
os.remove(map_filename)
assert not os.path.exists(map_filename)
32 changes: 32 additions & 0 deletions tests/test_median_household_income_query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import os
from night_light.socioeconomic.median_household_income import (
get_median_household_income,
)
from night_light.utils import query_geojson
from night_light.utils.fips import StateFIPS


def test_get_ma_median_household_income():
"""Test getting MA median household income"""
data = get_median_household_income(state=StateFIPS.MASSACHUSETTS)
assert data.shape[0] > 0
assert "median_household_income" in data.columns
data = data.dropna(subset=["median_household_income"])
invalid_values = data[data["median_household_income"] < 0]
assert invalid_values.empty


def test_ma_median_household_geojson():
"""Test saving MA median household income to a GeoJSON file"""
data = get_median_household_income(state=StateFIPS.MASSACHUSETTS)
geojson_filename = "test_ma_median_household_income.geojson"
query_geojson.save_geojson(data, geojson_filename)
saved_gdf = query_geojson.gpd.read_file(geojson_filename)

assert data.crs == saved_gdf.crs
assert set(data.columns) == set(saved_gdf.columns)
assert data.index.equals(saved_gdf.index)
assert data.shape == saved_gdf.shape

os.remove(geojson_filename)
assert not os.path.exists(geojson_filename)

0 comments on commit 3843235

Please sign in to comment.