Skip to content

Commit

Permalink
Merge pull request #20 from olincollege/SAN-84-determine-relevant-edges
Browse files Browse the repository at this point in the history
SAN-84 determine relevant edges
  • Loading branch information
cory0417 authored Jan 29, 2025
2 parents 92b12e1 + c730792 commit b6c925d
Show file tree
Hide file tree
Showing 3 changed files with 190 additions and 6 deletions.
92 changes: 92 additions & 0 deletions src/night_light/GIS_predictor/edge_classifier/edge_classifier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
import duckdb
import night_light.bronze_db.util as util


def initialize_edge_classifier_db(
con: duckdb.DuckDBPyConnection,
crosswalks_geojson_path: str,
street_segments_geojson_path: str,
) -> duckdb.DuckDBPyConnection:
"""
Initialize duckdb database tables for edge classifier.
The db should include Boston crosswalks and street segments tables.
"""
datasets = [
(crosswalks_geojson_path, "crosswalks"),
(street_segments_geojson_path, "street_segments"),
]

util.load_multiple_datasets(con, datasets)

return con


def decompose_crosswalk_edges(con: duckdb.DuckDBPyConnection):
"""Decompose crosswalks polygons into separate edges."""
con.execute(
"""
-- Create or replace the table of crosswalk edges (line segments)
CREATE OR REPLACE TABLE crosswalk_segments AS
WITH boundaries AS (
SELECT
cw.OBJECTID AS crosswalk_id,
(UNNEST(ST_Dump(ST_Boundary(ST_GeomFromText(cw.geometry)))).geom) AS boundary_geom
FROM crosswalks cw
),
segments AS (
SELECT
crosswalk_id,
g.i AS edge_id,
ST_AsText(ST_MakeLine(
ST_PointN(boundary_geom, CAST(g.i AS INT)),
ST_PointN(boundary_geom, CAST(g.i + 1 AS INT))
)) AS segment_geom
FROM boundaries
-- Generate a series 1..(npoints-1), label it g(i)
CROSS JOIN generate_series(1, ST_NPoints(boundary_geom) - 1) AS g(i)
)
SELECT
crosswalk_id,
edge_id,
segment_geom AS geometry
FROM segments;
"""
)


def classify_edges_by_intersection(con: duckdb.DuckDBPyConnection):
"""Classify edges based on their intersection with street segments."""
con.execute(
"""
-- 1. Add a new boolean column
ALTER TABLE crosswalk_segments
ADD COLUMN is_vehicle_edge BOOLEAN;
-- 2. Update is_vehicle_edge = TRUE if there's any intersection, otherwise FALSE
UPDATE crosswalk_segments
SET is_vehicle_edge = (
SELECT COUNT(*) > 0
FROM street_segments s
WHERE ST_Intersects(ST_GeomFromText(crosswalk_segments.geometry), ST_GeomFromText(s.geometry))
);
"""
)


if __name__ == "__main__":
conn = util.connect_to_duckdb("test_edge_classifier.db")
initialize_edge_classifier_db(
conn,
"../../road_characteristics/boston_street_segments.geojson",
"../../../../tests/test_boston_crosswalk.geojson",
)

decompose_crosswalk_edges(conn)
classify_edges_by_intersection(conn)
# save the db table to geojson
util.save_table_to_geojson(
conn,
"crosswalk_segments",
"test_crosswalk_segments.geojson",
)
67 changes: 61 additions & 6 deletions src/night_light/bronze_db/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
from geopandas import GeoDataFrame
from typing import List, Tuple, Union

from pandas import DataFrame


def connect_to_duckdb(db_path: str) -> duckdb.DuckDBPyConnection:
"""
Expand All @@ -30,6 +32,27 @@ def connect_to_duckdb(db_path: str) -> duckdb.DuckDBPyConnection:
return con


def _query_table_to_df(
con: duckdb.DuckDBPyConnection,
table_name: str,
query: str = None,
) -> DataFrame:
"""
Query a DuckDB table and return the results as a pandas DataFrame.
Args:
con (duckdb.DuckDBPyConnection): Connection to the DuckDB database.
table_name (str): Name of the table to query.
query (Optional[str]): SQL query to execute. Default is to fetch the first 10 rows.
Returns:
DataFrame: Results of the query.
"""
if query is None:
query = "SELECT * FROM {table_name} LIMIT 10".format(table_name=table_name)
return con.execute(query).fetchdf()


def query_table_to_gdf(
con: duckdb.DuckDBPyConnection,
table_name: str,
Expand All @@ -46,11 +69,11 @@ def query_table_to_gdf(
Returns:
GeoDataFrame: Results of the query.
"""
if query is None:
query = "SELECT * FROM {table_name} LIMIT 10".format(table_name=table_name)
df = con.execute(query).fetchdf()
df["geometry"] = gpd.GeoSeries.from_wkt(df["geometry"])
gdf = gpd.GeoDataFrame(df)
df = _query_table_to_df(con, table_name, query)
if "geometry" in df:
# Assume that the geometry column is in WKT format, NOT in geometry objects
df["geometry"] = gpd.GeoSeries.from_wkt(df["geometry"])
gdf = gpd.GeoDataFrame(df, geometry="geometry", crs="EPSG:4326")
return gdf


Expand Down Expand Up @@ -80,7 +103,9 @@ def load_data_to_table(
else data_source
)
if not isinstance(gdf, gpd.GeoDataFrame):
raise ValueError("data_source must be a valid GeoJSON file path or GeoDataFrame.")
raise ValueError(
"data_source must be a valid GeoJSON file path or GeoDataFrame."
)

# Convert geometry to WKT and ensure compatibility with DuckDB
if "geometry" in gdf:
Expand All @@ -107,3 +132,33 @@ def load_multiple_datasets(
"""
for data_source, table_name in datasets:
load_data_to_table(con, data_source, table_name)


def save_table_to_geojson(
con: duckdb.DuckDBPyConnection, table_name: str, filename: str
) -> None:
"""
Save a DuckDB table to a GeoJSON file.
Args:
con (duckdb.DuckDBPyConnection): Connection to the DuckDB database.
table_name (str): Name of the table to save.
filename (str): Path to the output GeoJSON file.
"""
gdf = query_table_to_gdf(con, table_name, f"SELECT * FROM {table_name}")
gdf.to_file(filename, driver="GeoJSON")


def save_table_to_parquet(
con: duckdb.DuckDBPyConnection, table_name: str, filename: str
) -> None:
"""
Save a DuckDB table to a parquet file.
Args:
con (duckdb.DuckDBPyConnection): Connection to the DuckDB database.
table_name (str): Name of the table to save.
filename (str): Path to the output parquet file.
"""
df = _query_table_to_df(con, table_name, f"SELECT * FROM {table_name}")
df.to_parquet(filename)
37 changes: 37 additions & 0 deletions src/night_light/road_characteristics/boston_street_segments.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import requests

from night_light.utils import query_geojson

BOSTON_STREET_SEGMENTS_API_URL = "https://gisportal.boston.gov/arcgis/rest/services/SAM/Live_SAM_Address/FeatureServer/3/query"
BOSTON_STREET_SEGMENTS_GEOJSON_URL = "https://data.boston.gov/dataset/b9b8b634-f28a-410f-9727-b53d0d006308/resource/e850cfd2-2c6e-4af6-9ac4-e03019412d1e/download/boston_street_segments__sam_system_.geojson"

# MA Road inventory data dictionary for ref:
# https://www.mass.gov/doc/road-inventory-data-dictionary/download

BOSTON_STREET_SEGMENTS_QUERY = {
"where": "1=1",
"outFields": "*",
"outSR": 4326,
"f": "geojson",
}


def get_boston_street_segments():
"""
Get Boston street segments data from the API (2000 records/request)
"""
return query_geojson.fetch_geojson_data(
url=BOSTON_STREET_SEGMENTS_API_URL, params=BOSTON_STREET_SEGMENTS_QUERY
)


def save_boston_street_segments_geojson():
"""
Save Boston street segments data as GeoJSON
"""
with open("boston_street_segments.geojson", "wb") as geojson:
geojson.write(requests.get(BOSTON_STREET_SEGMENTS_GEOJSON_URL).content)


if __name__ == "__main__":
save_boston_street_segments_geojson()

0 comments on commit b6c925d

Please sign in to comment.