Skip to content

Commit

Permalink
cast to epsg:3310 and rename geom columns to be clear
Browse files Browse the repository at this point in the history
  • Loading branch information
tiffanychu90 committed Feb 20, 2024
1 parent b891ab9 commit 017dc45
Show file tree
Hide file tree
Showing 8 changed files with 112 additions and 87 deletions.
6 changes: 3 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ build_portfolio_site:
git add portfolio/$(site)/*.yml portfolio/$(site)/*.md
git add portfolio/$(site)/*.ipynb
git add portfolio/sites/$(site).yml
#make production_portfolio
make production_portfolio


build_competitive_corridors:
Expand Down Expand Up @@ -39,8 +39,8 @@ build_ntd_report:
make build_portfolio_site

build_route_speeds:
$(eval override site = route_speeds)
cd rt_segment_speeds / && make pip install -r requirements.txt && cd ..
$(eval export site = route_speeds)
cd rt_segment_speeds / && pip install -r requirements.txt && cd ..
cd rt_segment_speeds/ && python deploy_portfolio_yaml.py && cd ..
make build_portfolio_site

Expand Down
14 changes: 12 additions & 2 deletions _shared_utils/shared_utils/rt_dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
GCS: gs://calitp-analytics-data/data-analyses/rt_delay/cached_views/
"""
from typing import Literal

# HQTAs and RT speedmaps
DATES = {
"feb2022": "2022-02-08",
Expand Down Expand Up @@ -57,8 +59,16 @@

y2024_dates = [v for k, v in DATES.items() if k.endswith("2024")]

apr_week = [v for k, v in DATES.items() if "apr2023" in k]
oct_week = [v for k, v in DATES.items() if "oct2023" in k]

def get_week(month: Literal["apr2023", "oct2023"], exclude_wed: bool) -> list:
if exclude_wed:
return [v for k, v in DATES.items() if month in k]
else:
return [v for k, v in DATES.items() if month in k and not k.endswith(month)]


apr_week = get_week(month="apr2023", exclude_wed=False)
oct_week = get_week(month="oct2023", exclude_wed=False)


# Planning and Modal Advisory Committee (PMAC) - quarterly
Expand Down
13 changes: 6 additions & 7 deletions rt_segment_speeds/scripts/average_speeds.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,7 @@ def multi_day_averages(analysis_date_list: list, dict_inputs: dict):
columns = col_order + ["route_name", "geometry"]
)

utils_to_add.geoparquet_gcs_export(
utils.geoparquet_gcs_export(
route_dir_avg,
SEGMENT_GCS,
f"{ROUTE_DIR_FILE}_{time_span_str}"
Expand Down Expand Up @@ -432,15 +432,14 @@ def stage_open_data_exports(analysis_date: str, dict_inputs: dict):

logger.info(f"average rollups for {analysis_date}: {end - start}")

'''
for month in ["apr2023", "oct2023"]:
start = datetime.datetime.now()
one_week = [v for k, v in rt_dates.DATES.items() if month in k]

for one_week in [rt_dates.oct_week, rt_dates.apr_week]:
start = datetime.datetime.now()

multi_day_averages(one_week, STOP_SEG_DICT)
end = datetime.datetime.now()

logger.info(f"average rollups for {one_week}: {end - start}")
'''



2 changes: 2 additions & 0 deletions rt_segment_speeds/scripts/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ stop_segments:
rt_stop_times:
stage1: "vp_usable"
stage2: "nearest/nearest_vp_rt_stop_times"
stage3: "stop_arrivals_rt_stop_times"
stage4: "speeds_rt_stop_times"
segments_file: "segment_options/stop_segments"
road_segments:
stage1: "vp_usable"
Expand Down
21 changes: 13 additions & 8 deletions rt_segment_speeds/scripts/interpolate_stop_arrival.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,24 +18,25 @@
def project_points_onto_shape(
stop_geometry: shapely.Point,
vp_coords_trio: shapely.LineString,
shape_geometry: shapely.Geometry,
shape_geometry: shapely.LineString,
timestamp_arr: np.ndarray,
) -> tuple[float]:
"""
Project the points in the vp trio against the shape geometry
and the stop position onto the shape geometry.
Use np.interp to find interpolated arrival time
"""
stop_position = shape_geometry.project(stop_geometry)

stop_position = vp_coords_trio.project(stop_geometry)
stop_meters = shape_geometry.project(stop_geometry)

points = [shapely.Point(p) for p in vp_coords_trio.coords]
xp = np.asarray([shape_geometry.project(p) for p in points])
xp = np.asarray([vp_coords_trio.project(p) for p in points])

yp = timestamp_arr.astype("datetime64[s]").astype("float64")

interpolated_arrival = np.interp(stop_position, xp, yp)

return stop_position, interpolated_arrival
return stop_meters, interpolated_arrival


def interpolate_stop_arrivals(
Expand All @@ -58,6 +59,7 @@ def interpolate_stop_arrivals(
)

df = df.assign(
stop_geometry = df.stop_geometry.to_crs(PROJECT_CRS),
vp_coords_trio = df.vp_coords_trio.to_crs(PROJECT_CRS)
)

Expand Down Expand Up @@ -96,7 +98,8 @@ def interpolate_stop_arrivals(
arrival_time = stop_arrival_series,
).astype({"arrival_time": "datetime64[s]"})[
["trip_instance_key", "shape_array_key",
"stop_sequence", "stop_id", "stop_meters",
"stop_sequence", "stop_id",
"stop_meters",
"arrival_time"
]]

Expand All @@ -121,8 +124,10 @@ def interpolate_stop_arrivals(
level="INFO")

STOP_SEG_DICT = helpers.get_parameters(CONFIG_PATH, "stop_segments")

RT_STOP_TIMES_DICT = helpers.get_parameters(CONFIG_PATH, "rt_stop_times")

for analysis_date in analysis_date_list:
interpolate_stop_arrivals(analysis_date, STOP_SEG_DICT)
#interpolate_stop_arrivals(analysis_date, RT_STOP_TIMES_DICT)


110 changes: 56 additions & 54 deletions rt_segment_speeds/scripts/nearest_vp_to_stop.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
Find nearest_vp_idx to the stop position
using scipy KDTree.
"""
import dask.dataframe as dd
import dask_geopandas as dg
import datetime
import geopandas as gpd
import numpy as np
Expand All @@ -16,6 +18,11 @@
from segment_speed_utils import helpers, neighbor
from segment_speed_utils.project_vars import SEGMENT_GCS

stop_time_col_order = [
'trip_instance_key', 'shape_array_key',
'stop_sequence', 'stop_id', 'stop_pair',
'stop_primary_direction', 'geometry'
]

def add_nearest_neighbor_result(
gdf: gpd.GeoDataFrame,
Expand All @@ -31,11 +38,11 @@ def add_nearest_neighbor_result(
f"{SEGMENT_GCS}condensed/vp_condensed_{analysis_date}.parquet",
columns = ["trip_instance_key", "vp_idx",
"location_timestamp_local",
"geometry"]
"geometry"],
).rename(columns = {
"vp_idx": "trip_vp_idx",
"geometry": "trip_geometry"
})
}).set_geometry("trip_geometry").to_crs(WGS84)

gdf2 = pd.merge(
gdf,
Expand All @@ -52,48 +59,40 @@ def add_nearest_neighbor_result(
coords_trio_series = []

# Iterate through and find the nearest_vp_idx, then surrounding trio
nearest_vp_idx = np.vectorize(neighbor.add_nearest_vp_idx)(
gdf2.vp_geometry, gdf2.stop_geometry, gdf2.vp_idx
)

gdf2 = gdf2.assign(
nearest_vp_idx = nearest_vp_idx,
).drop(
columns = ["vp_idx", "vp_geometry"]
)

for row in gdf2.itertuples():
nearest_vp = neighbor.add_nearest_vp_idx(
getattr(row, "geometry"),
getattr(row, "stop_geometry"),
getattr(row, "vp_idx")
)

vp_idx_arr = np.asarray(getattr(row, "trip_vp_idx"))
timestamp_arr = np.asarray(getattr(row, "location_timestamp_local"))
coords_arr = np.asarray(getattr(row, "trip_geometry").coords)

vp_trio, time_trio, coords_trio = neighbor.add_trio(
nearest_vp,
getattr(row, "nearest_vp_idx"),
np.asarray(getattr(row, "trip_vp_idx")),
np.asarray(getattr(row, "location_timestamp_local")),
np.array(getattr(row, "trip_geometry").coords),
np.asarray(getattr(row, "trip_geometry").coords),
)

nearest_vp_idx_series.append(nearest_vp)
trio_line = shapely.LineString(coords_trio)
vp_trio_series.append(vp_trio)
time_trio_series.append(time_trio)
coords_trio_series.append(trio_line)


gdf2 = gdf2.assign(
nearest_vp_idx = nearest_vp_idx_series,
vp_idx_trio = vp_trio_series,
location_timestamp_local_trio = time_trio_series,
vp_coords_trio = gpd.GeoSeries(coords_trio_series, crs = WGS84)
)

coords_trio_series.append(shapely.LineString(coords_trio))

drop_cols = [
"vp_idx", "geometry",
"location_timestamp_local",
"trip_vp_idx", "trip_geometry"
]

gdf2 = gdf2.drop(columns = drop_cols)
gdf2 = gdf2.assign(
vp_idx_trio = vp_trio_series,
location_timestamp_local_trio = time_trio_series,
vp_coords_trio = gpd.GeoSeries(coords_trio_series, crs = WGS84)
).drop(columns = drop_cols)

del nearest_vp_idx_series, vp_trio_series
del time_trio_series, coords_trio_series
del vp_trio_series, time_trio_series, coords_trio_series

return gdf2

Expand All @@ -111,14 +110,14 @@ def nearest_neighbor_rt_stop_times(

stop_times = helpers.import_scheduled_stop_times(
analysis_date,
columns = ["trip_instance_key",
columns = ["trip_instance_key", "shape_array_key",
"stop_sequence", "stop_id", "stop_pair",
"stop_primary_direction",
"geometry"],
with_direction = True,
get_pandas = True,
crs = WGS84
)
).reindex(columns = stop_time_col_order)

gdf = neighbor.merge_stop_vp_for_nearest_neighbor(
stop_times, analysis_date)
Expand Down Expand Up @@ -154,45 +153,48 @@ def nearest_neighbor_shape_segments(
EXPORT_FILE = dict_inputs["stage2"]
SEGMENT_FILE = dict_inputs["segments_file"]

subset_trips = pd.read_parquet(
rt_trips = helpers.import_unique_vp_trips(analysis_date)

shape_stop_combinations = pd.read_parquet(
f"{SEGMENT_GCS}{SEGMENT_FILE}_{analysis_date}.parquet",
columns = ["st_trip_instance_key"]
).st_trip_instance_key.unique()
columns = ["trip_instance_key",
"stop_id1", "stop_pair",
"st_trip_instance_key"],
filters = [[("trip_instance_key", "in", rt_trips)]]
).rename(columns = {"stop_id1": "stop_id"})

subset_trips = shape_stop_combinations.st_trip_instance_key.unique()

stops_to_use = helpers.import_scheduled_stop_times(
analysis_date,
columns = ["trip_instance_key", "shape_array_key",
"stop_sequence", "stop_id", "stop_pair",
"stop_primary_direction",
"geometry"],
"stop_sequence", "stop_id", "stop_pair",
"stop_primary_direction", "geometry"],
filters = [[("trip_instance_key", "in", subset_trips)]],
get_pandas = True,
with_direction = True
).rename(columns = {"trip_instance_key": "st_trip_instance_key"})

all_trips = helpers.import_scheduled_stop_times(
analysis_date,
columns = ["trip_instance_key", "shape_array_key"],
get_pandas = True,
with_direction = True
).drop_duplicates().reset_index(drop=True)

stop_times = pd.merge(
stops_to_use,
all_trips,
on = "shape_array_key",
shape_stop_combinations,
on = ["st_trip_instance_key", "stop_id", "stop_pair"],
how = "inner"
).drop(
columns = "st_trip_instance_key"
).drop_duplicates().reset_index(drop=True).reindex(
columns = stop_time_col_order
)

del stops_to_use, shape_stop_combinations

gdf = neighbor.merge_stop_vp_for_nearest_neighbor(
stop_times, analysis_date)

del stop_times, all_trips, stops_to_use

results = add_nearest_neighbor_result(gdf, analysis_date)

del gdf
results = add_nearest_neighbor_result(gdf, analysis_date)

del stop_times, gdf

utils.geoparquet_gcs_export(
results,
SEGMENT_GCS,
Expand All @@ -206,7 +208,7 @@ def nearest_neighbor_shape_segments(
del results

return


if __name__ == "__main__":

Expand All @@ -223,5 +225,5 @@ def nearest_neighbor_shape_segments(

for analysis_date in analysis_date_list:
nearest_neighbor_shape_segments(analysis_date, STOP_SEG_DICT)
nearest_neighbor_rt_stop_times(analysis_date, RT_STOP_TIMES_DICT)
#nearest_neighbor_rt_stop_times(analysis_date, RT_STOP_TIMES_DICT)

1 change: 0 additions & 1 deletion rt_segment_speeds/scripts/stop_arrivals_to_speed.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,5 +147,4 @@ def calculate_speed_from_stop_arrivals(
STOP_SEG_DICT = helpers.get_parameters(CONFIG_PATH, "stop_segments")

for analysis_date in analysis_date_list:

calculate_speed_from_stop_arrivals(analysis_date, STOP_SEG_DICT)
Loading

0 comments on commit 017dc45

Please sign in to comment.