Skip to content

Commit

Permalink
Merge pull request #703 from cal-itp/error-stop-segments
Browse files Browse the repository at this point in the history
Error stop segments
  • Loading branch information
tiffanychu90 authored Apr 3, 2023
2 parents 903e498 + 8ec769e commit 38a868a
Show file tree
Hide file tree
Showing 21 changed files with 2,047 additions and 121 deletions.
Empty file removed hackathon/README.md
Empty file.
1,280 changes: 1,280 additions & 0 deletions rt_segment_speeds/09_loop_shapes_exploration.ipynb

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions rt_segment_speeds/logs/cut_stop_segments.log
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,7 @@
2023-03-02 09:13:21.190 | INFO | __main__:<module>:251 - Cut stop segments: 0:03:41.535521
2023-03-02 09:14:07.197 | INFO | __main__:<module>:264 - Add arrowized geometry and rt_dataset_key: 0:00:46.007232
2023-03-02 09:14:14.830 | INFO | __main__:<module>:273 - execution time: 0:04:35.175108
2023-04-03 09:41:40.694 | INFO | __main__:<module>:238 - Analysis date: 2023-03-15
2023-04-03 09:45:08.714 | INFO | __main__:<module>:251 - Cut stop segments: 0:03:27.996436
2023-04-03 09:45:56.279 | INFO | __main__:<module>:264 - Add arrowized geometry and rt_dataset_key: 0:00:47.565150
2023-04-03 09:46:04.479 | INFO | __main__:<module>:273 - execution time: 0:04:23.761136
4 changes: 4 additions & 0 deletions rt_segment_speeds/logs/prep_stop_segments.log
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,7 @@
2023-03-02 09:04:13.505 | INFO | __main__:<module>:218 - Prep stop segment df: 0:00:37.290996
2023-03-02 09:09:15.801 | INFO | __main__:<module>:232 - Make stop segment wide: 0:05:02.295395
2023-03-02 09:09:18.507 | INFO | __main__:<module>:241 - execution time: 0:05:42.292687
2023-04-03 09:35:01.862 | INFO | __main__:<module>:200 - Analysis date: 2023-03-15
2023-04-03 09:35:40.326 | INFO | __main__:<module>:207 - Prep stop segment df: 0:00:38.424796
2023-04-03 09:40:55.218 | INFO | __main__:<module>:221 - Make stop segment wide: 0:05:14.892670
2023-04-03 09:40:58.173 | INFO | __main__:<module>:230 - execution time: 0:05:56.272368
9 changes: 9 additions & 0 deletions rt_segment_speeds/logs/sjoin_vp_segments.log
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,12 @@
2023-03-08 20:55:27.815 | INFO | __main__:<module>:305 - attach vp to route segments: 1:21:47.508132
2023-03-08 22:35:21.561 | INFO | __main__:<module>:319 - attach vp to stop-to-stop segments: 1:39:53.746360
2023-03-08 22:35:21.563 | INFO | __main__:<module>:322 - execution time: 3:01:41.256498
2023-04-01 15:17:25.924 | INFO | __main__:<module>:285 - Analysis date: 2023-02-15
2023-04-01 15:20:04.915 | INFO | __main__:<module>:285 - Analysis date: 2023-02-15
2023-04-01 17:10:56.418 | INFO | __main__:<module>:305 - attach vp to route segments: 1:50:51.500105
2023-04-01 19:20:13.882 | INFO | __main__:<module>:319 - attach vp to stop-to-stop segments: 2:09:17.464154
2023-04-01 19:20:13.893 | INFO | __main__:<module>:322 - execution time: 4:00:08.975523
2023-04-03 10:30:53.338 | INFO | __main__:<module>:285 - Analysis date: 2023-03-15
2023-04-03 11:39:19.999 | INFO | __main__:<module>:305 - attach vp to route segments: 1:08:26.639548
2023-04-03 13:03:12.904 | INFO | __main__:<module>:319 - attach vp to stop-to-stop segments: 1:23:52.905197
2023-04-03 13:03:12.906 | INFO | __main__:<module>:322 - execution time: 2:32:19.546712
6 changes: 6 additions & 0 deletions rt_segment_speeds/logs/usable_rt_vp.log
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
2023-04-01 12:35:44.231 | INFO | __main__:<module>:134 - Analysis date: 2023-02-15
2023-04-01 12:39:00.176 | INFO | __main__:<module>:146 - pare down vp
2023-04-01 12:39:00.277 | INFO | __main__:<module>:149 - execution time: 0:03:16.043023
2023-04-03 10:28:38.880 | INFO | __main__:<module>:133 - Analysis date: 2023-03-15
2023-04-03 10:30:40.291 | INFO | __main__:<module>:145 - pare down vp
2023-04-03 10:30:40.293 | INFO | __main__:<module>:148 - execution time: 0:02:01.403003
25 changes: 25 additions & 0 deletions rt_segment_speeds/logs/valid_vehicle_positions.log
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,28 @@
2023-03-08 22:41:23.751 | INFO | __main__:pare_down_vp_to_valid_trips:138 - filter out to only valid trips
2023-03-08 22:49:30.470 | INFO | __main__:<module>:188 - pare down vp by stop segments 0:08:06.720494
2023-03-08 22:49:30.471 | INFO | __main__:<module>:191 - execution time: 0:13:52.787143

2023-04-01 11:01:09.429 | INFO | __main__:<module>:191 - Analysis date: 2023-02-15
2023-04-01 11:01:09.450 | INFO | __main__:pare_down_vp_to_valid_trips:163 - filter out to only valid trips
2023-04-01 11:11:32.665 | INFO | __main__:<module>:205 - pare down vp by route segments 0:10:23.216479
2023-04-01 11:11:32.669 | INFO | __main__:pare_down_vp_to_valid_trips:163 - filter out to only valid trips
2023-04-01 11:26:02.567 | INFO | __main__:<module>:213 - pare down vp by stop segments 0:14:29.901784
2023-04-01 11:26:02.569 | INFO | __main__:<module>:216 - execution time: 0:24:53.137853
2023-04-01 13:20:43.862 | INFO | __main__:<module>:128 - Analysis date: 2023-02-15
2023-04-01 13:20:43.913 | INFO | __main__:pare_down_vp_by_segment:100 - filter out to only valid trips
2023-04-01 13:25:27.312 | INFO | __main__:<module>:116 - Analysis date: 2023-02-15
2023-04-01 13:25:42.291 | INFO | __main__:pare_down_vp_by_segment:97 - filter out to only valid trips
2023-04-01 13:31:50.633 | INFO | __main__:<module>:130 - pare down vp by route segments 0:06:23.311710
2023-04-01 13:32:04.816 | INFO | __main__:pare_down_vp_by_segment:97 - filter out to only valid trips
2023-04-01 13:40:46.655 | INFO | __main__:<module>:138 - pare down vp by stop segments 0:08:56.021696
2023-04-01 13:40:46.657 | INFO | __main__:<module>:141 - execution time: 0:15:19.343564
2023-04-01 19:20:59.651 | INFO | __main__:<module>:116 - Analysis date: 2023-02-15
2023-04-01 19:21:15.010 | INFO | __main__:pare_down_vp_by_segment:97 - filter out to only valid trips
2023-04-01 19:27:45.623 | INFO | __main__:<module>:130 - pare down vp by route segments 0:06:45.945768
2023-04-01 19:28:01.291 | INFO | __main__:pare_down_vp_by_segment:97 - filter out to only valid trips
2023-04-03 13:03:26.151 | INFO | __main__:<module>:116 - Analysis date: 2023-03-15
2023-04-03 13:03:36.686 | INFO | __main__:pare_down_vp_by_segment:97 - filter out to only valid trips
2023-04-03 13:07:32.164 | INFO | __main__:<module>:130 - pare down vp by route segments 0:04:06.004579
2023-04-03 13:07:41.833 | INFO | __main__:pare_down_vp_by_segment:97 - filter out to only valid trips
2023-04-03 13:12:49.457 | INFO | __main__:<module>:138 - pare down vp by stop segments 0:05:17.293446
2023-04-03 13:12:49.459 | INFO | __main__:<module>:141 - execution time: 0:09:23.304992
224 changes: 219 additions & 5 deletions rt_segment_speeds/road-segments.ipynb

Large diffs are not rendered by default.

148 changes: 148 additions & 0 deletions rt_segment_speeds/scripts/A0_preprocessing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
"""
Pre-processing vehicle positions.
Drop all RT trips with less than ___ min of data.
Create 2 dfs of trips that are straightforward - no loops, no inlining,
and ones that are more complex?
"""
import dask.dataframe as dd
import datetime
import geopandas as gpd
import numpy as np
import pandas as pd
import sys

from loguru import logger

from shared_utils import utils
from segment_speed_utils import helpers
from segment_speed_utils.project_vars import (SEGMENT_GCS, analysis_date,
CONFIG_PATH)

def trip_time_elapsed(
ddf: dd.DataFrame,
group_cols: list,
timestamp_col: str
):
"""
Group by trip and calculate the time elapsed (max_time-min_time)
for RT vp observed.
"""
min_time = (ddf.groupby(group_cols)
[timestamp_col]
.min()
.reset_index()
.rename(columns = {timestamp_col: "min_time"})
)


max_time = (ddf.groupby(group_cols)
[timestamp_col]
.max()
.reset_index()
.rename(columns = {timestamp_col: "max_time"})
)

df = dd.merge(
min_time,
max_time,
on = group_cols,
how = "outer"
)

df = df.assign(
trip_time_sec = (df.max_time - df.min_time) / np.timedelta64(1, "s")
)

return df


def get_valid_trips_by_time_cutoff(
ddf: dd.DataFrame,
timestamp_col: str,
trip_time_min_cutoff: int
)-> pd.DataFrame:
"""
Filter down trips by trip time elapsed.
Set the number of minutes to do cut-off for at least x min of RT.
"""
trip_cols = ["gtfs_dataset_key", "trip_id"]
trip_stats = trip_time_elapsed(
ddf,
trip_cols,
timestamp_col
)

usable_trips = (trip_stats[
trip_stats.trip_time_sec >= trip_time_min_cutoff * 60]
[trip_cols]
.drop_duplicates()
.reset_index(drop=True)
)

return usable_trips


def pare_down_vp_to_valid_trips(
analysis_date: str,
dict_inputs: dict = {}
):
"""
Pare down vehicle positions that have been joined to segments
to keep the enter / exit timestamps.
Also, exclude any bad batches of trips.
"""
INPUT_FILE_PREFIX = dict_inputs["stage0"]
TIMESTAMP_COL = dict_inputs["timestamp_col"]
TIME_CUTOFF = dict_inputs["time_min_cutoff"]
EXPORT_FILE = dict_inputs["stage1"]

vp = gpd.read_parquet(
f"{SEGMENT_GCS}{INPUT_FILE_PREFIX}_{analysis_date}.parquet"
)

usable_trips = get_valid_trips_by_time_cutoff(
vp,
TIMESTAMP_COL,
TIME_CUTOFF
)

usable_vp = pd.merge(
vp,
usable_trips,
on = ["gtfs_dataset_key", "trip_id"],
how = "inner"
)

utils.geoparquet_gcs_export(
usable_vp,
SEGMENT_GCS,
f"{EXPORT_FILE}_{analysis_date}"
)



if __name__ == "__main__":

LOG_FILE = "../logs/usable_rt_vp.log"
logger.add(LOG_FILE, retention="3 months")
logger.add(sys.stderr,
format="{time:YYYY-MM-DD at HH:mm:ss} | {level} | {message}",
level="INFO")

logger.info(f"Analysis date: {analysis_date}")

start = datetime.datetime.now()

ROUTE_SEG_DICT = helpers.get_parameters(CONFIG_PATH, "route_segments")

time1 = datetime.datetime.now()
pare_down_vp_to_valid_trips(
analysis_date,
dict_inputs = ROUTE_SEG_DICT
)

logger.info(f"pare down vp")

end = datetime.datetime.now()
logger.info(f"execution time: {end-start}")
4 changes: 2 additions & 2 deletions rt_segment_speeds/scripts/A1_sjoin_vp_segments.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,10 +179,10 @@ def sjoin_vp_to_segments(
SEGMENT_GCS,
f"{SEGMENT_FILE}_{analysis_date}",
columns = ["gtfs_dataset_key", 'geometry']
).drop(columns = "geometry")
).drop(columns = "geometry").drop_duplicates()

in_vp = vp_df.gtfs_dataset_key.compute().tolist()
in_segments = segment_df.gtfs_dataset_key.compute().tolist()
in_segments = segment_df.gtfs_dataset_key.tolist()
RT_OPERATORS = list(set(in_vp) & set(in_segments))

for rt_dataset_key in sorted(RT_OPERATORS):
Expand Down
Loading

0 comments on commit 38a868a

Please sign in to comment.