Skip to content

Commit

Permalink
new technical notes
Browse files Browse the repository at this point in the history
  • Loading branch information
edasmalchi committed Dec 17, 2024
1 parent d03d5a4 commit e8b3c7f
Show file tree
Hide file tree
Showing 6 changed files with 105 additions and 17 deletions.
25 changes: 23 additions & 2 deletions high_quality_transit_areas/09_enforce_collinearity.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,16 @@
"(s[s<11]).hist()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d6c8f6fd-fc7f-4021-9f56-3aec9ab206ba",
"metadata": {},
"outputs": [],
"source": [
"s.quantile(.96)"
]
},
{
"cell_type": "markdown",
"id": "d839566e-7cc0-4345-b835-ce5a77ab70cc",
Expand Down Expand Up @@ -963,7 +973,7 @@
"metadata": {},
"outputs": [],
"source": [
"# m2"
"m2"
]
},
{
Expand Down Expand Up @@ -993,7 +1003,18 @@
"metadata": {},
"outputs": [],
"source": [
"# one_pts.explore(column='hqta_type')"
"one_pts.explore(column='hqta_type')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0131bb42-d63b-4142-915f-e4ce29972d35",
"metadata": {},
"outputs": [],
"source": [
"areas.to_file('hqta_areas_update.geojson')\n",
"hqta_points.to_file('hqta_points_update.geojson')"
]
},
{
Expand Down
4 changes: 2 additions & 2 deletions high_quality_transit_areas/Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
hqta_data:
# python rail_ferry_brt_stops.py
# python create_hqta_segments.py
python rail_ferry_brt_stops.py
python create_hqta_segments.py
python create_aggregate_stop_frequencies.py
python sjoin_stops_to_segments.py
python prep_pairwise_intersections.py
Expand Down
7 changes: 5 additions & 2 deletions high_quality_transit_areas/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,11 @@ For more details, see [links](#High-Quality-Transit-Areas-Relevant-Statutes) to
* Corridors are cut for each operator-route-direction every 1,250 meters.
* Stops have a 50 meter buffer drawn around them and are spatially joined to corridors.
* For each 1,250 meter corridor segment, the highest frequency stop is counted (if there are multiple stops).
* We count _all_ trips at the highest frequency stop in each corridor segment, even if they're marketed as different "routes".
* For example, if Route A serves the stop twice an hour, and Route B twice an hour, we consider the stop frequency to be 4 trips/hour, qualifying it as a high quality transit corridor and potentially a major bus stop (if an intersecting corridor is present)
* We count trips at the highest frequency stop in each corridor segment. Where routes providing similar service share a stop, we combine their trips for frequency calculation. However, we don't combine trips across routes where those routes do not provide similar service. We currently do this by counting the number of stops that different routes share in a given direction.
* For example, suppose that Route A and Route B share many stops in the same direction. They may be two routes coming together to provide combined service on a corridor while branching elsewhere, or a local and rapid route on the same corridor.
* In that case if Route A serves the stop twice an hour, and Route B twice an hour, we consider the stop frequency to be 4 trips/hour, qualifying it as a high quality transit corridor and potentially a major bus stop (if an intersecting corridor is present)
* If, however, Route A and Route B only shared one or two stops, for example starting together at a transit center but going off in different directions, we would not combine them for stop frequency calculations.
* See [technical_notes.md](technical_notes.md) for additional details.

### Why Not one "Route" at a Time?

Expand Down
16 changes: 10 additions & 6 deletions high_quality_transit_areas/create_aggregate_stop_frequencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from update_vars import (analysis_date, AM_PEAK, PM_PEAK, EXPORT_PATH, GCS_FILE_PATH, PROJECT_CRS,
SEGMENT_BUFFER_METERS, AM_PEAK, PM_PEAK, HQ_TRANSIT_THRESHOLD, MS_TRANSIT_THRESHOLD, SHARED_STOP_THRESHOLD,
KEYS_TO_DROP)
ROUTE_COLLINEARITY_KEYS_TO_DROP)

am_peak_hrs = list(range(AM_PEAK[0].hour, AM_PEAK[1].hour))
pm_peak_hrs = list(range(PM_PEAK[0].hour, PM_PEAK[1].hour))
Expand Down Expand Up @@ -90,7 +90,7 @@ def get_explode_multiroute_only(
multi_only_explode = (multi_only[['schedule_gtfs_dataset_key', 'stop_id', 'route_dir']].explode('route_dir'))
multi_only_explode = multi_only_explode.merge(single_hourly, on = ['route_dir', 'schedule_gtfs_dataset_key', 'stop_id'])
multi_only_explode = multi_only_explode.sort_values(['schedule_gtfs_dataset_key','stop_id', 'route_dir']) # sorting crucial for next step
print(f'{multi_only_explode.stop_id.nunique()} stops may qualify with multi-route aggregation')
# print(f'{multi_only_explode.stop_id.nunique()} stops may qualify with multi-route aggregation')
return multi_only_explode

def accumulate_share_count(route_dir_exploded: pd.DataFrame) -> None:
Expand Down Expand Up @@ -138,17 +138,17 @@ def feed_level_filter(
st_prepped = st_prepped >> filter(_.schedule_gtfs_dataset_key == gtfs_dataset_key,
_.stop_id.isin(stops_to_eval.stop_id),
)
print(f'{st_prepped.shape}')
# print(f'{st_prepped.shape}')
st_to_eval = st_prepped >> filter(_.route_dir.isin(any_appearance))
print(f'{st_to_eval.shape}')
# print(f'{st_to_eval.shape}')
# cut down problem space by checking if stops still could qual after filtering for any appearance
min_rows = min(frequency_thresholds) * len(both_peaks_hrs)
st_could_qual = (st_to_eval >> group_by(_.stop_id)
>> mutate(could_qualify = _.shape[0] >= min_rows)
>> ungroup()
>> filter(_.could_qualify)
)
print(f'{st_could_qual.shape}')
# print(f'{st_could_qual.shape}')
return st_could_qual, qualify_pairs

def check_stop(this_stop_route_dirs, qualify_pairs):
Expand Down Expand Up @@ -221,6 +221,9 @@ def filter_all_prepare_export(
max_arrivals_by_stop_single: pd.DataFrame,
frequency_thresholds: tuple
):
'''
Apply collinearity
'''
# %%time 90 seconds (on default user) is not too bad!
all_collinear = pd.DataFrame()
for gtfs_dataset_key in feeds_to_filter:
Expand Down Expand Up @@ -318,7 +321,8 @@ def stop_times_aggregation_max_by_stop(
share_counts = {}
multi_only_explode.groupby(['schedule_gtfs_dataset_key', 'stop_id']).apply(accumulate_share_count)
qualify_dict = {key: share_counts[key] for key in share_counts.keys() if share_counts[key] >= SHARED_STOP_THRESHOLD}
for key in KEYS_TO_DROP: qualify_dict.pop(key) # will error if key not present, check if situation still present and update key if needed
# will error if key not present, check if situation still present and update key if needed
for key in ROUTE_COLLINEARITY_KEYS_TO_DROP: qualify_dict.pop(key)
feeds_to_filter = np.unique([key.split('__')[0] for key in qualify_dict.keys()])

combined_export = filter_all_prepare_export(feeds_to_filter, multi_only_explode, qualify_dict,
Expand Down
62 changes: 62 additions & 0 deletions high_quality_transit_areas/technical_notes.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# HQ Transit Corridors/Major Transit Stops Technical Notes

## Parameter Definitions

Mostly located in [update_vars.py](update_vars.py).

* `HQ_TRANSIT_THRESHOLD`, `MS_TRANSIT_THRESHOLD`: Statutory definition, see [README.md](README.md)
* `AM_PEAK`, `PM_PEAK`: Reflect most common MPO methodologies, not defined in statute
* `HQTA_SEGMENT_LENGTH`, `SEGMENT_BUFFER_METERS`: Our definitions for splitting corridors into analysis segments and matching them to stops
* `CORRIDOR_BUFFER_METERS`: Statutory definition
* `SHARED_STOP_THRESHOLD`: Our threshold for finding similar routes (routes sharing at least this many stops with another in the same direction). Reflects the 96th percentile.

## Script Sequence and Notes

Defined via [Makefile](Makefile)

## `create_aggregate_stop_frequencies.py`

This script finds similar (collinear) routes.

### Initial steps

Get frequencies at each stop for defined peak periods, with one version only looking at the single most frequent routes per stop and another version looking at all routes per stop.

Find stops in the multi-route version that qualify at a higher threshold than they do in the single-route version. These are the stops (and thus routes and feeds) that we want to check for collinearity.

### Detailed collinearity evaluation

1. Get a list of unique feeds where at least one route_directions pair qualifies to evaluate.
1. Get stop_times filtered to that feed, and filter that to stops that only qualify with multiple routes, and route directions that pair with at least one other route_direction. Do not consider pairs between the same route in one direction and the same route in the opposite direction.
1. After that filtering, check again if stop_times includes the minimum frequency to qualify at each stop. Exclude stops where it doesn't.
1. Then... evaluate which route_directions can be aggregated at each remaining stop. From the full list of route_directions (sorted by frequency) serving the stop, use `list(itertools.combinations(this_stop_route_dirs, 2))` to get each unique pair of route_directions. Check each of those unique pairs to see if it meets the `SHARED_STOP_THRESHOLD`. If they all do, keep all stop_times entries for that stop, different route_directions can be aggregated together at that stop. If any do not, remove the least frequent route_direction and try again, until a subset passes (only keep stop_times for that subset) or until all are eliminated. Currently implemented recursively as below:

```
attempting ['103_1', '101_1', '102_1', '104_1']... subsetting...
attempting ['103_1', '101_1', '102_1']... subsetting...
attempting ['103_1', '101_1']... matched!
attempting ['103_1', '101_0', '101_1', '103_0']... subsetting...
attempting ['103_1', '101_0', '101_1']... subsetting...
attempting ['103_1', '101_0']... subsetting...
exhausted!
```
1. With that filtered stop_times, recalculate stop-level frequencies as before. Only keep stops meeting the minimum frequency threshold for a major stop or HQ corridor.
1. Finally, once again apply the `SHARED_STOP_THRESHOLD` after aggregation (by ensuring at least one route_dir at each stop has >= `SHARED_STOP_THRESHOLD` frequent stops). Exclude stops that don't meet this criteria.
#### edge cases:
[AC Transit 45](https://www.actransit.org/sites/default/files/timetable_files/45-2023_12_03.pdf) _Opposite directions share a same-direction loop._ __Solved__ by preventing the same route from being compared with itself in the opposite direction.
[SDMTS 944/945](https://www.sdmts.com/sites/default/files/routes/pdf/944.pdf) _Shared frequent stops are few, and these routes are isolated._ __Solved__ by once again applying the `SHARED_STOP_THRESHOLD` after aggregation (by ensuring at least one route_dir at each stop has >= `SHARED_STOP_THRESHOLD` frequent stops). Complex typology including a loop route, each pair of [944, 945, 945A(946)] has >= threshold... but not actually in the same spots!
### Export
Export stop-level frequencies that are a composite of single-route results, and multi-route results passing the collinearity evaluation. These will be the stop-level frequencies used in subsequent steps.
## `sjoin_stops_to_segments.py`
* Formerly, this evaluates segments from all routes against all stops. Since it is a spatial join, this meant that infrequent routes running perpendicular to frequent routes often grabbed the cross street stop from the frequent route, creating an erroneous, isolated frequent segment and often an erroneous major transit stop.
* Now, we first filter segments to only segments from routes with at least one stop-level frequency meeting the standard. This screens out entirely infrequent routes and vastly reduces the risk of false positives.
* We have also reduced the segment to stop buffer (`SEGMENT_BUFFER_METERS`) to the extent possible.
8 changes: 3 additions & 5 deletions high_quality_transit_areas/update_vars.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,17 @@
PROJECT_CRS = "EPSG:3310"
HQTA_SEGMENT_LENGTH = 1_250 # meters
SEGMENT_BUFFER_METERS = 35 # buffer around segment to sjoin to stops
INTERSECTION_BUFFER_METERS = 152 # ~ 500ft
HALF_MILE_BUFFER_METERS = 805 # half mile ~ 805 meters
INTERSECTION_BUFFER_METERS = 152.4 # ~ 500ft
HALF_MILE_BUFFER_METERS = 804.7 # half mile
CORRIDOR_BUFFER_METERS = HALF_MILE_BUFFER_METERS - SEGMENT_BUFFER_METERS # 755 meters
EXPORT_PATH = f"{GCS_FILE_PATH}export/{analysis_date}/"

AM_PEAK = (dt.time(hour=6), dt.time(hour=9)) # time range to calculate average peak frequencies
PM_PEAK = (dt.time(hour=15), dt.time(hour=19))
HQ_TRANSIT_THRESHOLD = 4 # trips per hour, from statute, see README.md
MS_TRANSIT_THRESHOLD = 3 # different statutory threshold as of October 2024
# CORR_SINGLE_ROUTE = False # only consider frequency from a single route at each stop (not recommended, see README)
# BUS_MS_SINGLE_ROUTE = False # only consider frequency from a single route at each stop (not recommended, see README)

SHARED_STOP_THRESHOLD = 8 # current rec
# Yolobus. Separate route_id, but same route in a CW and CCW loop, drop per rule to not compare same rt with itself
KEYS_TO_DROP = ['3c62ad6ee589d56eca915ce291a5df0a__42A_0__42B_0',
ROUTE_COLLINEARITY_KEYS_TO_DROP = ['3c62ad6ee589d56eca915ce291a5df0a__42A_0__42B_0',
'3c62ad6ee589d56eca915ce291a5df0a__42B_0__42A_0']

0 comments on commit e8b3c7f

Please sign in to comment.