Skip to content

Commit

Permalink
Merge pull request #1029 from cal-itp/feb-open-data
Browse files Browse the repository at this point in the history
Feb open data
  • Loading branch information
tiffanychu90 authored Feb 22, 2024
2 parents 98f6b24 + 7da1739 commit 15ebea4
Show file tree
Hide file tree
Showing 137 changed files with 965 additions and 672 deletions.
6 changes: 3 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ build_portfolio_site:
git add portfolio/$(site)/*.yml portfolio/$(site)/*.md
git add portfolio/$(site)/*.ipynb
git add portfolio/sites/$(site).yml
#make production_portfolio
make production_portfolio


build_competitive_corridors:
Expand Down Expand Up @@ -39,8 +39,8 @@ build_ntd_report:
make build_portfolio_site

build_route_speeds:
$(eval override site = route_speeds)
cd rt_segment_speeds / && make pip install -r requirements.txt && cd ..
$(eval export site = route_speeds)
cd rt_segment_speeds / && pip install -r requirements.txt && cd ..
cd rt_segment_speeds/ && python deploy_portfolio_yaml.py && cd ..
make build_portfolio_site

Expand Down
22 changes: 18 additions & 4 deletions _shared_utils/shared_utils/rt_dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
GCS: gs://calitp-analytics-data/data-analyses/rt_delay/cached_views/
"""
from typing import Literal

# HQTAs and RT speedmaps
DATES = {
"feb2022": "2022-02-08",
Expand Down Expand Up @@ -48,13 +50,25 @@
"nov2023": "2023-11-15",
"dec2023": "2023-12-13",
"jan2024": "2024-01-17",
"feb2024": "2024-02-14",
}

y2023_dates = [DATES[f"{m}2023"] for m in ["dec", "nov", "oct", "sep", "aug", "jul", "jun", "may", "apr", "mar"]]
y2024_dates = [v for k, v in DATES.items() if "2024" in k]
y2023_dates = [
v for k, v in DATES.items() if k.endswith("2023") and not any(substring in k for substring in ["jan", "feb"])
]

y2024_dates = [v for k, v in DATES.items() if k.endswith("2024")]


def get_week(month: Literal["apr2023", "oct2023"], exclude_wed: bool) -> list:
if exclude_wed:
return [v for k, v in DATES.items() if month in k and not k.endswith(month)]
else:
return [v for k, v in DATES.items() if month in k]


apr_week = [v for k, v in DATES.items() if "apr2023" in k]
oct_week = [v for k, v in DATES.items() if "oct2023" in k]
apr_week = get_week(month="apr2023", exclude_wed=False)
oct_week = get_week(month="oct2023", exclude_wed=False)


# Planning and Modal Advisory Committee (PMAC) - quarterly
Expand Down
2 changes: 2 additions & 0 deletions gtfs_funnel/download_vehicle_positions.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,8 @@ def loop_through_batches_and_download_vp(
df.to_parquet(
f"{SEGMENT_GCS}vp_raw_{analysis_date}_batch{i}.parquet")

del df

time1 = datetime.datetime.now()
logger.info(f"exported batch {i} to GCS: {time1 - time0}")

Expand Down
16 changes: 16 additions & 0 deletions gtfs_funnel/logs/download_data.log
Original file line number Diff line number Diff line change
Expand Up @@ -270,3 +270,19 @@
2024-01-18 09:14:47.261 | INFO | __main__:download_one_day:29 - # operators to run: 169
2024-01-18 09:14:47.261 | INFO | __main__:download_one_day:33 - *********** Download st data ***********
2024-01-18 09:15:55.924 | INFO | __main__:download_one_day:56 - execution time: 0:01:10.033714
2024-02-15 09:20:58.545 | INFO | __main__:download_one_day:46 - Analysis date: 2024-02-14
2024-02-15 09:21:01.201 | INFO | __main__:download_one_day:53 - # operators to run: 202
2024-02-15 09:21:01.202 | INFO | __main__:download_one_day:56 - *********** Download trips data ***********
2024-02-15 09:21:23.365 | INFO | __main__:download_one_day:86 - execution time: 0:00:24.802016
2024-02-15 09:21:37.056 | INFO | __main__:download_one_day:23 - Analysis date: 2024-02-14
2024-02-15 09:21:39.062 | INFO | __main__:download_one_day:30 - # operators to run: 202
2024-02-15 09:21:39.062 | INFO | __main__:download_one_day:33 - *********** Download stops data ***********
2024-02-15 09:21:47.027 | INFO | __main__:download_one_day:64 - execution time: 0:00:09.969948
2024-02-15 09:22:00.563 | INFO | __main__:download_one_day:22 - Analysis date: 2024-02-14
2024-02-15 09:22:02.703 | INFO | __main__:download_one_day:29 - # operators to run: 202
2024-02-15 09:22:02.704 | INFO | __main__:download_one_day:33 - *********** Download routelines data ***********
2024-02-15 09:23:28.505 | INFO | __main__:download_one_day:63 - execution time: 0:01:27.941276
2024-02-15 09:23:45.261 | INFO | __main__:download_one_day:21 - Analysis date: 2024-02-14
2024-02-15 09:23:46.825 | INFO | __main__:download_one_day:29 - # operators to run: 169
2024-02-15 09:23:46.826 | INFO | __main__:download_one_day:33 - *********** Download st data ***********
2024-02-15 09:25:01.209 | INFO | __main__:download_one_day:56 - execution time: 0:01:15.946993
11 changes: 11 additions & 0 deletions gtfs_funnel/logs/download_vp_v2.log
Original file line number Diff line number Diff line change
Expand Up @@ -174,3 +174,14 @@
2024-01-18 09:25:33.713 | INFO | __main__:<module>:110 - export concatenated vp: 0:02:07.041294
2024-01-18 09:28:08.669 | INFO | __main__:<module>:132 - remove batched parquets
2024-01-18 09:28:08.670 | INFO | __main__:<module>:135 - execution time: 0:04:48.117366
2024-02-15 09:25:16.954 | INFO | __main__:<module>:146 - Analysis date: 2024-02-14
2024-02-15 09:26:49.716 | INFO | __main__:loop_through_batches_and_download_vp:109 - exported batch 0 to GCS: 0:01:32.752392
2024-02-15 09:27:37.016 | INFO | __main__:loop_through_batches_and_download_vp:109 - exported batch 1 to GCS: 0:00:47.299499
2024-02-15 09:30:29.353 | INFO | __main__:loop_through_batches_and_download_vp:109 - exported batch 2 to GCS: 0:02:52.335775
2024-02-15 09:31:56.048 | INFO | __main__:loop_through_batches_and_download_vp:109 - exported batch 3 to GCS: 0:01:26.673852
2024-02-15 09:31:57.359 | INFO | __main__:<module>:153 - execution time: 0:06:40.395352
2024-02-15 09:32:26.114 | INFO | __main__:<module>:95 - Analysis date: 2024-02-14
2024-02-15 09:32:31.685 | INFO | __main__:<module>:103 - concat and filter batched data: 0:00:05.570464
2024-02-15 09:34:43.337 | INFO | __main__:<module>:110 - export concatenated vp: 0:02:11.652166
2024-02-15 09:37:09.512 | INFO | __main__:<module>:132 - remove batched parquets
2024-02-15 09:37:09.513 | INFO | __main__:<module>:135 - execution time: 0:04:43.398413
10 changes: 9 additions & 1 deletion gtfs_funnel/logs/vp_preprocessing.log
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,12 @@
2024-01-27 14:27:17.221 | INFO | __main__:<module>:136 - 2023-11-15: condense vp for trip-direction 0:07:23.319808
2024-01-27 14:39:49.578 | INFO | __main__:<module>:144 - 2023-11-15: prepare vp to use in nearest neighbor: 0:12:32.357147
2024-01-27 14:46:57.364 | INFO | __main__:<module>:136 - 2023-10-11: condense vp for trip-direction 0:07:07.785109
2024-01-27 14:58:58.072 | INFO | __main__:<module>:144 - 2023-10-11: prepare vp to use in nearest neighbor: 0:12:00.707961
2024-01-27 14:58:58.072 | INFO | __main__:<module>:144 - 2023-10-11: prepare vp to use in nearest neighbor: 0:12:00.707961
2024-02-15 12:35:27.737 | INFO | __main__:<module>:169 - 2024-02-14: pare down vp: 0:01:27.967360
2024-02-15 12:40:05.610 | INFO | __main__:attach_prior_vp_add_direction:89 - persist vp gddf: 0:02:46.966446
2024-02-15 12:42:39.989 | INFO | __main__:attach_prior_vp_add_direction:121 - np vectorize arrays for direction: 0:02:34.378961
2024-02-15 12:42:45.707 | INFO | __main__:<module>:193 - 2024-02-14: export vp direction: 0:05:27.063384
2024-02-15 12:43:43.624 | INFO | __main__:<module>:199 - 2024-02-14: export usable vp with direction: 0:00:57.917219
2024-02-15 12:43:43.624 | INFO | __main__:<module>:202 - 2024-02-14: vp_direction script execution time: 0:06:24.980603
2024-02-15 12:50:35.377 | INFO | __main__:<module>:142 - 2024-02-14: condense vp for trip-direction 0:06:37.853370
2024-02-15 13:02:43.454 | INFO | __main__:<module>:150 - 2024-02-14: prepare vp to use in nearest neighbor: 0:12:08.077021
2 changes: 1 addition & 1 deletion gtfs_funnel/update_vars.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
rt_dates.oct_week + rt_dates.apr_week)

analysis_date_list = [
rt_dates.DATES["jan2024"]
rt_dates.DATES["feb2024"]
]

CONFIG_PATH = Path("config.yml")
Expand Down
6 changes: 3 additions & 3 deletions gtfs_funnel/vp_condenser.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
from segment_speed_utils import vp_transform, wrangle_shapes
from segment_speed_utils.project_vars import SEGMENT_GCS


def condense_vp_to_linestring(
analysis_date: str,
dict_inputs: dict
Expand Down Expand Up @@ -103,6 +102,7 @@ def prepare_vp_for_all_directions(
]

results = [compute(i)[0] for i in dfs]

gdf = pd.concat(
results, axis=0, ignore_index=True
).sort_values(
Expand All @@ -118,7 +118,7 @@ def prepare_vp_for_all_directions(
)

del gdf

return


Expand All @@ -140,7 +140,7 @@ def prepare_vp_for_all_directions(

time1 = datetime.datetime.now()
logger.info(
f"{analysis_date}: condense vp for trip-direction "
f"{analysis_date}: condense vp for trip "
f"{time1 - start}"
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def filter_trips_to_route_type(analysis_date: str,
trips = helpers.import_scheduled_trips(
analysis_date,
columns = ["feed_key", "name", "trip_id",
"route_id", "route_type"],
"route_id", "route_type", "route_desc"],
)

if isinstance(route_types, list):
Expand All @@ -33,6 +33,7 @@ def filter_trips_to_route_type(analysis_date: str,
trips_subset = filter_to_brt_trips(trips)

trips_subset = (trips_subset
.drop(columns = "route_desc")
.drop_duplicates()
.reset_index(drop=True)
)
Expand All @@ -52,7 +53,7 @@ def filter_to_brt_trips(trips: pd.DataFrame) -> pd.DataFrame:
["METRO SILVER LINE", "METRO ORANGE LINE",
"METRO J LINE", "METRO G LINE"
]},
"Bay Area 511 Muni Schedule": {"route_short_name":
"Bay Area 511 Muni Schedule": {"route_id":
['49']},
# Omni BRT -- too infrequent!
#"OmniTrans Schedule": {"route_short_name": ["sbX"]}
Expand Down
9 changes: 3 additions & 6 deletions high_quality_transit_areas/A2_combine_stops.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,12 @@

#client = Client("dask-scheduler.dask.svc.cluster.local:8786")

logger.add("./logs/A2_combine_stops.log", retention="3 months")
logger.add("./logs/hqta_processing.log", retention="3 months")
logger.add(sys.stderr,
format="{time:YYYY-MM-DD at HH:mm:ss} | {level} | {message}",
level="INFO")

logger.info(f"Analysis date: {analysis_date}")
logger.info(f"A2_combine_stops Analysis Date: {analysis_date}")
start = datetime.datetime.now()

# Rail
Expand Down Expand Up @@ -99,9 +99,6 @@
ferry_stops
], axis=0, ignore_index=True)

logger.info("concatenated datasets")


# Export to GCS
utils.geoparquet_gcs_export(
rail_brt_ferry,
Expand All @@ -110,6 +107,6 @@
)

end = datetime.datetime.now()
logger.info(f"execution time: {end-start}")
logger.info(f"A2_combine_stops execution time: {end-start}")

#client.close()
10 changes: 5 additions & 5 deletions high_quality_transit_areas/B1_create_hqta_segments.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

import operators_for_hqta
from calitp_data_analysis import geography_utils, utils
from shared_utils import rt_utils, geog_utils_to_add
from shared_utils import rt_utils
from segment_speed_utils import helpers, gtfs_schedule_wrangling
from utilities import GCS_FILE_PATH
from update_vars import analysis_date
Expand Down Expand Up @@ -188,7 +188,7 @@ def select_shapes_and_segment(
axis=1,
)

segmented = geog_utils_to_add.explode_segments(
segmented = geography_utils.explode_segments(
ready_for_segmenting,
group_cols = ["route_key"],
segment_col = "segment_geometry"
Expand Down Expand Up @@ -273,12 +273,12 @@ def find_primary_direction_across_hqta_segments(

if __name__=="__main__":

logger.add("./logs/B1_create_hqta_segments.log", retention="3 months")
logger.add("./logs/hqta_processing.log", retention="3 months")
logger.add(sys.stderr,
format="{time:YYYY-MM-DD at HH:mm:ss} | {level} | {message}",
level="INFO")

logger.info(f"Analysis date: {analysis_date}")
logger.info(f"B1_create_hqta_segments Analysis date: {analysis_date}")

start = dt.datetime.now()

Expand Down Expand Up @@ -319,4 +319,4 @@ def find_primary_direction_across_hqta_segments(
logger.info(f"cut segments: {time2 - time1}")

end = dt.datetime.now()
logger.info(f"total execution time: {end - start}")
logger.info(f"B1_create_hqta_segments execution time: {end - start}")
6 changes: 3 additions & 3 deletions high_quality_transit_areas/B2_sjoin_stops_to_segments.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,12 +210,12 @@ def sjoin_stops_and_stop_times_to_hqta_segments(
#from dask.distributed import Client
#client = Client("dask-scheduler.dask.svc.cluster.local:8786")

logger.add("./logs/B2_sjoin_stops_to_segments.log", retention="3 months")
logger.add("./logs/hqta_processing.log", retention="3 months")
logger.add(sys.stderr,
format="{time:YYYY-MM-DD at HH:mm:ss} | {level} | {message}",
level="INFO")

logger.info(f"Analysis date: {analysis_date}")
logger.info(f"B2_sjoin_stops_to_segments Analysis date: {analysis_date}")

start = dt.datetime.now()

Expand Down Expand Up @@ -255,6 +255,6 @@ def sjoin_stops_and_stop_times_to_hqta_segments(
)

end = dt.datetime.now()
logger.info(f"Execution time: {end-start}")
logger.info(f"B2_sjoin_stops_to_segments execution time: {end-start}")

#client.close()
6 changes: 3 additions & 3 deletions high_quality_transit_areas/C1_prep_pairwise_intersections.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,12 +82,12 @@ def sjoin_against_other_operators(
#from dask.distributed import Client
#client = Client("dask-scheduler.dask.svc.cluster.local:8786")

logger.add("./logs/C1_prep_pairwise_intersections.log", retention = "3 months")
logger.add("./logs/hqta_processing.log", retention = "3 months")
logger.add(sys.stderr,
format="{time:YYYY-MM-DD at HH:mm:ss} | {level} | {message}",
level="INFO")

logger.info(f"Analysis date: {analysis_date}")
logger.info(f"C1_prep_pairwise_intersections Analysis date: {analysis_date}")
start = dt.datetime.now()

corridors = prep_bus_corridors()
Expand Down Expand Up @@ -145,6 +145,6 @@ def sjoin_against_other_operators(
)

end = dt.datetime.now()
logger.info(f"execution time: {end-start}")
logger.info(f"C1_prep_pairwise_intersections execution time: {end-start}")

#client.close()
6 changes: 3 additions & 3 deletions high_quality_transit_areas/C2_get_intersections.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,12 +101,12 @@ def find_intersections(pairs_table: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
#from dask.distributed import Client
#client = Client("dask-scheduler.dask.svc.cluster.local:8786")

logger.add("./logs/C2_find_intersections.log", retention = "3 months")
logger.add("./logs/hqta_processing.log", retention = "3 months")
logger.add(sys.stderr,
format="{time:YYYY-MM-DD at HH:mm:ss} | {level} | {message}",
level="INFO")

logger.info(f"Analysis date: {analysis_date}")
logger.info(f"C2_find_intersections Analysis date: {analysis_date}")

start = dt.datetime.now()

Expand All @@ -130,6 +130,6 @@ def find_intersections(pairs_table: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
)

end = dt.datetime.now()
logger.info(f"execution time: {end-start}")
logger.info(f"C2_find_intersections execution time: {end-start}")

#client.close()
6 changes: 3 additions & 3 deletions high_quality_transit_areas/C3_create_bus_hqta_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,12 +119,12 @@ def create_stops_along_corridors(all_stops: gpd.GeoDataFrame) -> gpd.GeoDataFram
#from dask.distributed import Client

#client = Client("dask-scheduler.dask.svc.cluster.local:8786")
logger.add("./logs/C3_create_bus_hqta_types.log", retention="3 months")
logger.add("./logs/hqta_processing.log", retention="3 months")
logger.add(sys.stderr,
format="{time:YYYY-MM-DD at HH:mm:ss} | {level} | {message}",
level="INFO")

logger.info(f"Analysis date: {analysis_date}")
logger.info(f"C3_create_bus_hqta_types Analysis date: {analysis_date}")
start = dt.datetime.now()

# Start with the gdf of all the hqta_segments
Expand Down Expand Up @@ -162,6 +162,6 @@ def create_stops_along_corridors(all_stops: gpd.GeoDataFrame) -> gpd.GeoDataFram
)

end = dt.datetime.now()
logger.info(f"execution time: {end-start}")
logger.info(f"C3_create_bus_hqta_types execution time: {end-start}")

#client.close()
6 changes: 3 additions & 3 deletions high_quality_transit_areas/D1_assemble_hqta_points.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,12 +201,12 @@ def final_processing(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:

if __name__=="__main__":

logger.add("./logs/D1_assemble_hqta_points.log", retention="3 months")
logger.add("./logs/hqta_processing.log", retention="3 months")
logger.add(sys.stderr,
format="{time:YYYY-MM-DD at HH:mm:ss} | {level} | {message}",
level="INFO")

logger.info(f"Analysis date: {analysis_date}")
logger.info(f"D1_assemble_hqta_points Analysis date: {analysis_date}")
start = dt.datetime.now()

rail_ferry_brt = rail_ferry_brt_extract.get_rail_ferry_brt_extract().to_crs(
Expand Down Expand Up @@ -267,4 +267,4 @@ def final_processing(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
logger.info("export as geoparquet")

end = dt.datetime.now()
logger.info(f"execution time: {end-start}")
logger.info(f"D1_assemble_hqta_points execution time: {end-start}")
Loading

0 comments on commit 15ebea4

Please sign in to comment.