Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feb open data #1029

Merged
merged 15 commits into from
Feb 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ build_portfolio_site:
git add portfolio/$(site)/*.yml portfolio/$(site)/*.md
git add portfolio/$(site)/*.ipynb
git add portfolio/sites/$(site).yml
#make production_portfolio
make production_portfolio


build_competitive_corridors:
Expand Down Expand Up @@ -39,8 +39,8 @@ build_ntd_report:
make build_portfolio_site

build_route_speeds:
$(eval override site = route_speeds)
cd rt_segment_speeds / && make pip install -r requirements.txt && cd ..
$(eval export site = route_speeds)
cd rt_segment_speeds / && pip install -r requirements.txt && cd ..
cd rt_segment_speeds/ && python deploy_portfolio_yaml.py && cd ..
make build_portfolio_site

Expand Down
22 changes: 18 additions & 4 deletions _shared_utils/shared_utils/rt_dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

GCS: gs://calitp-analytics-data/data-analyses/rt_delay/cached_views/
"""
from typing import Literal

# HQTAs and RT speedmaps
DATES = {
"feb2022": "2022-02-08",
Expand Down Expand Up @@ -48,13 +50,25 @@
"nov2023": "2023-11-15",
"dec2023": "2023-12-13",
"jan2024": "2024-01-17",
"feb2024": "2024-02-14",
}

y2023_dates = [DATES[f"{m}2023"] for m in ["dec", "nov", "oct", "sep", "aug", "jul", "jun", "may", "apr", "mar"]]
y2024_dates = [v for k, v in DATES.items() if "2024" in k]
y2023_dates = [
v for k, v in DATES.items() if k.endswith("2023") and not any(substring in k for substring in ["jan", "feb"])
]

y2024_dates = [v for k, v in DATES.items() if k.endswith("2024")]


def get_week(month: Literal["apr2023", "oct2023"], exclude_wed: bool) -> list:
if exclude_wed:
return [v for k, v in DATES.items() if month in k and not k.endswith(month)]
else:
return [v for k, v in DATES.items() if month in k]


apr_week = [v for k, v in DATES.items() if "apr2023" in k]
oct_week = [v for k, v in DATES.items() if "oct2023" in k]
apr_week = get_week(month="apr2023", exclude_wed=False)
oct_week = get_week(month="oct2023", exclude_wed=False)


# Planning and Modal Advisory Committee (PMAC) - quarterly
Expand Down
2 changes: 2 additions & 0 deletions gtfs_funnel/download_vehicle_positions.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,8 @@ def loop_through_batches_and_download_vp(
df.to_parquet(
f"{SEGMENT_GCS}vp_raw_{analysis_date}_batch{i}.parquet")

del df

time1 = datetime.datetime.now()
logger.info(f"exported batch {i} to GCS: {time1 - time0}")

Expand Down
16 changes: 16 additions & 0 deletions gtfs_funnel/logs/download_data.log
Original file line number Diff line number Diff line change
Expand Up @@ -270,3 +270,19 @@
2024-01-18 09:14:47.261 | INFO | __main__:download_one_day:29 - # operators to run: 169
2024-01-18 09:14:47.261 | INFO | __main__:download_one_day:33 - *********** Download st data ***********
2024-01-18 09:15:55.924 | INFO | __main__:download_one_day:56 - execution time: 0:01:10.033714
2024-02-15 09:20:58.545 | INFO | __main__:download_one_day:46 - Analysis date: 2024-02-14
2024-02-15 09:21:01.201 | INFO | __main__:download_one_day:53 - # operators to run: 202
2024-02-15 09:21:01.202 | INFO | __main__:download_one_day:56 - *********** Download trips data ***********
2024-02-15 09:21:23.365 | INFO | __main__:download_one_day:86 - execution time: 0:00:24.802016
2024-02-15 09:21:37.056 | INFO | __main__:download_one_day:23 - Analysis date: 2024-02-14
2024-02-15 09:21:39.062 | INFO | __main__:download_one_day:30 - # operators to run: 202
2024-02-15 09:21:39.062 | INFO | __main__:download_one_day:33 - *********** Download stops data ***********
2024-02-15 09:21:47.027 | INFO | __main__:download_one_day:64 - execution time: 0:00:09.969948
2024-02-15 09:22:00.563 | INFO | __main__:download_one_day:22 - Analysis date: 2024-02-14
2024-02-15 09:22:02.703 | INFO | __main__:download_one_day:29 - # operators to run: 202
2024-02-15 09:22:02.704 | INFO | __main__:download_one_day:33 - *********** Download routelines data ***********
2024-02-15 09:23:28.505 | INFO | __main__:download_one_day:63 - execution time: 0:01:27.941276
2024-02-15 09:23:45.261 | INFO | __main__:download_one_day:21 - Analysis date: 2024-02-14
2024-02-15 09:23:46.825 | INFO | __main__:download_one_day:29 - # operators to run: 169
2024-02-15 09:23:46.826 | INFO | __main__:download_one_day:33 - *********** Download st data ***********
2024-02-15 09:25:01.209 | INFO | __main__:download_one_day:56 - execution time: 0:01:15.946993
11 changes: 11 additions & 0 deletions gtfs_funnel/logs/download_vp_v2.log
Original file line number Diff line number Diff line change
Expand Up @@ -174,3 +174,14 @@
2024-01-18 09:25:33.713 | INFO | __main__:<module>:110 - export concatenated vp: 0:02:07.041294
2024-01-18 09:28:08.669 | INFO | __main__:<module>:132 - remove batched parquets
2024-01-18 09:28:08.670 | INFO | __main__:<module>:135 - execution time: 0:04:48.117366
2024-02-15 09:25:16.954 | INFO | __main__:<module>:146 - Analysis date: 2024-02-14
2024-02-15 09:26:49.716 | INFO | __main__:loop_through_batches_and_download_vp:109 - exported batch 0 to GCS: 0:01:32.752392
2024-02-15 09:27:37.016 | INFO | __main__:loop_through_batches_and_download_vp:109 - exported batch 1 to GCS: 0:00:47.299499
2024-02-15 09:30:29.353 | INFO | __main__:loop_through_batches_and_download_vp:109 - exported batch 2 to GCS: 0:02:52.335775
2024-02-15 09:31:56.048 | INFO | __main__:loop_through_batches_and_download_vp:109 - exported batch 3 to GCS: 0:01:26.673852
2024-02-15 09:31:57.359 | INFO | __main__:<module>:153 - execution time: 0:06:40.395352
2024-02-15 09:32:26.114 | INFO | __main__:<module>:95 - Analysis date: 2024-02-14
2024-02-15 09:32:31.685 | INFO | __main__:<module>:103 - concat and filter batched data: 0:00:05.570464
2024-02-15 09:34:43.337 | INFO | __main__:<module>:110 - export concatenated vp: 0:02:11.652166
2024-02-15 09:37:09.512 | INFO | __main__:<module>:132 - remove batched parquets
2024-02-15 09:37:09.513 | INFO | __main__:<module>:135 - execution time: 0:04:43.398413
10 changes: 9 additions & 1 deletion gtfs_funnel/logs/vp_preprocessing.log
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,12 @@
2024-01-27 14:27:17.221 | INFO | __main__:<module>:136 - 2023-11-15: condense vp for trip-direction 0:07:23.319808
2024-01-27 14:39:49.578 | INFO | __main__:<module>:144 - 2023-11-15: prepare vp to use in nearest neighbor: 0:12:32.357147
2024-01-27 14:46:57.364 | INFO | __main__:<module>:136 - 2023-10-11: condense vp for trip-direction 0:07:07.785109
2024-01-27 14:58:58.072 | INFO | __main__:<module>:144 - 2023-10-11: prepare vp to use in nearest neighbor: 0:12:00.707961
2024-01-27 14:58:58.072 | INFO | __main__:<module>:144 - 2023-10-11: prepare vp to use in nearest neighbor: 0:12:00.707961
2024-02-15 12:35:27.737 | INFO | __main__:<module>:169 - 2024-02-14: pare down vp: 0:01:27.967360
2024-02-15 12:40:05.610 | INFO | __main__:attach_prior_vp_add_direction:89 - persist vp gddf: 0:02:46.966446
2024-02-15 12:42:39.989 | INFO | __main__:attach_prior_vp_add_direction:121 - np vectorize arrays for direction: 0:02:34.378961
2024-02-15 12:42:45.707 | INFO | __main__:<module>:193 - 2024-02-14: export vp direction: 0:05:27.063384
2024-02-15 12:43:43.624 | INFO | __main__:<module>:199 - 2024-02-14: export usable vp with direction: 0:00:57.917219
2024-02-15 12:43:43.624 | INFO | __main__:<module>:202 - 2024-02-14: vp_direction script execution time: 0:06:24.980603
2024-02-15 12:50:35.377 | INFO | __main__:<module>:142 - 2024-02-14: condense vp for trip-direction 0:06:37.853370
2024-02-15 13:02:43.454 | INFO | __main__:<module>:150 - 2024-02-14: prepare vp to use in nearest neighbor: 0:12:08.077021
2 changes: 1 addition & 1 deletion gtfs_funnel/update_vars.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
rt_dates.oct_week + rt_dates.apr_week)

analysis_date_list = [
rt_dates.DATES["jan2024"]
rt_dates.DATES["feb2024"]
]

CONFIG_PATH = Path("config.yml")
Expand Down
6 changes: 3 additions & 3 deletions gtfs_funnel/vp_condenser.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
from segment_speed_utils import vp_transform, wrangle_shapes
from segment_speed_utils.project_vars import SEGMENT_GCS


def condense_vp_to_linestring(
analysis_date: str,
dict_inputs: dict
Expand Down Expand Up @@ -103,6 +102,7 @@ def prepare_vp_for_all_directions(
]

results = [compute(i)[0] for i in dfs]

gdf = pd.concat(
results, axis=0, ignore_index=True
).sort_values(
Expand All @@ -118,7 +118,7 @@ def prepare_vp_for_all_directions(
)

del gdf

return


Expand All @@ -140,7 +140,7 @@ def prepare_vp_for_all_directions(

time1 = datetime.datetime.now()
logger.info(
f"{analysis_date}: condense vp for trip-direction "
f"{analysis_date}: condense vp for trip "
f"{time1 - start}"
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def filter_trips_to_route_type(analysis_date: str,
trips = helpers.import_scheduled_trips(
analysis_date,
columns = ["feed_key", "name", "trip_id",
"route_id", "route_type"],
"route_id", "route_type", "route_desc"],
)

if isinstance(route_types, list):
Expand All @@ -33,6 +33,7 @@ def filter_trips_to_route_type(analysis_date: str,
trips_subset = filter_to_brt_trips(trips)

trips_subset = (trips_subset
.drop(columns = "route_desc")
.drop_duplicates()
.reset_index(drop=True)
)
Expand All @@ -52,7 +53,7 @@ def filter_to_brt_trips(trips: pd.DataFrame) -> pd.DataFrame:
["METRO SILVER LINE", "METRO ORANGE LINE",
"METRO J LINE", "METRO G LINE"
]},
"Bay Area 511 Muni Schedule": {"route_short_name":
"Bay Area 511 Muni Schedule": {"route_id":
['49']},
# Omni BRT -- too infrequent!
#"OmniTrans Schedule": {"route_short_name": ["sbX"]}
Expand Down
9 changes: 3 additions & 6 deletions high_quality_transit_areas/A2_combine_stops.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,12 @@

#client = Client("dask-scheduler.dask.svc.cluster.local:8786")

logger.add("./logs/A2_combine_stops.log", retention="3 months")
logger.add("./logs/hqta_processing.log", retention="3 months")
logger.add(sys.stderr,
format="{time:YYYY-MM-DD at HH:mm:ss} | {level} | {message}",
level="INFO")

logger.info(f"Analysis date: {analysis_date}")
logger.info(f"A2_combine_stops Analysis Date: {analysis_date}")
start = datetime.datetime.now()

# Rail
Expand Down Expand Up @@ -99,9 +99,6 @@
ferry_stops
], axis=0, ignore_index=True)

logger.info("concatenated datasets")


# Export to GCS
utils.geoparquet_gcs_export(
rail_brt_ferry,
Expand All @@ -110,6 +107,6 @@
)

end = datetime.datetime.now()
logger.info(f"execution time: {end-start}")
logger.info(f"A2_combine_stops execution time: {end-start}")

#client.close()
10 changes: 5 additions & 5 deletions high_quality_transit_areas/B1_create_hqta_segments.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

import operators_for_hqta
from calitp_data_analysis import geography_utils, utils
from shared_utils import rt_utils, geog_utils_to_add
from shared_utils import rt_utils
from segment_speed_utils import helpers, gtfs_schedule_wrangling
from utilities import GCS_FILE_PATH
from update_vars import analysis_date
Expand Down Expand Up @@ -188,7 +188,7 @@ def select_shapes_and_segment(
axis=1,
)

segmented = geog_utils_to_add.explode_segments(
segmented = geography_utils.explode_segments(
ready_for_segmenting,
group_cols = ["route_key"],
segment_col = "segment_geometry"
Expand Down Expand Up @@ -273,12 +273,12 @@ def find_primary_direction_across_hqta_segments(

if __name__=="__main__":

logger.add("./logs/B1_create_hqta_segments.log", retention="3 months")
logger.add("./logs/hqta_processing.log", retention="3 months")
logger.add(sys.stderr,
format="{time:YYYY-MM-DD at HH:mm:ss} | {level} | {message}",
level="INFO")

logger.info(f"Analysis date: {analysis_date}")
logger.info(f"B1_create_hqta_segments Analysis date: {analysis_date}")

start = dt.datetime.now()

Expand Down Expand Up @@ -319,4 +319,4 @@ def find_primary_direction_across_hqta_segments(
logger.info(f"cut segments: {time2 - time1}")

end = dt.datetime.now()
logger.info(f"total execution time: {end - start}")
logger.info(f"B1_create_hqta_segments execution time: {end - start}")
6 changes: 3 additions & 3 deletions high_quality_transit_areas/B2_sjoin_stops_to_segments.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,12 +210,12 @@ def sjoin_stops_and_stop_times_to_hqta_segments(
#from dask.distributed import Client
#client = Client("dask-scheduler.dask.svc.cluster.local:8786")

logger.add("./logs/B2_sjoin_stops_to_segments.log", retention="3 months")
logger.add("./logs/hqta_processing.log", retention="3 months")
logger.add(sys.stderr,
format="{time:YYYY-MM-DD at HH:mm:ss} | {level} | {message}",
level="INFO")

logger.info(f"Analysis date: {analysis_date}")
logger.info(f"B2_sjoin_stops_to_segments Analysis date: {analysis_date}")

start = dt.datetime.now()

Expand Down Expand Up @@ -255,6 +255,6 @@ def sjoin_stops_and_stop_times_to_hqta_segments(
)

end = dt.datetime.now()
logger.info(f"Execution time: {end-start}")
logger.info(f"B2_sjoin_stops_to_segments execution time: {end-start}")

#client.close()
6 changes: 3 additions & 3 deletions high_quality_transit_areas/C1_prep_pairwise_intersections.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,12 +82,12 @@ def sjoin_against_other_operators(
#from dask.distributed import Client
#client = Client("dask-scheduler.dask.svc.cluster.local:8786")

logger.add("./logs/C1_prep_pairwise_intersections.log", retention = "3 months")
logger.add("./logs/hqta_processing.log", retention = "3 months")
logger.add(sys.stderr,
format="{time:YYYY-MM-DD at HH:mm:ss} | {level} | {message}",
level="INFO")

logger.info(f"Analysis date: {analysis_date}")
logger.info(f"C1_prep_pairwise_intersections Analysis date: {analysis_date}")
start = dt.datetime.now()

corridors = prep_bus_corridors()
Expand Down Expand Up @@ -145,6 +145,6 @@ def sjoin_against_other_operators(
)

end = dt.datetime.now()
logger.info(f"execution time: {end-start}")
logger.info(f"C1_prep_pairwise_intersections execution time: {end-start}")

#client.close()
6 changes: 3 additions & 3 deletions high_quality_transit_areas/C2_get_intersections.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,12 +101,12 @@ def find_intersections(pairs_table: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
#from dask.distributed import Client
#client = Client("dask-scheduler.dask.svc.cluster.local:8786")

logger.add("./logs/C2_find_intersections.log", retention = "3 months")
logger.add("./logs/hqta_processing.log", retention = "3 months")
logger.add(sys.stderr,
format="{time:YYYY-MM-DD at HH:mm:ss} | {level} | {message}",
level="INFO")

logger.info(f"Analysis date: {analysis_date}")
logger.info(f"C2_find_intersections Analysis date: {analysis_date}")

start = dt.datetime.now()

Expand All @@ -130,6 +130,6 @@ def find_intersections(pairs_table: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
)

end = dt.datetime.now()
logger.info(f"execution time: {end-start}")
logger.info(f"C2_find_intersections execution time: {end-start}")

#client.close()
6 changes: 3 additions & 3 deletions high_quality_transit_areas/C3_create_bus_hqta_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,12 +119,12 @@ def create_stops_along_corridors(all_stops: gpd.GeoDataFrame) -> gpd.GeoDataFram
#from dask.distributed import Client

#client = Client("dask-scheduler.dask.svc.cluster.local:8786")
logger.add("./logs/C3_create_bus_hqta_types.log", retention="3 months")
logger.add("./logs/hqta_processing.log", retention="3 months")
logger.add(sys.stderr,
format="{time:YYYY-MM-DD at HH:mm:ss} | {level} | {message}",
level="INFO")

logger.info(f"Analysis date: {analysis_date}")
logger.info(f"C3_create_bus_hqta_types Analysis date: {analysis_date}")
start = dt.datetime.now()

# Start with the gdf of all the hqta_segments
Expand Down Expand Up @@ -162,6 +162,6 @@ def create_stops_along_corridors(all_stops: gpd.GeoDataFrame) -> gpd.GeoDataFram
)

end = dt.datetime.now()
logger.info(f"execution time: {end-start}")
logger.info(f"C3_create_bus_hqta_types execution time: {end-start}")

#client.close()
6 changes: 3 additions & 3 deletions high_quality_transit_areas/D1_assemble_hqta_points.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,12 +201,12 @@ def final_processing(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:

if __name__=="__main__":

logger.add("./logs/D1_assemble_hqta_points.log", retention="3 months")
logger.add("./logs/hqta_processing.log", retention="3 months")
logger.add(sys.stderr,
format="{time:YYYY-MM-DD at HH:mm:ss} | {level} | {message}",
level="INFO")

logger.info(f"Analysis date: {analysis_date}")
logger.info(f"D1_assemble_hqta_points Analysis date: {analysis_date}")
start = dt.datetime.now()

rail_ferry_brt = rail_ferry_brt_extract.get_rail_ferry_brt_extract().to_crs(
Expand Down Expand Up @@ -267,4 +267,4 @@ def final_processing(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
logger.info("export as geoparquet")

end = dt.datetime.now()
logger.info(f"execution time: {end-start}")
logger.info(f"D1_assemble_hqta_points execution time: {end-start}")
Loading
Loading