Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dec open data #580

Merged
merged 10 commits into from
Dec 15, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions _shared_utils/shared_utils/rt_dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
"sep2022": "2022-09-14",
"oct2022": "2022-10-12",
"nov2022": "2022-11-16",
"dec2022": "2022-12-14",
}


Expand All @@ -27,6 +28,7 @@
"sep2022": 5,
"oct2022": 6,
"nov2022": 7,
"dec2022": 8,
}

# Planning and Modal Advisory Committee (PMAC) - quarterly
Expand Down
8 changes: 4 additions & 4 deletions bus_service_increase/bus_service_utils/better_bus_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

from calitp.tables import tbls
from siuba import *
from typing import Literal
from typing import Literal, Union

from shared_utils import geography_utils
from bus_service_utils import calenviroscreen_lehd_utils
Expand Down Expand Up @@ -145,8 +145,8 @@ def get_sorted_transit_routes(


def add_district_description(
df: pd.DataFrame | gpd.GeoDataFrame
) -> pd.DataFrame | gpd.GeoDataFrame:
df: Union[pd.DataFrame, gpd.GeoDataFrame]
) -> Union[pd.DataFrame, gpd.GeoDataFrame]:
"""
Add in caltrans_district column.
Go from numeric district column (District = 1, 2, 3) to
Expand Down Expand Up @@ -237,7 +237,7 @@ def get_quartiles_by_district(
"""

def subset_by_district(gdf: gpd.GeoDataFrame,
district: str | int,
district: Union[str, int],
stat_col: str) -> gpd.GeoDataFrame:
# extra filtering to only keep if trips > 0
gdf2 = gdf[(gdf[district_col] == district) &
Expand Down
10 changes: 5 additions & 5 deletions bus_service_increase/bus_service_utils/gtfs_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@
import pandas as pd

from calitp.sql import to_snakecase
from typing import Literal
from typing import Literal, Union

from shared_utils import geography_utils

def merge_routes_trips(
routelines: gpd.GeoDataFrame | dg.GeoDataFrame,
trips: pd.DataFrame | dd.DataFrame,
routelines: Union[gpd.GeoDataFrame, dg.GeoDataFrame],
trips: Union[pd.DataFrame, dd.DataFrame],
merge_cols: list = ["calitp_itp_id", "calitp_url_number", "shape_id"],
crs: str = geography_utils.WGS84,
join: Literal["left", "inner", "outer", "right", "cross"] = "left",
Expand Down Expand Up @@ -77,8 +77,8 @@ def group_and_aggregate(df: dd.DataFrame,
.reset_index()
)

# return pd.DataFrame for now, since it's not clear what the metadata should be
# if we are inputting different things in stats_col
# return pd.DataFrame for now, since it's not clear what the
# metadata should be if we are inputting different things in stats_col
return agg_df.compute()

final = pd.DataFrame()
Expand Down
105 changes: 3 additions & 102 deletions bus_service_increase/bus_service_utils/report_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,115 +11,16 @@
Working example:
https://github.com/CityOfLosAngeles/planning-entitlements/blob/master/notebooks/D1-entitlement-demographics.ipynb

integer_cols = ["E", "F"]
one_decimal_cols = ["A", "D", ]
two_decimal_cols = ["C"]

(df.astype(
{c: "Int64" for c in integer_cols})
.style.format(subset=one_decimal_cols,
formatter = {c: '{:,.1f}' for c in one_decimal_cols})
.format(subset=two_decimal_cols,
formatter = {c: '{:,.2f}' for c in two_decimal_cols}
)
)

# Currency: https://stackoverflow.com/questions/35019156/pandas-format-column-as-currency

'''


def style_table(
df: pd.DataFrame,
rename_cols: dict = {},
drop_cols: list = [],
integer_cols: list = [],
one_decimal_cols: list = [],
two_decimal_cols: list = [],
three_decimal_cols: list = [],
currency_cols: list = [],
percent_cols: list = [],
left_align_cols: list = "first", # by default, left align first col
center_align_cols: list = "all", # by default, center align all other cols
right_align_cols: list = [],
custom_format_cols: dict = {},
display_table: bool = True
) -> pd.io.formats.style.Styler | str:
"""
Returns a pandas Styler object with some basic formatting.
Any other tweaks for currency, percentages, etc should be done before / after.
"""
df = (df.drop(columns = drop_cols)
.rename(columns = rename_cols)
)

if len(integer_cols) > 0:
df = df.astype({c: "Int64" for c in integer_cols})

if left_align_cols == "first":
left_align_cols = list(df.columns)[0]
if center_align_cols == "all":
center_align_cols = list(df.columns)
# all other columns except first one is center aligned
center_align_cols = [c for c in center_align_cols if c not in left_align_cols]

df_style = (df.style
.format(#subset = integer_cols,
formatter = {c: '{:,g}' for c in integer_cols})
.format(#subset = one_decimal_cols,
formatter = {c: '{:,.1f}' for c in one_decimal_cols})
.format(#subset = two_decimal_cols,
formatter = {c: '{:,.2f}' for c in two_decimal_cols})
.format(#subset = three_decimal_cols,
formatter = {c: '{:,.3f}' for c in three_decimal_cols})
.format(#subset = percent_cols,
formatter = {c: '{:,.2%}' for c in percent_cols})
.format(#subset = currency_cols,
formatter = {c: '$ {:,.2f}' for c in currency_cols})
.set_properties(subset=left_align_cols,
**{'text-align': 'left'})
.set_properties(subset=center_align_cols,
**{'text-align': 'center'})
.set_properties(subset=right_align_cols,
**{'text-align': 'right'})
.set_table_styles([dict(selector='th',
props=[('text-align', 'center')])
])
.hide(axis="index")
)


def add_custom_format(
df_style: pd.io.formats.style.Styler ,
format_str: str, cols_to_format: list,) -> pd.io.formats.style.Styler:
"""
Appends any additional formatting needs.
key: format string, such as '{:.1%}'
value: list of columns to apply that formatter to.
"""
new_styler = (df_style
.format(formatter = {c: format_str for c in cols_to_format}))

return new_styler


if len(list(custom_format_cols.keys())) > 0:
for format_str, cols_to_format in custom_format_cols.items():
df_style = add_custom_format(df_style, format_str, cols_to_format)

if display_table is True:
display(HTML(df_style.to_html()))

return df_style


# Display a table of route-level stats for each route_group
# Displaying route_name makes chart too crowded
def style_route_stats(df):
df = df.assign(
route_short_name = df.apply(lambda x:
x.route_long_name if x.route_short_name is None
else x.route_short_name, axis=1)
route_short_name = df.apply(
lambda x: x.route_long_name if x.route_short_name is None
else x.route_short_name, axis=1)
)

# Rename columns for display
Expand Down
34 changes: 24 additions & 10 deletions high_quality_transit_areas/A1b_muni_weekend_rail.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,17 @@
import pandas as pd
import pendulum

from shared_utils import gtfs_utils, geography_utils
from shared_utils import utils, gtfs_utils, geography_utils, rt_utils
from update_vars import analysis_date, TEMP_GCS

previous_sat = (pendulum.from_format(analysis_date, fmt="YYYY-MM-DD")
date_str = analysis_date.strftime(rt_utils.FULL_DATE_FMT)

previous_sat = (pendulum.from_format(
date_str,
fmt = "YYYY-MM-DD")
.date()
.previous(pendulum.SATURDAY)
.strftime('%Y-%m-%d')
.strftime(rt_utils.FULL_DATE_FMT)
)

# Muni weekend trips, instead of going from primary_trip_query,
Expand Down Expand Up @@ -55,6 +59,8 @@ def download_muni_weekend_rail_trips(
custom_filtering = additional_filters
)

routes.to_parquet(f"{TEMP_GCS}muni_weekend_rail_route_info.parquet")

# Just grab the route_ids in the trip table
subset_routes = routes.route_id.unique().tolist()

Expand Down Expand Up @@ -91,6 +97,9 @@ def download_muni_stops(
additional_filters = {"route_type": ['0', '1', '2']}
)

muni_weekend_rail_trips.to_parquet(
f"{TEMP_GCS}muni_weekend_rail_trips.parquet")

muni_stop_times = gtfs_utils.get_stop_times(
selected_date = analysis_date,
itp_id_list = [itp_id],
Expand All @@ -100,7 +109,9 @@ def download_muni_stops(
trip_df = muni_weekend_rail_trips
)

unique_muni_weekend_rail_stops = muni_stop_times.stop_key.unique().tolist()
muni_stop_times.to_parquet(f"{TEMP_GCS}muni_weekend_rail_stop_times.parquet")

unique_muni_weekend_rail_stops = muni_stop_times.stop_id.unique().tolist()

keep_stop_cols = [
"calitp_itp_id", "stop_id",
Expand All @@ -114,12 +125,15 @@ def download_muni_stops(
stop_cols = keep_stop_cols,
get_df = True,
crs = geography_utils.CA_NAD83Albers,
custom_filtering = {"stop_key": unique_muni_weekend_rail_stops}
)# should be ok to drop duplicates, but must use stop_id for future joins...
.drop_duplicates(subset=["calitp_itp_id", "stop_id"])
custom_filtering = {"stop_id": unique_muni_weekend_rail_stops}
).drop_duplicates(subset=["calitp_itp_id", "stop_id"])
.reset_index(drop=True)
)

muni_stops.to_parquet(f"{TEMP_GCS}muni_rail_stops.parquet")


utils.geoparquet_gcs_export(
muni_stops,
TEMP_GCS,
"muni_weekend_rail_stops"
)

return muni_stops
13 changes: 11 additions & 2 deletions high_quality_transit_areas/A2_combine_stops.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,13 @@
'18105', '18088'
]

new_muni_stops = [
'17876', '17875', # Chinatown-Rose Pak
'17877', '17874', # Union Square/Market Street
'17873', '17878', # Yerba Buena/Moscone
'13156', '3156', # 4th & Brannan
]

BRT_STOPS_FILTER = {
182: metro_street_running,
282: van_ness_ids
Expand All @@ -62,7 +69,9 @@

# Handle Muni separately - temp, can remove in 2023
muni_weekend_rail.download_muni_stops(282)
muni_rail_stops = gpd.read_parquet(f"{TEMP_GCS}muni_rail_stops.parquet")
muni_rail_stops = gpd.read_parquet(f"{TEMP_GCS}muni_weekend_rail_stops.parquet")
new_muni_rail_stops = muni_rail_stops[
muni_rail_stops.stop_id.isin(new_muni_stops)].reset_index(drop=True)

time1 = datetime.datetime.now()
logger.info(f"grabbed rail: {time1-start}")
Expand All @@ -86,7 +95,7 @@
# Concatenate datasets that need to be clipped to CA
rail_brt = pd.concat([
rail_stops,
muni_rail_stops,
new_muni_rail_stops,
brt_stops
], axis=0, ignore_index= True)

Expand Down
12 changes: 11 additions & 1 deletion high_quality_transit_areas/A3_rail_ferry_brt_extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"""
import dask_geopandas as dg
from utilities import catalog_filepath
from A2_combine_stops import new_muni_stops

COMPILED_RAIL_BRT_FERRY = catalog_filepath("rail_brt_ferry_initial")

Expand All @@ -15,7 +16,16 @@ def get_rail_ferry_brt_extract():

keep_cols = ["calitp_itp_id", "stop_id",
"route_type", "geometry"]


# Temporarily assign route_type for new Muni stops to be rail
# since route_info wasn't attached
df = df.assign(
route_type = df.apply(
lambda x: "1" if x.stop_id in new_muni_stops and
x.calitp_itp_id==282
else x.route_type, axis=1, meta=('route_type', 'str'))
)

df2 = (df[keep_cols].assign(
hqta_type = df.route_type.map(
lambda x: "major_stop_rail" if x in ["0", "1", "2"]
Expand Down
2 changes: 1 addition & 1 deletion high_quality_transit_areas/B1_create_hqta_segments.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ def select_shapes_and_segment(
it's ready to cut into segments.

For routes where 2 shape_ids were chosen...1 in each direction,
find thedifference.
find the difference.

Concatenate these 2 portions and then cut HQTA segments.
Returns the hqta_segments for all the routes across all operators.
Expand Down
29 changes: 27 additions & 2 deletions high_quality_transit_areas/D1_assemble_hqta_points.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import A3_rail_ferry_brt_extract as rail_ferry_brt_extract
import utilities
from shared_utils import utils, geography_utils, portfolio_utils
from update_vars import analysis_date, COMPILED_CACHED_VIEWS
from update_vars import analysis_date, TEMP_GCS, COMPILED_CACHED_VIEWS

#fs = get_fs()

Expand All @@ -35,16 +35,42 @@
MAJOR_STOP_BUS_FILE = utilities.catalog_filepath("major_stop_bus")
STOPS_IN_CORRIDOR_FILE = utilities.catalog_filepath("stops_in_hq_corr")

def merge_muni_trips_route_info():
"""
Temporarily attach Muni with its route_info back in.
Remove in Jan 2023.
"""
muni_trips = pd.read_parquet(
f"{TEMP_GCS}muni_weekend_rail_trips.parquet")
muni_route_info = pd.read_parquet(
f"{TEMP_GCS}muni_weekend_rail_route_info.parquet")

trips_with_route_info = pd.merge(
muni_trips,
muni_route_info.drop_duplicates(subset="route_id"),
on = ["calitp_itp_id", "route_id"],
how = "inner",
validate = "m:1"
)

return trips_with_route_info


def add_route_info(hqta_points: dg.GeoDataFrame) -> dg.GeoDataFrame:
"""
Use calitp_itp_id-stop_id to add route_id back in,
using the trips and stop_times table.
"""
stop_times = dd.read_parquet(
f"{COMPILED_CACHED_VIEWS}st_{analysis_date}.parquet")
muni_stop_times = dd.read_parquet(
f"{TEMP_GCS}muni_weekend_rail_stop_times.parquet")
trips = dd.read_parquet(
f"{COMPILED_CACHED_VIEWS}trips_{analysis_date}.parquet")
muni_trips = merge_muni_trips_route_info()

stop_times = dd.multi.concat([stop_times, muni_stop_times], axis=0)
trips = dd.multi.concat([trips, muni_trips], axis=0)

stop_cols = ["calitp_itp_id", "stop_id"]
trip_cols = ["calitp_itp_id", "trip_id"]
Expand Down Expand Up @@ -74,7 +100,6 @@ def add_route_info(hqta_points: dg.GeoDataFrame) -> dg.GeoDataFrame:
return ca_hqta_points



def get_agency_names() -> pd.DataFrame:
names = portfolio_utils.add_agency_name(analysis_date)

Expand Down
Loading