cal-itp · tiffanychu90 · Dec 15, 2022 · Dec 13, 2022 · Dec 14, 2022 · Dec 15, 2022
diff --git a/_shared_utils/shared_utils/rt_dates.py b/_shared_utils/shared_utils/rt_dates.py
@@ -15,6 +15,7 @@
     "sep2022": "2022-09-14",
     "oct2022": "2022-10-12",
     "nov2022": "2022-11-16",
+    "dec2022": "2022-12-14",
 }
 
 
@@ -27,6 +28,7 @@
     "sep2022": 5,
     "oct2022": 6,
     "nov2022": 7,
+    "dec2022": 8,
 }
 
 # Planning and Modal Advisory Committee (PMAC) - quarterly

diff --git a/bus_service_increase/bus_service_utils/better_bus_utils.py b/bus_service_increase/bus_service_utils/better_bus_utils.py
@@ -22,7 +22,7 @@
 
 from calitp.tables import tbls
 from siuba import *
-from typing import Literal
+from typing import Literal, Union
 
 from shared_utils import geography_utils
 from bus_service_utils import calenviroscreen_lehd_utils
@@ -145,8 +145,8 @@ def get_sorted_transit_routes(
 
 
 def add_district_description(
-    df: pd.DataFrame | gpd.GeoDataFrame
-) -> pd.DataFrame | gpd.GeoDataFrame: 
+    df: Union[pd.DataFrame, gpd.GeoDataFrame]
+) -> Union[pd.DataFrame, gpd.GeoDataFrame]: 
     """
     Add in caltrans_district column.
     Go from numeric district column (District = 1, 2, 3) to
@@ -237,7 +237,7 @@ def get_quartiles_by_district(
     """
 
     def subset_by_district(gdf: gpd.GeoDataFrame, 
-                           district: str | int, 
+                           district: Union[str, int], 
                            stat_col: str) -> gpd.GeoDataFrame:
         # extra filtering to only keep if trips > 0
         gdf2 = gdf[(gdf[district_col] == district) & 

diff --git a/bus_service_increase/bus_service_utils/gtfs_build.py b/bus_service_increase/bus_service_utils/gtfs_build.py
@@ -8,13 +8,13 @@
 import pandas as pd
 
 from calitp.sql import to_snakecase
-from typing import Literal
+from typing import Literal, Union
 
 from shared_utils import geography_utils
 
 def merge_routes_trips(
-    routelines: gpd.GeoDataFrame | dg.GeoDataFrame, 
-    trips: pd.DataFrame | dd.DataFrame,
+    routelines: Union[gpd.GeoDataFrame, dg.GeoDataFrame], 
+    trips: Union[pd.DataFrame, dd.DataFrame],
     merge_cols: list = ["calitp_itp_id", "calitp_url_number", "shape_id"],
     crs: str = geography_utils.WGS84,
     join: Literal["left", "inner", "outer", "right", "cross"] = "left",
@@ -77,8 +77,8 @@ def group_and_aggregate(df: dd.DataFrame,
                       .reset_index()
                      )
 
-        # return pd.DataFrame for now, since it's not clear what the metadata should be
-        # if we are inputting different things in stats_col
+        # return pd.DataFrame for now, since it's not clear what the 
+        # metadata should be if we are inputting different things in stats_col
         return agg_df.compute()
 
     final = pd.DataFrame()

diff --git a/bus_service_increase/bus_service_utils/report_utils.py b/bus_service_increase/bus_service_utils/report_utils.py
@@ -11,115 +11,16 @@
 Working example:
 https://github.com/CityOfLosAngeles/planning-entitlements/blob/master/notebooks/D1-entitlement-demographics.ipynb
 
-integer_cols = ["E", "F"]
-one_decimal_cols = ["A", "D", ]
-two_decimal_cols = ["C"]
-
-(df.astype(
-    {c: "Int64" for c in integer_cols})
- .style.format(subset=one_decimal_cols, 
-     formatter = {c: '{:,.1f}' for c in one_decimal_cols})
- .format(subset=two_decimal_cols,
-         formatter = {c: '{:,.2f}' for c in two_decimal_cols}
- )
-)
-
 # Currency: https://stackoverflow.com/questions/35019156/pandas-format-column-as-currency
-
 '''
-
-
-def style_table(
-    df: pd.DataFrame, 
-    rename_cols: dict = {}, 
-    drop_cols: list = [], 
-    integer_cols: list = [],
-    one_decimal_cols: list = [],
-    two_decimal_cols: list = [],
-    three_decimal_cols: list = [],
-    currency_cols: list = [],
-    percent_cols: list = [],
-    left_align_cols: list = "first", # by default, left align first col
-    center_align_cols: list = "all", # by default, center align all other cols
-    right_align_cols: list = [],
-    custom_format_cols: dict = {},
-    display_table: bool = True
-) -> pd.io.formats.style.Styler | str: 
-    """
-    Returns a pandas Styler object with some basic formatting.
-    Any other tweaks for currency, percentages, etc should be done before / after.
-    """
-    df = (df.drop(columns = drop_cols)
-           .rename(columns = rename_cols)
-          )
-
-    if len(integer_cols) > 0:
-        df = df.astype({c: "Int64" for c in integer_cols})
 
-    if left_align_cols == "first":
-        left_align_cols = list(df.columns)[0]
-    if center_align_cols == "all":
-        center_align_cols = list(df.columns)
-        # all other columns except first one is center aligned
-        center_align_cols = [c for c in center_align_cols if c not in left_align_cols]
-
-    df_style = (df.style
-                .format(#subset = integer_cols, 
-                        formatter = {c: '{:,g}' for c in integer_cols})
-                .format(#subset = one_decimal_cols, 
-                        formatter = {c: '{:,.1f}' for c in one_decimal_cols})
-                .format(#subset = two_decimal_cols,
-                       formatter = {c: '{:,.2f}' for c in two_decimal_cols})
-                .format(#subset = three_decimal_cols,
-                        formatter = {c: '{:,.3f}' for c in three_decimal_cols})
-                .format(#subset = percent_cols,
-                         formatter = {c: '{:,.2%}' for c in percent_cols})
-                .format(#subset = currency_cols,
-                        formatter = {c: '$ {:,.2f}' for c in currency_cols})
-                .set_properties(subset=left_align_cols, 
-                                **{'text-align': 'left'})
-                .set_properties(subset=center_align_cols, 
-                               **{'text-align': 'center'})
-                .set_properties(subset=right_align_cols, 
-                                **{'text-align': 'right'})
-                .set_table_styles([dict(selector='th', 
-                                        props=[('text-align', 'center')])
-                                        ])
-           .hide(axis="index")
-          )
-
-
-    def add_custom_format(
-        df_style: pd.io.formats.style.Styler , 
-        format_str: str, cols_to_format: list,) -> pd.io.formats.style.Styler: 
-        """
-        Appends any additional formatting needs.
-            key: format string, such as '{:.1%}'
-            value: list of columns to apply that formatter to.
-        """
-        new_styler = (df_style
-                      .format(formatter = {c: format_str for c in cols_to_format}))
-
-        return new_styler
-
-
-    if len(list(custom_format_cols.keys())) > 0:
-        for format_str, cols_to_format in custom_format_cols.items():
-            df_style = add_custom_format(df_style, format_str, cols_to_format)
-
-    if display_table is True: 
-        display(HTML(df_style.to_html()))
-
-    return df_style
-
-
 # Display a table of route-level stats for each route_group
 # Displaying route_name makes chart too crowded    
 def style_route_stats(df):
     df = df.assign(
-        route_short_name = df.apply(lambda x: 
-                                    x.route_long_name if x.route_short_name is None
-                                    else x.route_short_name, axis=1)
+        route_short_name = df.apply(
+            lambda x: x.route_long_name if x.route_short_name is None
+            else x.route_short_name, axis=1)
     )
 
     # Rename columns for display

diff --git a/high_quality_transit_areas/A1b_muni_weekend_rail.py b/high_quality_transit_areas/A1b_muni_weekend_rail.py
@@ -14,13 +14,17 @@
 import pandas as pd
 import pendulum
 
-from shared_utils import gtfs_utils, geography_utils
+from shared_utils import utils, gtfs_utils, geography_utils, rt_utils
 from update_vars import analysis_date, TEMP_GCS
 
-previous_sat = (pendulum.from_format(analysis_date, fmt="YYYY-MM-DD")
+date_str = analysis_date.strftime(rt_utils.FULL_DATE_FMT)
+
+previous_sat = (pendulum.from_format(
+                date_str, 
+                fmt = "YYYY-MM-DD")
                 .date()
                 .previous(pendulum.SATURDAY)
-                .strftime('%Y-%m-%d')
+                .strftime(rt_utils.FULL_DATE_FMT)
                )
 
 # Muni weekend trips, instead of going from primary_trip_query, 
@@ -55,6 +59,8 @@ def download_muni_weekend_rail_trips(
         custom_filtering = additional_filters
     )
 
+    routes.to_parquet(f"{TEMP_GCS}muni_weekend_rail_route_info.parquet")
+
     # Just grab the route_ids in the trip table
     subset_routes = routes.route_id.unique().tolist()
 
@@ -91,6 +97,9 @@ def download_muni_stops(
         additional_filters = {"route_type": ['0', '1', '2']}
     )
 
+    muni_weekend_rail_trips.to_parquet(
+        f"{TEMP_GCS}muni_weekend_rail_trips.parquet")
+
     muni_stop_times = gtfs_utils.get_stop_times(
         selected_date = analysis_date,
         itp_id_list = [itp_id],
@@ -100,7 +109,9 @@ def download_muni_stops(
         trip_df = muni_weekend_rail_trips
     )
 
-    unique_muni_weekend_rail_stops = muni_stop_times.stop_key.unique().tolist()
+    muni_stop_times.to_parquet(f"{TEMP_GCS}muni_weekend_rail_stop_times.parquet")
+
+    unique_muni_weekend_rail_stops = muni_stop_times.stop_id.unique().tolist()
 
     keep_stop_cols = [
         "calitp_itp_id", "stop_id", 
@@ -114,12 +125,15 @@ def download_muni_stops(
         stop_cols = keep_stop_cols,
         get_df = True,
         crs = geography_utils.CA_NAD83Albers,
-        custom_filtering = {"stop_key": unique_muni_weekend_rail_stops}
-        )# should be ok to drop duplicates, but must use stop_id for future joins...
-        .drop_duplicates(subset=["calitp_itp_id", "stop_id"])
+        custom_filtering = {"stop_id": unique_muni_weekend_rail_stops}
+        ).drop_duplicates(subset=["calitp_itp_id", "stop_id"])
         .reset_index(drop=True)
     )
-
-    muni_stops.to_parquet(f"{TEMP_GCS}muni_rail_stops.parquet")
-
+
+    utils.geoparquet_gcs_export(
+        muni_stops, 
+        TEMP_GCS,
+        "muni_weekend_rail_stops"
+    )
+
     return muni_stops
diff --git a/high_quality_transit_areas/A2_combine_stops.py b/high_quality_transit_areas/A2_combine_stops.py
@@ -36,6 +36,13 @@
     '18105', '18088'
 ]
 
+new_muni_stops = [
+    '17876', '17875', # Chinatown-Rose Pak 
+    '17877', '17874', # Union Square/Market Street
+    '17873', '17878', # Yerba Buena/Moscone
+    '13156', '3156', # 4th & Brannan
+]
+
 BRT_STOPS_FILTER = {
     182: metro_street_running,
     282: van_ness_ids
@@ -62,7 +69,9 @@
 
     # Handle Muni separately - temp, can remove in 2023
     muni_weekend_rail.download_muni_stops(282)
-    muni_rail_stops = gpd.read_parquet(f"{TEMP_GCS}muni_rail_stops.parquet")
+    muni_rail_stops = gpd.read_parquet(f"{TEMP_GCS}muni_weekend_rail_stops.parquet")
+    new_muni_rail_stops = muni_rail_stops[
+        muni_rail_stops.stop_id.isin(new_muni_stops)].reset_index(drop=True)
 
     time1 = datetime.datetime.now()
     logger.info(f"grabbed rail: {time1-start}")
@@ -86,7 +95,7 @@
     # Concatenate datasets that need to be clipped to CA
     rail_brt = pd.concat([
         rail_stops,
-        muni_rail_stops,
+        new_muni_rail_stops,
         brt_stops
     ], axis=0, ignore_index= True)
 

diff --git a/high_quality_transit_areas/A3_rail_ferry_brt_extract.py b/high_quality_transit_areas/A3_rail_ferry_brt_extract.py
@@ -6,6 +6,7 @@
 """
 import dask_geopandas as dg
 from utilities import catalog_filepath
+from A2_combine_stops import new_muni_stops
 
 COMPILED_RAIL_BRT_FERRY = catalog_filepath("rail_brt_ferry_initial")
 
@@ -15,7 +16,16 @@ def get_rail_ferry_brt_extract():
 
     keep_cols = ["calitp_itp_id", "stop_id", 
                 "route_type", "geometry"]
-
+
+    # Temporarily assign route_type for new Muni stops to be rail
+    # since route_info wasn't attached 
+    df = df.assign(
+        route_type = df.apply(
+            lambda x: "1" if x.stop_id in new_muni_stops and 
+            x.calitp_itp_id==282
+            else x.route_type, axis=1, meta=('route_type', 'str'))
+    )
+
     df2 = (df[keep_cols].assign(
             hqta_type = df.route_type.map(
                 lambda x: "major_stop_rail" if x in ["0", "1", "2"]

diff --git a/high_quality_transit_areas/B1_create_hqta_segments.py b/high_quality_transit_areas/B1_create_hqta_segments.py
@@ -175,7 +175,7 @@ def select_shapes_and_segment(
     it's ready to cut into segments.
 
     For routes where 2 shape_ids were chosen...1 in each direction, 
-    find thedifference.
+    find the difference.
 
     Concatenate these 2 portions and then cut HQTA segments.
     Returns the hqta_segments for all the routes across all operators.

diff --git a/high_quality_transit_areas/D1_assemble_hqta_points.py b/high_quality_transit_areas/D1_assemble_hqta_points.py
@@ -24,7 +24,7 @@
 import A3_rail_ferry_brt_extract as rail_ferry_brt_extract
 import utilities
 from shared_utils import utils, geography_utils, portfolio_utils
-from update_vars import analysis_date, COMPILED_CACHED_VIEWS
+from update_vars import analysis_date, TEMP_GCS, COMPILED_CACHED_VIEWS
 
 #fs = get_fs()
 
@@ -35,16 +35,42 @@
 MAJOR_STOP_BUS_FILE = utilities.catalog_filepath("major_stop_bus")
 STOPS_IN_CORRIDOR_FILE = utilities.catalog_filepath("stops_in_hq_corr")
 
+def merge_muni_trips_route_info():
+    """
+    Temporarily attach Muni with its route_info back in.
+    Remove in Jan 2023.
+    """
+    muni_trips = pd.read_parquet(
+        f"{TEMP_GCS}muni_weekend_rail_trips.parquet")
+    muni_route_info = pd.read_parquet(
+        f"{TEMP_GCS}muni_weekend_rail_route_info.parquet")
+
+    trips_with_route_info = pd.merge(
+        muni_trips, 
+        muni_route_info.drop_duplicates(subset="route_id"),
+        on = ["calitp_itp_id", "route_id"],
+        how = "inner",
+        validate = "m:1"
+    )
+
+    return trips_with_route_info
 
+
 def add_route_info(hqta_points: dg.GeoDataFrame) -> dg.GeoDataFrame:
     """
     Use calitp_itp_id-stop_id to add route_id back in, 
     using the trips and stop_times table.
     """    
     stop_times = dd.read_parquet(
         f"{COMPILED_CACHED_VIEWS}st_{analysis_date}.parquet")
+    muni_stop_times = dd.read_parquet(
+        f"{TEMP_GCS}muni_weekend_rail_stop_times.parquet")
     trips = dd.read_parquet(
         f"{COMPILED_CACHED_VIEWS}trips_{analysis_date}.parquet")
+    muni_trips = merge_muni_trips_route_info()
+
+    stop_times = dd.multi.concat([stop_times, muni_stop_times], axis=0)
+    trips = dd.multi.concat([trips, muni_trips], axis=0)
 
     stop_cols = ["calitp_itp_id", "stop_id"]
     trip_cols = ["calitp_itp_id", "trip_id"]
@@ -74,7 +100,6 @@ def add_route_info(hqta_points: dg.GeoDataFrame) -> dg.GeoDataFrame:
     return ca_hqta_points
 
 
-
 def get_agency_names() -> pd.DataFrame:
     names = portfolio_utils.add_agency_name(analysis_date)