ActivitySim · jpn-- · Feb 13, 2024 · Jul 12, 2023 · Jul 17, 2023 · Jul 25, 2023
diff --git a/activitysim/abm/models/atwork_subtour_destination.py b/activitysim/abm/models/atwork_subtour_destination.py
@@ -89,11 +89,21 @@ def atwork_subtour_destination(
  estimator.end_estimation()
 
  subtours[destination_column_name] = choices_df["choice"]
- assign_in_place(tours, subtours[[destination_column_name]])
+ assign_in_place(
+ tours,
+ subtours[[destination_column_name]],
+ state.settings.downcast_int,
+ state.settings.downcast_float,
+ )
 
  if want_logsums:
  subtours[logsum_column_name] = choices_df["logsum"]
- assign_in_place(tours, subtours[[logsum_column_name]])
+ assign_in_place(
+ tours,
+ subtours[[logsum_column_name]],
+ state.settings.downcast_int,
+ state.settings.downcast_float,
+ )
 
  state.add_table("tours", tours)
 

diff --git a/activitysim/abm/models/atwork_subtour_frequency.py b/activitysim/abm/models/atwork_subtour_frequency.py
@@ -24,7 +24,15 @@
 
 def add_null_results(state, trace_label, tours):
  logger.info("Skipping %s: add_null_results", trace_label)
- tours["atwork_subtour_frequency"] = np.nan
+ cat_type = pd.api.types.CategoricalDtype(
+ [""],
+ ordered=False,
+ )
+ choices = choices.astype(cat_type)
+ tours["atwork_subtour_frequency"] = ""
+ tours["atwork_subtour_frequency"] = tours["atwork_subtour_frequency"].astype(
+ cat_type
+ )
  state.add_table("tours", tours)
 
 
@@ -117,6 +125,11 @@ def atwork_subtour_frequency(
 
  # convert indexes to alternative names
  choices = pd.Series(model_spec.columns[choices.values], index=choices.index)
+ cat_type = pd.api.types.CategoricalDtype(
+ alternatives.index.tolist() + [""],
+ ordered=False,
+ )
+ choices = choices.astype(cat_type)
 
  if estimator:
  estimator.write_choices(choices)
@@ -137,6 +150,12 @@ def atwork_subtour_frequency(
 
  subtours = process_atwork_subtours(state, work_tours, alternatives)
 
+ # convert purpose to pandas categoricals
+ purpose_type = pd.api.types.CategoricalDtype(
+ alternatives.columns.tolist() + ["atwork"], ordered=False
+ )
+ subtours["tour_type"] = subtours["tour_type"].astype(purpose_type)
+
  tours = state.extend_table("tours", subtours)
 
  state.tracing.register_traceable_table("tours", subtours)

diff --git a/activitysim/abm/models/atwork_subtour_mode_choice.py b/activitysim/abm/models/atwork_subtour_mode_choice.py
@@ -190,7 +190,9 @@ def atwork_subtour_mode_choice(
  "%s choices" % trace_label, choices_df[mode_column_name], value_counts=True
  )
 
- assign_in_place(tours, choices_df)
+ assign_in_place(
+ tours, choices_df, state.settings.downcast_int, state.settings.downcast_float
+ )
  state.add_table("tours", tours)
 
  # - annotate tours table

diff --git a/activitysim/abm/models/atwork_subtour_scheduling.py b/activitysim/abm/models/atwork_subtour_scheduling.py
@@ -111,7 +111,9 @@ def atwork_subtour_scheduling(
  choices.to_frame("tdd"), tdd_alts, left_on=["tdd"], right_index=True, how="left"
  )
 
- assign_in_place(tours, tdd_choices)
+ assign_in_place(
+ tours, tdd_choices, state.settings.downcast_int, state.settings.downcast_float
+ )
  state.add_table("tours", tours)
 
  if trace_hh_id:

diff --git a/activitysim/abm/models/cdap.py b/activitysim/abm/models/cdap.py
@@ -219,6 +219,8 @@ def cdap_simulate(
  estimator.end_estimation()
 
  choices = choices.reindex(persons.index)
+ cap_cat_type = pd.api.types.CategoricalDtype(["", "M", "N", "H"], ordered=False)
+ choices = choices.astype(cap_cat_type)
  persons["cdap_activity"] = choices
 
  expressions.assign_columns(

diff --git a/activitysim/abm/models/joint_tour_composition.py b/activitysim/abm/models/joint_tour_composition.py
@@ -24,6 +24,10 @@
 def add_null_results(state, trace_label, tours):
  logger.info("Skipping %s: add_null_results" % trace_label)
  tours["composition"] = ""
+ cat_type = pd.api.types.CategoricalDtype(
+ ["", "adults", "children", "mixed"], ordered=False
+ )
+ tours["composition"] = tours["composition"].astype(cat_type)
  state.add_table("tours", tours)
 
 
@@ -123,6 +127,10 @@ def joint_tour_composition(
 
  # convert indexes to alternative names
  choices = pd.Series(model_spec.columns[choices.values], index=choices.index)
+ cat_type = pd.api.types.CategoricalDtype(
+ model_spec.columns.tolist() + [""], ordered=False
+ )
+ choices = choices.astype(cat_type)
 
  if estimator:
  estimator.write_choices(choices)
@@ -134,7 +142,7 @@ def joint_tour_composition(
  joint_tours["composition"] = choices
 
  # reindex since we ran model on a subset of households
- tours["composition"] = choices.reindex(tours.index).fillna("").astype(str)
+ tours["composition"] = choices.reindex(tours.index).fillna("")
  state.add_table("tours", tours)
 
  tracing.print_summary(

diff --git a/activitysim/abm/models/joint_tour_destination.py b/activitysim/abm/models/joint_tour_destination.py
@@ -87,12 +87,22 @@ def joint_tour_destination(
 
  # add column as we want joint_tours table for tracing.
  joint_tours["destination"] = choices_df.choice
- assign_in_place(tours, joint_tours[["destination"]])
+ assign_in_place(
+ tours,
+ joint_tours[["destination"]],
+ state.settings.downcast_int,
+ state.settings.downcast_float,
+ )
  state.add_table("tours", tours)
 
  if want_logsums:
  joint_tours[logsum_column_name] = choices_df["logsum"]
- assign_in_place(tours, joint_tours[[logsum_column_name]])
+ assign_in_place(
+ tours,
+ joint_tours[[logsum_column_name]],
+ state.settings.downcast_int,
+ state.settings.downcast_float,
+ )
 
  tracing.print_summary("destination", joint_tours.destination, describe=True)
 

diff --git a/activitysim/abm/models/joint_tour_frequency.py b/activitysim/abm/models/joint_tour_frequency.py
@@ -116,6 +116,11 @@ def joint_tour_frequency(
 
  # convert indexes to alternative names
  choices = pd.Series(model_spec.columns[choices.values], index=choices.index)
+ cat_type = pd.api.types.CategoricalDtype(
+ model_spec.columns.tolist(),
+ ordered=False,
+ )
+ choices = choices.astype(cat_type)
 
  if estimator:
  estimator.write_choices(choices)
@@ -138,6 +143,12 @@ def joint_tour_frequency(
 
  joint_tours = process_joint_tours(state, choices, alternatives, temp_point_persons)
 
+ # convert purpose to pandas categoricals
+ purpose_type = pd.api.types.CategoricalDtype(
+ alternatives.columns.tolist(), ordered=False
+ )
+ joint_tours["tour_type"] = joint_tours["tour_type"].astype(purpose_type)
+
  tours = state.extend_table("tours", joint_tours)
 
  state.tracing.register_traceable_table("tours", joint_tours)
@@ -147,8 +158,8 @@ def joint_tour_frequency(
 
  # we expect there to be an alt with no tours - which we can use to backfill non-travelers
  no_tours_alt = (alternatives.sum(axis=1) == 0).index[0]
- households["joint_tour_frequency"] = (
- choices.reindex(households.index).fillna(no_tours_alt).astype(str)
+ households["joint_tour_frequency"] = choices.reindex(households.index).fillna(
+ no_tours_alt
  )
 
  households["num_hh_joint_tours"] = (

diff --git a/activitysim/abm/models/joint_tour_participation.py b/activitysim/abm/models/joint_tour_participation.py
@@ -92,7 +92,7 @@ def get_tour_satisfaction(candidates, participate):
 
  x = (
  candidates[cols]
- .groupby(["tour_id", "composition"])
+ .groupby(["tour_id", "composition"], observed=True)
  .agg(
  participants=("adult", "size"),
  adults=("adult", "sum"),
@@ -475,7 +475,12 @@ def joint_tour_participation(
  # update number_of_participants which was initialized to 1
  joint_tours["number_of_participants"] = participants.groupby("tour_id").size()
 
- assign_in_place(tours, joint_tours[["person_id", "number_of_participants"]])
+ assign_in_place(
+ tours,
+ joint_tours[["person_id", "number_of_participants"]],
+ state.settings.downcast_int,
+ state.settings.downcast_float,
+ )
 
  state.add_table("tours", tours)
 

diff --git a/activitysim/abm/models/joint_tour_scheduling.py b/activitysim/abm/models/joint_tour_scheduling.py
@@ -161,7 +161,9 @@ def joint_tour_scheduling(
  choices.to_frame("tdd"), tdd_alts, left_on=["tdd"], right_index=True, how="left"
  )
 
- assign_in_place(tours, choices)
+ assign_in_place(
+ tours, choices, state.settings.downcast_int, state.settings.downcast_float
+ )
  state.add_table("tours", tours)
 
  # updated df for tracing

diff --git a/activitysim/abm/models/mandatory_scheduling.py b/activitysim/abm/models/mandatory_scheduling.py
@@ -62,7 +62,9 @@ def mandatory_tour_scheduling(
  tour_segment_col,
  )
 
- assign_in_place(tours, choices)
+ assign_in_place(
+ tours, choices, state.settings.downcast_int, state.settings.downcast_float
+ )
  state.add_table("tours", tours)
 
  # updated df for tracing

diff --git a/activitysim/abm/models/mandatory_tour_frequency.py b/activitysim/abm/models/mandatory_tour_frequency.py
@@ -30,7 +30,11 @@ def add_null_results(state, trace_label, mandatory_tour_frequency_settings):
  logger.info("Skipping %s: add_null_results", trace_label)
 
  persons = state.get_dataframe("persons")
- persons["mandatory_tour_frequency"] = ""
+ persons["mandatory_tour_frequency"] = pd.categorical(
+ "",
+ categories=["", "work1", "work2", "school1", "school2", "work_and_school"],
+ ordered=False,
+ )
 
  tours = pd.DataFrame()
  tours["tour_category"] = None
@@ -134,6 +138,10 @@ def mandatory_tour_frequency(
 
  # convert indexes to alternative names
  choices = pd.Series(model_spec.columns[choices.values], index=choices.index)
+ cat_type = pd.api.types.CategoricalDtype(
+ model_spec.columns.tolist() + [""], ordered=False
+ )
+ choices = choices.astype(cat_type)
 
  if estimator:
  estimator.write_choices(choices)
@@ -158,6 +166,12 @@ def mandatory_tour_frequency(
  state, persons=choosers, mandatory_tour_frequency_alts=alternatives
  )
 
+ # convert purpose to pandas categoricals
+ purpose_type = pd.api.types.CategoricalDtype(
+ alternatives.columns.tolist() + ["univ", "home", "escort"], ordered=False
+ )
+ mandatory_tours["tour_type"] = mandatory_tours["tour_type"].astype(purpose_type)
+
  tours = state.extend_table("tours", mandatory_tours)
  state.tracing.register_traceable_table("tours", mandatory_tours)
  state.get_rn_generator().add_channel("tours", mandatory_tours)
@@ -166,9 +180,7 @@ def mandatory_tour_frequency(
  persons = state.get_dataframe("persons")
 
  # need to reindex as we only handled persons with cdap_activity == 'M'
- persons["mandatory_tour_frequency"] = (
- choices.reindex(persons.index).fillna("").astype(str)
- )
+ persons["mandatory_tour_frequency"] = choices.reindex(persons.index).fillna("")
 
  expressions.assign_columns(
  state,

diff --git a/activitysim/abm/models/non_mandatory_destination.py b/activitysim/abm/models/non_mandatory_destination.py
@@ -107,11 +107,21 @@ def non_mandatory_tour_destination(
  [pure_school_escort_tours, non_mandatory_tours]
  ).set_index(nm_tour_index)
 
- assign_in_place(tours, non_mandatory_tours[["destination"]])
+ assign_in_place(
+ tours,
+ non_mandatory_tours[["destination"]],
+ state.settings.downcast_int,
+ state.settings.downcast_float,
+ )
 
  if want_logsums:
  non_mandatory_tours[logsum_column_name] = choices_df["logsum"]
- assign_in_place(tours, non_mandatory_tours[[logsum_column_name]])
+ assign_in_place(
+ tours,
+ non_mandatory_tours[[logsum_column_name]],
+ state.settings.downcast_int,
+ state.settings.downcast_float,
+ )
 
  assert all(
  ~tours["destination"].isna()

diff --git a/activitysim/abm/models/non_mandatory_scheduling.py b/activitysim/abm/models/non_mandatory_scheduling.py
@@ -47,7 +47,9 @@ def non_mandatory_tour_scheduling(
  tour_segment_col,
  )
 
- assign_in_place(tours, choices)
+ assign_in_place(
+ tours, choices, state.settings.downcast_int, state.settings.downcast_float
+ )
  state.add_table("tours", tours)
 
  # updated df for tracing

diff --git a/activitysim/abm/models/non_mandatory_tour_frequency.py b/activitysim/abm/models/non_mandatory_tour_frequency.py
@@ -391,6 +391,14 @@ def non_mandatory_tour_frequency(
  )
  assert len(non_mandatory_tours) == extended_tour_counts.sum().sum()
 
+ # convert purpose to pandas categoricals
+ purpose_type = pd.api.types.CategoricalDtype(
+ alternatives.columns.tolist(), ordered=False
+ )
+ non_mandatory_tours["tour_type"] = non_mandatory_tours["tour_type"].astype(
+ purpose_type
+ )
+
  if estimator:
  # make sure they created the right tours
  survey_tours = estimation.manager.get_survey_table("tours").sort_index()

diff --git a/activitysim/abm/models/parking_location_choice.py b/activitysim/abm/models/parking_location_choice.py
@@ -276,7 +276,12 @@ def run_parking_destination(
  if fail_some_trips_for_testing:
  parking_df = parking_df.drop(parking_df.index[0])
 
- assign_in_place(trips, parking_df.to_frame(parking_location_column_name))
+ assign_in_place(
+ trips,
+ parking_df.to_frame(parking_location_column_name),
+ state.settings.downcast_int,
+ state.settings.downcast_float,
+ )
  trips[parking_location_column_name] = trips[
  parking_location_column_name
  ].fillna(-1)
@@ -398,7 +403,12 @@ def parking_location(
  trace_label=trace_label,
  )
 
- assign_in_place(trips_df, parking_locations.to_frame(alt_destination_col_name))
+ assign_in_place(
+ trips_df,
+ parking_locations.to_frame(alt_destination_col_name),
+ state.settings.downcast_int,
+ state.settings.downcast_float,
+ )
 
  state.add_table("trips", trips_df)