Skip to content

Commit

Permalink
Merge pull request #782 from wsp-sag/data-type-op-pd-cat
Browse files Browse the repository at this point in the history
Data Type Optimization
  • Loading branch information
jpn-- authored Feb 13, 2024
2 parents a2ad2a1 + db03dae commit 1f42cd3
Show file tree
Hide file tree
Showing 63 changed files with 797 additions and 145 deletions.
14 changes: 12 additions & 2 deletions activitysim/abm/models/atwork_subtour_destination.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,11 +89,21 @@ def atwork_subtour_destination(
estimator.end_estimation()

subtours[destination_column_name] = choices_df["choice"]
assign_in_place(tours, subtours[[destination_column_name]])
assign_in_place(
tours,
subtours[[destination_column_name]],
state.settings.downcast_int,
state.settings.downcast_float,
)

if want_logsums:
subtours[logsum_column_name] = choices_df["logsum"]
assign_in_place(tours, subtours[[logsum_column_name]])
assign_in_place(
tours,
subtours[[logsum_column_name]],
state.settings.downcast_int,
state.settings.downcast_float,
)

state.add_table("tours", tours)

Expand Down
21 changes: 20 additions & 1 deletion activitysim/abm/models/atwork_subtour_frequency.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,15 @@

def add_null_results(state, trace_label, tours):
logger.info("Skipping %s: add_null_results", trace_label)
tours["atwork_subtour_frequency"] = np.nan
cat_type = pd.api.types.CategoricalDtype(
[""],
ordered=False,
)
choices = choices.astype(cat_type)
tours["atwork_subtour_frequency"] = ""
tours["atwork_subtour_frequency"] = tours["atwork_subtour_frequency"].astype(
cat_type
)
state.add_table("tours", tours)


Expand Down Expand Up @@ -117,6 +125,11 @@ def atwork_subtour_frequency(

# convert indexes to alternative names
choices = pd.Series(model_spec.columns[choices.values], index=choices.index)
cat_type = pd.api.types.CategoricalDtype(
alternatives.index.tolist() + [""],
ordered=False,
)
choices = choices.astype(cat_type)

if estimator:
estimator.write_choices(choices)
Expand All @@ -137,6 +150,12 @@ def atwork_subtour_frequency(

subtours = process_atwork_subtours(state, work_tours, alternatives)

# convert purpose to pandas categoricals
purpose_type = pd.api.types.CategoricalDtype(
alternatives.columns.tolist() + ["atwork"], ordered=False
)
subtours["tour_type"] = subtours["tour_type"].astype(purpose_type)

tours = state.extend_table("tours", subtours)

state.tracing.register_traceable_table("tours", subtours)
Expand Down
4 changes: 3 additions & 1 deletion activitysim/abm/models/atwork_subtour_mode_choice.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,9 @@ def atwork_subtour_mode_choice(
"%s choices" % trace_label, choices_df[mode_column_name], value_counts=True
)

assign_in_place(tours, choices_df)
assign_in_place(
tours, choices_df, state.settings.downcast_int, state.settings.downcast_float
)
state.add_table("tours", tours)

# - annotate tours table
Expand Down
4 changes: 3 additions & 1 deletion activitysim/abm/models/atwork_subtour_scheduling.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,9 @@ def atwork_subtour_scheduling(
choices.to_frame("tdd"), tdd_alts, left_on=["tdd"], right_index=True, how="left"
)

assign_in_place(tours, tdd_choices)
assign_in_place(
tours, tdd_choices, state.settings.downcast_int, state.settings.downcast_float
)
state.add_table("tours", tours)

if trace_hh_id:
Expand Down
2 changes: 2 additions & 0 deletions activitysim/abm/models/cdap.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,8 @@ def cdap_simulate(
estimator.end_estimation()

choices = choices.reindex(persons.index)
cap_cat_type = pd.api.types.CategoricalDtype(["", "M", "N", "H"], ordered=False)
choices = choices.astype(cap_cat_type)
persons["cdap_activity"] = choices

expressions.assign_columns(
Expand Down
10 changes: 9 additions & 1 deletion activitysim/abm/models/joint_tour_composition.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@
def add_null_results(state, trace_label, tours):
logger.info("Skipping %s: add_null_results" % trace_label)
tours["composition"] = ""
cat_type = pd.api.types.CategoricalDtype(
["", "adults", "children", "mixed"], ordered=False
)
tours["composition"] = tours["composition"].astype(cat_type)
state.add_table("tours", tours)


Expand Down Expand Up @@ -123,6 +127,10 @@ def joint_tour_composition(

# convert indexes to alternative names
choices = pd.Series(model_spec.columns[choices.values], index=choices.index)
cat_type = pd.api.types.CategoricalDtype(
model_spec.columns.tolist() + [""], ordered=False
)
choices = choices.astype(cat_type)

if estimator:
estimator.write_choices(choices)
Expand All @@ -134,7 +142,7 @@ def joint_tour_composition(
joint_tours["composition"] = choices

# reindex since we ran model on a subset of households
tours["composition"] = choices.reindex(tours.index).fillna("").astype(str)
tours["composition"] = choices.reindex(tours.index).fillna("")
state.add_table("tours", tours)

tracing.print_summary(
Expand Down
14 changes: 12 additions & 2 deletions activitysim/abm/models/joint_tour_destination.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,12 +87,22 @@ def joint_tour_destination(

# add column as we want joint_tours table for tracing.
joint_tours["destination"] = choices_df.choice
assign_in_place(tours, joint_tours[["destination"]])
assign_in_place(
tours,
joint_tours[["destination"]],
state.settings.downcast_int,
state.settings.downcast_float,
)
state.add_table("tours", tours)

if want_logsums:
joint_tours[logsum_column_name] = choices_df["logsum"]
assign_in_place(tours, joint_tours[[logsum_column_name]])
assign_in_place(
tours,
joint_tours[[logsum_column_name]],
state.settings.downcast_int,
state.settings.downcast_float,
)

tracing.print_summary("destination", joint_tours.destination, describe=True)

Expand Down
15 changes: 13 additions & 2 deletions activitysim/abm/models/joint_tour_frequency.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,11 @@ def joint_tour_frequency(

# convert indexes to alternative names
choices = pd.Series(model_spec.columns[choices.values], index=choices.index)
cat_type = pd.api.types.CategoricalDtype(
model_spec.columns.tolist(),
ordered=False,
)
choices = choices.astype(cat_type)

if estimator:
estimator.write_choices(choices)
Expand All @@ -138,6 +143,12 @@ def joint_tour_frequency(

joint_tours = process_joint_tours(state, choices, alternatives, temp_point_persons)

# convert purpose to pandas categoricals
purpose_type = pd.api.types.CategoricalDtype(
alternatives.columns.tolist(), ordered=False
)
joint_tours["tour_type"] = joint_tours["tour_type"].astype(purpose_type)

tours = state.extend_table("tours", joint_tours)

state.tracing.register_traceable_table("tours", joint_tours)
Expand All @@ -147,8 +158,8 @@ def joint_tour_frequency(

# we expect there to be an alt with no tours - which we can use to backfill non-travelers
no_tours_alt = (alternatives.sum(axis=1) == 0).index[0]
households["joint_tour_frequency"] = (
choices.reindex(households.index).fillna(no_tours_alt).astype(str)
households["joint_tour_frequency"] = choices.reindex(households.index).fillna(
no_tours_alt
)

households["num_hh_joint_tours"] = (
Expand Down
9 changes: 7 additions & 2 deletions activitysim/abm/models/joint_tour_participation.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def get_tour_satisfaction(candidates, participate):

x = (
candidates[cols]
.groupby(["tour_id", "composition"])
.groupby(["tour_id", "composition"], observed=True)
.agg(
participants=("adult", "size"),
adults=("adult", "sum"),
Expand Down Expand Up @@ -475,7 +475,12 @@ def joint_tour_participation(
# update number_of_participants which was initialized to 1
joint_tours["number_of_participants"] = participants.groupby("tour_id").size()

assign_in_place(tours, joint_tours[["person_id", "number_of_participants"]])
assign_in_place(
tours,
joint_tours[["person_id", "number_of_participants"]],
state.settings.downcast_int,
state.settings.downcast_float,
)

state.add_table("tours", tours)

Expand Down
4 changes: 3 additions & 1 deletion activitysim/abm/models/joint_tour_scheduling.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,9 @@ def joint_tour_scheduling(
choices.to_frame("tdd"), tdd_alts, left_on=["tdd"], right_index=True, how="left"
)

assign_in_place(tours, choices)
assign_in_place(
tours, choices, state.settings.downcast_int, state.settings.downcast_float
)
state.add_table("tours", tours)

# updated df for tracing
Expand Down
4 changes: 3 additions & 1 deletion activitysim/abm/models/mandatory_scheduling.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,9 @@ def mandatory_tour_scheduling(
tour_segment_col,
)

assign_in_place(tours, choices)
assign_in_place(
tours, choices, state.settings.downcast_int, state.settings.downcast_float
)
state.add_table("tours", tours)

# updated df for tracing
Expand Down
20 changes: 16 additions & 4 deletions activitysim/abm/models/mandatory_tour_frequency.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,11 @@ def add_null_results(state, trace_label, mandatory_tour_frequency_settings):
logger.info("Skipping %s: add_null_results", trace_label)

persons = state.get_dataframe("persons")
persons["mandatory_tour_frequency"] = ""
persons["mandatory_tour_frequency"] = pd.categorical(
"",
categories=["", "work1", "work2", "school1", "school2", "work_and_school"],
ordered=False,
)

tours = pd.DataFrame()
tours["tour_category"] = None
Expand Down Expand Up @@ -134,6 +138,10 @@ def mandatory_tour_frequency(

# convert indexes to alternative names
choices = pd.Series(model_spec.columns[choices.values], index=choices.index)
cat_type = pd.api.types.CategoricalDtype(
model_spec.columns.tolist() + [""], ordered=False
)
choices = choices.astype(cat_type)

if estimator:
estimator.write_choices(choices)
Expand All @@ -158,6 +166,12 @@ def mandatory_tour_frequency(
state, persons=choosers, mandatory_tour_frequency_alts=alternatives
)

# convert purpose to pandas categoricals
purpose_type = pd.api.types.CategoricalDtype(
alternatives.columns.tolist() + ["univ", "home", "escort"], ordered=False
)
mandatory_tours["tour_type"] = mandatory_tours["tour_type"].astype(purpose_type)

tours = state.extend_table("tours", mandatory_tours)
state.tracing.register_traceable_table("tours", mandatory_tours)
state.get_rn_generator().add_channel("tours", mandatory_tours)
Expand All @@ -166,9 +180,7 @@ def mandatory_tour_frequency(
persons = state.get_dataframe("persons")

# need to reindex as we only handled persons with cdap_activity == 'M'
persons["mandatory_tour_frequency"] = (
choices.reindex(persons.index).fillna("").astype(str)
)
persons["mandatory_tour_frequency"] = choices.reindex(persons.index).fillna("")

expressions.assign_columns(
state,
Expand Down
14 changes: 12 additions & 2 deletions activitysim/abm/models/non_mandatory_destination.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,11 +107,21 @@ def non_mandatory_tour_destination(
[pure_school_escort_tours, non_mandatory_tours]
).set_index(nm_tour_index)

assign_in_place(tours, non_mandatory_tours[["destination"]])
assign_in_place(
tours,
non_mandatory_tours[["destination"]],
state.settings.downcast_int,
state.settings.downcast_float,
)

if want_logsums:
non_mandatory_tours[logsum_column_name] = choices_df["logsum"]
assign_in_place(tours, non_mandatory_tours[[logsum_column_name]])
assign_in_place(
tours,
non_mandatory_tours[[logsum_column_name]],
state.settings.downcast_int,
state.settings.downcast_float,
)

assert all(
~tours["destination"].isna()
Expand Down
4 changes: 3 additions & 1 deletion activitysim/abm/models/non_mandatory_scheduling.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,9 @@ def non_mandatory_tour_scheduling(
tour_segment_col,
)

assign_in_place(tours, choices)
assign_in_place(
tours, choices, state.settings.downcast_int, state.settings.downcast_float
)
state.add_table("tours", tours)

# updated df for tracing
Expand Down
8 changes: 8 additions & 0 deletions activitysim/abm/models/non_mandatory_tour_frequency.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,14 @@ def non_mandatory_tour_frequency(
)
assert len(non_mandatory_tours) == extended_tour_counts.sum().sum()

# convert purpose to pandas categoricals
purpose_type = pd.api.types.CategoricalDtype(
alternatives.columns.tolist(), ordered=False
)
non_mandatory_tours["tour_type"] = non_mandatory_tours["tour_type"].astype(
purpose_type
)

if estimator:
# make sure they created the right tours
survey_tours = estimation.manager.get_survey_table("tours").sort_index()
Expand Down
14 changes: 12 additions & 2 deletions activitysim/abm/models/parking_location_choice.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,12 @@ def run_parking_destination(
if fail_some_trips_for_testing:
parking_df = parking_df.drop(parking_df.index[0])

assign_in_place(trips, parking_df.to_frame(parking_location_column_name))
assign_in_place(
trips,
parking_df.to_frame(parking_location_column_name),
state.settings.downcast_int,
state.settings.downcast_float,
)
trips[parking_location_column_name] = trips[
parking_location_column_name
].fillna(-1)
Expand Down Expand Up @@ -398,7 +403,12 @@ def parking_location(
trace_label=trace_label,
)

assign_in_place(trips_df, parking_locations.to_frame(alt_destination_col_name))
assign_in_place(
trips_df,
parking_locations.to_frame(alt_destination_col_name),
state.settings.downcast_int,
state.settings.downcast_float,
)

state.add_table("trips", trips_df)

Expand Down
Loading

0 comments on commit 1f42cd3

Please sign in to comment.