merging location choice model with BayDAG version

* https://github.com/SANDAG/activitysim/blob/BayDAG_estimation/activitysim/abm/models/location_choice.py * https://github.com/SANDAG/activitysim/blob/BayDAG_estimation/activitysim/estimation/larch/location_choice.py
ActivitySim · Nov 1, 2023 · b8beb66 · b8beb66
1 parent 425467e
commit b8beb66
Show file tree

Hide file tree

Showing 2 changed files with 103 additions and 19 deletions.
diff --git a/activitysim/abm/models/location_choice.py b/activitysim/abm/models/location_choice.py
@@ -17,6 +17,7 @@
 )
 from activitysim.core.interaction_sample import interaction_sample
 from activitysim.core.interaction_sample_simulate import interaction_sample_simulate
+from activitysim.core.util import reindex
 
 from .util import estimation
 from .util import logsums as logsum
@@ -138,22 +139,17 @@ def _location_sample(
  logger.info("Running %s with %d persons" % (trace_label, len(choosers.index)))
 
  sample_size = model_settings["SAMPLE_SIZE"]
- if config.setting("disable_destination_sampling", False) or (
- estimator and estimator.want_unsampled_alternatives
- ):
- # FIXME interaction_sample will return unsampled complete alternatives with probs and pick_count
- logger.info(
- "Estimation mode for %s using unsampled alternatives short_circuit_choices"
- % (trace_label,)
- )
- sample_size = 0
+ if estimator:
+ sample_size = model_settings.get("ESTIMATION_SAMPLE_SIZE", 0)
 
  locals_d = {
  "skims": skims,
  "segment_size": segment_name,
  "orig_col_name": skims.orig_key, # added for sharrow flows
  "dest_col_name": skims.dest_key, # added for sharrow flows
  "timeframe": "timeless",
+ "reindex": reindex,
+ "land_use": inject.get_table("land_use").to_frame(),
  }
  constants = config.get_model_constants(model_settings)
  locals_d.update(constants)
@@ -470,6 +466,38 @@ def run_location_sample(
  trace_label=trace_label,
  )
 
+ # FIXME temporary code to ensure sampled alternative is in choices for estimation
+ # Hack to get shorter run times when you don't care about creating EDB for location choice models
+ if estimator:
+ # grabbing survey values
+ survey_persons = estimation.manager.get_survey_table("persons")
+ if "school_location" in trace_label:
+ survey_choices = survey_persons["school_zone_id"].reset_index()
+ elif ("workplace_location" in trace_label) and ("external" not in trace_label):
+ survey_choices = survey_persons["workplace_zone_id"].reset_index()
+ else:
+ return choices
+ survey_choices.columns = ["person_id", "alt_dest"]
+ survey_choices = survey_choices[
+ survey_choices["person_id"].isin(choices.index)
+ & (survey_choices.alt_dest > 0)
+ ]
+ # merging survey destination into table if not available
+ joined_data = survey_choices.merge(
+ choices, on=["person_id", "alt_dest"], how="left", indicator=True
+ )
+ missing_rows = joined_data[joined_data["_merge"] == "left_only"]
+ missing_rows["pick_count"] = 1
+ if len(missing_rows) > 0:
+ new_choices = missing_rows[
+ ["person_id", "alt_dest", "prob", "pick_count"]
+ ].set_index("person_id")
+ choices = choices.append(new_choices, ignore_index=False).sort_index()
+ # making probability the mean of all other sampled destinations by person
+ choices["prob"] = choices["prob"].fillna(
+ choices.groupby("person_id")["prob"].transform("mean")
+ )
+
  return choices
 
 
@@ -601,6 +629,8 @@ def run_location_simulate(
  "orig_col_name": skims.orig_key, # added for sharrow flows
  "dest_col_name": skims.dest_key, # added for sharrow flows
  "timeframe": "timeless",
+ "reindex": reindex,
+ "land_use": inject.get_table("land_use").to_frame(),
  }
  constants = config.get_model_constants(model_settings)
  if constants is not None:
@@ -808,6 +838,24 @@ def run_location_choice(
  )
  tracing.trace_df(choices_df, estimation_trace_label)
 
+ if want_logsums & (not skip_choice):
+ # grabbing index, could be person_id or proto_person_id
+ index_name = choices_df.index.name
+ # merging mode choice logsum of chosen alternative to choices
+ choices_df = (
+ pd.merge(
+ choices_df.reset_index(),
+ location_sample_df.reset_index()[
+ [index_name, model_settings["ALT_DEST_COL_NAME"], ALT_LOGSUM]
+ ],
+ how="left",
+ left_on=[index_name, "choice"],
+ right_on=[index_name, model_settings["ALT_DEST_COL_NAME"]],
+ )
+ .drop(columns=model_settings["ALT_DEST_COL_NAME"])
+ .set_index(index_name)
+ )
+
  choices_list.append(choices_df)
 
  if want_sample_table:
@@ -825,7 +873,7 @@ def run_location_choice(
  else:
  # this will only happen with small samples (e.g. singleton) with no (e.g.) school segs
  logger.warning("%s no choices", trace_label)
- choices_df = pd.DataFrame(columns=["choice", "logsum"])
+ choices_df = pd.DataFrame(columns=["choice", "logsum", ALT_LOGSUM])
 
  if len(sample_list) > 0:
  save_sample_df = pd.concat(sample_list)
@@ -869,7 +917,8 @@ def iterate_location_choice(
  Returns
  -------
  adds choice column model_settings['DEST_CHOICE_COLUMN_NAME']
- adds logsum column model_settings['DEST_CHOICE_LOGSUM_COLUMN_NAME']- if provided
+ adds destination choice logsum column model_settings['DEST_CHOICE_LOGSUM_COLUMN_NAME']- if provided
+ adds mode choice logsum to selected destination column model_settings['MODE_CHOICE_LOGSUM_COLUMN_NAME']- if provided
  adds annotations to persons table
  """
 
@@ -879,7 +928,11 @@ def iterate_location_choice(
  chooser_filter_column = model_settings["CHOOSER_FILTER_COLUMN_NAME"]
 
  dest_choice_column_name = model_settings["DEST_CHOICE_COLUMN_NAME"]
- logsum_column_name = model_settings.get("DEST_CHOICE_LOGSUM_COLUMN_NAME")
+ dc_logsum_column_name = model_settings.get("DEST_CHOICE_LOGSUM_COLUMN_NAME")
+ mc_logsum_column_name = model_settings.get("MODE_CHOICE_LOGSUM_COLUMN_NAME")
+ want_logsums = (dc_logsum_column_name is not None) | (
+ mc_logsum_column_name is not None
+ )
 
  sample_table_name = model_settings.get("DEST_CHOICE_SAMPLE_TABLE_NAME")
  want_sample_table = (
@@ -929,7 +982,7 @@ def iterate_location_choice(
  persons_merged_df_,
  network_los,
  shadow_price_calculator=spc,
- want_logsums=logsum_column_name is not None,
+ want_logsums=want_logsums,
  want_sample_table=want_sample_table,
  estimator=estimator,
  model_settings=model_settings,
@@ -1005,10 +1058,15 @@ def iterate_location_choice(
  )
 
  # add the dest_choice_logsum column to persons dataframe
- if logsum_column_name:
- persons_df[logsum_column_name] = (
+ if dc_logsum_column_name:
+ persons_df[dc_logsum_column_name] = (
  choices_df["logsum"].reindex(persons_df.index).astype("float")
  )
+ # add the mode choice logsum column to persons dataframe
+ if mc_logsum_column_name:
+ persons_df[mc_logsum_column_name] = (
+ choices_df[ALT_LOGSUM].reindex(persons_df.index).astype("float")
+ )
 
  if save_sample_df is not None:
  # might be None for tiny samples even if sample_table_name was specified
@@ -1047,9 +1105,13 @@ def iterate_location_choice(
  if trace_hh_id:
  tracing.trace_df(households_df, label=trace_label, warn_if_empty=True)
 
- if logsum_column_name:
+ if dc_logsum_column_name:
+ tracing.print_summary(
+ dc_logsum_column_name, choices_df["logsum"], value_counts=True
+ )
+ if mc_logsum_column_name:
  tracing.print_summary(
- logsum_column_name, choices_df["logsum"], value_counts=True
+ mc_logsum_column_name, choices_df[ALT_LOGSUM], value_counts=True
  )
 
  return persons_df
@@ -1133,4 +1195,4 @@ def school_location(
  )
 
  if estimator:
- estimator.end_estimation()
+ estimator.end_estimation()
diff --git a/activitysim/estimation/larch/location_choice.py b/activitysim/estimation/larch/location_choice.py
@@ -140,6 +140,9 @@ def _file_exists(filename):
  .set_index("segment")
  )
  size_spec = size_spec.loc[:, size_spec.max() > 0]
+ assert (
+ len(size_spec) > 0
+ ), f"Empty size_spec, is model_selector {SIZE_TERM_SELECTOR} in your size term file?"
 
  size_coef = size_coefficients_from_spec(size_spec)
 
@@ -289,6 +292,9 @@ def split(a, n):
  else:
  av = 1
 
+ assert len(x_co) > 0, "Empty chooser dataframe"
+ assert len(x_ca_1) > 0, "Empty alternatives dataframe"
+
  d = DataFrames(co=x_co, ca=x_ca_1, av=av)
 
  m = Model(dataservice=d)
@@ -406,6 +412,14 @@ def workplace_location_model(**kwargs):
  )
 
 
+def external_workplace_location_model(**kwargs):
+ unused = kwargs.pop("name", None)
+ return location_choice_model(
+ name="external_workplace_location",
+ **kwargs,
+ )
+
+
 def school_location_model(**kwargs):
  unused = kwargs.pop("name", None)
  return location_choice_model(
@@ -442,9 +456,17 @@ def non_mandatory_tour_destination_model(**kwargs):
  )
 
 
+def external_non_mandatory_destination_model(**kwargs):
+ unused = kwargs.pop("name", None)
+ return location_choice_model(
+ name="external_non_mandatory_destination",
+ **kwargs,
+ )
+
+
 def trip_destination_model(**kwargs):
  unused = kwargs.pop("name", None)
  return location_choice_model(
  name="trip_destination",
  **kwargs,
- )
+ )