UCL · marghe-molaro · Apr 3, 2024 · Sep 17, 2024 · Sep 30, 2024 · Oct 2, 2024
diff --git a/src/tlo/events.py b/src/tlo/events.py
@@ -9,6 +9,8 @@
 if TYPE_CHECKING:
     from tlo import Simulation
 
+import pandas as pd
+
 
 class Priority(Enum):
     """Enumeration for the Priority, which is used in sorting the events in the simulation queue."""
@@ -63,8 +65,54 @@ def apply(self, target):
 
     def run(self):
         """Make the event happen."""
+
+        print_chains = False
+        df_before = []
+
+        if self.sim.generate_event_chains:
+            # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore
+            if (self.module in self.sim.generate_event_chains_modules_of_interest) and all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events):
+                print_chains = True
+                if self.target != self.sim.population:
+                    row = self.sim.population.props.iloc[[self.target]]
+                    row['person_ID'] = self.target
+                    row['event'] = self
+                    row['event_date'] = self.sim.date
+                    row['when'] = 'Before'
+                    self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
+                else:
+                    df_before = self.sim.population.props.copy()
+
+
         self.apply(self.target)
         self.post_apply_hook()
+
+        if print_chains:
+            if self.target != self.sim.population:
+                row = self.sim.population.props.iloc[[self.target]]
+                row['person_ID'] = self.target
+                row['event'] = self
+                row['event_date'] = self.sim.date
+                row['when'] = 'After'
+                self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
+            else:
+                df_after = self.sim.population.props.copy()
+                change = df_before.compare(df_after)
+                if ~change.empty:
-                if ~change.empty:
+                if not change.empty:
-                if ~change.empty:
+                if not change.empty:
+                    indices = change.index
+                    new_rows_before = df_before.loc[indices]
+                    new_rows_before['person_ID'] = new_rows_before.index
+                    new_rows_before['event'] = self
+                    new_rows_before['event_date'] = self.sim.date
+                    new_rows_before['when'] = 'Before'
+                    new_rows_after = df_after.loc[indices]
+                    new_rows_after['person_ID'] = new_rows_after.index
+                    new_rows_after['event'] = self
+                    new_rows_after['event_date'] = self.sim.date
+                    new_rows_after['when'] = 'After'
+
+                    self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_before], ignore_index=True)
+                    self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_after], ignore_index=True)
 
 
 class RegularEvent(Event):

diff --git a/src/tlo/methods/demography.py b/src/tlo/methods/demography.py
@@ -315,9 +315,10 @@ def initialise_simulation(self, sim):
         # Launch the repeating event that will store statistics about the population structure
         sim.schedule_event(DemographyLoggingEvent(self), sim.date)
 
-        # Create (and store pointer to) the OtherDeathPoll and schedule first occurrence immediately
-        self.other_death_poll = OtherDeathPoll(self)
-        sim.schedule_event(self.other_death_poll, sim.date)
+        if sim.generate_event_chains is False:
+            # Create (and store pointer to) the OtherDeathPoll and schedule first occurrence immediately
+            self.other_death_poll = OtherDeathPoll(self)
+            sim.schedule_event(self.other_death_poll, sim.date)
 
         # Log the initial population scaling-factor (to the logger of this module and that of `tlo.methods.population`)
         for _logger in (logger, logger_scale_factor):

diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py
@@ -2427,7 +2427,8 @@ def process_events_mode_2(self, hold_over: List[HSIEventQueueItem]) -> None:
 
                             # Expected appt footprint before running event
                             _appt_footprint_before_running = event.EXPECTED_APPT_FOOTPRINT
-                            # Run event & get actual footprint
+
+                            # Run the HSI event (allowing it to return an updated appt_footprint)
                             actual_appt_footprint = event.run(squeeze_factor=squeeze_factor)
 
                             # Check if the HSI event returned updated_appt_footprint, and if so adjust original_call

diff --git a/src/tlo/methods/hiv.py b/src/tlo/methods/hiv.py
@@ -631,11 +631,12 @@ def initialise_population(self, population):
         df.loc[df.is_alive, "hv_date_treated"] = pd.NaT
         df.loc[df.is_alive, "hv_date_last_ART"] = pd.NaT
 
-        # Launch sub-routines for allocating the right number of people into each category
-        self.initialise_baseline_prevalence(population)  # allocate baseline prevalence
+        if self.sim.generate_event_chains is False:
+            # Launch sub-routines for allocating the right number of people into each category
+            self.initialise_baseline_prevalence(population)  # allocate baseline prevalence
 
-        self.initialise_baseline_art(population)  # allocate baseline art coverage
-        self.initialise_baseline_tested(population)  # allocate baseline testing coverage
+            self.initialise_baseline_art(population)  # allocate baseline art coverage
+            self.initialise_baseline_tested(population)  # allocate baseline testing coverage
 
     def initialise_baseline_prevalence(self, population):
         """
@@ -905,10 +906,16 @@ def initialise_simulation(self, sim):
         df = sim.population.props
         p = self.parameters
 
-        # 1) Schedule the Main HIV Regular Polling Event
-        sim.schedule_event(
-            HivRegularPollingEvent(self), sim.date + DateOffset(days=0)
-        )
+        if self.sim.generate_event_chains:
+            print("Should be generating data")
+            sim.schedule_event(
+                HivPollingEventForDataGeneration(self), sim.date + DateOffset(days=0)
+            )
+        else:
+            # 1) Schedule the Main HIV Regular Polling Event
+            sim.schedule_event(
+                HivRegularPollingEvent(self), sim.date + DateOffset(days=0)
+            )
 
         # 2) Schedule the Logging Event
         sim.schedule_event(HivLoggingEvent(self), sim.date + DateOffset(years=1))
@@ -1662,6 +1669,37 @@ def do_at_generic_first_appt(
 #   Main Polling Event
 # ---------------------------------------------------------------------------
 
+class HivPollingEventForDataGeneration(RegularEvent, PopulationScopeEventMixin):
+    """ The HIV Polling Events for Data Generation
+    * Ensures that 
+    """
+
+    def __init__(self, module):
+        super().__init__(
+            module, frequency=DateOffset(years=120)
+        )  # repeats every 12 months, but this can be changed
+
+    def apply(self, population):
+
+        df = population.props
+
+        # Make everyone who is alive and not infected (no-one should be) susceptible
+        susc_idx = df.loc[
+            df.is_alive
+            & ~df.hv_inf
+            ].index
+
+        n_susceptible = len(susc_idx)
+        print("Number of individuals susceptible", n_susceptible)
+        # Schedule the date of infection for each new infection:
+        for i in susc_idx:
+            date_of_infection = self.sim.date + pd.DateOffset(
+                # Ensure that individual will be infected before end of sim
+                days=self.module.rng.randint(0, 365*(int(self.sim.end_date.year - self.sim.date.year)+1))
+            )
+            self.sim.schedule_event(
+                HivInfectionEvent(self.module, i), date_of_infection
+            )
 
 class HivRegularPollingEvent(RegularEvent, PopulationScopeEventMixin):
     """ The HIV Regular Polling Events
@@ -1683,6 +1721,7 @@ def apply(self, population):
         fraction_of_year_between_polls = self.frequency.months / 12
         beta = p["beta"] * fraction_of_year_between_polls
 
+
         # ----------------------------------- HORIZONTAL TRANSMISSION -----------------------------------
         def horizontal_transmission(to_sex, from_sex):
             # Count current number of alive 15-80 year-olds at risk of transmission
@@ -1758,6 +1797,7 @@ def horizontal_transmission(to_sex, from_sex):
                         HivInfectionEvent(self.module, idx), date_of_infection
                     )
 
+
         # ----------------------------------- SPONTANEOUS TESTING -----------------------------------
         def spontaneous_testing(current_year):
 
@@ -1861,11 +1901,12 @@ def vmmc_for_child():
                     priority=0,
                 )
 
-        # Horizontal transmission: Male --> Female
-        horizontal_transmission(from_sex="M", to_sex="F")
+        if self.sim.generate_event_chains is False:
+            # Horizontal transmission: Male --> Female
+            horizontal_transmission(from_sex="M", to_sex="F")
 
-        # Horizontal transmission: Female --> Male
-        horizontal_transmission(from_sex="F", to_sex="M")
+            # Horizontal transmission: Female --> Male
+            horizontal_transmission(from_sex="F", to_sex="M")
 
         # testing
         # if year later than 2020, set testing rates to those reported in 2020
@@ -1882,6 +1923,8 @@ def vmmc_for_child():
         vmmc_for_child()
 
 
+
+
 # ---------------------------------------------------------------------------
 #   Natural History Events
 # ---------------------------------------------------------------------------

diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py
@@ -9,6 +9,9 @@
 from tlo.events import Event
 from tlo.population import Population
 
+import pandas as pd
+
+
 if TYPE_CHECKING:
     from tlo import Module, Simulation
     from tlo.methods.healthsystem import HealthSystem
@@ -187,9 +190,54 @@ def _run_after_hsi_event(self) -> None:
 
     def run(self, squeeze_factor):
         """Make the event happen."""
+
+        print_chains = False
+        df_before = []
+
+        if self.sim.generate_event_chains:
+            # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore
+            if (self.module in self.sim.generate_event_chains_modules_of_interest) and all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events):
+                print_chains = True
+                if self.target != self.sim.population:
+                    row = self.sim.population.props.iloc[[self.target]]
+                    row['person_ID'] = self.target
+                    row['event'] = self
+                    row['event_date'] = self.sim.date
+                    row['when'] = 'Before'
+                    self.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
+                else:
+                    df_before = self.sim.population.props.copy()
+
         updated_appt_footprint = self.apply(self.target, squeeze_factor)
         self.post_apply_hook()
         self._run_after_hsi_event()
+
+        if print_chains:
+            if self.target != self.sim.population:
+                row = self.sim.population.props.iloc[[self.target]]
+                row['person_ID'] = self.target
+                row['event'] = self
+                row['event_date'] = self.sim.date
+                row['when'] = 'After'
+                self.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
+            else:
+                df_after = self.sim.population.props.copy()
+                change = df_before.compare(df_after)
+                if ~change.empty:
+                    indices = change.index
+                    new_rows_before = df_before.loc[indices]
+                    new_rows_before['person_ID'] = new_rows_before.index
+                    new_rows_before['event'] = self
+                    new_rows_before['event_date'] = self.sim.date
+                    new_rows_before['when'] = 'Before'
+                    new_rows_after = df_after.loc[indices]
+                    new_rows_after['person_ID'] = new_rows_after.index
+                    new_rows_after['event'] = self
+                    new_rows_after['event_date'] = self.sim.date
+                    new_rows_after['when'] = 'After'
+
+                    self.event_chains = pd.concat([self.sim.event_chains,new_rows_before], ignore_index=True)
+                    self.event_chains = pd.concat([self.sim.event_chains,new_rows_after], ignore_index=True)
         return updated_appt_footprint
 
     def get_consumables(

diff --git a/src/tlo/methods/tb.py b/src/tlo/methods/tb.py
@@ -833,28 +833,29 @@ def initialise_population(self, population):
         df["tb_date_ipt"] = pd.NaT
 
         # # ------------------ infection status ------------------ #
-        # WHO estimates of active TB for 2010 to get infected initial population
-        # don't need to scale or include treated proportion as no-one on treatment yet
-        inc_estimates = p["who_incidence_estimates"]
-        incidence_year = (inc_estimates.loc[
-            (inc_estimates.year == self.sim.date.year), "incidence_per_100k"
-        ].values[0]) / 100_000
-
-        incidence_year = incidence_year * p["scaling_factor_WHO"]
-
-        self.assign_active_tb(
-            population,
-            strain="ds",
-            incidence=incidence_year)
-
-        self.assign_active_tb(
-            population,
-            strain="mdr",
-            incidence=incidence_year * p['prop_mdr2010'])
-
-        self.send_for_screening_general(
-            population
-        )  # send some baseline population for screening
+        if self.sim.generate_event_chains is False:
+            # WHO estimates of active TB for 2010 to get infected initial population
+            # don't need to scale or include treated proportion as no-one on treatment yet
+            inc_estimates = p["who_incidence_estimates"]
+            incidence_year = (inc_estimates.loc[
+                (inc_estimates.year == self.sim.date.year), "incidence_per_100k"
+            ].values[0]) / 100_000
+
+            incidence_year = incidence_year * p["scaling_factor_WHO"]
+
+            self.assign_active_tb(
+                population,
+                strain="ds",
+                incidence=incidence_year)
+
+            self.assign_active_tb(
+                population,
+                strain="mdr",
+                incidence=incidence_year * p['prop_mdr2010'])
+
+            self.send_for_screening_general(
+                population
+            )  # send some baseline population for screening
 
     def initialise_simulation(self, sim):
         """
@@ -867,7 +868,11 @@ def initialise_simulation(self, sim):
         sim.schedule_event(TbActiveEvent(self), sim.date)
         sim.schedule_event(TbRegularEvents(self), sim.date)
         sim.schedule_event(TbSelfCureEvent(self), sim.date)
-        sim.schedule_event(TbActiveCasePoll(self), sim.date + DateOffset(years=1))
+
+        if sim.generate_event_chains is False:
+            sim.schedule_event(TbActiveCasePoll(self), sim.date + DateOffset(years=1))
+        else:
+            sim.schedule_event(TbActiveCasePollGenerateData(self), sim.date + DateOffset(days=0))
 
         # 2) log at the end of the year
         # Optional: Schedule the scale-up of programs
@@ -1366,6 +1371,53 @@ def is_subset(col_for_set, col_for_subset):
 # #   TB infection event
 # # ---------------------------------------------------------------------------
 
+class TbActiveCasePollGenerateData(RegularEvent, PopulationScopeEventMixin):
+    """The Tb Regular Poll Event for Data Generation for assigning active infections
+    * selects everyone to develop an active infection and schedules onset of active tb
+    sometime during the simulation
+    """
+
+    def __init__(self, module):
+        super().__init__(module, frequency=DateOffset(years=120))
+
+    def apply(self, population):
+
+        df = population.props
+        now = self.sim.date
+        rng = self.module.rng
+        # Make everyone who is alive and not infected (no-one should be) susceptible
+        susc_idx = df.loc[
+            df.is_alive
+            & (df.tb_inf != "active")
+            ].index
+
+        n_susceptible = len(susc_idx)
+
+        middle_index = len(susc_idx) // 2
+
+        # Will equally split two strains among the population
+        list_ds = susc_idx[:middle_index]
+        list_mdr = susc_idx[middle_index:]
+
+        # schedule onset of active tb. This will be equivalent to the "Onset", so it
+        # doesn't matter how long after we have decided which infection this is.
+        for person_id in list_ds:
+            date_progression = now + pd.DateOffset(
+                # At some point during their lifetime, this person will develop TB
+                days=self.module.rng.randint(0, 365*(int(self.sim.end_date.year - self.sim.date.year)+1))
+            )
+            # set date of active tb - properties will be updated at TbActiveEvent poll daily
+            df.at[person_id, "tb_scheduled_date_active"] = date_progression
+            df.at[person_id, "tb_strain"] = "ds"
+
+        for person_id in list_mdr:
+            date_progression = now + pd.DateOffset(
+                days=rng.randint(0, 365*int(self.sim.end_date.year - self.sim.start_date.year + 1))
+            )
+            # set date of active tb - properties will be updated at TbActiveEvent poll daily
+            df.at[person_id, "tb_scheduled_date_active"] = date_progression
+            df.at[person_id, "tb_strain"] = "mdr"
+
 
 class TbActiveCasePoll(RegularEvent, PopulationScopeEventMixin):
     """The Tb Regular Poll Event for assigning active infections
@@ -1439,7 +1491,6 @@ def apply(self, population):
 
         self.module.update_parameters_for_program_scaleup()
 
-
 class TbActiveEvent(RegularEvent, PopulationScopeEventMixin):
     """
     * check for those with dates of active tb onset within last time-period