From dbff470b51cde44beeefdae3575d52e0c19964bc Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Wed, 3 Apr 2024 15:00:09 +0100
Subject: [PATCH 01/21] Investigate analysis of events at sim level

---
 src/tlo/simulation.py |  9 +++++++++
 tests/test_rti.py     | 12 ++++++++++++
 2 files changed, 21 insertions(+)

diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py
index 219b1b8a6f..a641909ed1 100644
--- a/src/tlo/simulation.py
+++ b/src/tlo/simulation.py
@@ -231,6 +231,15 @@ def simulate(self, *, end_date):
             if date >= end_date:
                 self.date = end_date
                 break
+                
+            #if event.target != self.population:
+            #    print("Event: ", event)
+
+            if event.module == self.modules['RTI']:
+                 print("RTI event ", event)
+                 print("   target ", event.target)
+                 if event.target != self.population:
+                    self.population.props.at[event.tar]
             self.fire_single_event(event, date)
 
         # The simulation has ended.
diff --git a/tests/test_rti.py b/tests/test_rti.py
index 0e231fb4af..99243b988e 100644
--- a/tests/test_rti.py
+++ b/tests/test_rti.py
@@ -25,6 +25,17 @@
 end_date = Date(2012, 1, 1)
 popsize = 1000
 
+@pytest.mark.slow
+def test_data_harvesting(seed):
+    """
+    This test runs a simulation with a functioning health system with full service availability and no set
+    constraints
+    """
+    # create sim object
+    sim = create_basic_rti_sim(popsize, seed)
+    # run simulation
+    sim.simulate(end_date=end_date)
+    exit(-1)
 
 def check_dtypes(simulation):
     # check types of columns in dataframe, check they are the same, list those that aren't
@@ -65,6 +76,7 @@ def test_run(seed):
     check_dtypes(sim)
 
 
+
 @pytest.mark.slow
 def test_all_injuries_run(seed):
     """

From 05098f78668a5317667d58cbda882a364a031277 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Mon, 30 Sep 2024 16:26:39 +0200
Subject: [PATCH 02/21] Final data-printing set-up

---
 src/tlo/methods/demography.py   |  7 ++-
 src/tlo/methods/healthsystem.py | 18 ++++++
 src/tlo/methods/hiv.py          | 67 ++++++++++++++++++----
 src/tlo/methods/tb.py           | 99 +++++++++++++++++++++++++--------
 src/tlo/simulation.py           | 82 ++++++++++++++++++++++++---
 5 files changed, 226 insertions(+), 47 deletions(-)

diff --git a/src/tlo/methods/demography.py b/src/tlo/methods/demography.py
index e58f3895f4..6b2578fd44 100644
--- a/src/tlo/methods/demography.py
+++ b/src/tlo/methods/demography.py
@@ -315,9 +315,10 @@ def initialise_simulation(self, sim):
         # Launch the repeating event that will store statistics about the population structure
         sim.schedule_event(DemographyLoggingEvent(self), sim.date)
 
-        # Create (and store pointer to) the OtherDeathPoll and schedule first occurrence immediately
-        self.other_death_poll = OtherDeathPoll(self)
-        sim.schedule_event(self.other_death_poll, sim.date)
+        if sim.generate_data is False:
+            # Create (and store pointer to) the OtherDeathPoll and schedule first occurrence immediately
+            self.other_death_poll = OtherDeathPoll(self)
+            sim.schedule_event(self.other_death_poll, sim.date)
 
         # Log the initial population scaling-factor (to the logger of this module and that of `tlo.methods.population`)
         for _logger in (logger, logger_scale_factor):
diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py
index 181c08f5aa..6e251e636c 100644
--- a/src/tlo/methods/healthsystem.py
+++ b/src/tlo/methods/healthsystem.py
@@ -2033,8 +2033,26 @@ def run_individual_level_events_in_mode_0_or_1(self,
                     assert event.facility_info is not None, \
                         f"Cannot run HSI {event.TREATMENT_ID} without facility_info being defined."
 
+                    go_ahead = False
+                    if (event.module == self.sim.modules['Tb'] or event.module == self.sim.modules['Hiv']):
+                        go_ahead = True
+                        row = self.sim.population.props.iloc[[event.target]]
+                        row['person_ID'] = event.target
+                        row['event'] = event
+                        row['event_date'] = self.sim.date
+                        row['when'] = 'Before'
+                        self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
+
                     # Run the HSI event (allowing it to return an updated appt_footprint)
                     actual_appt_footprint = event.run(squeeze_factor=squeeze_factor)
+                    
+                    if go_ahead:
+                        row = self.sim.population.props.iloc[[event.target]]
+                        row['person_ID'] = event.target
+                        row['event'] = event
+                        row['event_date'] = self.sim.date
+                        row['when'] = 'After'
+                        self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
 
                     # Check if the HSI event returned updated appt_footprint
                     if actual_appt_footprint is not None:
diff --git a/src/tlo/methods/hiv.py b/src/tlo/methods/hiv.py
index d6455cc861..8e0d337fc1 100644
--- a/src/tlo/methods/hiv.py
+++ b/src/tlo/methods/hiv.py
@@ -631,11 +631,12 @@ def initialise_population(self, population):
         df.loc[df.is_alive, "hv_date_treated"] = pd.NaT
         df.loc[df.is_alive, "hv_date_last_ART"] = pd.NaT
 
-        # Launch sub-routines for allocating the right number of people into each category
-        self.initialise_baseline_prevalence(population)  # allocate baseline prevalence
+        if self.sim.generate_data is False:
+            # Launch sub-routines for allocating the right number of people into each category
+            self.initialise_baseline_prevalence(population)  # allocate baseline prevalence
 
-        self.initialise_baseline_art(population)  # allocate baseline art coverage
-        self.initialise_baseline_tested(population)  # allocate baseline testing coverage
+            self.initialise_baseline_art(population)  # allocate baseline art coverage
+            self.initialise_baseline_tested(population)  # allocate baseline testing coverage
 
     def initialise_baseline_prevalence(self, population):
         """
@@ -905,10 +906,16 @@ def initialise_simulation(self, sim):
         df = sim.population.props
         p = self.parameters
 
-        # 1) Schedule the Main HIV Regular Polling Event
-        sim.schedule_event(
-            HivRegularPollingEvent(self), sim.date + DateOffset(days=0)
-        )
+        if self.sim.generate_data:
+            print("Should be generating data")
+            sim.schedule_event(
+                HivPollingEventForDataGeneration(self), sim.date + DateOffset(days=0)
+            )
+        else:
+            # 1) Schedule the Main HIV Regular Polling Event
+            sim.schedule_event(
+                HivRegularPollingEvent(self), sim.date + DateOffset(days=0)
+            )
 
         # 2) Schedule the Logging Event
         sim.schedule_event(HivLoggingEvent(self), sim.date + DateOffset(years=1))
@@ -1662,6 +1669,37 @@ def do_at_generic_first_appt(
 #   Main Polling Event
 # ---------------------------------------------------------------------------
 
+class HivPollingEventForDataGeneration(RegularEvent, PopulationScopeEventMixin):
+    """ The HIV Polling Events for Data Generation
+    * Ensures that 
+    """
+
+    def __init__(self, module):
+        super().__init__(
+            module, frequency=DateOffset(years=120)
+        )  # repeats every 12 months, but this can be changed
+
+    def apply(self, population):
+    
+        df = population.props
+        
+        # Make everyone who is alive and not infected (no-one should be) susceptible
+        susc_idx = df.loc[
+            df.is_alive
+            & ~df.hv_inf
+            ].index
+            
+        n_susceptible = len(susc_idx)
+        print("Number of individuals susceptible", n_susceptible)
+        # Schedule the date of infection for each new infection:
+        for i in susc_idx:
+            date_of_infection = self.sim.date + pd.DateOffset(
+                # Ensure that individual will be infected before end of sim
+                days=self.module.rng.randint(0, 365*(int(self.sim.end_date.year - self.sim.date.year)+1))
+            )
+            self.sim.schedule_event(
+                HivInfectionEvent(self.module, i), date_of_infection
+            )
 
 class HivRegularPollingEvent(RegularEvent, PopulationScopeEventMixin):
     """ The HIV Regular Polling Events
@@ -1683,6 +1721,7 @@ def apply(self, population):
         fraction_of_year_between_polls = self.frequency.months / 12
         beta = p["beta"] * fraction_of_year_between_polls
 
+        
         # ----------------------------------- HORIZONTAL TRANSMISSION -----------------------------------
         def horizontal_transmission(to_sex, from_sex):
             # Count current number of alive 15-80 year-olds at risk of transmission
@@ -1758,6 +1797,7 @@ def horizontal_transmission(to_sex, from_sex):
                         HivInfectionEvent(self.module, idx), date_of_infection
                     )
 
+
         # ----------------------------------- SPONTANEOUS TESTING -----------------------------------
         def spontaneous_testing(current_year):
 
@@ -1861,11 +1901,12 @@ def vmmc_for_child():
                     priority=0,
                 )
 
-        # Horizontal transmission: Male --> Female
-        horizontal_transmission(from_sex="M", to_sex="F")
+        if self.sim.generate_data is False:
+            # Horizontal transmission: Male --> Female
+            horizontal_transmission(from_sex="M", to_sex="F")
 
-        # Horizontal transmission: Female --> Male
-        horizontal_transmission(from_sex="F", to_sex="M")
+            # Horizontal transmission: Female --> Male
+            horizontal_transmission(from_sex="F", to_sex="M")
 
         # testing
         # if year later than 2020, set testing rates to those reported in 2020
@@ -1882,6 +1923,8 @@ def vmmc_for_child():
         vmmc_for_child()
 
 
+
+
 # ---------------------------------------------------------------------------
 #   Natural History Events
 # ---------------------------------------------------------------------------
diff --git a/src/tlo/methods/tb.py b/src/tlo/methods/tb.py
index 623ee2e483..cd79ae22a5 100644
--- a/src/tlo/methods/tb.py
+++ b/src/tlo/methods/tb.py
@@ -833,28 +833,29 @@ def initialise_population(self, population):
         df["tb_date_ipt"] = pd.NaT
 
         # # ------------------ infection status ------------------ #
-        # WHO estimates of active TB for 2010 to get infected initial population
-        # don't need to scale or include treated proportion as no-one on treatment yet
-        inc_estimates = p["who_incidence_estimates"]
-        incidence_year = (inc_estimates.loc[
-            (inc_estimates.year == self.sim.date.year), "incidence_per_100k"
-        ].values[0]) / 100_000
-
-        incidence_year = incidence_year * p["scaling_factor_WHO"]
-
-        self.assign_active_tb(
-            population,
-            strain="ds",
-            incidence=incidence_year)
-
-        self.assign_active_tb(
-            population,
-            strain="mdr",
-            incidence=incidence_year * p['prop_mdr2010'])
-
-        self.send_for_screening_general(
-            population
-        )  # send some baseline population for screening
+        if self.sim.generate_data is False:
+            # WHO estimates of active TB for 2010 to get infected initial population
+            # don't need to scale or include treated proportion as no-one on treatment yet
+            inc_estimates = p["who_incidence_estimates"]
+            incidence_year = (inc_estimates.loc[
+                (inc_estimates.year == self.sim.date.year), "incidence_per_100k"
+            ].values[0]) / 100_000
+
+            incidence_year = incidence_year * p["scaling_factor_WHO"]
+
+            self.assign_active_tb(
+                population,
+                strain="ds",
+                incidence=incidence_year)
+
+            self.assign_active_tb(
+                population,
+                strain="mdr",
+                incidence=incidence_year * p['prop_mdr2010'])
+
+            self.send_for_screening_general(
+                population
+            )  # send some baseline population for screening
 
     def initialise_simulation(self, sim):
         """
@@ -867,7 +868,11 @@ def initialise_simulation(self, sim):
         sim.schedule_event(TbActiveEvent(self), sim.date)
         sim.schedule_event(TbRegularEvents(self), sim.date)
         sim.schedule_event(TbSelfCureEvent(self), sim.date)
-        sim.schedule_event(TbActiveCasePoll(self), sim.date + DateOffset(years=1))
+
+        if sim.generate_data is False:
+            sim.schedule_event(TbActiveCasePoll(self), sim.date + DateOffset(years=1))
+        else:
+            sim.schedule_event(TbActiveCasePollGenerateData(self), sim.date + DateOffset(days=0))
 
         # 2) log at the end of the year
         # Optional: Schedule the scale-up of programs
@@ -1366,6 +1371,53 @@ def is_subset(col_for_set, col_for_subset):
 # #   TB infection event
 # # ---------------------------------------------------------------------------
 
+class TbActiveCasePollGenerateData(RegularEvent, PopulationScopeEventMixin):
+    """The Tb Regular Poll Event for Data Generation for assigning active infections
+    * selects everyone to develop an active infection and schedules onset of active tb
+    sometime during the simulation
+    """
+
+    def __init__(self, module):
+        super().__init__(module, frequency=DateOffset(years=120))
+
+    def apply(self, population):
+
+        df = population.props
+        now = self.sim.date
+        rng = self.module.rng
+        # Make everyone who is alive and not infected (no-one should be) susceptible
+        susc_idx = df.loc[
+            df.is_alive
+            & (df.tb_inf != "active")
+            ].index
+            
+        n_susceptible = len(susc_idx)
+        
+        middle_index = len(susc_idx) // 2
+
+        # Will equally split two strains among the population
+        list_ds = susc_idx[:middle_index]
+        list_mdr = susc_idx[middle_index:]
+    
+        # schedule onset of active tb. This will be equivalent to the "Onset", so it
+        # doesn't matter how long after we have decided which infection this is.
+        for person_id in list_ds:
+            date_progression = now + pd.DateOffset(
+                # At some point during their lifetime, this person will develop TB
+                days=self.module.rng.randint(0, 365*(int(self.sim.end_date.year - self.sim.date.year)+1))
+            )
+            # set date of active tb - properties will be updated at TbActiveEvent poll daily
+            df.at[person_id, "tb_scheduled_date_active"] = date_progression
+            df.at[person_id, "tb_strain"] = "ds"
+            
+        for person_id in list_mdr:
+            date_progression = now + pd.DateOffset(
+                days=rng.randint(0, 365*int(self.sim.end_date.year - self.sim.start_date.year + 1))
+            )
+            # set date of active tb - properties will be updated at TbActiveEvent poll daily
+            df.at[person_id, "tb_scheduled_date_active"] = date_progression
+            df.at[person_id, "tb_strain"] = "mdr"
+            
 
 class TbActiveCasePoll(RegularEvent, PopulationScopeEventMixin):
     """The Tb Regular Poll Event for assigning active infections
@@ -1439,7 +1491,6 @@ def apply(self, population):
 
         self.module.update_parameters_for_program_scaleup()
 
-
 class TbActiveEvent(RegularEvent, PopulationScopeEventMixin):
     """
     * check for those with dates of active tb onset within last time-period
diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py
index 5b4e2fff4c..f0c8d6f09f 100644
--- a/src/tlo/simulation.py
+++ b/src/tlo/simulation.py
@@ -7,7 +7,7 @@
 from collections import OrderedDict
 from pathlib import Path
 from typing import Dict, Optional, Union
-
+import pandas as pd
 import numpy as np
 
 from tlo import Date, Population, logging
@@ -63,9 +63,11 @@ def __init__(self, *, start_date: Date, seed: int = None, log_config: dict = Non
         self.date = self.start_date = start_date
         self.modules = OrderedDict()
         self.event_queue = EventQueue()
+        self.generate_data = None
         self.end_date = None
         self.output_file = None
         self.population: Optional[Population] = None
+        self.event_chains: Optinoal[Population] = None
 
         self.show_progress_bar = show_progress_bar
         self.resourcefilepath = resourcefilepath
@@ -209,6 +211,8 @@ def make_initial_population(self, *, n):
             module.initialise_population(self.population)
             logger.debug(key='debug', data=f'{module.name}.initialise_population() {time.time() - start1} s')
 
+        self.event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when'])
+
         end = time.time()
         logger.info(key='info', data=f'make_initial_population() {end - start} s')
 
@@ -221,7 +225,14 @@ def simulate(self, *, end_date):
         """
         start = time.time()
         self.end_date = end_date  # store the end_date so that others can reference it
+        self.generate_data = True # for now ensure we're always aiming to print data
+
+        f = open('output.txt', mode='a')
+        #df_event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when'])
 
+        # Reorder columns to place the new columns at the front
+        pd.set_option('display.max_columns', None)
+        print(self.event_chains.columns)
         for module in self.modules.values():
             module.initialise_simulation(self)
 
@@ -250,17 +261,72 @@ def simulate(self, *, end_date):
 
             if date >= end_date:
                 self.date = end_date
+                self.event_chains.to_csv('output.csv', index=False)
                 break
-                
+
             #if event.target != self.population:
             #    print("Event: ", event)
-
-            if event.module == self.modules['RTI']:
-                 print("RTI event ", event)
-                 print("   target ", event.target)
-                 if event.target != self.population:
-                    self.population.props.at[event.tar]
+            go_ahead = False
+            df_before = []
+            
+            # Only print events relevant to modules of interest
+            # Do not want to compare before/after in births because it may expand the pop dataframe
+            print_output = True
+            if print_output:
+                if (event.module == self.modules['Tb'] or event.module == self.modules['Hiv']) and 'TbActiveCasePollGenerateData' not in str(event) and 'HivPollingEventForDataGeneration' not in str(event) and "SimplifiedBirthsPoll" not in str(event) and "AgeUpdateEvent" not in str(event) and "HealthSystemScheduler" not in str(event):
+                #if 'TbActiveCasePollGenerateData' not in str(event) and 'HivPollingEventForDataGeneration' not in str(event) and "SimplifiedBirthsPoll" not in str(event) and "AgeUpdateEvent" not in str(event):
+                    go_ahead = True
+                    if event.target != self.population:
+                        row = self.population.props.iloc[[event.target]]
+                        row['person_ID'] = event.target
+                        row['event'] = event
+                        row['event_date'] = date
+                        row['when'] = 'Before'
+                        self.event_chains = pd.concat([self.event_chains, row], ignore_index=True)
+                    else:
+                        df_before = self.population.props.copy()
+                    
             self.fire_single_event(event, date)
+            
+            if print_output:
+                if go_ahead == True:
+                    if event.target != self.population:
+                        row = self.population.props.iloc[[event.target]]
+                        row['person_ID'] = event.target
+                        row['event'] = event
+                        row['event_date'] = date
+                        row['when'] = 'After'
+                        self.event_chains = pd.concat([self.event_chains, row], ignore_index=True)
+                    else:
+                        df_after = self.population.props.copy()
+                       # if not df_before.columns.equals(df_after.columns):
+                       #     print("Number of columns in pop dataframe", len(self.population.props.columns))
+                       #     print("Before", df_before.columns)
+                       #     print("After", df_after.columns#)
+                      #      exit(-1)
+                      #  if not df_before.index.equals(df_after.index):
+                       #     print("Number of indices in pop dataframe", len(self.population.props.index))
+                      #      print("----> ", event)
+                      #      print("Before", df_before.index#)
+                      #      print("After", df_after.index)
+                      #      exit(-1)
+                            
+                        change = df_before.compare(df_after)
+                        if ~change.empty:
+                            indices = change.index
+                            new_rows_before = df_before.loc[indices]
+                            new_rows_before['person_ID'] = new_rows_before.index
+                            new_rows_before['event'] = event
+                            new_rows_before['event_date'] = date
+                            new_rows_before['when'] = 'Before'
+                            new_rows_after = df_after.loc[indices]
+                            new_rows_after['person_ID'] = new_rows_after.index
+                            new_rows_after['event'] = event
+                            new_rows_after['event_date'] = date
+                            new_rows_after['when'] = 'After'
+
+                            self.event_chains = pd.concat([self.event_chains,new_rows_before], ignore_index=True)
+                            self.event_chains = pd.concat([self.event_chains,new_rows_after], ignore_index=True)
 
         # The simulation has ended.
         if self.show_progress_bar:

From 16c071c6220edcc20b539f346625f628e5e8c4c5 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Wed, 2 Oct 2024 12:37:38 +0200
Subject: [PATCH 03/21] Print event chains

---
 src/tlo/methods/demography.py   |  2 +-
 src/tlo/methods/healthsystem.py |  8 ++--
 src/tlo/methods/hiv.py          |  6 +--
 src/tlo/methods/tb.py           |  4 +-
 src/tlo/simulation.py           | 47 +++++++++---------
 tests/test_data_generation.py   | 85 +++++++++++++++++++++++++++++++++
 6 files changed, 117 insertions(+), 35 deletions(-)
 create mode 100644 tests/test_data_generation.py

diff --git a/src/tlo/methods/demography.py b/src/tlo/methods/demography.py
index 6b2578fd44..4f19af6d55 100644
--- a/src/tlo/methods/demography.py
+++ b/src/tlo/methods/demography.py
@@ -315,7 +315,7 @@ def initialise_simulation(self, sim):
         # Launch the repeating event that will store statistics about the population structure
         sim.schedule_event(DemographyLoggingEvent(self), sim.date)
 
-        if sim.generate_data is False:
+        if sim.generate_event_chains is False:
             # Create (and store pointer to) the OtherDeathPoll and schedule first occurrence immediately
             self.other_death_poll = OtherDeathPoll(self)
             sim.schedule_event(self.other_death_poll, sim.date)
diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py
index 6e251e636c..203ca10985 100644
--- a/src/tlo/methods/healthsystem.py
+++ b/src/tlo/methods/healthsystem.py
@@ -2033,9 +2033,9 @@ def run_individual_level_events_in_mode_0_or_1(self,
                     assert event.facility_info is not None, \
                         f"Cannot run HSI {event.TREATMENT_ID} without facility_info being defined."
 
-                    go_ahead = False
-                    if (event.module == self.sim.modules['Tb'] or event.module == self.sim.modules['Hiv']):
-                        go_ahead = True
+                    print_chains = False
+                    if event.module in self.sim.generate_event_chains_modules_of_interest and all(sub not in str(event) for sub in self.sim.generate_event_chains_ignore_events):
+                        print_chains = True
                         row = self.sim.population.props.iloc[[event.target]]
                         row['person_ID'] = event.target
                         row['event'] = event
@@ -2046,7 +2046,7 @@ def run_individual_level_events_in_mode_0_or_1(self,
                     # Run the HSI event (allowing it to return an updated appt_footprint)
                     actual_appt_footprint = event.run(squeeze_factor=squeeze_factor)
                     
-                    if go_ahead:
+                    if print_chains:
                         row = self.sim.population.props.iloc[[event.target]]
                         row['person_ID'] = event.target
                         row['event'] = event
diff --git a/src/tlo/methods/hiv.py b/src/tlo/methods/hiv.py
index 8e0d337fc1..36b1a4bd6e 100644
--- a/src/tlo/methods/hiv.py
+++ b/src/tlo/methods/hiv.py
@@ -631,7 +631,7 @@ def initialise_population(self, population):
         df.loc[df.is_alive, "hv_date_treated"] = pd.NaT
         df.loc[df.is_alive, "hv_date_last_ART"] = pd.NaT
 
-        if self.sim.generate_data is False:
+        if self.sim.generate_event_chains is False:
             # Launch sub-routines for allocating the right number of people into each category
             self.initialise_baseline_prevalence(population)  # allocate baseline prevalence
 
@@ -906,7 +906,7 @@ def initialise_simulation(self, sim):
         df = sim.population.props
         p = self.parameters
 
-        if self.sim.generate_data:
+        if self.sim.generate_event_chains:
             print("Should be generating data")
             sim.schedule_event(
                 HivPollingEventForDataGeneration(self), sim.date + DateOffset(days=0)
@@ -1901,7 +1901,7 @@ def vmmc_for_child():
                     priority=0,
                 )
 
-        if self.sim.generate_data is False:
+        if self.sim.generate_event_chains is False:
             # Horizontal transmission: Male --> Female
             horizontal_transmission(from_sex="M", to_sex="F")
 
diff --git a/src/tlo/methods/tb.py b/src/tlo/methods/tb.py
index cd79ae22a5..57ccd97368 100644
--- a/src/tlo/methods/tb.py
+++ b/src/tlo/methods/tb.py
@@ -833,7 +833,7 @@ def initialise_population(self, population):
         df["tb_date_ipt"] = pd.NaT
 
         # # ------------------ infection status ------------------ #
-        if self.sim.generate_data is False:
+        if self.sim.generate_event_chains is False:
             # WHO estimates of active TB for 2010 to get infected initial population
             # don't need to scale or include treated proportion as no-one on treatment yet
             inc_estimates = p["who_incidence_estimates"]
@@ -869,7 +869,7 @@ def initialise_simulation(self, sim):
         sim.schedule_event(TbRegularEvents(self), sim.date)
         sim.schedule_event(TbSelfCureEvent(self), sim.date)
 
-        if sim.generate_data is False:
+        if sim.generate_event_chains is False:
             sim.schedule_event(TbActiveCasePoll(self), sim.date + DateOffset(years=1))
         else:
             sim.schedule_event(TbActiveCasePollGenerateData(self), sim.date + DateOffset(days=0))
diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py
index f0c8d6f09f..d055d6e367 100644
--- a/src/tlo/simulation.py
+++ b/src/tlo/simulation.py
@@ -63,7 +63,9 @@ def __init__(self, *, start_date: Date, seed: int = None, log_config: dict = Non
         self.date = self.start_date = start_date
         self.modules = OrderedDict()
         self.event_queue = EventQueue()
-        self.generate_data = None
+        self.generate_event_chains = None
+        self.generate_event_chains_modules_of_interest = []
+        self.generate_event_chains_ignore_events = []
         self.end_date = None
         self.output_file = None
         self.population: Optional[Population] = None
@@ -216,7 +218,7 @@ def make_initial_population(self, *, n):
         end = time.time()
         logger.info(key='info', data=f'make_initial_population() {end - start} s')
 
-    def simulate(self, *, end_date):
+    def simulate(self, *, end_date, generate_event_chains = False):
         """Simulation until the given end date
 
         :param end_date: when to stop simulating. Only events strictly before this
@@ -225,7 +227,11 @@ def simulate(self, *, end_date):
         """
         start = time.time()
         self.end_date = end_date  # store the end_date so that others can reference it
-        self.generate_data = True # for now ensure we're always aiming to print data
+        self.generate_event_chains = generate_event_chains # for now ensure we're always aiming to print data
+        if self.generate_event_chains:
+            # For now keep these fixed, eventually they will be input from user
+            self.generate_event_chains_modules_of_interest = [self.modules['Tb'], self.modules['Hiv'], self.modules['CardioMetabolicDisorders']]
+            self.generate_event_chains_ignore_events = ['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler']
 
         f = open('output.txt', mode='a')
         #df_event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when'])
@@ -264,17 +270,13 @@ def simulate(self, *, end_date):
                 self.event_chains.to_csv('output.csv', index=False)
                 break
 
-            #if event.target != self.population:
-            #    print("Event: ", event)
-            go_ahead = False
+
+            print_chains = False
             df_before = []
             
-            # Only print events relevant to modules of interest
-            # Do not want to compare before/after in births because it may expand the pop dataframe
-            print_output = True
-            if print_output:
-                if (event.module == self.modules['Tb'] or event.module == self.modules['Hiv']) and 'TbActiveCasePollGenerateData' not in str(event) and 'HivPollingEventForDataGeneration' not in str(event) and "SimplifiedBirthsPoll" not in str(event) and "AgeUpdateEvent" not in str(event) and "HealthSystemScheduler" not in str(event):
-                #if 'TbActiveCasePollGenerateData' not in str(event) and 'HivPollingEventForDataGeneration' not in str(event) and "SimplifiedBirthsPoll" not in str(event) and "AgeUpdateEvent" not in str(event):
+            if self.generate_event_chains:
+                # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore
+                if (event.module in self.generate_event_chains_modules_of_interest) and all(sub not in str(event) for sub in self.generate_event_chains_ignore_events):
                     go_ahead = True
                     if event.target != self.population:
                         row = self.population.props.iloc[[event.target]]
@@ -288,7 +290,7 @@ def simulate(self, *, end_date):
                     
             self.fire_single_event(event, date)
             
-            if print_output:
+            if go_ahead:
                 if go_ahead == True:
                     if event.target != self.population:
                         row = self.population.props.iloc[[event.target]]
@@ -299,18 +301,6 @@ def simulate(self, *, end_date):
                         self.event_chains = pd.concat([self.event_chains, row], ignore_index=True)
                     else:
                         df_after = self.population.props.copy()
-                       # if not df_before.columns.equals(df_after.columns):
-                       #     print("Number of columns in pop dataframe", len(self.population.props.columns))
-                       #     print("Before", df_before.columns)
-                       #     print("After", df_after.columns#)
-                      #      exit(-1)
-                      #  if not df_before.index.equals(df_after.index):
-                       #     print("Number of indices in pop dataframe", len(self.population.props.index))
-                      #      print("----> ", event)
-                      #      print("Before", df_before.index#)
-                      #      print("After", df_after.index)
-                      #      exit(-1)
-                            
                         change = df_before.compare(df_after)
                         if ~change.empty:
                             indices = change.index
@@ -385,6 +375,13 @@ def do_birth(self, mother_id):
         child_id = self.population.do_birth()
         for module in self.modules.values():
             module.on_birth(mother_id, child_id)
+        if self.generate_event_chains:
+            row = self.population.props.iloc[[child_id]]
+            row['person_ID'] = child_id
+            row['event'] = 'Birth'
+            row['event_date'] = self.date
+            row['when'] = 'After'
+            self.event_chains = pd.concat([self.event_chains, row], ignore_index=True)
         return child_id
 
     def find_events_for_person(self, person_id: int):
diff --git a/tests/test_data_generation.py b/tests/test_data_generation.py
new file mode 100644
index 0000000000..1f6333bbfe
--- /dev/null
+++ b/tests/test_data_generation.py
@@ -0,0 +1,85 @@
+import os
+from pathlib import Path
+
+import pandas as pd
+import pytest
+
+from tlo import Date, Simulation
+from tlo.methods import (
+    care_of_women_during_pregnancy,
+    demography,
+    depression,
+    enhanced_lifestyle,
+    epi,
+    epilepsy,
+    healthburden,
+    healthseekingbehaviour,
+    healthsystem,
+    hiv,
+    cardio_metabolic_disorders,
+    labour,
+    newborn_outcomes,
+    postnatal_supervisor,
+    pregnancy_helper_functions,
+    pregnancy_supervisor,
+    depression,
+    tb,
+    contraception,
+#    simplified_births,
+    symptommanager,
+)
+from tlo.methods.hsi_generic_first_appts import HSI_GenericEmergencyFirstAppt
+
+# create simulation parameters
+start_date = Date(2010, 1, 1)
+end_date = Date(2015, 1, 1)
+popsize = 100
+
+@pytest.mark.slow
+def test_data_harvesting(seed):
+    """
+    This test runs a simulation to print all individual events of specific individuals
+    """
+    
+    module_of_interest = 'Hiv'
+    # create sim object
+    sim = create_basic_sim(popsize, seed)
+    
+    dependencies_list = sim.modules[module_of_interest].ADDITIONAL_DEPENDENCIES.union(sim.modules[module_of_interest].INIT_DEPENDENCIES)
+    
+    # Check that all dependencies are included
+    for dep in dependencies_list:
+        if dep not in sim.modules:
+            print("WARNING: dependency ", dep, "not included")
+            exit(-1)
+
+    # run simulation
+    sim.simulate(end_date=end_date, generate_event_chains = True)
+
+
+def create_basic_sim(population_size, seed):
+    # create the basic outline of an rti simulation object
+    sim = Simulation(start_date=start_date, seed=seed)
+    resourcefilepath = Path(os.path.dirname(__file__)) / '../resources'
+    sim.register(demography.Demography(resourcefilepath=resourcefilepath),
+                contraception.Contraception(resourcefilepath=resourcefilepath),
+                 enhanced_lifestyle.Lifestyle(resourcefilepath=resourcefilepath),
+                 healthburden.HealthBurden(resourcefilepath=resourcefilepath),
+                 symptommanager.SymptomManager(resourcefilepath=resourcefilepath),
+                 healthsystem.HealthSystem(resourcefilepath=resourcefilepath, service_availability=['*']),
+                 healthseekingbehaviour.HealthSeekingBehaviour(resourcefilepath=resourcefilepath),
+                 epi.Epi(resourcefilepath=resourcefilepath),
+                 hiv.Hiv(resourcefilepath=resourcefilepath),
+                 tb.Tb(resourcefilepath=resourcefilepath),
+                 cardio_metabolic_disorders.CardioMetabolicDisorders(resourcefilepath=resourcefilepath),
+                 depression.Depression(resourcefilepath=resourcefilepath),
+                 newborn_outcomes.NewbornOutcomes(resourcefilepath=resourcefilepath),
+                 pregnancy_supervisor.PregnancySupervisor(resourcefilepath=resourcefilepath),
+                 care_of_women_during_pregnancy.CareOfWomenDuringPregnancy(resourcefilepath=resourcefilepath),
+                 labour.Labour(resourcefilepath=resourcefilepath),
+                 postnatal_supervisor.PostnatalSupervisor(resourcefilepath=resourcefilepath),
+                 )
+
+    sim.make_initial_population(n=population_size)
+    return sim
+

From ba81487a3fa003e2f10206e435a1d64f170f14e3 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Wed, 2 Oct 2024 13:08:50 +0200
Subject: [PATCH 04/21] Add chains in mode 2 too and clean up in simuation

---
 src/tlo/methods/healthsystem.py | 40 ++++++++++++++++++------
 src/tlo/simulation.py           | 55 ++++++++++++++++-----------------
 2 files changed, 58 insertions(+), 37 deletions(-)

diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py
index 203ca10985..54cb976b26 100644
--- a/src/tlo/methods/healthsystem.py
+++ b/src/tlo/methods/healthsystem.py
@@ -2034,18 +2034,20 @@ def run_individual_level_events_in_mode_0_or_1(self,
                         f"Cannot run HSI {event.TREATMENT_ID} without facility_info being defined."
 
                     print_chains = False
-                    if event.module in self.sim.generate_event_chains_modules_of_interest and all(sub not in str(event) for sub in self.sim.generate_event_chains_ignore_events):
-                        print_chains = True
-                        row = self.sim.population.props.iloc[[event.target]]
-                        row['person_ID'] = event.target
-                        row['event'] = event
-                        row['event_date'] = self.sim.date
-                        row['when'] = 'Before'
-                        self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
+                    if self.sim.generate_event_chains:
+                        if event.module in self.sim.generate_event_chains_modules_of_interest and all(sub not in str(event) for sub in self.sim.generate_event_chains_ignore_events):
+                            print_chains = True
+                            row = self.sim.population.props.iloc[[event.target]]
+                            row['person_ID'] = event.target
+                            row['event'] = event
+                            row['event_date'] = self.sim.date
+                            row['when'] = 'Before'
+                            self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
 
                     # Run the HSI event (allowing it to return an updated appt_footprint)
                     actual_appt_footprint = event.run(squeeze_factor=squeeze_factor)
                     
+                    # Print individual info after event
                     if print_chains:
                         row = self.sim.population.props.iloc[[event.target]]
                         row['person_ID'] = event.target
@@ -2445,8 +2447,28 @@ def process_events_mode_2(self, hold_over: List[HSIEventQueueItem]) -> None:
 
                             # Expected appt footprint before running event
                             _appt_footprint_before_running = event.EXPECTED_APPT_FOOTPRINT
-                            # Run event & get actual footprint
+                       
+                            print_chains = False
+                            if self.sim.generate_event_chains:
+                                if event.module in self.sim.generate_event_chains_modules_of_interest and all(sub not in str(event) for sub in self.sim.generate_event_chains_ignore_events):
+                                    print_chains = True
+                                    row = self.sim.population.props.iloc[[event.target]]
+                                    row['person_ID'] = event.target
+                                    row['event'] = event
+                                    row['event_date'] = self.sim.date
+                                    row['when'] = 'Before'
+                                    self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
+
+                            # Run the HSI event (allowing it to return an updated appt_footprint)
                             actual_appt_footprint = event.run(squeeze_factor=squeeze_factor)
+                            
+                            if print_chains:
+                                row = self.sim.population.props.iloc[[event.target]]
+                                row['person_ID'] = event.target
+                                row['event'] = event
+                                row['event_date'] = self.sim.date
+                                row['when'] = 'After'
+                                self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
 
                             # Check if the HSI event returned updated_appt_footprint, and if so adjust original_call
                             if actual_appt_footprint is not None:
diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py
index d055d6e367..616e159453 100644
--- a/src/tlo/simulation.py
+++ b/src/tlo/simulation.py
@@ -277,7 +277,7 @@ def simulate(self, *, end_date, generate_event_chains = False):
             if self.generate_event_chains:
                 # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore
                 if (event.module in self.generate_event_chains_modules_of_interest) and all(sub not in str(event) for sub in self.generate_event_chains_ignore_events):
-                    go_ahead = True
+                    print_chains = True
                     if event.target != self.population:
                         row = self.population.props.iloc[[event.target]]
                         row['person_ID'] = event.target
@@ -290,33 +290,32 @@ def simulate(self, *, end_date, generate_event_chains = False):
                     
             self.fire_single_event(event, date)
             
-            if go_ahead:
-                if go_ahead == True:
-                    if event.target != self.population:
-                        row = self.population.props.iloc[[event.target]]
-                        row['person_ID'] = event.target
-                        row['event'] = event
-                        row['event_date'] = date
-                        row['when'] = 'After'
-                        self.event_chains = pd.concat([self.event_chains, row], ignore_index=True)
-                    else:
-                        df_after = self.population.props.copy()
-                        change = df_before.compare(df_after)
-                        if ~change.empty:
-                            indices = change.index
-                            new_rows_before = df_before.loc[indices]
-                            new_rows_before['person_ID'] = new_rows_before.index
-                            new_rows_before['event'] = event
-                            new_rows_before['event_date'] = date
-                            new_rows_before['when'] = 'Before'
-                            new_rows_after = df_after.loc[indices]
-                            new_rows_after['person_ID'] = new_rows_after.index
-                            new_rows_after['event'] = event
-                            new_rows_after['event_date'] = date
-                            new_rows_after['when'] = 'After'
-
-                            self.event_chains = pd.concat([self.event_chains,new_rows_before], ignore_index=True)
-                            self.event_chains = pd.concat([self.event_chains,new_rows_after], ignore_index=True)
+            if print_chains:
+                if event.target != self.population:
+                    row = self.population.props.iloc[[event.target]]
+                    row['person_ID'] = event.target
+                    row['event'] = event
+                    row['event_date'] = date
+                    row['when'] = 'After'
+                    self.event_chains = pd.concat([self.event_chains, row], ignore_index=True)
+                else:
+                    df_after = self.population.props.copy()
+                    change = df_before.compare(df_after)
+                    if ~change.empty:
+                        indices = change.index
+                        new_rows_before = df_before.loc[indices]
+                        new_rows_before['person_ID'] = new_rows_before.index
+                        new_rows_before['event'] = event
+                        new_rows_before['event_date'] = date
+                        new_rows_before['when'] = 'Before'
+                        new_rows_after = df_after.loc[indices]
+                        new_rows_after['person_ID'] = new_rows_after.index
+                        new_rows_after['event'] = event
+                        new_rows_after['event_date'] = date
+                        new_rows_after['when'] = 'After'
+
+                        self.event_chains = pd.concat([self.event_chains,new_rows_before], ignore_index=True)
+                        self.event_chains = pd.concat([self.event_chains,new_rows_after], ignore_index=True)
 
         # The simulation has ended.
         if self.show_progress_bar:

From b1c907c12bfa54621983415b560381d1737afc9a Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Mon, 7 Oct 2024 09:36:06 +0200
Subject: [PATCH 05/21] Fix issue with tests by ensuring standard Polling and
 infection is maintained is generate_event_chains is None

---
 src/tlo/methods/hiv.py       |  6 +++---
 src/tlo/methods/hsi_event.py | 14 ++++++++------
 src/tlo/methods/tb.py        | 10 ++++++----
 src/tlo/simulation.py        |  4 +++-
 4 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/src/tlo/methods/hiv.py b/src/tlo/methods/hiv.py
index 36b1a4bd6e..391cf587a8 100644
--- a/src/tlo/methods/hiv.py
+++ b/src/tlo/methods/hiv.py
@@ -631,7 +631,7 @@ def initialise_population(self, population):
         df.loc[df.is_alive, "hv_date_treated"] = pd.NaT
         df.loc[df.is_alive, "hv_date_last_ART"] = pd.NaT
 
-        if self.sim.generate_event_chains is False:
+        if self.sim.generate_event_chains is False or self.sim.generate_event_chains is None or self.sim.generate_event_chains_overwrite_epi is False:
             # Launch sub-routines for allocating the right number of people into each category
             self.initialise_baseline_prevalence(population)  # allocate baseline prevalence
 
@@ -906,7 +906,7 @@ def initialise_simulation(self, sim):
         df = sim.population.props
         p = self.parameters
 
-        if self.sim.generate_event_chains:
+        if self.sim.generate_event_chains is True and self.sim.generate_event_chains_overwrite_epi:
             print("Should be generating data")
             sim.schedule_event(
                 HivPollingEventForDataGeneration(self), sim.date + DateOffset(days=0)
@@ -1901,7 +1901,7 @@ def vmmc_for_child():
                     priority=0,
                 )
 
-        if self.sim.generate_event_chains is False:
+        if self.sim.generate_event_chains is False or self.sim.generate_event_chains is None or self.sim.generate_event_chains_overwrite_epi is False:
             # Horizontal transmission: Male --> Female
             horizontal_transmission(from_sex="M", to_sex="F")
 
diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py
index 470794bcdd..785f27b7a6 100644
--- a/src/tlo/methods/hsi_event.py
+++ b/src/tlo/methods/hsi_event.py
@@ -193,10 +193,12 @@ def run(self, squeeze_factor):
         
         print_chains = False
         df_before = []
-        
+
         if self.sim.generate_event_chains:
             # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore
-            if (self.module in self.sim.generate_event_chains_modules_of_interest) and all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events):
+            #if (self.module in self.sim.generate_event_chains_modules_of_interest) and
+            if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events):
+
                 print_chains = True
                 if self.target != self.sim.population:
                     row = self.sim.population.props.iloc[[self.target]]
@@ -204,7 +206,7 @@ def run(self, squeeze_factor):
                     row['event'] = self
                     row['event_date'] = self.sim.date
                     row['when'] = 'Before'
-                    self.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
+                    self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
                 else:
                     df_before = self.sim.population.props.copy()
         
@@ -219,7 +221,7 @@ def run(self, squeeze_factor):
                 row['event'] = self
                 row['event_date'] = self.sim.date
                 row['when'] = 'After'
-                self.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
+                self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
             else:
                 df_after = self.sim.population.props.copy()
                 change = df_before.compare(df_after)
@@ -236,8 +238,8 @@ def run(self, squeeze_factor):
                     new_rows_after['event_date'] = self.sim.date
                     new_rows_after['when'] = 'After'
 
-                    self.event_chains = pd.concat([self.sim.event_chains,new_rows_before], ignore_index=True)
-                    self.event_chains = pd.concat([self.sim.event_chains,new_rows_after], ignore_index=True)
+                    self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_before], ignore_index=True)
+                    self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_after], ignore_index=True)
         return updated_appt_footprint
 
     def get_consumables(
diff --git a/src/tlo/methods/tb.py b/src/tlo/methods/tb.py
index 57ccd97368..4c170944d2 100644
--- a/src/tlo/methods/tb.py
+++ b/src/tlo/methods/tb.py
@@ -832,8 +832,9 @@ def initialise_population(self, population):
         df["tb_on_ipt"] = False
         df["tb_date_ipt"] = pd.NaT
 
+
         # # ------------------ infection status ------------------ #
-        if self.sim.generate_event_chains is False:
+        if self.sim.generate_event_chains is False or self.sim.generate_event_chains is None:
             # WHO estimates of active TB for 2010 to get infected initial population
             # don't need to scale or include treated proportion as no-one on treatment yet
             inc_estimates = p["who_incidence_estimates"]
@@ -869,10 +870,11 @@ def initialise_simulation(self, sim):
         sim.schedule_event(TbRegularEvents(self), sim.date)
         sim.schedule_event(TbSelfCureEvent(self), sim.date)
 
-        if sim.generate_event_chains is False:
-            sim.schedule_event(TbActiveCasePoll(self), sim.date + DateOffset(years=1))
-        else:
+        if sim.generate_event_chains is True and sim.generate_event_chains_overwrite_epi is True:
             sim.schedule_event(TbActiveCasePollGenerateData(self), sim.date + DateOffset(days=0))
+        else:
+            sim.schedule_event(TbActiveCasePoll(self), sim.date + DateOffset(years=1))
+
 
         # 2) log at the end of the year
         # Optional: Schedule the scale-up of programs
diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py
index 794bfef98e..4aff23c9d7 100644
--- a/src/tlo/simulation.py
+++ b/src/tlo/simulation.py
@@ -105,6 +105,7 @@ def __init__(
         self.modules = OrderedDict()
         self.event_queue = EventQueue()
         self.generate_event_chains = None
+        self.generate_event_chains_overwrite_epi = None
         self.generate_event_chains_modules_of_interest = []
         self.generate_event_chains_ignore_events = []
         self.end_date = None
@@ -298,10 +299,11 @@ def initialise(self, *, end_date: Date, generate_event_chains) -> None:
         self.end_date = end_date  # store the end_date so that others can reference it
 
         self.generate_event_chains = generate_event_chains # for now ensure we're always aiming to print data
+        self.generate_event_chains_overwrite_epi = False
         if self.generate_event_chains:
             # For now keep these fixed, eventually they will be input from user
             self.generate_event_chains_modules_of_interest = [self.modules['Tb'], self.modules['Hiv'], self.modules['CardioMetabolicDisorders']]
-            self.generate_event_chains_ignore_events = ['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler']
+            self.generate_event_chains_ignore_events =  ['AgeUpdateEvent','HealthSystemScheduler', 'DirectBirth'] #['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler']
 
         #df_event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when'])
 

From cfb4264f0133fccbc0a82a6c9d3f51479d19038f Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Mon, 7 Oct 2024 15:51:37 +0200
Subject: [PATCH 06/21] Switch iloc for loc

---
 src/tlo/events.py             | 5 ++---
 src/tlo/methods/hsi_event.py  | 4 ++--
 src/tlo/simulation.py         | 9 ++++++---
 tests/test_data_generation.py | 2 +-
 4 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/src/tlo/events.py b/src/tlo/events.py
index 78b828091d..a50832a58d 100644
--- a/src/tlo/events.py
+++ b/src/tlo/events.py
@@ -74,7 +74,7 @@ def run(self):
             if (self.module in self.sim.generate_event_chains_modules_of_interest) and all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events):
                 print_chains = True
                 if self.target != self.sim.population:
-                    row = self.sim.population.props.iloc[[self.target]]
+                    row = self.sim.population.props.loc[[self.target]]
                     row['person_ID'] = self.target
                     row['event'] = self
                     row['event_date'] = self.sim.date
@@ -83,13 +83,12 @@ def run(self):
                 else:
                     df_before = self.sim.population.props.copy()
         
-        
         self.apply(self.target)
         self.post_apply_hook()
                 
         if print_chains:
             if self.target != self.sim.population:
-                row = self.sim.population.props.iloc[[self.target]]
+                row = self.sim.population.props.loc[[self.target]]
                 row['person_ID'] = self.target
                 row['event'] = self
                 row['event_date'] = self.sim.date
diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py
index 785f27b7a6..cffeb32992 100644
--- a/src/tlo/methods/hsi_event.py
+++ b/src/tlo/methods/hsi_event.py
@@ -201,7 +201,7 @@ def run(self, squeeze_factor):
 
                 print_chains = True
                 if self.target != self.sim.population:
-                    row = self.sim.population.props.iloc[[self.target]]
+                    row = self.sim.population.props.loc[[self.target]]
                     row['person_ID'] = self.target
                     row['event'] = self
                     row['event_date'] = self.sim.date
@@ -216,7 +216,7 @@ def run(self, squeeze_factor):
         
         if print_chains:
             if self.target != self.sim.population:
-                row = self.sim.population.props.iloc[[self.target]]
+                row = self.sim.population.props.loc[[self.target]]
                 row['person_ID'] = self.target
                 row['event'] = self
                 row['event_date'] = self.sim.date
diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py
index 4aff23c9d7..42a2a288d3 100644
--- a/src/tlo/simulation.py
+++ b/src/tlo/simulation.py
@@ -298,14 +298,17 @@ def initialise(self, *, end_date: Date, generate_event_chains) -> None:
         self.date = self.start_date
         self.end_date = end_date  # store the end_date so that others can reference it
 
-        self.generate_event_chains = generate_event_chains # for now ensure we're always aiming to print data
-        self.generate_event_chains_overwrite_epi = False
+        self.generate_event_chains = generate_event_chains
         if self.generate_event_chains:
+            # Eventually this can be made an option
+            self.generate_event_chains_overwrite_epi = True
             # For now keep these fixed, eventually they will be input from user
             self.generate_event_chains_modules_of_interest = [self.modules['Tb'], self.modules['Hiv'], self.modules['CardioMetabolicDisorders']]
             self.generate_event_chains_ignore_events =  ['AgeUpdateEvent','HealthSystemScheduler', 'DirectBirth'] #['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler']
+        else:
+            # If not using to print chains, cannot ignore epi
+            self.generate_event_chains_overwrite_epi = False
 
-        #df_event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when'])
 
         # Reorder columns to place the new columns at the front
         pd.set_option('display.max_columns', None)
diff --git a/tests/test_data_generation.py b/tests/test_data_generation.py
index 1f6333bbfe..8dd92513f9 100644
--- a/tests/test_data_generation.py
+++ b/tests/test_data_generation.py
@@ -32,7 +32,7 @@
 
 # create simulation parameters
 start_date = Date(2010, 1, 1)
-end_date = Date(2015, 1, 1)
+end_date = Date(2014, 1, 1)
 popsize = 100
 
 @pytest.mark.slow

From e0327de6b6f850ac871a2308271f6863333f173e Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Mon, 7 Oct 2024 15:55:57 +0200
Subject: [PATCH 07/21] Change syntax of if statement

---
 src/tlo/events.py            | 2 +-
 src/tlo/methods/hsi_event.py | 4 +---
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/tlo/events.py b/src/tlo/events.py
index a50832a58d..2eef87ba3f 100644
--- a/src/tlo/events.py
+++ b/src/tlo/events.py
@@ -71,7 +71,7 @@ def run(self):
         
         if self.sim.generate_event_chains:
             # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore
-            if (self.module in self.sim.generate_event_chains_modules_of_interest) and all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events):
+            if (self.module in self.sim.generate_event_chains_modules_of_interest) and not set(self.sim.generate_event_chains_ignore_events).intersect(str(self)):
                 print_chains = True
                 if self.target != self.sim.population:
                     row = self.sim.population.props.loc[[self.target]]
diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py
index cffeb32992..805c9584fb 100644
--- a/src/tlo/methods/hsi_event.py
+++ b/src/tlo/methods/hsi_event.py
@@ -196,9 +196,7 @@ def run(self, squeeze_factor):
 
         if self.sim.generate_event_chains:
             # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore
-            #if (self.module in self.sim.generate_event_chains_modules_of_interest) and
-            if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events):
-
+            if (self.module in self.sim.generate_event_chains_modules_of_interest) and not set(self.sim.generate_event_chains_ignore_events).intersect(str(self)):
                 print_chains = True
                 if self.target != self.sim.population:
                     row = self.sim.population.props.loc[[self.target]]

From fceee02e68722e29314c3d9efe35983709a78deb Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Wed, 9 Oct 2024 09:27:54 +0100
Subject: [PATCH 08/21] Change syntax of if statement and print string of event

---
 src/tlo/events.py            | 6 +++---
 src/tlo/methods/hsi_event.py | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/tlo/events.py b/src/tlo/events.py
index 2eef87ba3f..2a7871c2c8 100644
--- a/src/tlo/events.py
+++ b/src/tlo/events.py
@@ -71,12 +71,12 @@ def run(self):
         
         if self.sim.generate_event_chains:
             # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore
-            if (self.module in self.sim.generate_event_chains_modules_of_interest) and not set(self.sim.generate_event_chains_ignore_events).intersect(str(self)):
+            if (self.module in self.sim.generate_event_chains_modules_of_interest) and not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)):
                 print_chains = True
                 if self.target != self.sim.population:
                     row = self.sim.population.props.loc[[self.target]]
                     row['person_ID'] = self.target
-                    row['event'] = self
+                    row['event'] = str(self)
                     row['event_date'] = self.sim.date
                     row['when'] = 'Before'
                     self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
@@ -90,7 +90,7 @@ def run(self):
             if self.target != self.sim.population:
                 row = self.sim.population.props.loc[[self.target]]
                 row['person_ID'] = self.target
-                row['event'] = self
+                row['event'] = str(self)
                 row['event_date'] = self.sim.date
                 row['when'] = 'After'
                 self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py
index 805c9584fb..ea9066bc8b 100644
--- a/src/tlo/methods/hsi_event.py
+++ b/src/tlo/methods/hsi_event.py
@@ -196,12 +196,12 @@ def run(self, squeeze_factor):
 
         if self.sim.generate_event_chains:
             # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore
-            if (self.module in self.sim.generate_event_chains_modules_of_interest) and not set(self.sim.generate_event_chains_ignore_events).intersect(str(self)):
+            if (self.module in self.sim.generate_event_chains_modules_of_interest) and not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)):
                 print_chains = True
                 if self.target != self.sim.population:
                     row = self.sim.population.props.loc[[self.target]]
                     row['person_ID'] = self.target
-                    row['event'] = self
+                    row['event'] = str(self)
                     row['event_date'] = self.sim.date
                     row['when'] = 'Before'
                     self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
@@ -216,7 +216,7 @@ def run(self, squeeze_factor):
             if self.target != self.sim.population:
                 row = self.sim.population.props.loc[[self.target]]
                 row['person_ID'] = self.target
-                row['event'] = self
+                row['event'] = str(self)
                 row['event_date'] = self.sim.date
                 row['when'] = 'After'
                 self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)

From eaeae626a4b37c024db38abf82bdb7c2e723ffe2 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Thu, 10 Oct 2024 14:45:41 +0100
Subject: [PATCH 09/21] Focus on rti and print footprint

---
 src/tlo/events.py             | 16 +++++++++++++---
 src/tlo/methods/hsi_event.py  | 36 ++++++++++++++++-------------------
 src/tlo/methods/rti.py        |  8 ++++++--
 src/tlo/simulation.py         |  6 +++---
 tests/test_data_generation.py | 31 ++++++++++++++++--------------
 5 files changed, 55 insertions(+), 42 deletions(-)

diff --git a/src/tlo/events.py b/src/tlo/events.py
index 2a7871c2c8..76e1b9a117 100644
--- a/src/tlo/events.py
+++ b/src/tlo/events.py
@@ -71,14 +71,19 @@ def run(self):
         
         if self.sim.generate_event_chains:
             # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore
-            if (self.module in self.sim.generate_event_chains_modules_of_interest) and not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)):
+            #if (self.module in self.sim.generate_event_chains_modules_of_interest) and not
+            #if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)):
+            #if (self.module in self.sim.generate_event_chains_modules_of_interest) and all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events):
+            if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events):
+
                 print_chains = True
                 if self.target != self.sim.population:
-                    row = self.sim.population.props.loc[[self.target]]
+                    row = self.sim.population.props.iloc[[self.target]]
                     row['person_ID'] = self.target
                     row['event'] = str(self)
                     row['event_date'] = self.sim.date
                     row['when'] = 'Before'
+                    row['appt_footprint'] = 'N/A'
                     self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
                 else:
                     df_before = self.sim.population.props.copy()
@@ -88,11 +93,12 @@ def run(self):
                 
         if print_chains:
             if self.target != self.sim.population:
-                row = self.sim.population.props.loc[[self.target]]
+                row = self.sim.population.props.iloc[[self.target]]
                 row['person_ID'] = self.target
                 row['event'] = str(self)
                 row['event_date'] = self.sim.date
                 row['when'] = 'After'
+                row['appt_footprint'] = 'N/A'
                 self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
             else:
                 df_after = self.sim.population.props.copy()
@@ -104,11 +110,15 @@ def run(self):
                     new_rows_before['event'] = self
                     new_rows_before['event_date'] = self.sim.date
                     new_rows_before['when'] = 'Before'
+                    new_rows_before['appt_footprint'] = 'N/A'
+
                     new_rows_after = df_after.loc[indices]
                     new_rows_after['person_ID'] = new_rows_after.index
                     new_rows_after['event'] = self
                     new_rows_after['event_date'] = self.sim.date
                     new_rows_after['when'] = 'After'
+                    new_rows_after['appt_footprint'] = 'N/A'
+
 
                     self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_before], ignore_index=True)
                     self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_after], ignore_index=True)
diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py
index ea9066bc8b..f8e8738543 100644
--- a/src/tlo/methods/hsi_event.py
+++ b/src/tlo/methods/hsi_event.py
@@ -196,14 +196,19 @@ def run(self, squeeze_factor):
 
         if self.sim.generate_event_chains:
             # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore
-            if (self.module in self.sim.generate_event_chains_modules_of_interest) and not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)):
+            #if (self.module in self.sim.generate_event_chains_modules_of_interest) and not
+            #if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)):
+#            if (self.module in self.sim.generate_event_chains_modules_of_interest) and all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events):
+            if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events):
+
                 print_chains = True
                 if self.target != self.sim.population:
-                    row = self.sim.population.props.loc[[self.target]]
+                    row = self.sim.population.props.iloc[[self.target]]
                     row['person_ID'] = self.target
                     row['event'] = str(self)
                     row['event_date'] = self.sim.date
                     row['when'] = 'Before'
+                    row['appt_footprint'] = str(self.EXPECTED_APPT_FOOTPRINT)
                     self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
                 else:
                     df_before = self.sim.population.props.copy()
@@ -212,32 +217,23 @@ def run(self, squeeze_factor):
         self.post_apply_hook()
         self._run_after_hsi_event()
         
+        footprint = self.EXPECTED_APPT_FOOTPRINT
+        if updated_appt_footprint is not None:
+            footprint = updated_appt_footprint
+        
         if print_chains:
             if self.target != self.sim.population:
-                row = self.sim.population.props.loc[[self.target]]
+                row = self.sim.population.props.iloc[[self.target]]
                 row['person_ID'] = self.target
                 row['event'] = str(self)
                 row['event_date'] = self.sim.date
                 row['when'] = 'After'
+                row['appt_footprint'] = str(footprint)
                 self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
             else:
-                df_after = self.sim.population.props.copy()
-                change = df_before.compare(df_after)
-                if ~change.empty:
-                    indices = change.index
-                    new_rows_before = df_before.loc[indices]
-                    new_rows_before['person_ID'] = new_rows_before.index
-                    new_rows_before['event'] = self
-                    new_rows_before['event_date'] = self.sim.date
-                    new_rows_before['when'] = 'Before'
-                    new_rows_after = df_after.loc[indices]
-                    new_rows_after['person_ID'] = new_rows_after.index
-                    new_rows_after['event'] = self
-                    new_rows_after['event_date'] = self.sim.date
-                    new_rows_after['when'] = 'After'
-
-                    self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_before], ignore_index=True)
-                    self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_after], ignore_index=True)
+                print("Error, I shouldn't be here")
+                exit(-1)
+
         return updated_appt_footprint
 
     def get_consumables(
diff --git a/src/tlo/methods/rti.py b/src/tlo/methods/rti.py
index 18c1987483..1c12e7162b 100644
--- a/src/tlo/methods/rti.py
+++ b/src/tlo/methods/rti.py
@@ -2776,7 +2776,7 @@ class RTIPollingEvent(RegularEvent, PopulationScopeEventMixin):
     def __init__(self, module):
         """Schedule to take place every month
         """
-        super().__init__(module, frequency=DateOffset(months=1))
+        super().__init__(module, frequency=DateOffset(months=1000))
         p = module.parameters
         # Parameters which transition the model between states
         self.base_1m_prob_rti = (p['base_rate_injrti'] / 12)
@@ -2864,9 +2864,13 @@ def apply(self, population):
                          .when('.between(70,79)', self.rr_injrti_age7079),
                          Predictor('li_ex_alc').when(True, self.rr_injrti_excessalcohol)
                          )
-        pred = eq.predict(df.loc[rt_current_non_ind])
+        if self.sim.generate_event_chains is True and self.sim.generate_event_chains_overwrite_epi is True:
+            pred = 1
+        else:
+            pred = eq.predict(df.loc[rt_current_non_ind])
         random_draw_in_rti = self.module.rng.random_sample(size=len(rt_current_non_ind))
         selected_for_rti = rt_current_non_ind[pred > random_draw_in_rti]
+
         # Update to say they have been involved in a rti
         df.loc[selected_for_rti, 'rt_road_traffic_inc'] = True
         # Set the date that people were injured to now
diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py
index 42a2a288d3..a8ecf14cc6 100644
--- a/src/tlo/simulation.py
+++ b/src/tlo/simulation.py
@@ -281,7 +281,7 @@ def make_initial_population(self, *, n: int) -> None:
                 data=f"{module.name}.initialise_population() {time.time() - start1} s",
             )
 
-        self.event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when'])
+        self.event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when'] + ['appt_footprint'])
 
         end = time.time()
         logger.info(key="info", data=f"make_initial_population() {end - start} s")
@@ -303,8 +303,8 @@ def initialise(self, *, end_date: Date, generate_event_chains) -> None:
             # Eventually this can be made an option
             self.generate_event_chains_overwrite_epi = True
             # For now keep these fixed, eventually they will be input from user
-            self.generate_event_chains_modules_of_interest = [self.modules['Tb'], self.modules['Hiv'], self.modules['CardioMetabolicDisorders']]
-            self.generate_event_chains_ignore_events =  ['AgeUpdateEvent','HealthSystemScheduler', 'DirectBirth'] #['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler']
+            self.generate_event_chains_modules_of_interest = [self.modules['RTI']]
+            self.generate_event_chains_ignore_events =  ['AgeUpdateEvent','HealthSystemScheduler', 'SimplifiedBirthsPoll','DirectBirth'] #['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler']
         else:
             # If not using to print chains, cannot ignore epi
             self.generate_event_chains_overwrite_epi = False
diff --git a/tests/test_data_generation.py b/tests/test_data_generation.py
index 8dd92513f9..af3c4f0ae9 100644
--- a/tests/test_data_generation.py
+++ b/tests/test_data_generation.py
@@ -25,15 +25,16 @@
     depression,
     tb,
     contraception,
-#    simplified_births,
+    simplified_births,
+    rti,
     symptommanager,
 )
 from tlo.methods.hsi_generic_first_appts import HSI_GenericEmergencyFirstAppt
 
 # create simulation parameters
 start_date = Date(2010, 1, 1)
-end_date = Date(2014, 1, 1)
-popsize = 100
+end_date = Date(2012, 1, 1)
+popsize = 200
 
 @pytest.mark.slow
 def test_data_harvesting(seed):
@@ -41,7 +42,7 @@ def test_data_harvesting(seed):
     This test runs a simulation to print all individual events of specific individuals
     """
     
-    module_of_interest = 'Hiv'
+    module_of_interest = 'RTI'
     # create sim object
     sim = create_basic_sim(popsize, seed)
     
@@ -55,29 +56,31 @@ def test_data_harvesting(seed):
 
     # run simulation
     sim.simulate(end_date=end_date, generate_event_chains = True)
-
+    exit(-1)
 
 def create_basic_sim(population_size, seed):
     # create the basic outline of an rti simulation object
     sim = Simulation(start_date=start_date, seed=seed)
     resourcefilepath = Path(os.path.dirname(__file__)) / '../resources'
     sim.register(demography.Demography(resourcefilepath=resourcefilepath),
-                contraception.Contraception(resourcefilepath=resourcefilepath),
+               # contraception.Contraception(resourcefilepath=resourcefilepath),
                  enhanced_lifestyle.Lifestyle(resourcefilepath=resourcefilepath),
                  healthburden.HealthBurden(resourcefilepath=resourcefilepath),
                  symptommanager.SymptomManager(resourcefilepath=resourcefilepath),
                  healthsystem.HealthSystem(resourcefilepath=resourcefilepath, service_availability=['*']),
+                 rti.RTI(resourcefilepath=resourcefilepath),
                  healthseekingbehaviour.HealthSeekingBehaviour(resourcefilepath=resourcefilepath),
-                 epi.Epi(resourcefilepath=resourcefilepath),
-                 hiv.Hiv(resourcefilepath=resourcefilepath),
-                 tb.Tb(resourcefilepath=resourcefilepath),
+                 simplified_births.SimplifiedBirths(resourcefilepath=resourcefilepath),
+               #  epi.Epi(resourcefilepath=resourcefilepath),
+               #  hiv.Hiv(resourcefilepath=resourcefilepath),
+               #  tb.Tb(resourcefilepath=resourcefilepath),
                  cardio_metabolic_disorders.CardioMetabolicDisorders(resourcefilepath=resourcefilepath),
                  depression.Depression(resourcefilepath=resourcefilepath),
-                 newborn_outcomes.NewbornOutcomes(resourcefilepath=resourcefilepath),
-                 pregnancy_supervisor.PregnancySupervisor(resourcefilepath=resourcefilepath),
-                 care_of_women_during_pregnancy.CareOfWomenDuringPregnancy(resourcefilepath=resourcefilepath),
-                 labour.Labour(resourcefilepath=resourcefilepath),
-                 postnatal_supervisor.PostnatalSupervisor(resourcefilepath=resourcefilepath),
+                # newborn_outcomes.NewbornOutcomes(resourcefilepath=resourcefilepath),
+                # pregnancy_supervisor.PregnancySupervisor(resourcefilepath=resourcefilepath),
+                # care_of_women_during_pregnancy.CareOfWomenDuringPregnancy(resourcefilepath=resourcefilepath),
+                # labour.Labour(resourcefilepath=resourcefilepath),
+                 #postnatal_supervisor.PostnatalSupervisor(resourcefilepath=resourcefilepath),
                  )
 
     sim.make_initial_population(n=population_size)

From c7bd9d058cea79fad0f8471830766f5c335a7df1 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Fri, 11 Oct 2024 16:57:21 +0100
Subject: [PATCH 10/21] Only store change in individual properties, not entire
 property row. Log changes to logger.

---
 src/tlo/events.py             | 204 ++++++++++++++++++++++++++--------
 src/tlo/methods/hsi_event.py  | 134 ++++++++++++++++------
 src/tlo/simulation.py         |   2 +-
 tests/test_data_generation.py |  22 ++--
 4 files changed, 268 insertions(+), 94 deletions(-)

diff --git a/src/tlo/events.py b/src/tlo/events.py
index 76e1b9a117..436a01a97c 100644
--- a/src/tlo/events.py
+++ b/src/tlo/events.py
@@ -4,13 +4,20 @@
 from enum import Enum
 from typing import TYPE_CHECKING
 
-from tlo import DateOffset
+from tlo import DateOffset, logging
 
 if TYPE_CHECKING:
     from tlo import Simulation
 
 import pandas as pd
 
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+
+logger_summary = logging.getLogger(f"{__name__}.summary")
+logger_summary.setLevel(logging.INFO)
+
+debug_chains = True
 
 class Priority(Enum):
     """Enumeration for the Priority, which is used in sorting the events in the simulation queue."""
@@ -62,66 +69,167 @@ def apply(self, target):
         :param target: the target of the event
         """
         raise NotImplementedError
-
-    def run(self):
-        """Make the event happen."""
         
+    def compare_population_dataframe(self,df_before, df_after):
+        """ This function compares the population dataframe before/after a population-wide event has occurred.
+        It allows us to identify the individuals for which this event led to a significant (i.e. property) change, and to store the properties which have changed as a result of it. """
+        
+        # Create a mask of where values are different
+        diff_mask = (df_before != df_after) & ~(df_before.isna() & df_after.isna())
+        
+        # Create an empty list to store changes for each of the individuals
+        chain_links = {}
+        
+        # Loop through each row of the mask
+        for idx, row in diff_mask.iterrows():
+            changed_cols = row.index[row].tolist()
+            
+            if changed_cols:  # Proceed only if there are changes in the row
+            
+                # Create a dictionary for this person
+                # First add event info
+                link_info = {
+                    #'person_ID': idx,
+                    'event': str(self),
+                    'event_date': self.sim.date,
+                }
+                
+                # Store the new values from df_after for the changed columns
+                for col in changed_cols:
+                    link_info[col] = df_after.at[idx, col]
+                
+                
+                # Append the event and changes to the individual key
+                chain_links = {idx : link_info}
+        
+        return chain_links
+        
+    def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame]:
+        """ This function checks whether this event should be logged as part of the event chains, and if so stored required information before the event has occurred. """
+        
+        # Initialise these variables
         print_chains = False
         df_before = []
+        row_before = pd.Series()
         
-        if self.sim.generate_event_chains:
-            # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore
-            #if (self.module in self.sim.generate_event_chains_modules_of_interest) and not
-            #if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)):
-            #if (self.module in self.sim.generate_event_chains_modules_of_interest) and all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events):
-            if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events):
-
-                print_chains = True
-                if self.target != self.sim.population:
-                    row = self.sim.population.props.iloc[[self.target]]
+        # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore
+        #if (self.module in self.sim.generate_event_chains_modules_of_interest) and ..
+        if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events):
+        
+        # Will eventually use this once I can actually GET THE NAME OF THE SELF
+        #if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)):
+
+            print_chains = True
+            
+            # Target is single individual
+            if self.target != self.sim.population:
+                # Save row for comparison after event has occurred
+                row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999)
+                
+                if debug_chains:
+                    # Print entire row
+                    row = self.sim.population.props.loc[[abs(self.target)]]
                     row['person_ID'] = self.target
                     row['event'] = str(self)
                     row['event_date'] = self.sim.date
                     row['when'] = 'Before'
-                    row['appt_footprint'] = 'N/A'
                     self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
-                else:
-                    df_before = self.sim.population.props.copy()
-        
-        self.apply(self.target)
-        self.post_apply_hook()
                 
+            else:
+                # This will be a population-wide event. In order to find individuals for which this led to
+                # a meaningful change, make a copy of the pop dataframe before the event has occurred.
+                df_before = self.sim.population.props.copy()
+                
+        return print_chains, row_before, df_before
+        
+    def store_chains_to_do_after_event(self, print_chains, row_before, df_before) -> dict:
+        """ If print_chains=True, this function logs the event and identifies and logs the any property changes that have occured to one or multiple individuals as a result of the event taking place. """
+        
+        chain_links = {}
+        
         if print_chains:
+        
+            # Target is single individual
             if self.target != self.sim.population:
-                row = self.sim.population.props.iloc[[self.target]]
-                row['person_ID'] = self.target
-                row['event'] = str(self)
-                row['event_date'] = self.sim.date
-                row['when'] = 'After'
-                row['appt_footprint'] = 'N/A'
-                self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
+                row_after = self.sim.population.props.loc[abs(self.target)].fillna(-99999)
+                
+                # Create and store event for this individual
+                link_info = {
+                    #'person_ID' : self.target,
+                    'event' : str(self),
+                    'event_date' : self.sim.date,
+                }
+                # Store property changes as a result of the event for this individual
+                for key in row_before.index:
+                    if row_before[key] != row_after[key]: # Note: used fillna previously
+                        link_info[key] = row_after[key]
+                        
+                chain_links = {self.target : link_info}
+
+                if debug_chains:
+                    # Print entire row
+                    row = self.sim.population.props.loc[[abs(self.target)]] # Use abs to avoid potentil issue with direct births
+                    row['person_ID'] = self.target
+                    row['event'] = str(self)
+                    row['event_date'] = self.sim.date
+                    row['when'] = 'After'
+                    self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
+                
             else:
-                df_after = self.sim.population.props.copy()
-                change = df_before.compare(df_after)
-                if ~change.empty:
-                    indices = change.index
-                    new_rows_before = df_before.loc[indices]
-                    new_rows_before['person_ID'] = new_rows_before.index
-                    new_rows_before['event'] = self
-                    new_rows_before['event_date'] = self.sim.date
-                    new_rows_before['when'] = 'Before'
-                    new_rows_before['appt_footprint'] = 'N/A'
-
-                    new_rows_after = df_after.loc[indices]
-                    new_rows_after['person_ID'] = new_rows_after.index
-                    new_rows_after['event'] = self
-                    new_rows_after['event_date'] = self.sim.date
-                    new_rows_after['when'] = 'After'
-                    new_rows_after['appt_footprint'] = 'N/A'
-
-
-                    self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_before], ignore_index=True)
-                    self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_after], ignore_index=True)
+                # Target is entire population. Identify individuals for which properties have changed
+                # and store their changes.
+                
+                # Population frame after event
+                df_after = self.sim.population.props
+                
+                #  Create and store the event and dictionary of changes for affected individuals
+                chain_links = self.compare_population_dataframe(df_before, df_after)
+
+                if debug_chains:
+                    # Or print entire rows
+                    change = df_before.compare(df_after)
+                    if not change.empty:
+                        indices = change.index
+                        new_rows_before = df_before.loc[indices]
+                        new_rows_before['person_ID'] = new_rows_before.index
+                        new_rows_before['event'] = self
+                        new_rows_before['event_date'] = self.sim.date
+                        new_rows_before['when'] = 'Before'
+
+                        new_rows_after = df_after.loc[indices]
+                        new_rows_after['person_ID'] = new_rows_after.index
+                        new_rows_after['event'] = self
+                        new_rows_after['event_date'] = self.sim.date
+                        new_rows_after['when'] = 'After'
+
+                        self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_before], ignore_index=True)
+                        self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_after], ignore_index=True)
+                    
+        return chain_links
+
+    def run(self):
+        """Make the event happen."""
+        
+        # Collect relevant information before event takes place
+        if self.sim.generate_event_chains:
+            print_chains, row_before, df_before = self.store_chains_to_do_before_event()
+                
+        self.apply(self.target)
+        self.post_apply_hook()
+        
+        # Collect event info + meaningful property changes of individuals. Combined, these will constitute a 'link'
+        # in the individual's event chain.
+        if self.sim.generate_event_chains:
+            chain_links = self.store_chains_to_do_after_event(print_chains, row_before, df_before)
+            
+            # Log chain_links here
+            if len(chain_links)>0:
+                logger.info(key='event_chains',
+                            data= chain_links,
+                            description='Links forming chains of events for simulated individuals')
+                
+                #print("Chain events ", chain_links)
+            
 
 
 class RegularEvent(Event):
diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py
index f8e8738543..1c727f014b 100644
--- a/src/tlo/methods/hsi_event.py
+++ b/src/tlo/methods/hsi_event.py
@@ -16,12 +16,19 @@
     from tlo import Module, Simulation
     from tlo.methods.healthsystem import HealthSystem
 
+# Pointing to the logger in events
+logger_chains = logging.getLogger("tlo.methods.event")
+logger_chains.setLevel(logging.INFO)
+
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
 
 logger_summary = logging.getLogger(f"{__name__}.summary")
 logger_summary.setLevel(logging.INFO)
 
+debug_chains = True
+
+
 # Declare the level which will be used to represent the merging of levels '1b' and '2'
 LABEL_FOR_MERGED_FACILITY_LEVELS_1B_AND_2 = "2"
 
@@ -187,54 +194,113 @@ def _run_after_hsi_event(self) -> None:
                 item_codes=self._EQUIPMENT,
                 facility_id=self.facility_info.id
             )
-
-    def run(self, squeeze_factor):
-        """Make the event happen."""
+            
+    def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series]:
+        """ This function checks whether this event should be logged as part of the event chains, and if so stored required information before the event has occurred. """
         
+        # Initialise these variables
         print_chains = False
-        df_before = []
-
-        if self.sim.generate_event_chains:
-            # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore
-            #if (self.module in self.sim.generate_event_chains_modules_of_interest) and not
-            #if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)):
-#            if (self.module in self.sim.generate_event_chains_modules_of_interest) and all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events):
-            if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events):
-
-                print_chains = True
-                if self.target != self.sim.population:
-                    row = self.sim.population.props.iloc[[self.target]]
-                    row['person_ID'] = self.target
-                    row['event'] = str(self)
-                    row['event_date'] = self.sim.date
-                    row['when'] = 'Before'
-                    row['appt_footprint'] = str(self.EXPECTED_APPT_FOOTPRINT)
-                    self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
-                else:
-                    df_before = self.sim.population.props.copy()
-        
-        updated_appt_footprint = self.apply(self.target, squeeze_factor)
-        self.post_apply_hook()
-        self._run_after_hsi_event()
+        row_before = pd.Series()
         
-        footprint = self.EXPECTED_APPT_FOOTPRINT
-        if updated_appt_footprint is not None:
-            footprint = updated_appt_footprint
+        # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore
+        # if (self.module in self.sim.generate_event_chains_modules_of_interest) and
+        if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events):
         
-        if print_chains:
+        # Will eventually use this once I can actually GET THE NAME OF THE SELF
+        # if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)):
+                
             if self.target != self.sim.population:
-                row = self.sim.population.props.iloc[[self.target]]
+            
+                # In the case of HSI events, only individual events should exist and therefore be logged
+                print_chains = True
+                
+                # Save row for comparison after event has occurred
+                row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999)
+
+                row = self.sim.population.props.loc[[abs(self.target)]]
                 row['person_ID'] = self.target
                 row['event'] = str(self)
                 row['event_date'] = self.sim.date
-                row['when'] = 'After'
-                row['appt_footprint'] = str(footprint)
+                row['when'] = 'Before'
+                row['appt_footprint'] = str(self.EXPECTED_APPT_FOOTPRINT)
+                row['level'] = self.facility_info.level
                 self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
+                
             else:
+                # Many of our HealthSystem implementations rely on the assumption that
                 print("Error, I shouldn't be here")
                 exit(-1)
+                
+        return print_chains, row_before
+        
+    def store_chains_to_do_after_event(self, print_chains, row_before, footprint) -> dict:
+        """ If print_chains=True, this function logs the event and identifies and logs the any property changes that have occured to one or multiple individuals as a result of the event taking place. """
+        if print_chains:
+            # For HSI event, this will only ever occur for individual events
+            
+            row_after = self.sim.population.props.loc[abs(self.target)].fillna(-99999)
+            
+            # Create and store dictionary of changes. Note that person_ID, event, event_date, appt_foot, and level
+            # will be stored regardless of whether individual experienced property changes.
+
+            # Add event details
+            link_info = {
+                'event' : str(self),
+                'event_date' : self.sim.date,
+                'appt_footprint' : str(footprint),
+                'level' : self.facility_info.level,
+            }
+            
+            # Add changes to properties
+            for key in row_before.index:
+                if row_before[key] != row_after[key]: # Note: used fillna previously
+                    link_info[key] = row_after[key]
+            
+            chain_links = {self.target : link_info}
+
+            # Print entire row
+            row = self.sim.population.props.loc[[abs(self.target)]]
+            row['person_ID'] = self.target
+            row['event'] = str(self)
+            row['event_date'] = self.sim.date
+            row['when'] = 'After'
+            row['appt_footprint'] = footprint
+            row['level'] = self.facility_info.level
+            self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
+            
+        return chain_links
+        
+
+    def run(self, squeeze_factor):
+        """Make the event happen."""
+
+        
+        if self.sim.generate_event_chains:
+            print_chains, row_before = self.store_chains_to_do_before_event()
+              
+            footprint = self.EXPECTED_APPT_FOOTPRINT
 
+        updated_appt_footprint = self.apply(self.target, squeeze_factor)
+        self.post_apply_hook()
+        self._run_after_hsi_event()
+        
+        
+        if self.sim.generate_event_chains:
+
+            # If the footprint has been updated when the event ran, change it here
+            if updated_appt_footprint is not None:
+                footprint = updated_appt_footprint
+            
+            chain_links = self.store_chains_to_do_after_event(print_chains, row_before, str(footprint))
+            
+            if len(chain_links)>0:
+                logger_chains.info(key='event_chains',
+                            data = chain_links,
+                            description='Links forming chains of events for simulated individuals')
+                #print(chain_links)
+                
         return updated_appt_footprint
+        
 
     def get_consumables(
         self,
diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py
index a8ecf14cc6..20b3a4898f 100644
--- a/src/tlo/simulation.py
+++ b/src/tlo/simulation.py
@@ -281,7 +281,7 @@ def make_initial_population(self, *, n: int) -> None:
                 data=f"{module.name}.initialise_population() {time.time() - start1} s",
             )
 
-        self.event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when'] + ['appt_footprint'])
+        self.event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when'] + ['appt_footprint'] + ['level'])
 
         end = time.time()
         logger.info(key="info", data=f"make_initial_population() {end - start} s")
diff --git a/tests/test_data_generation.py b/tests/test_data_generation.py
index af3c4f0ae9..39f2b022aa 100644
--- a/tests/test_data_generation.py
+++ b/tests/test_data_generation.py
@@ -33,7 +33,7 @@
 
 # create simulation parameters
 start_date = Date(2010, 1, 1)
-end_date = Date(2012, 1, 1)
+end_date = Date(2011, 1, 1)
 popsize = 200
 
 @pytest.mark.slow
@@ -63,24 +63,24 @@ def create_basic_sim(population_size, seed):
     sim = Simulation(start_date=start_date, seed=seed)
     resourcefilepath = Path(os.path.dirname(__file__)) / '../resources'
     sim.register(demography.Demography(resourcefilepath=resourcefilepath),
-               # contraception.Contraception(resourcefilepath=resourcefilepath),
+                 contraception.Contraception(resourcefilepath=resourcefilepath),
                  enhanced_lifestyle.Lifestyle(resourcefilepath=resourcefilepath),
                  healthburden.HealthBurden(resourcefilepath=resourcefilepath),
                  symptommanager.SymptomManager(resourcefilepath=resourcefilepath),
                  healthsystem.HealthSystem(resourcefilepath=resourcefilepath, service_availability=['*']),
                  rti.RTI(resourcefilepath=resourcefilepath),
                  healthseekingbehaviour.HealthSeekingBehaviour(resourcefilepath=resourcefilepath),
-                 simplified_births.SimplifiedBirths(resourcefilepath=resourcefilepath),
-               #  epi.Epi(resourcefilepath=resourcefilepath),
-               #  hiv.Hiv(resourcefilepath=resourcefilepath),
-               #  tb.Tb(resourcefilepath=resourcefilepath),
+                 # simplified_births.SimplifiedBirths(resourcefilepath=resourcefilepath),
+                 epi.Epi(resourcefilepath=resourcefilepath),
+                 hiv.Hiv(resourcefilepath=resourcefilepath),
+                 tb.Tb(resourcefilepath=resourcefilepath),
                  cardio_metabolic_disorders.CardioMetabolicDisorders(resourcefilepath=resourcefilepath),
                  depression.Depression(resourcefilepath=resourcefilepath),
-                # newborn_outcomes.NewbornOutcomes(resourcefilepath=resourcefilepath),
-                # pregnancy_supervisor.PregnancySupervisor(resourcefilepath=resourcefilepath),
-                # care_of_women_during_pregnancy.CareOfWomenDuringPregnancy(resourcefilepath=resourcefilepath),
-                # labour.Labour(resourcefilepath=resourcefilepath),
-                 #postnatal_supervisor.PostnatalSupervisor(resourcefilepath=resourcefilepath),
+                 newborn_outcomes.NewbornOutcomes(resourcefilepath=resourcefilepath),
+                 pregnancy_supervisor.PregnancySupervisor(resourcefilepath=resourcefilepath),
+                 care_of_women_during_pregnancy.CareOfWomenDuringPregnancy(resourcefilepath=resourcefilepath),
+                 labour.Labour(resourcefilepath=resourcefilepath),
+                 postnatal_supervisor.PostnatalSupervisor(resourcefilepath=resourcefilepath),
                  )
 
     sim.make_initial_population(n=population_size)

From 769aaeca44aaedc324bd3da2f5f338bb47e02106 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Fri, 11 Oct 2024 17:03:22 +0100
Subject: [PATCH 11/21] Style fixes

---
 src/tlo/methods/tb.py         | 2 +-
 src/tlo/simulation.py         | 4 ++--
 tests/test_data_generation.py | 5 -----
 3 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/src/tlo/methods/tb.py b/src/tlo/methods/tb.py
index 4c170944d2..9dc05ff301 100644
--- a/src/tlo/methods/tb.py
+++ b/src/tlo/methods/tb.py
@@ -1393,7 +1393,7 @@ def apply(self, population):
             & (df.tb_inf != "active")
             ].index
             
-        n_susceptible = len(susc_idx)
+        len(susc_idx)
         
         middle_index = len(susc_idx) // 2
 
diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py
index 20b3a4898f..75dfa76429 100644
--- a/src/tlo/simulation.py
+++ b/src/tlo/simulation.py
@@ -8,7 +8,7 @@
 import time
 from collections import OrderedDict
 from pathlib import Path
-from typing import Dict, Optional, Union
+from typing import Optional
 from typing import TYPE_CHECKING, Optional
 import pandas as pd
 
@@ -374,7 +374,7 @@ def run_simulation_to(self, *, to_date: Date) -> None:
         :param to_date: Date to simulate up to but not including - must be before or
             equal to simulation end date specified in call to :py:meth:`initialise`.
         """
-        f = open('output.txt', mode='a')
+        open('output.txt', mode='a')
 
         if not self._initialised:
             msg = "Simulation must be initialised before calling run_simulation_to"
diff --git a/tests/test_data_generation.py b/tests/test_data_generation.py
index 39f2b022aa..c94618a77d 100644
--- a/tests/test_data_generation.py
+++ b/tests/test_data_generation.py
@@ -1,7 +1,6 @@
 import os
 from pathlib import Path
 
-import pandas as pd
 import pytest
 
 from tlo import Date, Simulation
@@ -11,7 +10,6 @@
     depression,
     enhanced_lifestyle,
     epi,
-    epilepsy,
     healthburden,
     healthseekingbehaviour,
     healthsystem,
@@ -20,16 +18,13 @@
     labour,
     newborn_outcomes,
     postnatal_supervisor,
-    pregnancy_helper_functions,
     pregnancy_supervisor,
     depression,
     tb,
     contraception,
-    simplified_births,
     rti,
     symptommanager,
 )
-from tlo.methods.hsi_generic_first_appts import HSI_GenericEmergencyFirstAppt
 
 # create simulation parameters
 start_date = Date(2010, 1, 1)

From 757cee36b0ae611f1f7ae31d25799fc0d6e7daa1 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Sun, 13 Oct 2024 11:15:17 +0100
Subject: [PATCH 12/21] Include printing of individual properties at the
 beginning and at birth, label what is only used for ddebugging and will be
 later removed

---
 src/tlo/events.py             |  5 +++--
 src/tlo/methods/hsi_event.py  |  7 ++++---
 src/tlo/methods/rti.py        |  2 +-
 src/tlo/simulation.py         | 28 ++++++++++++++++++++++++++++
 tests/test_data_generation.py |  5 ++---
 5 files changed, 38 insertions(+), 9 deletions(-)

diff --git a/src/tlo/events.py b/src/tlo/events.py
index 436a01a97c..03bf7c72fa 100644
--- a/src/tlo/events.py
+++ b/src/tlo/events.py
@@ -98,7 +98,6 @@ def compare_population_dataframe(self,df_before, df_after):
                 for col in changed_cols:
                     link_info[col] = df_after.at[idx, col]
                 
-                
                 # Append the event and changes to the individual key
                 chain_links = {idx : link_info}
         
@@ -127,7 +126,7 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame
                 row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999)
                 
                 if debug_chains:
-                    # Print entire row
+                    # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
                     row = self.sim.population.props.loc[[abs(self.target)]]
                     row['person_ID'] = self.target
                     row['event'] = str(self)
@@ -166,6 +165,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) ->
                         
                 chain_links = {self.target : link_info}
 
+                # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
                 if debug_chains:
                     # Print entire row
                     row = self.sim.population.props.loc[[abs(self.target)]] # Use abs to avoid potentil issue with direct births
@@ -185,6 +185,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) ->
                 #  Create and store the event and dictionary of changes for affected individuals
                 chain_links = self.compare_population_dataframe(df_before, df_after)
 
+                # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
                 if debug_chains:
                     # Or print entire rows
                     change = df_before.compare(df_after)
diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py
index 1c727f014b..0c3bc16072 100644
--- a/src/tlo/methods/hsi_event.py
+++ b/src/tlo/methods/hsi_event.py
@@ -217,6 +217,7 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series]:
                 # Save row for comparison after event has occurred
                 row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999)
 
+                # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
                 row = self.sim.population.props.loc[[abs(self.target)]]
                 row['person_ID'] = self.target
                 row['event'] = str(self)
@@ -228,8 +229,8 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series]:
                 
             else:
                 # Many of our HealthSystem implementations rely on the assumption that
-                print("Error, I shouldn't be here")
-                exit(-1)
+                raise RuntimeError("Cannot have population-wide HSI events")
+
                 
         return print_chains, row_before
         
@@ -258,7 +259,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, footprint) ->
             
             chain_links = {self.target : link_info}
 
-            # Print entire row
+            # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
             row = self.sim.population.props.loc[[abs(self.target)]]
             row['person_ID'] = self.target
             row['event'] = str(self)
diff --git a/src/tlo/methods/rti.py b/src/tlo/methods/rti.py
index 1c12e7162b..3642365976 100644
--- a/src/tlo/methods/rti.py
+++ b/src/tlo/methods/rti.py
@@ -2865,7 +2865,7 @@ def apply(self, population):
                          Predictor('li_ex_alc').when(True, self.rr_injrti_excessalcohol)
                          )
         if self.sim.generate_event_chains is True and self.sim.generate_event_chains_overwrite_epi is True:
-            pred = 1
+            pred = 1.0
         else:
             pred = eq.predict(df.loc[rt_current_non_ind])
         random_draw_in_rti = self.module.rng.random_sample(size=len(rt_current_non_ind))
diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py
index 75dfa76429..582fb4ba1c 100644
--- a/src/tlo/simulation.py
+++ b/src/tlo/simulation.py
@@ -37,6 +37,9 @@
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
 
+logger_chains = logging.getLogger("tlo.methods.event")
+logger_chains.setLevel(logging.INFO)
+
 
 class SimulationPreviouslyInitialisedError(Exception):
     """Exception raised when trying to initialise an already initialised simulation."""
@@ -111,6 +114,8 @@ def __init__(
         self.end_date = None
         self.output_file = None
         self.population: Optional[Population] = None
+        
+        # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
         self.event_chains: Optinoal[Population] = None
 
         self.show_progress_bar = show_progress_bar
@@ -281,7 +286,16 @@ def make_initial_population(self, *, n: int) -> None:
                 data=f"{module.name}.initialise_population() {time.time() - start1} s",
             )
 
+        # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
         self.event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when'] + ['appt_footprint'] + ['level'])
+        
+        # When logging events for each individual to reconstruct chains, only the changes in individual properties will be logged.
+        # At the start of the simulation + when a new individual is born, we therefore want to store all of their properties at the start.
+        if self.generate_event_chains:
+            pop_dict = self.population.props.to_dict(orient='index')
+            logger_chains.info(key='event_chains',
+                               data = pop_dict,
+                               description='Links forming chains of events for simulated individuals')
 
         end = time.time()
         logger.info(key="info", data=f"make_initial_population() {end - start} s")
@@ -392,6 +406,8 @@ def run_simulation_to(self, *, to_date: Date) -> None:
                 self._update_progress_bar(progress_bar, date)
             self.fire_single_event(event, date)
         self.date = to_date
+        
+        # TO BE REMOVED: this is currently only used for debugging, will be removed from final PR.
         self.event_chains.to_csv('output.csv', index=False)
 
         if self.show_progress_bar:
@@ -449,13 +465,25 @@ def do_birth(self, mother_id: int) -> int:
         child_id = self.population.do_birth()
         for module in self.modules.values():
             module.on_birth(mother_id, child_id)
+            
         if self.generate_event_chains:
+            # When individual is born, store their initial properties to provide a starting point to the chain of property
+            # changes that this individual will undergo as a result of events taking place.
+            prop_dict = self.population.props.loc[child_id].to_dict()
+            
+            child_dict = {child_id : prop_dict}
+            logger_chains.info(key='event_chains',
+                               data = child_dict,
+                               description='Links forming chains of events for simulated individuals')
+        
+            # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
             row = self.population.props.iloc[[child_id]]
             row['person_ID'] = child_id
             row['event'] = 'Birth'
             row['event_date'] = self.date
             row['when'] = 'After'
             self.event_chains = pd.concat([self.event_chains, row], ignore_index=True)
+            
         return child_id
 
     def find_events_for_person(self, person_id: int) -> list[tuple[Date, Event]]:
diff --git a/tests/test_data_generation.py b/tests/test_data_generation.py
index c94618a77d..d9885c1fab 100644
--- a/tests/test_data_generation.py
+++ b/tests/test_data_generation.py
@@ -28,8 +28,8 @@
 
 # create simulation parameters
 start_date = Date(2010, 1, 1)
-end_date = Date(2011, 1, 1)
-popsize = 200
+end_date = Date(2012, 1, 1)
+popsize = 100
 
 @pytest.mark.slow
 def test_data_harvesting(seed):
@@ -51,7 +51,6 @@ def test_data_harvesting(seed):
 
     # run simulation
     sim.simulate(end_date=end_date, generate_event_chains = True)
-    exit(-1)
 
 def create_basic_sim(population_size, seed):
     # create the basic outline of an rti simulation object

From 22a5e44312ad4d2f1d955b70399ae9569efb13c0 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Wed, 16 Oct 2024 14:00:22 +0100
Subject: [PATCH 13/21] Log everything to simulation, as events logger doesn't
 seem to be visible to all modules. For now add person_ID to the dict of info
 printed as the outer dictionary key logging seems to have a problem.

---
 src/tlo/events.py            | 13 +++++++++----
 src/tlo/methods/hsi_event.py |  3 ++-
 src/tlo/simulation.py        | 25 +++++++++++++++++--------
 3 files changed, 28 insertions(+), 13 deletions(-)

diff --git a/src/tlo/events.py b/src/tlo/events.py
index 03bf7c72fa..98832faecb 100644
--- a/src/tlo/events.py
+++ b/src/tlo/events.py
@@ -11,9 +11,13 @@
 
 import pandas as pd
 
+
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
 
+logger_chain = logging.getLogger('tlo.simulation')
+logger_chain.setLevel(logging.INFO)
+
 logger_summary = logging.getLogger(f"{__name__}.summary")
 logger_summary.setLevel(logging.INFO)
 
@@ -89,7 +93,7 @@ def compare_population_dataframe(self,df_before, df_after):
                 # Create a dictionary for this person
                 # First add event info
                 link_info = {
-                    #'person_ID': idx,
+                    'person_ID': idx,
                     'event': str(self),
                     'event_date': self.sim.date,
                 }
@@ -152,13 +156,14 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) ->
             if self.target != self.sim.population:
                 row_after = self.sim.population.props.loc[abs(self.target)].fillna(-99999)
                 
-                # Create and store event for this individual
+                # Create and store event for this individual, regardless of whether any property change occurred
                 link_info = {
                     #'person_ID' : self.target,
+                    'person_ID' : self.target,
                     'event' : str(self),
                     'event_date' : self.sim.date,
                 }
-                # Store property changes as a result of the event for this individual
+                # Store (if any) property changes as a result of the event for this individual
                 for key in row_before.index:
                     if row_before[key] != row_after[key]: # Note: used fillna previously
                         link_info[key] = row_after[key]
@@ -225,7 +230,7 @@ def run(self):
             
             # Log chain_links here
             if len(chain_links)>0:
-                logger.info(key='event_chains',
+                logger_chain.info(key='event_chains',
                             data= chain_links,
                             description='Links forming chains of events for simulated individuals')
                 
diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py
index 0c3bc16072..6651a8704a 100644
--- a/src/tlo/methods/hsi_event.py
+++ b/src/tlo/methods/hsi_event.py
@@ -17,7 +17,7 @@
     from tlo.methods.healthsystem import HealthSystem
 
 # Pointing to the logger in events
-logger_chains = logging.getLogger("tlo.methods.event")
+logger_chains = logging.getLogger("tlo.simulation")
 logger_chains.setLevel(logging.INFO)
 
 logger = logging.getLogger(__name__)
@@ -246,6 +246,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, footprint) ->
 
             # Add event details
             link_info = {
+                'person_ID': self.target,
                 'event' : str(self),
                 'event_date' : self.sim.date,
                 'appt_footprint' : str(footprint),
diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py
index 582fb4ba1c..fd9fade215 100644
--- a/src/tlo/simulation.py
+++ b/src/tlo/simulation.py
@@ -107,7 +107,7 @@ def __init__(
         self.date = self.start_date = start_date
         self.modules = OrderedDict()
         self.event_queue = EventQueue()
-        self.generate_event_chains = None
+        self.generate_event_chains = True
         self.generate_event_chains_overwrite_epi = None
         self.generate_event_chains_modules_of_interest = []
         self.generate_event_chains_ignore_events = []
@@ -292,15 +292,23 @@ def make_initial_population(self, *, n: int) -> None:
         # When logging events for each individual to reconstruct chains, only the changes in individual properties will be logged.
         # At the start of the simulation + when a new individual is born, we therefore want to store all of their properties at the start.
         if self.generate_event_chains:
+
             pop_dict = self.population.props.to_dict(orient='index')
-            logger_chains.info(key='event_chains',
+            
+            print(pop_dict)
+            print(pop_dict.keys())
+            for key in pop_dict.keys():
+                pop_dict[key]['person_ID'] = key
+            print("Length of properties", len(pop_dict[0].keys()))
+            #exit(-1)
+            logger.info(key='event_chains',
                                data = pop_dict,
                                description='Links forming chains of events for simulated individuals')
 
         end = time.time()
         logger.info(key="info", data=f"make_initial_population() {end - start} s")
 
-    def initialise(self, *, end_date: Date, generate_event_chains) -> None:
+    def initialise(self, *, end_date: Date) -> None:
         """Initialise all modules in simulation.
         :param end_date: Date to end simulation on - accessible to modules to allow
             initialising data structures which may depend (in size for example) on the
@@ -312,7 +320,7 @@ def initialise(self, *, end_date: Date, generate_event_chains) -> None:
         self.date = self.start_date
         self.end_date = end_date  # store the end_date so that others can reference it
 
-        self.generate_event_chains = generate_event_chains
+        #self.generate_event_chains = generate_event_chains
         if self.generate_event_chains:
             # Eventually this can be made an option
             self.generate_event_chains_overwrite_epi = True
@@ -413,7 +421,7 @@ def run_simulation_to(self, *, to_date: Date) -> None:
         if self.show_progress_bar:
             progress_bar.stop()
 
-    def simulate(self, *, end_date: Date, generate_event_chains=False) -> None:
+    def simulate(self, *, end_date: Date) -> None:
         """Simulate until the given end date
 
         :param end_date: When to stop simulating. Only events strictly before this
@@ -421,7 +429,7 @@ def simulate(self, *, end_date: Date, generate_event_chains=False) -> None:
             clarity.
         """
         start = time.time()
-        self.initialise(end_date=end_date, generate_event_chains=generate_event_chains)
+        self.initialise(end_date=end_date)
         self.run_simulation_to(to_date=end_date)
         self.finalise(time.time() - start)
 
@@ -470,9 +478,10 @@ def do_birth(self, mother_id: int) -> int:
             # When individual is born, store their initial properties to provide a starting point to the chain of property
             # changes that this individual will undergo as a result of events taking place.
             prop_dict = self.population.props.loc[child_id].to_dict()
-            
+            prop_dict['event'] = 'Birth'
+            prop_dict['event_date'] = self.date
             child_dict = {child_id : prop_dict}
-            logger_chains.info(key='event_chains',
+            logger.info(key='event_chains',
                                data = child_dict,
                                description='Links forming chains of events for simulated individuals')
         

From 7faa81783dc43e434e26ef8c95717480cebd3816 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Fri, 18 Oct 2024 10:07:46 +0200
Subject: [PATCH 14/21] Consider all modules included as of interest

---
 src/tlo/simulation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py
index fd9fade215..15be1622e8 100644
--- a/src/tlo/simulation.py
+++ b/src/tlo/simulation.py
@@ -325,7 +325,7 @@ def initialise(self, *, end_date: Date) -> None:
             # Eventually this can be made an option
             self.generate_event_chains_overwrite_epi = True
             # For now keep these fixed, eventually they will be input from user
-            self.generate_event_chains_modules_of_interest = [self.modules['RTI']]
+            self.generate_event_chains_modules_of_interest = [self.modules]
             self.generate_event_chains_ignore_events =  ['AgeUpdateEvent','HealthSystemScheduler', 'SimplifiedBirthsPoll','DirectBirth'] #['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler']
         else:
             # If not using to print chains, cannot ignore epi

From 7232f976831054ed541d59d8da20c91289fa79e6 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Fri, 18 Oct 2024 11:29:43 +0200
Subject: [PATCH 15/21] Remove pop-wide HSI warning and make epi default even
 when printing chains

---
 src/tlo/methods/hsi_event.py | 38 ++++++++++++++++++++++++++----------
 src/tlo/simulation.py        |  2 +-
 2 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py
index 6651a8704a..d0cdb5bbdd 100644
--- a/src/tlo/methods/hsi_event.py
+++ b/src/tlo/methods/hsi_event.py
@@ -223,13 +223,23 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series]:
                 row['event'] = str(self)
                 row['event_date'] = self.sim.date
                 row['when'] = 'Before'
-                row['appt_footprint'] = str(self.EXPECTED_APPT_FOOTPRINT)
-                row['level'] = self.facility_info.level
+                try:
+                    row['appt_footprint'] = str(self.EXPECTED_APPT_FOOTPRINT)
+                    row['level'] = self.facility_info.level
+                except:
+                    row['appt_footprint'] = 'N/A'
+                    row['level'] = 'N/A'
                 self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
                 
             else:
-                # Many of our HealthSystem implementations rely on the assumption that
-                raise RuntimeError("Cannot have population-wide HSI events")
+                # Once this has been removed from Chronic Syndrome mock module, make this a Runtime Error
+                # raise RuntimeError("Cannot have population-wide HSI events")
+                logger.debug(
+                    key="message",
+                    data=(
+                        f"Cannot have population-wide HSI events"
+                    ),
+                )
 
                 
         return print_chains, row_before
@@ -245,12 +255,20 @@ def store_chains_to_do_after_event(self, print_chains, row_before, footprint) ->
             # will be stored regardless of whether individual experienced property changes.
 
             # Add event details
+            
+            try:
+                record_footprint = str(footprint)
+                record_level = self.facility_info.level
+            except:
+                record_footprint = 'N/A'
+                record_level = 'N/A'
+                
             link_info = {
                 'person_ID': self.target,
                 'event' : str(self),
                 'event_date' : self.sim.date,
-                'appt_footprint' : str(footprint),
-                'level' : self.facility_info.level,
+                'appt_footprint' : record_footprint,
+                'level' : record_level,
             }
             
             # Add changes to properties
@@ -266,8 +284,8 @@ def store_chains_to_do_after_event(self, print_chains, row_before, footprint) ->
             row['event'] = str(self)
             row['event_date'] = self.sim.date
             row['when'] = 'After'
-            row['appt_footprint'] = footprint
-            row['level'] = self.facility_info.level
+            row['appt_footprint'] = record_footprint
+            row['level'] = record_level
             self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
             
         return chain_links
@@ -277,7 +295,7 @@ def run(self, squeeze_factor):
         """Make the event happen."""
 
         
-        if self.sim.generate_event_chains:
+        if self.sim.generate_event_chains and self.target != self.sim.population:
             print_chains, row_before = self.store_chains_to_do_before_event()
               
             footprint = self.EXPECTED_APPT_FOOTPRINT
@@ -287,7 +305,7 @@ def run(self, squeeze_factor):
         self._run_after_hsi_event()
         
         
-        if self.sim.generate_event_chains:
+        if self.sim.generate_event_chains and self.target != self.sim.population:
 
             # If the footprint has been updated when the event ran, change it here
             if updated_appt_footprint is not None:
diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py
index 15be1622e8..0c70b164d9 100644
--- a/src/tlo/simulation.py
+++ b/src/tlo/simulation.py
@@ -323,7 +323,7 @@ def initialise(self, *, end_date: Date) -> None:
         #self.generate_event_chains = generate_event_chains
         if self.generate_event_chains:
             # Eventually this can be made an option
-            self.generate_event_chains_overwrite_epi = True
+            self.generate_event_chains_overwrite_epi = False
             # For now keep these fixed, eventually they will be input from user
             self.generate_event_chains_modules_of_interest = [self.modules]
             self.generate_event_chains_ignore_events =  ['AgeUpdateEvent','HealthSystemScheduler', 'SimplifiedBirthsPoll','DirectBirth'] #['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler']

From a6def2d22c0d291ce775afef561b580847ad36cf Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Fri, 18 Oct 2024 11:39:24 +0200
Subject: [PATCH 16/21] Style fix

---
 src/tlo/methods/hsi_event.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py
index d0cdb5bbdd..041ab9cf08 100644
--- a/src/tlo/methods/hsi_event.py
+++ b/src/tlo/methods/hsi_event.py
@@ -237,7 +237,7 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series]:
                 logger.debug(
                     key="message",
                     data=(
-                        f"Cannot have population-wide HSI events"
+                        "Cannot have population-wide HSI events"
                     ),
                 )
 

From ecea532a2843d312580accf97383cd62c457fd04 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Fri, 18 Oct 2024 11:51:39 +0200
Subject: [PATCH 17/21] Remove data generation test, which wasn't really a test

---
 tests/test_data_generation.py | 82 -----------------------------------
 1 file changed, 82 deletions(-)
 delete mode 100644 tests/test_data_generation.py

diff --git a/tests/test_data_generation.py b/tests/test_data_generation.py
deleted file mode 100644
index d9885c1fab..0000000000
--- a/tests/test_data_generation.py
+++ /dev/null
@@ -1,82 +0,0 @@
-import os
-from pathlib import Path
-
-import pytest
-
-from tlo import Date, Simulation
-from tlo.methods import (
-    care_of_women_during_pregnancy,
-    demography,
-    depression,
-    enhanced_lifestyle,
-    epi,
-    healthburden,
-    healthseekingbehaviour,
-    healthsystem,
-    hiv,
-    cardio_metabolic_disorders,
-    labour,
-    newborn_outcomes,
-    postnatal_supervisor,
-    pregnancy_supervisor,
-    depression,
-    tb,
-    contraception,
-    rti,
-    symptommanager,
-)
-
-# create simulation parameters
-start_date = Date(2010, 1, 1)
-end_date = Date(2012, 1, 1)
-popsize = 100
-
-@pytest.mark.slow
-def test_data_harvesting(seed):
-    """
-    This test runs a simulation to print all individual events of specific individuals
-    """
-    
-    module_of_interest = 'RTI'
-    # create sim object
-    sim = create_basic_sim(popsize, seed)
-    
-    dependencies_list = sim.modules[module_of_interest].ADDITIONAL_DEPENDENCIES.union(sim.modules[module_of_interest].INIT_DEPENDENCIES)
-    
-    # Check that all dependencies are included
-    for dep in dependencies_list:
-        if dep not in sim.modules:
-            print("WARNING: dependency ", dep, "not included")
-            exit(-1)
-
-    # run simulation
-    sim.simulate(end_date=end_date, generate_event_chains = True)
-
-def create_basic_sim(population_size, seed):
-    # create the basic outline of an rti simulation object
-    sim = Simulation(start_date=start_date, seed=seed)
-    resourcefilepath = Path(os.path.dirname(__file__)) / '../resources'
-    sim.register(demography.Demography(resourcefilepath=resourcefilepath),
-                 contraception.Contraception(resourcefilepath=resourcefilepath),
-                 enhanced_lifestyle.Lifestyle(resourcefilepath=resourcefilepath),
-                 healthburden.HealthBurden(resourcefilepath=resourcefilepath),
-                 symptommanager.SymptomManager(resourcefilepath=resourcefilepath),
-                 healthsystem.HealthSystem(resourcefilepath=resourcefilepath, service_availability=['*']),
-                 rti.RTI(resourcefilepath=resourcefilepath),
-                 healthseekingbehaviour.HealthSeekingBehaviour(resourcefilepath=resourcefilepath),
-                 # simplified_births.SimplifiedBirths(resourcefilepath=resourcefilepath),
-                 epi.Epi(resourcefilepath=resourcefilepath),
-                 hiv.Hiv(resourcefilepath=resourcefilepath),
-                 tb.Tb(resourcefilepath=resourcefilepath),
-                 cardio_metabolic_disorders.CardioMetabolicDisorders(resourcefilepath=resourcefilepath),
-                 depression.Depression(resourcefilepath=resourcefilepath),
-                 newborn_outcomes.NewbornOutcomes(resourcefilepath=resourcefilepath),
-                 pregnancy_supervisor.PregnancySupervisor(resourcefilepath=resourcefilepath),
-                 care_of_women_during_pregnancy.CareOfWomenDuringPregnancy(resourcefilepath=resourcefilepath),
-                 labour.Labour(resourcefilepath=resourcefilepath),
-                 postnatal_supervisor.PostnatalSupervisor(resourcefilepath=resourcefilepath),
-                 )
-
-    sim.make_initial_population(n=population_size)
-    return sim
-

From ae7a44cb5f72063c48555e3b21d5d6dd4400ee97 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Wed, 23 Oct 2024 15:29:03 +0200
Subject: [PATCH 18/21] Change dict of properties to string in logging, and add
 analysis files

---
 .../analysis_extract_data.py                  | 370 ++++++++++++++++++
 .../postprocess_events_chain.py               | 156 ++++++++
 .../scenario_generate_chains.py               | 115 ++++++
 src/tlo/events.py                             |  23 +-
 src/tlo/methods/hsi_event.py                  |  13 +-
 src/tlo/simulation.py                         |  29 +-
 6 files changed, 684 insertions(+), 22 deletions(-)
 create mode 100644 src/scripts/analysis_data_generation/analysis_extract_data.py
 create mode 100644 src/scripts/analysis_data_generation/postprocess_events_chain.py
 create mode 100644 src/scripts/analysis_data_generation/scenario_generate_chains.py

diff --git a/src/scripts/analysis_data_generation/analysis_extract_data.py b/src/scripts/analysis_data_generation/analysis_extract_data.py
new file mode 100644
index 0000000000..2cfba5315b
--- /dev/null
+++ b/src/scripts/analysis_data_generation/analysis_extract_data.py
@@ -0,0 +1,370 @@
+"""Produce plots to show the health impact (deaths, dalys) each the healthcare system (overall health impact) when
+running under different MODES and POLICIES (scenario_impact_of_actual_vs_funded.py)"""
+
+# short tclose -> ideal case
+# long tclose -> status quo
+import argparse
+from pathlib import Path
+from typing import Tuple
+
+import pandas as pd
+
+from tlo import Date
+from tlo.analysis.utils import extract_results
+from datetime import datetime
+
+# Range of years considered
+min_year = 2010
+max_year = 2040
+
+
+def all_columns(_df):
+    return pd.Series(_df.all())
+
+def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = None, ):
+    """Produce standard set of plots describing the effect of each TREATMENT_ID.
+    - We estimate the epidemiological impact as the EXTRA deaths that would occur if that treatment did not occur.
+    - We estimate the draw on healthcare system resources as the FEWER appointments when that treatment does not occur.
+    """
+    pd.set_option('display.max_rows', None)
+    pd.set_option('display.max_colwidth', None)
+    event_chains = extract_results(
+            results_folder,
+            module='tlo.simulation',
+            key='event_chains',
+            column='0',
+            #column = str(i),
+            #custom_generate_series=get_num_dalys_by_year,
+            do_scaling=False
+        )
+   # print(event_chains.loc[0,(0, 0)])
+
+    eval_env = {
+        'datetime': datetime,  # Add the datetime class to the eval environment
+        'pd': pd,              # Add pandas to handle Timestamp
+        'Timestamp': pd.Timestamp,  # Specifically add Timestamp for eval
+        'NaT': pd.NaT,
+        'nan': float('nan'),       # Include NaN for eval (can also use pd.NA if preferred)
+    }
+
+    for item,row in event_chains.iterrows():
+        value = event_chains.loc[item,(0, 0)]
+        if value !='':
+            print('')
+            print(value)
+    exit(-1)
+    #dict = {}
+    #for i in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]:
+    #    dict[i] = []
+
+    #for i in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]:
+    #    event_chains = extract_results(
+    #        results_folder,
+    #        module='tlo.simulation'#,
+    #        key='event_chains',
+    #        column = str(i),
+    #        #custom_generate_series=get_num_dalys_by_year,
+    #        do_scaling=False
+    #    )
+    #    print(event_chains)
+    #    print(event_chains.index)
+    #    print(event_chains.columns.levels)
+
+    #    for index, row in event_chains.iterrows():
+    #        if event_chains.iloc[index,0] is not None:
+    #            if(event_chains.iloc[index,0]['person_ID']==i): #and 'event' in event_chains.iloc[index,0].keys()):
+    #                dict[i].append(event_chains.iloc[index,0])
+            #elif (event_chains.iloc[index,0]['person_ID']==i and 'event' not in event_chains.iloc[index,0].keys()):
+                #print(event_chains.iloc[index,0]['de_depr'])
+               # exit(-1)
+    #for item in dict[0]:
+    #    print(item)
+    
+    #exit(-1)
+    
+    TARGET_PERIOD = (Date(min_year, 1, 1), Date(max_year, 1, 1))
+
+    # Definitions of general helper functions
+    lambda stub: output_folder / f"{stub.replace('*', '_star_')}.png"  # noqa: E731
+
+    def target_period() -> str:
+        """Returns the target period as a string of the form YYYY-YYYY"""
+        return "-".join(str(t.year) for t in TARGET_PERIOD)
+
+    def get_parameter_names_from_scenario_file() -> Tuple[str]:
+        """Get the tuple of names of the scenarios from `Scenario` class used to create the results."""
+        from scripts.healthsystem.impact_of_actual_vs_funded.scenario_impact_of_actual_vs_funded import (
+            ImpactOfHealthSystemMode,
+        )
+        e = ImpactOfHealthSystemMode()
+        return tuple(e._scenarios.keys())
+
+    def get_num_deaths(_df):
+        """Return total number of Deaths (total within the TARGET_PERIOD)
+        """
+        return pd.Series(data=len(_df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD)]))
+
+    def get_num_dalys(_df):
+        """Return total number of DALYs (Stacked) by label (total within the TARGET_PERIOD)"""
+        return pd.Series(
+            data=_df
+            .loc[_df.year.between(*[i.year for i in TARGET_PERIOD])]
+            .drop(columns=['date', 'sex', 'age_range', 'year'])
+            .sum().sum()
+        )
+
+    def get_num_dalys_by_cause(_df):
+        """Return number of DALYs by cause by label (total within the TARGET_PERIOD)"""
+        return pd.Series(
+            data=_df
+            .loc[_df.year.between(*[i.year for i in TARGET_PERIOD])]
+            .drop(columns=['date', 'sex', 'age_range', 'year'])
+            .sum()
+        )
+
+    def set_param_names_as_column_index_level_0(_df):
+        """Set the columns index (level 0) as the param_names."""
+        ordered_param_names_no_prefix = {i: x for i, x in enumerate(param_names)}
+        names_of_cols_level0 = [ordered_param_names_no_prefix.get(col) for col in _df.columns.levels[0]]
+        assert len(names_of_cols_level0) == len(_df.columns.levels[0])
+        _df.columns = _df.columns.set_levels(names_of_cols_level0, level=0)
+        return _df
+
+    def find_difference_relative_to_comparison(_ser: pd.Series,
+                                               comparison: str,
+                                               scaled: bool = False,
+                                               drop_comparison: bool = True,
+                                               ):
+        """Find the difference in the values in a pd.Series with a multi-index, between the draws (level 0)
+        within the runs (level 1), relative to where draw = `comparison`.
+        The comparison is `X - COMPARISON`."""
+        return _ser \
+            .unstack(level=0) \
+            .apply(lambda x: (x - x[comparison]) / (x[comparison] if scaled else 1.0), axis=1) \
+            .drop(columns=([comparison] if drop_comparison else [])) \
+            .stack()
+
+    
+    def get_counts_of_hsi_by_treatment_id(_df):
+        """Get the counts of the short TREATMENT_IDs occurring"""
+        _counts_by_treatment_id = _df \
+            .loc[pd.to_datetime(_df['date']).between(*TARGET_PERIOD), 'TREATMENT_ID'] \
+            .apply(pd.Series) \
+            .sum() \
+            .astype(int)
+        return _counts_by_treatment_id.groupby(level=0).sum()
+        
+    year_target = 2023
+    def get_counts_of_hsi_by_treatment_id_by_year(_df):
+        """Get the counts of the short TREATMENT_IDs occurring"""
+        _counts_by_treatment_id = _df \
+            .loc[pd.to_datetime(_df['date']).dt.year ==year_target, 'TREATMENT_ID'] \
+            .apply(pd.Series) \
+            .sum() \
+            .astype(int)
+        return _counts_by_treatment_id.groupby(level=0).sum()
+    
+    def get_counts_of_hsi_by_short_treatment_id(_df):
+        """Get the counts of the short TREATMENT_IDs occurring (shortened, up to first underscore)"""
+        _counts_by_treatment_id = get_counts_of_hsi_by_treatment_id(_df)
+        _short_treatment_id = _counts_by_treatment_id.index.map(lambda x: x.split('_')[0] + "*")
+        return _counts_by_treatment_id.groupby(by=_short_treatment_id).sum()
+        
+    def get_counts_of_hsi_by_short_treatment_id_by_year(_df):
+        """Get the counts of the short TREATMENT_IDs occurring (shortened, up to first underscore)"""
+        _counts_by_treatment_id = get_counts_of_hsi_by_treatment_id_by_year(_df)
+        _short_treatment_id = _counts_by_treatment_id.index.map(lambda x: x.split('_')[0] + "*")
+        return _counts_by_treatment_id.groupby(by=_short_treatment_id).sum()
+ 
+        
+    # Obtain parameter names for this scenario file
+    param_names = get_parameter_names_from_scenario_file()
+    print(param_names)
+
+    # ================================================================================================
+    # TIME EVOLUTION OF TOTAL DALYs
+    # Plot DALYs averted compared to the ``No Policy'' policy
+    
+    year_target = 2023 # This global variable will be passed to custom function
+    def get_num_dalys_by_year(_df):
+        """Return total number of DALYs (Stacked) by label (total within the TARGET_PERIOD)"""
+        return pd.Series(
+            data=_df
+            .loc[_df.year == year_target]
+            .drop(columns=['date', 'sex', 'age_range', 'year'])
+            .sum().sum()
+        )
+        
+    ALL = {}
+    # Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred
+    # are consistent across different policies
+    this_min_year = 2010
+    for year in range(this_min_year, max_year+1):
+        year_target = year
+        num_dalys_by_year = extract_results(
+            results_folder,
+            module='tlo.methods.healthburden',
+            key='dalys_stacked',
+            custom_generate_series=get_num_dalys_by_year,
+            do_scaling=True
+        ).pipe(set_param_names_as_column_index_level_0)
+        ALL[year_target] = num_dalys_by_year
+    # Concatenate the DataFrames into a single DataFrame
+    concatenated_df = pd.concat(ALL.values(), keys=ALL.keys())
+    concatenated_df.index = concatenated_df.index.set_names(['date', 'index_original'])
+    concatenated_df = concatenated_df.reset_index(level='index_original',drop=True)
+    dalys_by_year = concatenated_df
+    print(dalys_by_year)
+    dalys_by_year.to_csv('ConvertedOutputs/Total_DALYs_with_time.csv', index=True)
+    
+    # ================================================================================================
+    # Print population under each scenario
+    pop_model = extract_results(results_folder,
+                                module="tlo.methods.demography",
+                                key="population",
+                                column="total",
+                                index="date",
+                                do_scaling=True
+                                ).pipe(set_param_names_as_column_index_level_0)
+    
+    pop_model.index = pop_model.index.year
+    pop_model = pop_model[(pop_model.index >= this_min_year) & (pop_model.index <= max_year)]
+    print(pop_model)
+    assert dalys_by_year.index.equals(pop_model.index)
+    assert all(dalys_by_year.columns == pop_model.columns)
+    pop_model.to_csv('ConvertedOutputs/Population_with_time.csv', index=True)
+
+    # ================================================================================================
+    # DALYs BROKEN DOWN BY CAUSES AND YEAR
+    # DALYs by cause per year
+    # %% Quantify the health losses associated with all interventions combined.
+    
+    year_target = 2023 # This global variable will be passed to custom function
+    def get_num_dalys_by_year_and_cause(_df):
+        """Return total number of DALYs (Stacked) by label (total within the TARGET_PERIOD)"""
+        return pd.Series(
+            data=_df
+            .loc[_df.year == year_target]
+            .drop(columns=['date', 'sex', 'age_range', 'year'])
+            .sum()
+        )
+        
+    ALL = {}
+    # Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred
+    # are consistent across different policies
+    this_min_year = 2010
+    for year in range(this_min_year, max_year+1):
+        year_target = year
+        num_dalys_by_year = extract_results(
+            results_folder,
+            module='tlo.methods.healthburden',
+            key='dalys_stacked',
+            custom_generate_series=get_num_dalys_by_year_and_cause,
+            do_scaling=True
+        ).pipe(set_param_names_as_column_index_level_0)
+        ALL[year_target] = num_dalys_by_year #summarize(num_dalys_by_year)
+
+    # Concatenate the DataFrames into a single DataFrame
+    concatenated_df = pd.concat(ALL.values(), keys=ALL.keys())
+
+    concatenated_df.index = concatenated_df.index.set_names(['date', 'cause'])
+    
+    df_total = concatenated_df
+    df_total.to_csv('ConvertedOutputs/DALYS_by_cause_with_time.csv', index=True)
+
+    ALL = {}
+    # Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred
+    # are consistent across different policies
+    for year in range(min_year, max_year+1):
+        year_target = year
+        
+        hsi_delivered_by_year = extract_results(
+                results_folder,
+                module='tlo.methods.healthsystem.summary',
+                key='HSI_Event',
+                custom_generate_series=get_counts_of_hsi_by_short_treatment_id_by_year,
+                do_scaling=True
+            ).pipe(set_param_names_as_column_index_level_0)
+        ALL[year_target] = hsi_delivered_by_year
+
+    # Concatenate the DataFrames into a single DataFrame
+    concatenated_df = pd.concat(ALL.values(), keys=ALL.keys())
+    concatenated_df.index = concatenated_df.index.set_names(['date', 'cause'])
+    HSI_ran_by_year = concatenated_df
+
+    del ALL
+    
+    ALL = {}
+    # Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred
+    # are consistent across different policies
+    for year in range(min_year, max_year+1):
+        year_target = year
+        
+        hsi_not_delivered_by_year = extract_results(
+                results_folder,
+                module='tlo.methods.healthsystem.summary',
+                key='Never_ran_HSI_Event',
+                custom_generate_series=get_counts_of_hsi_by_short_treatment_id_by_year,
+                do_scaling=True
+            ).pipe(set_param_names_as_column_index_level_0)
+        ALL[year_target] = hsi_not_delivered_by_year
+
+    # Concatenate the DataFrames into a single DataFrame
+    concatenated_df = pd.concat(ALL.values(), keys=ALL.keys())
+    concatenated_df.index = concatenated_df.index.set_names(['date', 'cause'])
+    HSI_never_ran_by_year = concatenated_df
+    
+    HSI_never_ran_by_year = HSI_never_ran_by_year.fillna(0) #clean_df(
+    HSI_ran_by_year = HSI_ran_by_year.fillna(0)
+    HSI_total_by_year = HSI_ran_by_year.add(HSI_never_ran_by_year, fill_value=0)
+    HSI_ran_by_year.to_csv('ConvertedOutputs/HSIs_ran_by_area_with_time.csv', index=True)
+    HSI_never_ran_by_year.to_csv('ConvertedOutputs/HSIs_never_ran_by_area_with_time.csv', index=True)
+    print(HSI_ran_by_year)
+    print(HSI_never_ran_by_year)
+    print(HSI_total_by_year)
+    
+if __name__ == "__main__":
+    rfp = Path('resources')
+
+    parser = argparse.ArgumentParser(
+        description="Produce plots to show the impact each set of treatments",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    parser.add_argument(
+        "--output-path",
+        help=(
+            "Directory to write outputs to. If not specified (set to None) outputs "
+            "will be written to value of --results-path argument."
+        ),
+        type=Path,
+        default=None,
+        required=False,
+    )
+    parser.add_argument(
+        "--resources-path",
+        help="Directory containing resource files",
+        type=Path,
+        default=Path('resources'),
+        required=False,
+    )
+    parser.add_argument(
+        "--results-path",
+        type=Path,
+        help=(
+            "Directory containing results from running "
+            "src/scripts/analysis_data_generation/scenario_generate_chains.py "
+        ),
+        default=None,
+        required=False
+    )
+    args = parser.parse_args()
+    assert args.results_path is not None
+    results_path = args.results_path
+
+    output_path = results_path if args.output_path is None else args.output_path
+
+    apply(
+        results_folder=results_path,
+        output_folder=output_path,
+        resourcefilepath=args.resources_path
+    )
diff --git a/src/scripts/analysis_data_generation/postprocess_events_chain.py b/src/scripts/analysis_data_generation/postprocess_events_chain.py
new file mode 100644
index 0000000000..96c27a04b1
--- /dev/null
+++ b/src/scripts/analysis_data_generation/postprocess_events_chain.py
@@ -0,0 +1,156 @@
+import pandas as pd
+from dateutil.relativedelta import relativedelta
+
+# Remove from every individual's event chain all events that were fired after death
+def cut_off_events_after_death(df):
+
+    events_chain = df.groupby('person_ID')
+    
+    filtered_data = pd.DataFrame()
+
+    for name, group in events_chain:
+
+        # Find the first non-NaN 'date_of_death' and its index
+        first_non_nan_index = group['date_of_death'].first_valid_index()
+        
+        if first_non_nan_index is not None:
+            # Filter out all rows after the first non-NaN index
+            filtered_group = group.loc[:first_non_nan_index]  # Keep rows up to and including the first valid index
+            filtered_data = pd.concat([filtered_data, filtered_group])
+        else:
+            # If there are no non-NaN values, keep the original group
+            filtered_data = pd.concat([filtered_data, group])
+
+    return filtered_data
+
+# Load into DataFrame
+def load_csv_to_dataframe(file_path):
+    try:
+        # Load raw chains into df
+        df = pd.read_csv(file_path)
+        print("Raw event chains loaded successfully!")
+        return df
+    except FileNotFoundError:
+        print(f"Error: The file '{file_path}' was not found.")
+    except Exception as e:
+        print(f"An error occurred: {e}")
+
+file_path = 'output.csv'  # Replace with the path to your CSV file
+
+output = load_csv_to_dataframe(file_path)
+
+# Some of the dates appeared not to be in datetime format. Correct here.
+output['date_of_death'] = pd.to_datetime(output['date_of_death'], errors='coerce')
+output['date_of_birth'] = pd.to_datetime(output['date_of_birth'], errors='coerce')
+if 'hv_date_inf' in output.columns:
+    output['hv_date_inf'] = pd.to_datetime(output['hv_date_inf'], errors='coerce')
+
+
+date_start = pd.to_datetime('2010-01-01')
+if 'Other' in output['cause_of_death'].values:
+    print("ERROR: 'Other' was included in sim as possible cause of death")
+    exit(-1)
+
+# Choose which columns in individual properties to visualise
+columns_to_print =['event','is_alive','hv_inf', 'hv_art','tb_inf', 'tb_date_active', 'event_date', 'when']
+#columns_to_print =['person_ID', 'date_of_birth', 'date_of_death', 'cause_of_death','hv_date_inf', 'hv_art','tb_inf', 'tb_date_active', 'event date', 'event']
+
+# When checking which individuals led to *any* changes in individual properties, exclude these columns from comparison
+columns_to_exclude_in_comparison = ['when', 'event', 'event_date', 'age_exact_years', 'age_years', 'age_days', 'age_range', 'level', 'appt_footprint']
+
+# If considering epidemiology consistent with sim, add check here.
+check_ages_of_those_HIV_inf = False
+if check_ages_of_those_HIV_inf:
+    for index, row in output.iterrows():
+        if pd.isna(row['hv_date_inf']):
+            continue  # Skip this iteration
+        diff = relativedelta(output.loc[index, 'hv_date_inf'],output.loc[index, 'date_of_birth'])
+        if diff.years > 1 and diff.years<15:
+            print("Person contracted HIV infection at age younger than 15", diff)
+
+# Remove events after death
+filtered_data = cut_off_events_after_death(output)
+
+print_raw_events = True # Print raw chain of events for each individual
+print_selected_changes = False
+print_all_changes = True
+person_ID_of_interest = 494
+
+pd.set_option('display.max_rows', None)
+
+for name, group in filtered_data.groupby('person_ID'):
+    list_of_dob = group['date_of_birth']
+    
+    # Select individuals based on when they were born
+    if list_of_dob.iloc[0].year<2010:
+
+        # Check that immutable properties are fixed for this individual, i.e. that events were collated properly:
+        all_identical_dob = group['date_of_birth'].nunique() == 1
+        all_identical_sex = group['sex'].nunique() == 1
+        if all_identical_dob is False or all_identical_sex is False:
+            print("Immutable properties are changing! This is not chain for single individual")
+            print(group)
+            exit(-1)
+            
+        print("----------------------------------------------------------------------")
+        print("person_ID ", group['person_ID'].iloc[0], "d.o.b ", group['date_of_birth'].iloc[0])
+        print("Number of events for this individual ", group['person_ID'].iloc[0], "is :", len(group)/2) # Divide by 2 before printing Before/After for each event
+        number_of_events =len(group)/2
+        number_of_changes=0
+        if print_raw_events:
+            print(group)
+        
+        if print_all_changes:
+            # Check each row
+            comparison = group.drop(columns=columns_to_exclude_in_comparison).fillna(-99999).ne(group.drop(columns=columns_to_exclude_in_comparison).shift().fillna(-99999))
+
+            # Iterate over rows where any column has changed
+            for idx, row_changed in comparison.iloc[1:].iterrows():
+                if row_changed.any():  # Check if any column changed in this row
+                    number_of_changes+=1
+                    changed_columns = row_changed[row_changed].index.tolist()  # Get the columns where changes occurred
+                    print(f"Row {idx} - Changes detected in columns: {changed_columns}")
+                    columns_output = ['event', 'event_date', 'appt_footprint', 'level'] + changed_columns
+                    print(group.loc[idx, columns_output])  # Print only the changed columns
+                    if group.loc[idx, 'when'] == 'Before':
+                        print('-----> THIS CHANGE OCCURRED BEFORE EVENT!')
+                    #print(group.loc[idx,columns_to_print])
+                    print()  # For better readability
+            print("Number of changes is ", number_of_changes, "out of ", number_of_events, " events")
+        
+        if print_selected_changes:
+            tb_inf_condition = (
+                ((group['tb_inf'].shift(1) == 'uninfected') & (group['tb_inf'] == 'active')) |
+                ((group['tb_inf'].shift(1) == 'latent') & (group['tb_inf'] == 'active')) |
+                ((group['tb_inf'].shift(1) == 'active') & (group['tb_inf'] == 'latent')) |
+                ((group['hv_inf'].shift(1) is False) & (group['hv_inf'] is True)) |
+                ((group['hv_art'].shift(1) == 'not') & (group['hv_art'] == 'on_not_VL_suppressed')) |
+                ((group['hv_art'].shift(1) == 'not') & (group['hv_art'] == 'on_VL_suppressed')) |
+                ((group['hv_art'].shift(1) == 'on_VL_suppressed') & (group['hv_art'] == 'on_not_VL_suppressed')) |
+                ((group['hv_art'].shift(1) == 'on_VL_suppressed') & (group['hv_art'] == 'not')) |
+                ((group['hv_art'].shift(1) == 'on_not_VL_suppressed') & (group['hv_art'] == 'on_VL_suppressed')) |
+                ((group['hv_art'].shift(1) == 'on_not_VL_suppressed') & (group['hv_art'] == 'not'))
+            )
+
+            alive_condition = (
+                (group['is_alive'].shift(1) is True) & (group['is_alive'] is False)
+            )
+            # Combine conditions for rows of interest
+            transition_condition = tb_inf_condition | alive_condition
+
+            if list_of_dob.iloc[0].year >= 2010:
+                print("DETECTED OF INTEREST")
+                print(group[group['event'] == 'Birth'][columns_to_print])
+
+            # Filter the DataFrame based on the condition
+            filtered_transitions = group[transition_condition]
+            if not filtered_transitions.empty:
+                if list_of_dob.iloc[0].year < 2010:
+                    print("DETECTED OF INTEREST")
+                print(filtered_transitions[columns_to_print])
+    
+    
+print("Number of individuals simulated ", filtered_data.groupby('person_ID').ngroups)
+
+
+
diff --git a/src/scripts/analysis_data_generation/scenario_generate_chains.py b/src/scripts/analysis_data_generation/scenario_generate_chains.py
new file mode 100644
index 0000000000..6bdcd02d90
--- /dev/null
+++ b/src/scripts/analysis_data_generation/scenario_generate_chains.py
@@ -0,0 +1,115 @@
+"""This Scenario file run the model to generate event chans
+
+Run on the batch system using:
+```
+tlo batch-submit 
+    src/scripts/analysis_data_generation/scenario_generate_chains.py
+```
+
+or locally using:
+```
+    tlo scenario-run src/scripts/analysis_data_generation/scenario_generate_chains.py
+```
+
+"""
+from pathlib import Path
+from typing import Dict
+
+import pandas as pd
+
+from tlo import Date, logging
+from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios
+from tlo.methods.fullmodel import fullmodel
+from tlo.methods.scenario_switcher import ImprovedHealthSystemAndCareSeekingScenarioSwitcher
+from tlo.scenario import BaseScenario
+
+
+class GenerateDataChains(BaseScenario):
+    def __init__(self):
+        super().__init__()
+        self.seed = 0
+        self.start_date = Date(2010, 1, 1)
+        self.end_date = self.start_date + pd.DateOffset(months=1)
+        self.pop_size = 120
+        self._scenarios = self._get_scenarios()
+        self.number_of_draws = len(self._scenarios)
+        self.runs_per_draw = 1
+        self.generate_event_chains = True
+
+    def log_configuration(self):
+        return {
+            'filename': 'generate_event_chains',
+            'directory': Path('./outputs'),  # <- (specified only for local running)
+            'custom_levels': {
+                '*': logging.WARNING,
+                'tlo.methods.demography': logging.INFO,
+                'tlo.methods.events': logging.INFO,
+                'tlo.methods.demography.detail': logging.WARNING,
+                'tlo.methods.healthburden': logging.INFO,
+                'tlo.methods.healthsystem.summary': logging.INFO,
+            }
+        }
+
+    def modules(self):
+        return (
+            fullmodel(resourcefilepath=self.resources)
+            + [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)]
+        )
+
+    def draw_parameters(self, draw_number, rng):
+        if draw_number < self.number_of_draws:
+            return list(self._scenarios.values())[draw_number]
+        else:
+            return
+
+    # case 1: gfHE = -0.030, factor = 1.01074
+    # case 2: gfHE = -0.020, factor = 1.02116
+    # case 3: gfHE = -0.015, factor = 1.02637
+    # case 4: gfHE =  0.015, factor = 1.05763
+    # case 5: gfHE =  0.020, factor = 1.06284
+    # case 6: gfHE =  0.030, factor = 1.07326
+
+    def _get_scenarios(self) -> Dict[str, Dict]:
+        """Return the Dict with values for the parameters that are changed, keyed by a name for the scenario.
+        """
+        
+        self.YEAR_OF_CHANGE = 2019
+
+        return {
+   
+            # =========== STATUS QUO ============
+            "Baseline":
+                mix_scenarios(
+                    self._baseline(),
+                    {
+                     "HealthSystem": {
+                        "yearly_HR_scaling_mode": "no_scaling",
+                      },
+                    }
+                ),
+
+        }
+        
+    def _baseline(self) -> Dict:
+        """Return the Dict with values for the parameter changes that define the baseline scenario. """
+        return mix_scenarios(
+            get_parameters_for_status_quo(),
+            {
+                "HealthSystem": {
+                    "mode_appt_constraints": 1,                 # <-- Mode 1 prior to change to preserve calibration
+                    "mode_appt_constraints_postSwitch": 2,      # <-- Mode 2 post-change to show effects of HRH
+                    "year_mode_switch": self.YEAR_OF_CHANGE,
+                    "scale_to_effective_capabilities": True,
+                    "policy_name": "Naive",
+                    "tclose_overwrite": 1,
+                    "tclose_days_offset_overwrite": 7,
+                    "use_funded_or_actual_staffing": "actual",
+                    "cons_availability": "default",
+                }
+            },
+        )
+
+if __name__ == '__main__':
+    from tlo.cli import scenario_run
+
+    scenario_run([__file__])
diff --git a/src/tlo/events.py b/src/tlo/events.py
index 98832faecb..00a6fe4e7d 100644
--- a/src/tlo/events.py
+++ b/src/tlo/events.py
@@ -11,6 +11,8 @@
 
 import pandas as pd
 
+FACTOR_POP_DICT = 5000
+
 
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
@@ -83,13 +85,14 @@ def compare_population_dataframe(self,df_before, df_after):
         
         # Create an empty list to store changes for each of the individuals
         chain_links = {}
-        
+        len_of_diff = len(diff_mask)
+
         # Loop through each row of the mask
+        
         for idx, row in diff_mask.iterrows():
             changed_cols = row.index[row].tolist()
-            
+
             if changed_cols:  # Proceed only if there are changes in the row
-            
                 # Create a dictionary for this person
                 # First add event info
                 link_info = {
@@ -103,7 +106,7 @@ def compare_population_dataframe(self,df_before, df_after):
                     link_info[col] = df_after.at[idx, col]
                 
                 # Append the event and changes to the individual key
-                chain_links = {idx : link_info}
+                chain_links[idx] = str(link_info)
         
         return chain_links
         
@@ -168,7 +171,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) ->
                     if row_before[key] != row_after[key]: # Note: used fillna previously
                         link_info[key] = row_after[key]
                         
-                chain_links = {self.target : link_info}
+                chain_links[self.target] = str(link_info)
 
                 # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
                 if debug_chains:
@@ -228,14 +231,18 @@ def run(self):
         if self.sim.generate_event_chains:
             chain_links = self.store_chains_to_do_after_event(print_chains, row_before, df_before)
             
+            # Create empty logger for entire pop
+            pop_dict = {i: '' for i in range(FACTOR_POP_DICT)} # Always include all possible individuals
+
+            pop_dict.update(chain_links)
+
             # Log chain_links here
             if len(chain_links)>0:
                 logger_chain.info(key='event_chains',
-                            data= chain_links,
-                            description='Links forming chains of events for simulated individuals')
+                                  data= pop_dict,
+                                  description='Links forming chains of events for simulated individuals')
                 
                 #print("Chain events ", chain_links)
-            
 
 
 class RegularEvent(Event):
diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py
index 041ab9cf08..d657e9d3a0 100644
--- a/src/tlo/methods/hsi_event.py
+++ b/src/tlo/methods/hsi_event.py
@@ -11,6 +11,8 @@
 
 import pandas as pd
 
+FACTOR_POP_DICT = 5000
+
 
 if TYPE_CHECKING:
     from tlo import Module, Simulation
@@ -276,7 +278,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, footprint) ->
                 if row_before[key] != row_after[key]: # Note: used fillna previously
                     link_info[key] = row_after[key]
             
-            chain_links = {self.target : link_info}
+            chain_links = {self.target : str(link_info)}
 
             # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
             row = self.sim.population.props.loc[[abs(self.target)]]
@@ -314,10 +316,15 @@ def run(self, squeeze_factor):
             chain_links = self.store_chains_to_do_after_event(print_chains, row_before, str(footprint))
             
             if len(chain_links)>0:
+            
+                pop_dict = {i: '' for i in range(FACTOR_POP_DICT)}
+               # pop_dict = {i: '' for i in range(1000)} # Always include all possible individuals
+
+                pop_dict.update(chain_links)
+                
                 logger_chains.info(key='event_chains',
-                            data = chain_links,
+                            data = pop_dict,
                             description='Links forming chains of events for simulated individuals')
-                #print(chain_links)
                 
         return updated_appt_footprint
         
diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py
index 0c70b164d9..d9ba62c43a 100644
--- a/src/tlo/simulation.py
+++ b/src/tlo/simulation.py
@@ -40,6 +40,8 @@
 logger_chains = logging.getLogger("tlo.methods.event")
 logger_chains.setLevel(logging.INFO)
 
+FACTOR_POP_DICT = 5000
+
 
 class SimulationPreviouslyInitialisedError(Exception):
     """Exception raised when trying to initialise an already initialised simulation."""
@@ -294,17 +296,18 @@ def make_initial_population(self, *, n: int) -> None:
         if self.generate_event_chains:
 
             pop_dict = self.population.props.to_dict(orient='index')
-            
-            print(pop_dict)
-            print(pop_dict.keys())
             for key in pop_dict.keys():
                 pop_dict[key]['person_ID'] = key
-            print("Length of properties", len(pop_dict[0].keys()))
-            #exit(-1)
+                pop_dict[key] = str(pop_dict[key]) # Log as string to avoid issues around length of properties stored later
+                
+            pop_dict_full = {i: '' for i in range(FACTOR_POP_DICT)}
+            pop_dict_full.update(pop_dict)
+
+            print("Size for full sim", len(pop_dict_full))
+            
             logger.info(key='event_chains',
-                               data = pop_dict,
+                               data = pop_dict_full,
                                description='Links forming chains of events for simulated individuals')
-
         end = time.time()
         logger.info(key="info", data=f"make_initial_population() {end - start} s")
 
@@ -323,7 +326,7 @@ def initialise(self, *, end_date: Date) -> None:
         #self.generate_event_chains = generate_event_chains
         if self.generate_event_chains:
             # Eventually this can be made an option
-            self.generate_event_chains_overwrite_epi = False
+            self.generate_event_chains_overwrite_epi = True
             # For now keep these fixed, eventually they will be input from user
             self.generate_event_chains_modules_of_interest = [self.modules]
             self.generate_event_chains_ignore_events =  ['AgeUpdateEvent','HealthSystemScheduler', 'SimplifiedBirthsPoll','DirectBirth'] #['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler']
@@ -480,9 +483,13 @@ def do_birth(self, mother_id: int) -> int:
             prop_dict = self.population.props.loc[child_id].to_dict()
             prop_dict['event'] = 'Birth'
             prop_dict['event_date'] = self.date
-            child_dict = {child_id : prop_dict}
+            
+            pop_dict = {i: '' for i in range(FACTOR_POP_DICT)} # Always include all possible individuals
+            pop_dict[child_id] = str(prop_dict) # Convert to string to avoid issue of length
+
+            print("Length at birth", len(pop_dict))
             logger.info(key='event_chains',
-                               data = child_dict,
+                               data = pop_dict,
                                description='Links forming chains of events for simulated individuals')
         
             # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
@@ -492,7 +499,7 @@ def do_birth(self, mother_id: int) -> int:
             row['event_date'] = self.date
             row['when'] = 'After'
             self.event_chains = pd.concat([self.event_chains, row], ignore_index=True)
-            
+
         return child_id
 
     def find_events_for_person(self, person_id: int) -> list[tuple[Date, Event]]:

From 16299a21f43862a188f41ea6117b81c2c11d72ab Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Mon, 25 Nov 2024 09:37:29 +0000
Subject: [PATCH 19/21] Include debugging option, final set-up of scenario to
 print data, analysis file now collects all relevant info and prints them

---
 .../analysis_extract_data.py                  | 157 ++++++++++++++++--
 .../scenario_generate_chains.py               |  53 +++++-
 src/tlo/events.py                             |  10 +-
 src/tlo/methods/hsi_event.py                  |  50 +++---
 src/tlo/methods/rti.py                        |  17 +-
 src/tlo/simulation.py                         |  39 +++--
 src/tlo/util.py                               |   1 +
 7 files changed, 252 insertions(+), 75 deletions(-)

diff --git a/src/scripts/analysis_data_generation/analysis_extract_data.py b/src/scripts/analysis_data_generation/analysis_extract_data.py
index 2cfba5315b..6eb6408830 100644
--- a/src/scripts/analysis_data_generation/analysis_extract_data.py
+++ b/src/scripts/analysis_data_generation/analysis_extract_data.py
@@ -8,10 +8,14 @@
 from typing import Tuple
 
 import pandas as pd
+import matplotlib.pyplot as plt
 
 from tlo import Date
 from tlo.analysis.utils import extract_results
 from datetime import datetime
+from collections import Counter
+import ast
+
 
 # Range of years considered
 min_year = 2010
@@ -28,17 +32,7 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No
     """
     pd.set_option('display.max_rows', None)
     pd.set_option('display.max_colwidth', None)
-    event_chains = extract_results(
-            results_folder,
-            module='tlo.simulation',
-            key='event_chains',
-            column='0',
-            #column = str(i),
-            #custom_generate_series=get_num_dalys_by_year,
-            do_scaling=False
-        )
-   # print(event_chains.loc[0,(0, 0)])
-
+    
     eval_env = {
         'datetime': datetime,  # Add the datetime class to the eval environment
         'pd': pd,              # Add pandas to handle Timestamp
@@ -46,13 +40,144 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No
         'NaT': pd.NaT,
         'nan': float('nan'),       # Include NaN for eval (can also use pd.NA if preferred)
     }
+    
+    initial_properties_of_interest = ['rt_inj_severity','rt_MAIS_military_score','rt_ISS_score','rt_disability','rt_polytrauma','rt_injury_1','rt_injury_2','rt_injury_3','rt_injury_4','rt_injury_5','rt_injury_6', 'rt_imm_death','sy_injury','sex','li_urban', 'li_wealth', 'li_ex_alc', 'li_exposed_to_campaign_alcohol_reduction', 'li_mar_stat', 'li_in_ed', 'li_ed_lev']
+
+    # Will be added through computation: age at time of RTI
+        
+    # Will be added through computation: total duration of event
+    
+    initial_rt_event_properties = set()
+    
+    num_individuals = 1000
+    num_runs = 50
+    record = []
+    
+    
+    for i in range(0,num_individuals):
 
-    for item,row in event_chains.iterrows():
-        value = event_chains.loc[item,(0, 0)]
-        if value !='':
-            print('')
-            print(value)
+        individual_event_chains = extract_results(
+                results_folder,
+                module='tlo.simulation',
+                key='event_chains',
+                column=str(i),
+                do_scaling=False
+            )
+            
+        #print(individual_event_chains)
+
+            
+        for r in range(0,num_runs):
+        
+            print("AT RUN = ", r)
+
+            initial_properties = {}
+            progression_properties = {}
+            key_first_event = {}
+            key_last_event = {}
+            first_event = {}
+            last_event = {}
+            properties = {}
+
+
+            #ind_Counter = Counter()
+            ind_Counter = {'0': Counter(), '1a': Counter(), '1b' : Counter(), '2' : Counter()}
+            # Count total appts
+
+            list_for_individual = []
+            for item,row in individual_event_chains.iterrows():
+                value = individual_event_chains.loc[item,(0, r)]
+               # print("The value is", value, "at run ", r)
+                if value !='' and isinstance(value, str):
+                    evaluated = eval(value, eval_env)
+                    list_for_individual.append(evaluated)
+               # elif not isinstance(value,str):
+               #     print(value)
+                    
+            initial_properties = list_for_individual[0]
+            print(initial_properties)
+            
+            # Initialise first event by gathering parameters of interest from initial_properties
+            first_event = {key: initial_properties[key] for key in initial_properties_of_interest if key in initial_properties}
+            
+            progression_properties = {}
+            for i in list_for_individual:
+                if 'event' in i:
+                    print("")
+                    print(i)
+                    if 'RTIPolling' in i['event']:
+                        #print("I'm in polling event")
+                        #print(i)
+                        
+                        # Keep track of which properties are changed during polling events
+                        for key,value in i.items():
+                            if 'rt_' in key:
+                                initial_rt_event_properties.add(key)
+                        
+                        # Retain a copy of Polling event
+                        polling_event = i.copy()
+                        
+                        # Update parameters of interest following RTI
+                        key_first_event = {key: i[key] if key in i else value for key, value in first_event.items()}
+                        
+                        # Calculate age of individual at time of event
+                        key_first_event['age_in_days_at_event'] = (i['rt_date_inj'] - initial_properties['date_of_birth']).days
+                        
+                        # Keep track of evolution in individual's properties
+                        progression_properties = initial_properties.copy()
+                        progression_properties.update(i)
+
+                    else:
+                        # Progress properties of individual, even if this event is a death
+                        progression_properties.update(i)
+                    
+                    #print(progression_properties)
+                    # Update footprint
+                    if 'appt_footprint' in i and i['appt_footprint'] != 'Counter()':
+                        footprint = i['appt_footprint']
+                        if 'Counter' in footprint:
+                            footprint = footprint[len("Counter("):-1]
+                        apply = eval(footprint, eval_env)
+                        ind_Counter[i['level']].update(Counter(apply))
+                        
+                    if 'is_alive' in i and i['is_alive'] is False:
+                        print("Death", i)
+                        print("-------Total footprint", ind_Counter)
+                        break
+                        
+
+            # Compute final properties of individual
+            key_last_event['is_alive_after_RTI'] = progression_properties['is_alive']
+            key_last_event['duration_days'] = (progression_properties['event_date'] - polling_event['rt_date_inj']).days
+            key_last_event['rt_disability_final'] = progression_properties['rt_disability']
+            key_last_event.update({'total_footprint': ind_Counter})
+            
+            #print("-------Total footprint", ind_Counter)
+            #for key, value in key_first_event.items():
+               # if 'rt_' in key or 'alive' in key:
+             #   print(f"{key}: {value}")
+            #print(#)
+            #for key, value in key_last_event.items():
+                #if 'rt_' in key or 'alive' in key or 'event_date' in key or 'footprint' in key:
+            #    print(f"{key}: {value}")
+
+            #print(key_first_event)
+            #print(key_last_event)
+            print(initial_rt_event_properties)
+            properties = key_first_event | key_last_event
+            record.append(properties)
+            for key, value in properties.items():
+                #if 'rt_' in key or 'alive' in key or 'event_date' in key or 'footprint' in key:
+                print(f"{key}: {value}")
+         
+    df = pd.DataFrame(record)
+    df.to_csv("raw_data.csv", index=False)
+
+    print(df)
+    print(initial_rt_event_properties)
     exit(-1)
+            #print(i)
+
     #dict = {}
     #for i in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]:
     #    dict[i] = []
diff --git a/src/scripts/analysis_data_generation/scenario_generate_chains.py b/src/scripts/analysis_data_generation/scenario_generate_chains.py
index 6bdcd02d90..79df3f55b6 100644
--- a/src/scripts/analysis_data_generation/scenario_generate_chains.py
+++ b/src/scripts/analysis_data_generation/scenario_generate_chains.py
@@ -22,18 +22,42 @@
 from tlo.methods.fullmodel import fullmodel
 from tlo.methods.scenario_switcher import ImprovedHealthSystemAndCareSeekingScenarioSwitcher
 from tlo.scenario import BaseScenario
-
+from tlo.methods import (
+    alri,
+    cardio_metabolic_disorders,
+    care_of_women_during_pregnancy,
+    contraception,
+    demography,
+    depression,
+    diarrhoea,
+    enhanced_lifestyle,
+    epi,
+    healthburden,
+    healthseekingbehaviour,
+    healthsystem,
+    hiv,
+    rti,
+    labour,
+    malaria,
+    newborn_outcomes,
+    postnatal_supervisor,
+    pregnancy_supervisor,
+    stunting,
+    symptommanager,
+    tb,
+    wasting,
+)
 
 class GenerateDataChains(BaseScenario):
     def __init__(self):
         super().__init__()
         self.seed = 0
         self.start_date = Date(2010, 1, 1)
-        self.end_date = self.start_date + pd.DateOffset(months=1)
-        self.pop_size = 120
+        self.end_date = self.start_date + pd.DateOffset(months=13)
+        self.pop_size = 1000
         self._scenarios = self._get_scenarios()
         self.number_of_draws = len(self._scenarios)
-        self.runs_per_draw = 1
+        self.runs_per_draw = 50
         self.generate_event_chains = True
 
     def log_configuration(self):
@@ -51,10 +75,23 @@ def log_configuration(self):
         }
 
     def modules(self):
-        return (
-            fullmodel(resourcefilepath=self.resources)
-            + [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)]
-        )
+        # MODIFY
+        # Here instead of running full module
+        return [demography.Demography(resourcefilepath=self.resources),
+                enhanced_lifestyle.Lifestyle(resourcefilepath=self.resources),
+                healthburden.HealthBurden(resourcefilepath=self.resources),
+                symptommanager.SymptomManager(resourcefilepath=self.resources, spurious_symptoms=False),
+                rti.RTI(resourcefilepath=self.resources),
+                healthseekingbehaviour.HealthSeekingBehaviour(resourcefilepath=self.resources),
+                #simplified_births.SimplifiedBirths(resourcefilepath=resourcefilepath),
+                healthsystem.HealthSystem(resourcefilepath=self.resources,
+                                          mode_appt_constraints=1,
+                                          cons_availability='all')]
+                                          
+       # return (
+       #     fullmodel(resourcefilepath=self.resources)
+       #     + [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)]
+       # )
 
     def draw_parameters(self, draw_number, rng):
         if draw_number < self.number_of_draws:
diff --git a/src/tlo/events.py b/src/tlo/events.py
index 00a6fe4e7d..ba8024f621 100644
--- a/src/tlo/events.py
+++ b/src/tlo/events.py
@@ -11,7 +11,7 @@
 
 import pandas as pd
 
-FACTOR_POP_DICT = 5000
+from tlo.util import FACTOR_POP_DICT
 
 
 logger = logging.getLogger(__name__)
@@ -132,7 +132,7 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame
                 # Save row for comparison after event has occurred
                 row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999)
                 
-                if debug_chains:
+                if self.sim.debug_generate_event_chains:
                     # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
                     row = self.sim.population.props.loc[[abs(self.target)]]
                     row['person_ID'] = self.target
@@ -142,6 +142,7 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame
                     self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
                 
             else:
+
                 # This will be a population-wide event. In order to find individuals for which this led to
                 # a meaningful change, make a copy of the pop dataframe before the event has occurred.
                 df_before = self.sim.population.props.copy()
@@ -174,7 +175,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) ->
                 chain_links[self.target] = str(link_info)
 
                 # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
-                if debug_chains:
+                if self.sim.debug_generate_event_chains:
                     # Print entire row
                     row = self.sim.population.props.loc[[abs(self.target)]] # Use abs to avoid potentil issue with direct births
                     row['person_ID'] = self.target
@@ -194,7 +195,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) ->
                 chain_links = self.compare_population_dataframe(df_before, df_after)
 
                 # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
-                if debug_chains:
+                if self.sim.debug_generate_event_chains:
                     # Or print entire rows
                     change = df_before.compare(df_after)
                     if not change.empty:
@@ -233,7 +234,6 @@ def run(self):
             
             # Create empty logger for entire pop
             pop_dict = {i: '' for i in range(FACTOR_POP_DICT)} # Always include all possible individuals
-
             pop_dict.update(chain_links)
 
             # Log chain_links here
diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py
index d657e9d3a0..bdf597fba4 100644
--- a/src/tlo/methods/hsi_event.py
+++ b/src/tlo/methods/hsi_event.py
@@ -8,10 +8,9 @@
 from tlo import Date, logging
 from tlo.events import Event
 from tlo.population import Population
-
+from tlo.util import FACTOR_POP_DICT
 import pandas as pd
 
-FACTOR_POP_DICT = 5000
 
 
 if TYPE_CHECKING:
@@ -219,19 +218,21 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series]:
                 # Save row for comparison after event has occurred
                 row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999)
 
-                # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
-                row = self.sim.population.props.loc[[abs(self.target)]]
-                row['person_ID'] = self.target
-                row['event'] = str(self)
-                row['event_date'] = self.sim.date
-                row['when'] = 'Before'
-                try:
-                    row['appt_footprint'] = str(self.EXPECTED_APPT_FOOTPRINT)
-                    row['level'] = self.facility_info.level
-                except:
-                    row['appt_footprint'] = 'N/A'
-                    row['level'] = 'N/A'
-                self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
+                if self.sim.debug_generate_event_chains:
+                    # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
+                    row = self.sim.population.props.loc[[abs(self.target)]]
+                    row['person_ID'] = self.target
+                    row['event'] = str(self)
+                    row['event_date'] = self.sim.date
+                    row['when'] = 'Before'
+                
+                    try:
+                        row['appt_footprint'] = str(self.EXPECTED_APPT_FOOTPRINT)
+                        row['level'] = self.facility_info.level
+                    except:
+                        row['appt_footprint'] = 'N/A'
+                        row['level'] = 'N/A'
+                    self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
                 
             else:
                 # Once this has been removed from Chronic Syndrome mock module, make this a Runtime Error
@@ -280,15 +281,16 @@ def store_chains_to_do_after_event(self, print_chains, row_before, footprint) ->
             
             chain_links = {self.target : str(link_info)}
 
-            # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
-            row = self.sim.population.props.loc[[abs(self.target)]]
-            row['person_ID'] = self.target
-            row['event'] = str(self)
-            row['event_date'] = self.sim.date
-            row['when'] = 'After'
-            row['appt_footprint'] = record_footprint
-            row['level'] = record_level
-            self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
+            if self.sim.debug_generate_event_chains:
+                # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
+                row = self.sim.population.props.loc[[abs(self.target)]]
+                row['person_ID'] = self.target
+                row['event'] = str(self)
+                row['event_date'] = self.sim.date
+                row['when'] = 'After'
+                row['appt_footprint'] = record_footprint
+                row['level'] = record_level
+                self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
             
         return chain_links
         
diff --git a/src/tlo/methods/rti.py b/src/tlo/methods/rti.py
index 3642365976..1ca2749af7 100644
--- a/src/tlo/methods/rti.py
+++ b/src/tlo/methods/rti.py
@@ -2776,7 +2776,7 @@ class RTIPollingEvent(RegularEvent, PopulationScopeEventMixin):
     def __init__(self, module):
         """Schedule to take place every month
         """
-        super().__init__(module, frequency=DateOffset(months=1000))
+        super().__init__(module, frequency=DateOffset(months=1000)) # Single polling event
         p = module.parameters
         # Parameters which transition the model between states
         self.base_1m_prob_rti = (p['base_rate_injrti'] / 12)
@@ -2864,10 +2864,12 @@ def apply(self, population):
                          .when('.between(70,79)', self.rr_injrti_age7079),
                          Predictor('li_ex_alc').when(True, self.rr_injrti_excessalcohol)
                          )
-        if self.sim.generate_event_chains is True and self.sim.generate_event_chains_overwrite_epi is True:
-            pred = 1.0
-        else:
-            pred = eq.predict(df.loc[rt_current_non_ind])
+        #if self.sim.generate_event_chains is True and self.sim.generate_event_chains_overwrite_epi is True:
+        pred = 1.0
+        #else:
+        #    pred = eq.predict(df.loc[rt_current_non_ind])
+            
+            
         random_draw_in_rti = self.module.rng.random_sample(size=len(rt_current_non_ind))
         selected_for_rti = rt_current_non_ind[pred > random_draw_in_rti]
 
@@ -4852,6 +4854,7 @@ def __init__(self, module, person_id):
         self.treated_code = 'none'
 
     def apply(self, person_id, squeeze_factor):
+
         self._number_of_times_this_event_has_run += 1
         df = self.sim.population.props
         rng = self.module.rng
@@ -4900,10 +4903,12 @@ def apply(self, person_id, squeeze_factor):
         # injury is being treated in this surgery
         # find untreated injury codes that are treated with major surgery
         relevant_codes = np.intersect1d(injuries_to_be_treated, surgically_treated_codes)
+
         # check that the person sent here has an appropriate code(s)
         assert len(relevant_codes) > 0
         # choose a code at random
         self.treated_code = rng.choice(relevant_codes)
+
         if request_outcome:
             # check the people sent here hasn't died due to rti, have had their injuries diagnosed and been through
             # RTI_Med
@@ -4990,7 +4995,9 @@ def apply(self, person_id, squeeze_factor):
 
             # ------------------------------------- Perm disability from amputation ------------------------------------
             codes = ['782', '782a', '782b', '782c', '783', '882', '883', '884']
+
             if self.treated_code in codes:
+
                 # Track whether they are permanently disabled
                 df.at[person_id, 'rt_perm_disability'] = True
                 # Find the column and code where the permanent injury is stored
diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py
index d9ba62c43a..bb766562a0 100644
--- a/src/tlo/simulation.py
+++ b/src/tlo/simulation.py
@@ -11,8 +11,9 @@
 from typing import Optional
 from typing import TYPE_CHECKING, Optional
 import pandas as pd
-
+import tlo.population
 import numpy as np
+from tlo.util import FACTOR_POP_DICT
 
 try:
     import dill
@@ -40,8 +41,6 @@
 logger_chains = logging.getLogger("tlo.methods.event")
 logger_chains.setLevel(logging.INFO)
 
-FACTOR_POP_DICT = 5000
-
 
 class SimulationPreviouslyInitialisedError(Exception):
     """Exception raised when trying to initialise an already initialised simulation."""
@@ -113,12 +112,15 @@ def __init__(
         self.generate_event_chains_overwrite_epi = None
         self.generate_event_chains_modules_of_interest = []
         self.generate_event_chains_ignore_events = []
+        self.debug_generate_event_chains = False
         self.end_date = None
         self.output_file = None
         self.population: Optional[Population] = None
         
-        # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
-        self.event_chains: Optinoal[Population] = None
+                
+        if self.debug_generate_event_chains:
+            # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
+            self.event_chains: Optional[Population] = None
 
         self.show_progress_bar = show_progress_bar
         self.resourcefilepath = resourcefilepath
@@ -288,8 +290,9 @@ def make_initial_population(self, *, n: int) -> None:
                 data=f"{module.name}.initialise_population() {time.time() - start1} s",
             )
 
-        # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
-        self.event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when'] + ['appt_footprint'] + ['level'])
+        if self.debug_generate_event_chains:
+            # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
+            self.event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when'] + ['appt_footprint'] + ['level'])
         
         # When logging events for each individual to reconstruct chains, only the changes in individual properties will be logged.
         # At the start of the simulation + when a new individual is born, we therefore want to store all of their properties at the start.
@@ -329,7 +332,7 @@ def initialise(self, *, end_date: Date) -> None:
             self.generate_event_chains_overwrite_epi = True
             # For now keep these fixed, eventually they will be input from user
             self.generate_event_chains_modules_of_interest = [self.modules]
-            self.generate_event_chains_ignore_events =  ['AgeUpdateEvent','HealthSystemScheduler', 'SimplifiedBirthsPoll','DirectBirth'] #['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler']
+            self.generate_event_chains_ignore_events =  ['AgeUpdateEvent','HealthSystemScheduler', 'SimplifiedBirthsPoll','DirectBirth', 'HealthSeekingBehaviourPoll', 'LifestyleEvent'] #['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler']
         else:
             # If not using to print chains, cannot ignore epi
             self.generate_event_chains_overwrite_epi = False
@@ -418,8 +421,9 @@ def run_simulation_to(self, *, to_date: Date) -> None:
             self.fire_single_event(event, date)
         self.date = to_date
         
-        # TO BE REMOVED: this is currently only used for debugging, will be removed from final PR.
-        self.event_chains.to_csv('output.csv', index=False)
+        if self.debug_generate_event_chains:
+            # TO BE REMOVED: this is currently only used for debugging, will be removed from final PR.
+            self.event_chains.to_csv('output.csv', index=False)
 
         if self.show_progress_bar:
             progress_bar.stop()
@@ -492,13 +496,14 @@ def do_birth(self, mother_id: int) -> int:
                                data = pop_dict,
                                description='Links forming chains of events for simulated individuals')
         
-            # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
-            row = self.population.props.iloc[[child_id]]
-            row['person_ID'] = child_id
-            row['event'] = 'Birth'
-            row['event_date'] = self.date
-            row['when'] = 'After'
-            self.event_chains = pd.concat([self.event_chains, row], ignore_index=True)
+            if self.debug_generate_event_chains:
+                # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
+                row = self.population.props.iloc[[child_id]]
+                row['person_ID'] = child_id
+                row['event'] = 'Birth'
+                row['event_date'] = self.date
+                row['when'] = 'After'
+                self.event_chains = pd.concat([self.event_chains, row], ignore_index=True)
 
         return child_id
 
diff --git a/src/tlo/util.py b/src/tlo/util.py
index 168b1d41a1..f8dc67d471 100644
--- a/src/tlo/util.py
+++ b/src/tlo/util.py
@@ -12,6 +12,7 @@
 
 # Default mother_id value, assigned to individuals initialised as adults at the start of the simulation.
 DEFAULT_MOTHER_ID = -1e7
+FACTOR_POP_DICT = 1000
 
 
 def create_age_range_lookup(min_age: int, max_age: int, range_size: int = 5) -> (list, Dict[int, str]):

From 0dd862f2a9b485a33933e185e3c59ad64ed33ed9 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Tue, 26 Nov 2024 15:28:30 +0000
Subject: [PATCH 20/21] Change label of person when iterating

---
 .../analysis_extract_data.py                  | 68 ++++++++++++-------
 1 file changed, 43 insertions(+), 25 deletions(-)

diff --git a/src/scripts/analysis_data_generation/analysis_extract_data.py b/src/scripts/analysis_data_generation/analysis_extract_data.py
index 6eb6408830..4c8e7d8197 100644
--- a/src/scripts/analysis_data_generation/analysis_extract_data.py
+++ b/src/scripts/analysis_data_generation/analysis_extract_data.py
@@ -41,7 +41,7 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No
         'nan': float('nan'),       # Include NaN for eval (can also use pd.NA if preferred)
     }
     
-    initial_properties_of_interest = ['rt_inj_severity','rt_MAIS_military_score','rt_ISS_score','rt_disability','rt_polytrauma','rt_injury_1','rt_injury_2','rt_injury_3','rt_injury_4','rt_injury_5','rt_injury_6', 'rt_imm_death','sy_injury','sex','li_urban', 'li_wealth', 'li_ex_alc', 'li_exposed_to_campaign_alcohol_reduction', 'li_mar_stat', 'li_in_ed', 'li_ed_lev']
+    initial_properties_of_interest = ['rt_MAIS_military_score','rt_ISS_score','rt_disability','rt_polytrauma','rt_injury_1','rt_injury_2','rt_injury_3','rt_injury_4','rt_injury_5','rt_injury_6', 'rt_imm_death','sy_injury','sy_severe_trauma','sex','li_urban', 'li_wealth', 'li_mar_stat', 'li_in_ed', 'li_ed_lev']
 
     # Will be added through computation: age at time of RTI
         
@@ -54,13 +54,15 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No
     record = []
     
     
-    for i in range(0,num_individuals):
+    for p in range(0,num_individuals):
+    
+        print("At person = ", p)
 
         individual_event_chains = extract_results(
                 results_folder,
                 module='tlo.simulation',
                 key='event_chains',
-                column=str(i),
+                column=str(p),
                 do_scaling=False
             )
             
@@ -69,7 +71,7 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No
             
         for r in range(0,num_runs):
         
-            print("AT RUN = ", r)
+
 
             initial_properties = {}
             progression_properties = {}
@@ -78,7 +80,8 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No
             first_event = {}
             last_event = {}
             properties = {}
-
+            average_disability = 0
+            prev_disability_incurred = 0
 
             #ind_Counter = Counter()
             ind_Counter = {'0': Counter(), '1a': Counter(), '1b' : Counter(), '2' : Counter()}
@@ -95,7 +98,7 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No
                #     print(value)
                     
             initial_properties = list_for_individual[0]
-            print(initial_properties)
+           # print(initial_properties)
             
             # Initialise first event by gathering parameters of interest from initial_properties
             first_event = {key: initial_properties[key] for key in initial_properties_of_interest if key in initial_properties}
@@ -103,8 +106,8 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No
             progression_properties = {}
             for i in list_for_individual:
                 if 'event' in i:
-                    print("")
-                    print(i)
+                    #print("")
+                    #print(i)
                     if 'RTIPolling' in i['event']:
                         #print("I'm in polling event")
                         #print(i)
@@ -126,10 +129,26 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No
                         # Keep track of evolution in individual's properties
                         progression_properties = initial_properties.copy()
                         progression_properties.update(i)
+                        
+                        # dalys incurred
+                        if 'rt_disability' in i:
+                            prev_disability_incurred = i['rt_disability']
+                            prev_date = i['event_date']
+                            #print('At polling event, ', prev_disability_incurred, prev_date)
 
                     else:
                         # Progress properties of individual, even if this event is a death
                         progression_properties.update(i)
+                        
+                        # If disability has changed as a result of this, recalculate
+                        if 'rt_disability' in i and i['rt_disability'] != prev_disability_incurred:
+                            dt_in_prev_disability = (i['event_date'] - prev_date).days
+                            average_disability += prev_disability_incurred*dt_in_prev_disability
+                            # Update variables
+                            prev_disability_incurred = i['rt_disability']
+                            prev_date = i['event_date']
+
+
                     
                     #print(progression_properties)
                     # Update footprint
@@ -141,34 +160,33 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No
                         ind_Counter[i['level']].update(Counter(apply))
                         
                     if 'is_alive' in i and i['is_alive'] is False:
-                        print("Death", i)
-                        print("-------Total footprint", ind_Counter)
+                        #print("Death", i)
+                        #print("-------Total footprint", ind_Counter)
                         break
                         
 
             # Compute final properties of individual
             key_last_event['is_alive_after_RTI'] = progression_properties['is_alive']
             key_last_event['duration_days'] = (progression_properties['event_date'] - polling_event['rt_date_inj']).days
-            key_last_event['rt_disability_final'] = progression_properties['rt_disability']
+            if not key_first_event['rt_imm_death'] and key_last_event['duration_days']> 0.0:
+                key_last_event['rt_disability_average'] = average_disability/key_last_event['duration_days']
+            else:
+                key_last_event['rt_disability_average'] = 0.0
+            key_last_event['rt_disability_permanent'] = progression_properties['rt_disability']
             key_last_event.update({'total_footprint': ind_Counter})
-            
-            #print("-------Total footprint", ind_Counter)
-            #for key, value in key_first_event.items():
-               # if 'rt_' in key or 'alive' in key:
-             #   print(f"{key}: {value}")
-            #print(#)
-            #for key, value in key_last_event.items():
-                #if 'rt_' in key or 'alive' in key or 'event_date' in key or 'footprint' in key:
-            #    print(f"{key}: {value}")
 
-            #print(key_first_event)
-            #print(key_last_event)
-            print(initial_rt_event_properties)
+            #print("Average disability", key_last_event['rt_disability_average'])
+            
             properties = key_first_event | key_last_event
+            
+            if not key_first_event['rt_imm_death'] and ((properties['rt_disability_average']-properties['rt_disability'])/properties['rt_disability'] > 1e-4):
+                print("Error in computed average for individual ", p, r )
+                
             record.append(properties)
-            for key, value in properties.items():
+            #for key, value in properties.items():
                 #if 'rt_' in key or 'alive' in key or 'event_date' in key or 'footprint' in key:
-                print(f"{key}: {value}")
+                #print(f"{key}: {value}")
+           # print("Initial event properties", initial_rt_event_properties)
          
     df = pd.DataFrame(record)
     df.to_csv("raw_data.csv", index=False)

From 84f826322ba13f6fa1631d639944c2bac50667f6 Mon Sep 17 00:00:00 2001
From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com>
Date: Fri, 13 Dec 2024 15:55:03 +0000
Subject: [PATCH 21/21] Correctly retrieve event name

---
 src/tlo/events.py            | 12 ++++++------
 src/tlo/methods/hsi_event.py |  6 +++---
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/tlo/events.py b/src/tlo/events.py
index ba8024f621..f67b54458a 100644
--- a/src/tlo/events.py
+++ b/src/tlo/events.py
@@ -97,7 +97,7 @@ def compare_population_dataframe(self,df_before, df_after):
                 # First add event info
                 link_info = {
                     'person_ID': idx,
-                    'event': str(self),
+                    'event': type(self).__name__,
                     'event_date': self.sim.date,
                 }
                 
@@ -136,7 +136,7 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame
                     # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
                     row = self.sim.population.props.loc[[abs(self.target)]]
                     row['person_ID'] = self.target
-                    row['event'] = str(self)
+                    row['event'] = type(self).__name__
                     row['event_date'] = self.sim.date
                     row['when'] = 'Before'
                     self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
@@ -164,7 +164,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) ->
                 link_info = {
                     #'person_ID' : self.target,
                     'person_ID' : self.target,
-                    'event' : str(self),
+                    'event' : type(self).__name__,
                     'event_date' : self.sim.date,
                 }
                 # Store (if any) property changes as a result of the event for this individual
@@ -179,7 +179,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) ->
                     # Print entire row
                     row = self.sim.population.props.loc[[abs(self.target)]] # Use abs to avoid potentil issue with direct births
                     row['person_ID'] = self.target
-                    row['event'] = str(self)
+                    row['event'] = type(self).__name__
                     row['event_date'] = self.sim.date
                     row['when'] = 'After'
                     self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
@@ -202,13 +202,13 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) ->
                         indices = change.index
                         new_rows_before = df_before.loc[indices]
                         new_rows_before['person_ID'] = new_rows_before.index
-                        new_rows_before['event'] = self
+                        new_rows_before['event'] = type(self).__name__
                         new_rows_before['event_date'] = self.sim.date
                         new_rows_before['when'] = 'Before'
 
                         new_rows_after = df_after.loc[indices]
                         new_rows_after['person_ID'] = new_rows_after.index
-                        new_rows_after['event'] = self
+                        new_rows_after['event'] = type(self).__name__
                         new_rows_after['event_date'] = self.sim.date
                         new_rows_after['when'] = 'After'
 
diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py
index f267181b56..978b26d7c5 100644
--- a/src/tlo/methods/hsi_event.py
+++ b/src/tlo/methods/hsi_event.py
@@ -222,7 +222,7 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series]:
                     # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
                     row = self.sim.population.props.loc[[abs(self.target)]]
                     row['person_ID'] = self.target
-                    row['event'] = str(self)
+                    row['event'] = type(self).__name__ #str(self.event_name)
                     row['event_date'] = self.sim.date
                     row['when'] = 'Before'
                 
@@ -268,7 +268,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, footprint) ->
                 
             link_info = {
                 'person_ID': self.target,
-                'event' : str(self),
+                'event' : type(self).__name__,
                 'event_date' : self.sim.date,
                 'appt_footprint' : record_footprint,
                 'level' : record_level,
@@ -285,7 +285,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, footprint) ->
                 # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR.
                 row = self.sim.population.props.loc[[abs(self.target)]]
                 row['person_ID'] = self.target
-                row['event'] = str(self)
+                row['event'] = type(self).__name__
                 row['event_date'] = self.sim.date
                 row['when'] = 'After'
                 row['appt_footprint'] = record_footprint