From dbff470b51cde44beeefdae3575d52e0c19964bc Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Wed, 3 Apr 2024 15:00:09 +0100 Subject: [PATCH 01/21] Investigate analysis of events at sim level --- src/tlo/simulation.py | 9 +++++++++ tests/test_rti.py | 12 ++++++++++++ 2 files changed, 21 insertions(+) diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index 219b1b8a6f..a641909ed1 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -231,6 +231,15 @@ def simulate(self, *, end_date): if date >= end_date: self.date = end_date break + + #if event.target != self.population: + # print("Event: ", event) + + if event.module == self.modules['RTI']: + print("RTI event ", event) + print(" target ", event.target) + if event.target != self.population: + self.population.props.at[event.tar] self.fire_single_event(event, date) # The simulation has ended. diff --git a/tests/test_rti.py b/tests/test_rti.py index 0e231fb4af..99243b988e 100644 --- a/tests/test_rti.py +++ b/tests/test_rti.py @@ -25,6 +25,17 @@ end_date = Date(2012, 1, 1) popsize = 1000 +@pytest.mark.slow +def test_data_harvesting(seed): + """ + This test runs a simulation with a functioning health system with full service availability and no set + constraints + """ + # create sim object + sim = create_basic_rti_sim(popsize, seed) + # run simulation + sim.simulate(end_date=end_date) + exit(-1) def check_dtypes(simulation): # check types of columns in dataframe, check they are the same, list those that aren't @@ -65,6 +76,7 @@ def test_run(seed): check_dtypes(sim) + @pytest.mark.slow def test_all_injuries_run(seed): """ From 05098f78668a5317667d58cbda882a364a031277 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Mon, 30 Sep 2024 16:26:39 +0200 Subject: [PATCH 02/21] Final data-printing set-up --- src/tlo/methods/demography.py | 7 ++- src/tlo/methods/healthsystem.py | 18 ++++++ src/tlo/methods/hiv.py | 67 ++++++++++++++++++---- src/tlo/methods/tb.py | 99 +++++++++++++++++++++++++-------- src/tlo/simulation.py | 82 ++++++++++++++++++++++++--- 5 files changed, 226 insertions(+), 47 deletions(-) diff --git a/src/tlo/methods/demography.py b/src/tlo/methods/demography.py index e58f3895f4..6b2578fd44 100644 --- a/src/tlo/methods/demography.py +++ b/src/tlo/methods/demography.py @@ -315,9 +315,10 @@ def initialise_simulation(self, sim): # Launch the repeating event that will store statistics about the population structure sim.schedule_event(DemographyLoggingEvent(self), sim.date) - # Create (and store pointer to) the OtherDeathPoll and schedule first occurrence immediately - self.other_death_poll = OtherDeathPoll(self) - sim.schedule_event(self.other_death_poll, sim.date) + if sim.generate_data is False: + # Create (and store pointer to) the OtherDeathPoll and schedule first occurrence immediately + self.other_death_poll = OtherDeathPoll(self) + sim.schedule_event(self.other_death_poll, sim.date) # Log the initial population scaling-factor (to the logger of this module and that of `tlo.methods.population`) for _logger in (logger, logger_scale_factor): diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py index 181c08f5aa..6e251e636c 100644 --- a/src/tlo/methods/healthsystem.py +++ b/src/tlo/methods/healthsystem.py @@ -2033,8 +2033,26 @@ def run_individual_level_events_in_mode_0_or_1(self, assert event.facility_info is not None, \ f"Cannot run HSI {event.TREATMENT_ID} without facility_info being defined." + go_ahead = False + if (event.module == self.sim.modules['Tb'] or event.module == self.sim.modules['Hiv']): + go_ahead = True + row = self.sim.population.props.iloc[[event.target]] + row['person_ID'] = event.target + row['event'] = event + row['event_date'] = self.sim.date + row['when'] = 'Before' + self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) + # Run the HSI event (allowing it to return an updated appt_footprint) actual_appt_footprint = event.run(squeeze_factor=squeeze_factor) + + if go_ahead: + row = self.sim.population.props.iloc[[event.target]] + row['person_ID'] = event.target + row['event'] = event + row['event_date'] = self.sim.date + row['when'] = 'After' + self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) # Check if the HSI event returned updated appt_footprint if actual_appt_footprint is not None: diff --git a/src/tlo/methods/hiv.py b/src/tlo/methods/hiv.py index d6455cc861..8e0d337fc1 100644 --- a/src/tlo/methods/hiv.py +++ b/src/tlo/methods/hiv.py @@ -631,11 +631,12 @@ def initialise_population(self, population): df.loc[df.is_alive, "hv_date_treated"] = pd.NaT df.loc[df.is_alive, "hv_date_last_ART"] = pd.NaT - # Launch sub-routines for allocating the right number of people into each category - self.initialise_baseline_prevalence(population) # allocate baseline prevalence + if self.sim.generate_data is False: + # Launch sub-routines for allocating the right number of people into each category + self.initialise_baseline_prevalence(population) # allocate baseline prevalence - self.initialise_baseline_art(population) # allocate baseline art coverage - self.initialise_baseline_tested(population) # allocate baseline testing coverage + self.initialise_baseline_art(population) # allocate baseline art coverage + self.initialise_baseline_tested(population) # allocate baseline testing coverage def initialise_baseline_prevalence(self, population): """ @@ -905,10 +906,16 @@ def initialise_simulation(self, sim): df = sim.population.props p = self.parameters - # 1) Schedule the Main HIV Regular Polling Event - sim.schedule_event( - HivRegularPollingEvent(self), sim.date + DateOffset(days=0) - ) + if self.sim.generate_data: + print("Should be generating data") + sim.schedule_event( + HivPollingEventForDataGeneration(self), sim.date + DateOffset(days=0) + ) + else: + # 1) Schedule the Main HIV Regular Polling Event + sim.schedule_event( + HivRegularPollingEvent(self), sim.date + DateOffset(days=0) + ) # 2) Schedule the Logging Event sim.schedule_event(HivLoggingEvent(self), sim.date + DateOffset(years=1)) @@ -1662,6 +1669,37 @@ def do_at_generic_first_appt( # Main Polling Event # --------------------------------------------------------------------------- +class HivPollingEventForDataGeneration(RegularEvent, PopulationScopeEventMixin): + """ The HIV Polling Events for Data Generation + * Ensures that + """ + + def __init__(self, module): + super().__init__( + module, frequency=DateOffset(years=120) + ) # repeats every 12 months, but this can be changed + + def apply(self, population): + + df = population.props + + # Make everyone who is alive and not infected (no-one should be) susceptible + susc_idx = df.loc[ + df.is_alive + & ~df.hv_inf + ].index + + n_susceptible = len(susc_idx) + print("Number of individuals susceptible", n_susceptible) + # Schedule the date of infection for each new infection: + for i in susc_idx: + date_of_infection = self.sim.date + pd.DateOffset( + # Ensure that individual will be infected before end of sim + days=self.module.rng.randint(0, 365*(int(self.sim.end_date.year - self.sim.date.year)+1)) + ) + self.sim.schedule_event( + HivInfectionEvent(self.module, i), date_of_infection + ) class HivRegularPollingEvent(RegularEvent, PopulationScopeEventMixin): """ The HIV Regular Polling Events @@ -1683,6 +1721,7 @@ def apply(self, population): fraction_of_year_between_polls = self.frequency.months / 12 beta = p["beta"] * fraction_of_year_between_polls + # ----------------------------------- HORIZONTAL TRANSMISSION ----------------------------------- def horizontal_transmission(to_sex, from_sex): # Count current number of alive 15-80 year-olds at risk of transmission @@ -1758,6 +1797,7 @@ def horizontal_transmission(to_sex, from_sex): HivInfectionEvent(self.module, idx), date_of_infection ) + # ----------------------------------- SPONTANEOUS TESTING ----------------------------------- def spontaneous_testing(current_year): @@ -1861,11 +1901,12 @@ def vmmc_for_child(): priority=0, ) - # Horizontal transmission: Male --> Female - horizontal_transmission(from_sex="M", to_sex="F") + if self.sim.generate_data is False: + # Horizontal transmission: Male --> Female + horizontal_transmission(from_sex="M", to_sex="F") - # Horizontal transmission: Female --> Male - horizontal_transmission(from_sex="F", to_sex="M") + # Horizontal transmission: Female --> Male + horizontal_transmission(from_sex="F", to_sex="M") # testing # if year later than 2020, set testing rates to those reported in 2020 @@ -1882,6 +1923,8 @@ def vmmc_for_child(): vmmc_for_child() + + # --------------------------------------------------------------------------- # Natural History Events # --------------------------------------------------------------------------- diff --git a/src/tlo/methods/tb.py b/src/tlo/methods/tb.py index 623ee2e483..cd79ae22a5 100644 --- a/src/tlo/methods/tb.py +++ b/src/tlo/methods/tb.py @@ -833,28 +833,29 @@ def initialise_population(self, population): df["tb_date_ipt"] = pd.NaT # # ------------------ infection status ------------------ # - # WHO estimates of active TB for 2010 to get infected initial population - # don't need to scale or include treated proportion as no-one on treatment yet - inc_estimates = p["who_incidence_estimates"] - incidence_year = (inc_estimates.loc[ - (inc_estimates.year == self.sim.date.year), "incidence_per_100k" - ].values[0]) / 100_000 - - incidence_year = incidence_year * p["scaling_factor_WHO"] - - self.assign_active_tb( - population, - strain="ds", - incidence=incidence_year) - - self.assign_active_tb( - population, - strain="mdr", - incidence=incidence_year * p['prop_mdr2010']) - - self.send_for_screening_general( - population - ) # send some baseline population for screening + if self.sim.generate_data is False: + # WHO estimates of active TB for 2010 to get infected initial population + # don't need to scale or include treated proportion as no-one on treatment yet + inc_estimates = p["who_incidence_estimates"] + incidence_year = (inc_estimates.loc[ + (inc_estimates.year == self.sim.date.year), "incidence_per_100k" + ].values[0]) / 100_000 + + incidence_year = incidence_year * p["scaling_factor_WHO"] + + self.assign_active_tb( + population, + strain="ds", + incidence=incidence_year) + + self.assign_active_tb( + population, + strain="mdr", + incidence=incidence_year * p['prop_mdr2010']) + + self.send_for_screening_general( + population + ) # send some baseline population for screening def initialise_simulation(self, sim): """ @@ -867,7 +868,11 @@ def initialise_simulation(self, sim): sim.schedule_event(TbActiveEvent(self), sim.date) sim.schedule_event(TbRegularEvents(self), sim.date) sim.schedule_event(TbSelfCureEvent(self), sim.date) - sim.schedule_event(TbActiveCasePoll(self), sim.date + DateOffset(years=1)) + + if sim.generate_data is False: + sim.schedule_event(TbActiveCasePoll(self), sim.date + DateOffset(years=1)) + else: + sim.schedule_event(TbActiveCasePollGenerateData(self), sim.date + DateOffset(days=0)) # 2) log at the end of the year # Optional: Schedule the scale-up of programs @@ -1366,6 +1371,53 @@ def is_subset(col_for_set, col_for_subset): # # TB infection event # # --------------------------------------------------------------------------- +class TbActiveCasePollGenerateData(RegularEvent, PopulationScopeEventMixin): + """The Tb Regular Poll Event for Data Generation for assigning active infections + * selects everyone to develop an active infection and schedules onset of active tb + sometime during the simulation + """ + + def __init__(self, module): + super().__init__(module, frequency=DateOffset(years=120)) + + def apply(self, population): + + df = population.props + now = self.sim.date + rng = self.module.rng + # Make everyone who is alive and not infected (no-one should be) susceptible + susc_idx = df.loc[ + df.is_alive + & (df.tb_inf != "active") + ].index + + n_susceptible = len(susc_idx) + + middle_index = len(susc_idx) // 2 + + # Will equally split two strains among the population + list_ds = susc_idx[:middle_index] + list_mdr = susc_idx[middle_index:] + + # schedule onset of active tb. This will be equivalent to the "Onset", so it + # doesn't matter how long after we have decided which infection this is. + for person_id in list_ds: + date_progression = now + pd.DateOffset( + # At some point during their lifetime, this person will develop TB + days=self.module.rng.randint(0, 365*(int(self.sim.end_date.year - self.sim.date.year)+1)) + ) + # set date of active tb - properties will be updated at TbActiveEvent poll daily + df.at[person_id, "tb_scheduled_date_active"] = date_progression + df.at[person_id, "tb_strain"] = "ds" + + for person_id in list_mdr: + date_progression = now + pd.DateOffset( + days=rng.randint(0, 365*int(self.sim.end_date.year - self.sim.start_date.year + 1)) + ) + # set date of active tb - properties will be updated at TbActiveEvent poll daily + df.at[person_id, "tb_scheduled_date_active"] = date_progression + df.at[person_id, "tb_strain"] = "mdr" + class TbActiveCasePoll(RegularEvent, PopulationScopeEventMixin): """The Tb Regular Poll Event for assigning active infections @@ -1439,7 +1491,6 @@ def apply(self, population): self.module.update_parameters_for_program_scaleup() - class TbActiveEvent(RegularEvent, PopulationScopeEventMixin): """ * check for those with dates of active tb onset within last time-period diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index 5b4e2fff4c..f0c8d6f09f 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -7,7 +7,7 @@ from collections import OrderedDict from pathlib import Path from typing import Dict, Optional, Union - +import pandas as pd import numpy as np from tlo import Date, Population, logging @@ -63,9 +63,11 @@ def __init__(self, *, start_date: Date, seed: int = None, log_config: dict = Non self.date = self.start_date = start_date self.modules = OrderedDict() self.event_queue = EventQueue() + self.generate_data = None self.end_date = None self.output_file = None self.population: Optional[Population] = None + self.event_chains: Optinoal[Population] = None self.show_progress_bar = show_progress_bar self.resourcefilepath = resourcefilepath @@ -209,6 +211,8 @@ def make_initial_population(self, *, n): module.initialise_population(self.population) logger.debug(key='debug', data=f'{module.name}.initialise_population() {time.time() - start1} s') + self.event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when']) + end = time.time() logger.info(key='info', data=f'make_initial_population() {end - start} s') @@ -221,7 +225,14 @@ def simulate(self, *, end_date): """ start = time.time() self.end_date = end_date # store the end_date so that others can reference it + self.generate_data = True # for now ensure we're always aiming to print data + + f = open('output.txt', mode='a') + #df_event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when']) + # Reorder columns to place the new columns at the front + pd.set_option('display.max_columns', None) + print(self.event_chains.columns) for module in self.modules.values(): module.initialise_simulation(self) @@ -250,17 +261,72 @@ def simulate(self, *, end_date): if date >= end_date: self.date = end_date + self.event_chains.to_csv('output.csv', index=False) break - + #if event.target != self.population: # print("Event: ", event) - - if event.module == self.modules['RTI']: - print("RTI event ", event) - print(" target ", event.target) - if event.target != self.population: - self.population.props.at[event.tar] + go_ahead = False + df_before = [] + + # Only print events relevant to modules of interest + # Do not want to compare before/after in births because it may expand the pop dataframe + print_output = True + if print_output: + if (event.module == self.modules['Tb'] or event.module == self.modules['Hiv']) and 'TbActiveCasePollGenerateData' not in str(event) and 'HivPollingEventForDataGeneration' not in str(event) and "SimplifiedBirthsPoll" not in str(event) and "AgeUpdateEvent" not in str(event) and "HealthSystemScheduler" not in str(event): + #if 'TbActiveCasePollGenerateData' not in str(event) and 'HivPollingEventForDataGeneration' not in str(event) and "SimplifiedBirthsPoll" not in str(event) and "AgeUpdateEvent" not in str(event): + go_ahead = True + if event.target != self.population: + row = self.population.props.iloc[[event.target]] + row['person_ID'] = event.target + row['event'] = event + row['event_date'] = date + row['when'] = 'Before' + self.event_chains = pd.concat([self.event_chains, row], ignore_index=True) + else: + df_before = self.population.props.copy() + self.fire_single_event(event, date) + + if print_output: + if go_ahead == True: + if event.target != self.population: + row = self.population.props.iloc[[event.target]] + row['person_ID'] = event.target + row['event'] = event + row['event_date'] = date + row['when'] = 'After' + self.event_chains = pd.concat([self.event_chains, row], ignore_index=True) + else: + df_after = self.population.props.copy() + # if not df_before.columns.equals(df_after.columns): + # print("Number of columns in pop dataframe", len(self.population.props.columns)) + # print("Before", df_before.columns) + # print("After", df_after.columns#) + # exit(-1) + # if not df_before.index.equals(df_after.index): + # print("Number of indices in pop dataframe", len(self.population.props.index)) + # print("----> ", event) + # print("Before", df_before.index#) + # print("After", df_after.index) + # exit(-1) + + change = df_before.compare(df_after) + if ~change.empty: + indices = change.index + new_rows_before = df_before.loc[indices] + new_rows_before['person_ID'] = new_rows_before.index + new_rows_before['event'] = event + new_rows_before['event_date'] = date + new_rows_before['when'] = 'Before' + new_rows_after = df_after.loc[indices] + new_rows_after['person_ID'] = new_rows_after.index + new_rows_after['event'] = event + new_rows_after['event_date'] = date + new_rows_after['when'] = 'After' + + self.event_chains = pd.concat([self.event_chains,new_rows_before], ignore_index=True) + self.event_chains = pd.concat([self.event_chains,new_rows_after], ignore_index=True) # The simulation has ended. if self.show_progress_bar: From 16c071c6220edcc20b539f346625f628e5e8c4c5 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Wed, 2 Oct 2024 12:37:38 +0200 Subject: [PATCH 03/21] Print event chains --- src/tlo/methods/demography.py | 2 +- src/tlo/methods/healthsystem.py | 8 ++-- src/tlo/methods/hiv.py | 6 +-- src/tlo/methods/tb.py | 4 +- src/tlo/simulation.py | 47 +++++++++--------- tests/test_data_generation.py | 85 +++++++++++++++++++++++++++++++++ 6 files changed, 117 insertions(+), 35 deletions(-) create mode 100644 tests/test_data_generation.py diff --git a/src/tlo/methods/demography.py b/src/tlo/methods/demography.py index 6b2578fd44..4f19af6d55 100644 --- a/src/tlo/methods/demography.py +++ b/src/tlo/methods/demography.py @@ -315,7 +315,7 @@ def initialise_simulation(self, sim): # Launch the repeating event that will store statistics about the population structure sim.schedule_event(DemographyLoggingEvent(self), sim.date) - if sim.generate_data is False: + if sim.generate_event_chains is False: # Create (and store pointer to) the OtherDeathPoll and schedule first occurrence immediately self.other_death_poll = OtherDeathPoll(self) sim.schedule_event(self.other_death_poll, sim.date) diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py index 6e251e636c..203ca10985 100644 --- a/src/tlo/methods/healthsystem.py +++ b/src/tlo/methods/healthsystem.py @@ -2033,9 +2033,9 @@ def run_individual_level_events_in_mode_0_or_1(self, assert event.facility_info is not None, \ f"Cannot run HSI {event.TREATMENT_ID} without facility_info being defined." - go_ahead = False - if (event.module == self.sim.modules['Tb'] or event.module == self.sim.modules['Hiv']): - go_ahead = True + print_chains = False + if event.module in self.sim.generate_event_chains_modules_of_interest and all(sub not in str(event) for sub in self.sim.generate_event_chains_ignore_events): + print_chains = True row = self.sim.population.props.iloc[[event.target]] row['person_ID'] = event.target row['event'] = event @@ -2046,7 +2046,7 @@ def run_individual_level_events_in_mode_0_or_1(self, # Run the HSI event (allowing it to return an updated appt_footprint) actual_appt_footprint = event.run(squeeze_factor=squeeze_factor) - if go_ahead: + if print_chains: row = self.sim.population.props.iloc[[event.target]] row['person_ID'] = event.target row['event'] = event diff --git a/src/tlo/methods/hiv.py b/src/tlo/methods/hiv.py index 8e0d337fc1..36b1a4bd6e 100644 --- a/src/tlo/methods/hiv.py +++ b/src/tlo/methods/hiv.py @@ -631,7 +631,7 @@ def initialise_population(self, population): df.loc[df.is_alive, "hv_date_treated"] = pd.NaT df.loc[df.is_alive, "hv_date_last_ART"] = pd.NaT - if self.sim.generate_data is False: + if self.sim.generate_event_chains is False: # Launch sub-routines for allocating the right number of people into each category self.initialise_baseline_prevalence(population) # allocate baseline prevalence @@ -906,7 +906,7 @@ def initialise_simulation(self, sim): df = sim.population.props p = self.parameters - if self.sim.generate_data: + if self.sim.generate_event_chains: print("Should be generating data") sim.schedule_event( HivPollingEventForDataGeneration(self), sim.date + DateOffset(days=0) @@ -1901,7 +1901,7 @@ def vmmc_for_child(): priority=0, ) - if self.sim.generate_data is False: + if self.sim.generate_event_chains is False: # Horizontal transmission: Male --> Female horizontal_transmission(from_sex="M", to_sex="F") diff --git a/src/tlo/methods/tb.py b/src/tlo/methods/tb.py index cd79ae22a5..57ccd97368 100644 --- a/src/tlo/methods/tb.py +++ b/src/tlo/methods/tb.py @@ -833,7 +833,7 @@ def initialise_population(self, population): df["tb_date_ipt"] = pd.NaT # # ------------------ infection status ------------------ # - if self.sim.generate_data is False: + if self.sim.generate_event_chains is False: # WHO estimates of active TB for 2010 to get infected initial population # don't need to scale or include treated proportion as no-one on treatment yet inc_estimates = p["who_incidence_estimates"] @@ -869,7 +869,7 @@ def initialise_simulation(self, sim): sim.schedule_event(TbRegularEvents(self), sim.date) sim.schedule_event(TbSelfCureEvent(self), sim.date) - if sim.generate_data is False: + if sim.generate_event_chains is False: sim.schedule_event(TbActiveCasePoll(self), sim.date + DateOffset(years=1)) else: sim.schedule_event(TbActiveCasePollGenerateData(self), sim.date + DateOffset(days=0)) diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index f0c8d6f09f..d055d6e367 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -63,7 +63,9 @@ def __init__(self, *, start_date: Date, seed: int = None, log_config: dict = Non self.date = self.start_date = start_date self.modules = OrderedDict() self.event_queue = EventQueue() - self.generate_data = None + self.generate_event_chains = None + self.generate_event_chains_modules_of_interest = [] + self.generate_event_chains_ignore_events = [] self.end_date = None self.output_file = None self.population: Optional[Population] = None @@ -216,7 +218,7 @@ def make_initial_population(self, *, n): end = time.time() logger.info(key='info', data=f'make_initial_population() {end - start} s') - def simulate(self, *, end_date): + def simulate(self, *, end_date, generate_event_chains = False): """Simulation until the given end date :param end_date: when to stop simulating. Only events strictly before this @@ -225,7 +227,11 @@ def simulate(self, *, end_date): """ start = time.time() self.end_date = end_date # store the end_date so that others can reference it - self.generate_data = True # for now ensure we're always aiming to print data + self.generate_event_chains = generate_event_chains # for now ensure we're always aiming to print data + if self.generate_event_chains: + # For now keep these fixed, eventually they will be input from user + self.generate_event_chains_modules_of_interest = [self.modules['Tb'], self.modules['Hiv'], self.modules['CardioMetabolicDisorders']] + self.generate_event_chains_ignore_events = ['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler'] f = open('output.txt', mode='a') #df_event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when']) @@ -264,17 +270,13 @@ def simulate(self, *, end_date): self.event_chains.to_csv('output.csv', index=False) break - #if event.target != self.population: - # print("Event: ", event) - go_ahead = False + + print_chains = False df_before = [] - # Only print events relevant to modules of interest - # Do not want to compare before/after in births because it may expand the pop dataframe - print_output = True - if print_output: - if (event.module == self.modules['Tb'] or event.module == self.modules['Hiv']) and 'TbActiveCasePollGenerateData' not in str(event) and 'HivPollingEventForDataGeneration' not in str(event) and "SimplifiedBirthsPoll" not in str(event) and "AgeUpdateEvent" not in str(event) and "HealthSystemScheduler" not in str(event): - #if 'TbActiveCasePollGenerateData' not in str(event) and 'HivPollingEventForDataGeneration' not in str(event) and "SimplifiedBirthsPoll" not in str(event) and "AgeUpdateEvent" not in str(event): + if self.generate_event_chains: + # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore + if (event.module in self.generate_event_chains_modules_of_interest) and all(sub not in str(event) for sub in self.generate_event_chains_ignore_events): go_ahead = True if event.target != self.population: row = self.population.props.iloc[[event.target]] @@ -288,7 +290,7 @@ def simulate(self, *, end_date): self.fire_single_event(event, date) - if print_output: + if go_ahead: if go_ahead == True: if event.target != self.population: row = self.population.props.iloc[[event.target]] @@ -299,18 +301,6 @@ def simulate(self, *, end_date): self.event_chains = pd.concat([self.event_chains, row], ignore_index=True) else: df_after = self.population.props.copy() - # if not df_before.columns.equals(df_after.columns): - # print("Number of columns in pop dataframe", len(self.population.props.columns)) - # print("Before", df_before.columns) - # print("After", df_after.columns#) - # exit(-1) - # if not df_before.index.equals(df_after.index): - # print("Number of indices in pop dataframe", len(self.population.props.index)) - # print("----> ", event) - # print("Before", df_before.index#) - # print("After", df_after.index) - # exit(-1) - change = df_before.compare(df_after) if ~change.empty: indices = change.index @@ -385,6 +375,13 @@ def do_birth(self, mother_id): child_id = self.population.do_birth() for module in self.modules.values(): module.on_birth(mother_id, child_id) + if self.generate_event_chains: + row = self.population.props.iloc[[child_id]] + row['person_ID'] = child_id + row['event'] = 'Birth' + row['event_date'] = self.date + row['when'] = 'After' + self.event_chains = pd.concat([self.event_chains, row], ignore_index=True) return child_id def find_events_for_person(self, person_id: int): diff --git a/tests/test_data_generation.py b/tests/test_data_generation.py new file mode 100644 index 0000000000..1f6333bbfe --- /dev/null +++ b/tests/test_data_generation.py @@ -0,0 +1,85 @@ +import os +from pathlib import Path + +import pandas as pd +import pytest + +from tlo import Date, Simulation +from tlo.methods import ( + care_of_women_during_pregnancy, + demography, + depression, + enhanced_lifestyle, + epi, + epilepsy, + healthburden, + healthseekingbehaviour, + healthsystem, + hiv, + cardio_metabolic_disorders, + labour, + newborn_outcomes, + postnatal_supervisor, + pregnancy_helper_functions, + pregnancy_supervisor, + depression, + tb, + contraception, +# simplified_births, + symptommanager, +) +from tlo.methods.hsi_generic_first_appts import HSI_GenericEmergencyFirstAppt + +# create simulation parameters +start_date = Date(2010, 1, 1) +end_date = Date(2015, 1, 1) +popsize = 100 + +@pytest.mark.slow +def test_data_harvesting(seed): + """ + This test runs a simulation to print all individual events of specific individuals + """ + + module_of_interest = 'Hiv' + # create sim object + sim = create_basic_sim(popsize, seed) + + dependencies_list = sim.modules[module_of_interest].ADDITIONAL_DEPENDENCIES.union(sim.modules[module_of_interest].INIT_DEPENDENCIES) + + # Check that all dependencies are included + for dep in dependencies_list: + if dep not in sim.modules: + print("WARNING: dependency ", dep, "not included") + exit(-1) + + # run simulation + sim.simulate(end_date=end_date, generate_event_chains = True) + + +def create_basic_sim(population_size, seed): + # create the basic outline of an rti simulation object + sim = Simulation(start_date=start_date, seed=seed) + resourcefilepath = Path(os.path.dirname(__file__)) / '../resources' + sim.register(demography.Demography(resourcefilepath=resourcefilepath), + contraception.Contraception(resourcefilepath=resourcefilepath), + enhanced_lifestyle.Lifestyle(resourcefilepath=resourcefilepath), + healthburden.HealthBurden(resourcefilepath=resourcefilepath), + symptommanager.SymptomManager(resourcefilepath=resourcefilepath), + healthsystem.HealthSystem(resourcefilepath=resourcefilepath, service_availability=['*']), + healthseekingbehaviour.HealthSeekingBehaviour(resourcefilepath=resourcefilepath), + epi.Epi(resourcefilepath=resourcefilepath), + hiv.Hiv(resourcefilepath=resourcefilepath), + tb.Tb(resourcefilepath=resourcefilepath), + cardio_metabolic_disorders.CardioMetabolicDisorders(resourcefilepath=resourcefilepath), + depression.Depression(resourcefilepath=resourcefilepath), + newborn_outcomes.NewbornOutcomes(resourcefilepath=resourcefilepath), + pregnancy_supervisor.PregnancySupervisor(resourcefilepath=resourcefilepath), + care_of_women_during_pregnancy.CareOfWomenDuringPregnancy(resourcefilepath=resourcefilepath), + labour.Labour(resourcefilepath=resourcefilepath), + postnatal_supervisor.PostnatalSupervisor(resourcefilepath=resourcefilepath), + ) + + sim.make_initial_population(n=population_size) + return sim + From ba81487a3fa003e2f10206e435a1d64f170f14e3 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Wed, 2 Oct 2024 13:08:50 +0200 Subject: [PATCH 04/21] Add chains in mode 2 too and clean up in simuation --- src/tlo/methods/healthsystem.py | 40 ++++++++++++++++++------ src/tlo/simulation.py | 55 ++++++++++++++++----------------- 2 files changed, 58 insertions(+), 37 deletions(-) diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py index 203ca10985..54cb976b26 100644 --- a/src/tlo/methods/healthsystem.py +++ b/src/tlo/methods/healthsystem.py @@ -2034,18 +2034,20 @@ def run_individual_level_events_in_mode_0_or_1(self, f"Cannot run HSI {event.TREATMENT_ID} without facility_info being defined." print_chains = False - if event.module in self.sim.generate_event_chains_modules_of_interest and all(sub not in str(event) for sub in self.sim.generate_event_chains_ignore_events): - print_chains = True - row = self.sim.population.props.iloc[[event.target]] - row['person_ID'] = event.target - row['event'] = event - row['event_date'] = self.sim.date - row['when'] = 'Before' - self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) + if self.sim.generate_event_chains: + if event.module in self.sim.generate_event_chains_modules_of_interest and all(sub not in str(event) for sub in self.sim.generate_event_chains_ignore_events): + print_chains = True + row = self.sim.population.props.iloc[[event.target]] + row['person_ID'] = event.target + row['event'] = event + row['event_date'] = self.sim.date + row['when'] = 'Before' + self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) # Run the HSI event (allowing it to return an updated appt_footprint) actual_appt_footprint = event.run(squeeze_factor=squeeze_factor) + # Print individual info after event if print_chains: row = self.sim.population.props.iloc[[event.target]] row['person_ID'] = event.target @@ -2445,8 +2447,28 @@ def process_events_mode_2(self, hold_over: List[HSIEventQueueItem]) -> None: # Expected appt footprint before running event _appt_footprint_before_running = event.EXPECTED_APPT_FOOTPRINT - # Run event & get actual footprint + + print_chains = False + if self.sim.generate_event_chains: + if event.module in self.sim.generate_event_chains_modules_of_interest and all(sub not in str(event) for sub in self.sim.generate_event_chains_ignore_events): + print_chains = True + row = self.sim.population.props.iloc[[event.target]] + row['person_ID'] = event.target + row['event'] = event + row['event_date'] = self.sim.date + row['when'] = 'Before' + self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) + + # Run the HSI event (allowing it to return an updated appt_footprint) actual_appt_footprint = event.run(squeeze_factor=squeeze_factor) + + if print_chains: + row = self.sim.population.props.iloc[[event.target]] + row['person_ID'] = event.target + row['event'] = event + row['event_date'] = self.sim.date + row['when'] = 'After' + self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) # Check if the HSI event returned updated_appt_footprint, and if so adjust original_call if actual_appt_footprint is not None: diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index d055d6e367..616e159453 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -277,7 +277,7 @@ def simulate(self, *, end_date, generate_event_chains = False): if self.generate_event_chains: # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore if (event.module in self.generate_event_chains_modules_of_interest) and all(sub not in str(event) for sub in self.generate_event_chains_ignore_events): - go_ahead = True + print_chains = True if event.target != self.population: row = self.population.props.iloc[[event.target]] row['person_ID'] = event.target @@ -290,33 +290,32 @@ def simulate(self, *, end_date, generate_event_chains = False): self.fire_single_event(event, date) - if go_ahead: - if go_ahead == True: - if event.target != self.population: - row = self.population.props.iloc[[event.target]] - row['person_ID'] = event.target - row['event'] = event - row['event_date'] = date - row['when'] = 'After' - self.event_chains = pd.concat([self.event_chains, row], ignore_index=True) - else: - df_after = self.population.props.copy() - change = df_before.compare(df_after) - if ~change.empty: - indices = change.index - new_rows_before = df_before.loc[indices] - new_rows_before['person_ID'] = new_rows_before.index - new_rows_before['event'] = event - new_rows_before['event_date'] = date - new_rows_before['when'] = 'Before' - new_rows_after = df_after.loc[indices] - new_rows_after['person_ID'] = new_rows_after.index - new_rows_after['event'] = event - new_rows_after['event_date'] = date - new_rows_after['when'] = 'After' - - self.event_chains = pd.concat([self.event_chains,new_rows_before], ignore_index=True) - self.event_chains = pd.concat([self.event_chains,new_rows_after], ignore_index=True) + if print_chains: + if event.target != self.population: + row = self.population.props.iloc[[event.target]] + row['person_ID'] = event.target + row['event'] = event + row['event_date'] = date + row['when'] = 'After' + self.event_chains = pd.concat([self.event_chains, row], ignore_index=True) + else: + df_after = self.population.props.copy() + change = df_before.compare(df_after) + if ~change.empty: + indices = change.index + new_rows_before = df_before.loc[indices] + new_rows_before['person_ID'] = new_rows_before.index + new_rows_before['event'] = event + new_rows_before['event_date'] = date + new_rows_before['when'] = 'Before' + new_rows_after = df_after.loc[indices] + new_rows_after['person_ID'] = new_rows_after.index + new_rows_after['event'] = event + new_rows_after['event_date'] = date + new_rows_after['when'] = 'After' + + self.event_chains = pd.concat([self.event_chains,new_rows_before], ignore_index=True) + self.event_chains = pd.concat([self.event_chains,new_rows_after], ignore_index=True) # The simulation has ended. if self.show_progress_bar: From b1c907c12bfa54621983415b560381d1737afc9a Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Mon, 7 Oct 2024 09:36:06 +0200 Subject: [PATCH 05/21] Fix issue with tests by ensuring standard Polling and infection is maintained is generate_event_chains is None --- src/tlo/methods/hiv.py | 6 +++--- src/tlo/methods/hsi_event.py | 14 ++++++++------ src/tlo/methods/tb.py | 10 ++++++---- src/tlo/simulation.py | 4 +++- 4 files changed, 20 insertions(+), 14 deletions(-) diff --git a/src/tlo/methods/hiv.py b/src/tlo/methods/hiv.py index 36b1a4bd6e..391cf587a8 100644 --- a/src/tlo/methods/hiv.py +++ b/src/tlo/methods/hiv.py @@ -631,7 +631,7 @@ def initialise_population(self, population): df.loc[df.is_alive, "hv_date_treated"] = pd.NaT df.loc[df.is_alive, "hv_date_last_ART"] = pd.NaT - if self.sim.generate_event_chains is False: + if self.sim.generate_event_chains is False or self.sim.generate_event_chains is None or self.sim.generate_event_chains_overwrite_epi is False: # Launch sub-routines for allocating the right number of people into each category self.initialise_baseline_prevalence(population) # allocate baseline prevalence @@ -906,7 +906,7 @@ def initialise_simulation(self, sim): df = sim.population.props p = self.parameters - if self.sim.generate_event_chains: + if self.sim.generate_event_chains is True and self.sim.generate_event_chains_overwrite_epi: print("Should be generating data") sim.schedule_event( HivPollingEventForDataGeneration(self), sim.date + DateOffset(days=0) @@ -1901,7 +1901,7 @@ def vmmc_for_child(): priority=0, ) - if self.sim.generate_event_chains is False: + if self.sim.generate_event_chains is False or self.sim.generate_event_chains is None or self.sim.generate_event_chains_overwrite_epi is False: # Horizontal transmission: Male --> Female horizontal_transmission(from_sex="M", to_sex="F") diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index 470794bcdd..785f27b7a6 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -193,10 +193,12 @@ def run(self, squeeze_factor): print_chains = False df_before = [] - + if self.sim.generate_event_chains: # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore - if (self.module in self.sim.generate_event_chains_modules_of_interest) and all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): + #if (self.module in self.sim.generate_event_chains_modules_of_interest) and + if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): + print_chains = True if self.target != self.sim.population: row = self.sim.population.props.iloc[[self.target]] @@ -204,7 +206,7 @@ def run(self, squeeze_factor): row['event'] = self row['event_date'] = self.sim.date row['when'] = 'Before' - self.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) + self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) else: df_before = self.sim.population.props.copy() @@ -219,7 +221,7 @@ def run(self, squeeze_factor): row['event'] = self row['event_date'] = self.sim.date row['when'] = 'After' - self.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) + self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) else: df_after = self.sim.population.props.copy() change = df_before.compare(df_after) @@ -236,8 +238,8 @@ def run(self, squeeze_factor): new_rows_after['event_date'] = self.sim.date new_rows_after['when'] = 'After' - self.event_chains = pd.concat([self.sim.event_chains,new_rows_before], ignore_index=True) - self.event_chains = pd.concat([self.sim.event_chains,new_rows_after], ignore_index=True) + self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_before], ignore_index=True) + self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_after], ignore_index=True) return updated_appt_footprint def get_consumables( diff --git a/src/tlo/methods/tb.py b/src/tlo/methods/tb.py index 57ccd97368..4c170944d2 100644 --- a/src/tlo/methods/tb.py +++ b/src/tlo/methods/tb.py @@ -832,8 +832,9 @@ def initialise_population(self, population): df["tb_on_ipt"] = False df["tb_date_ipt"] = pd.NaT + # # ------------------ infection status ------------------ # - if self.sim.generate_event_chains is False: + if self.sim.generate_event_chains is False or self.sim.generate_event_chains is None: # WHO estimates of active TB for 2010 to get infected initial population # don't need to scale or include treated proportion as no-one on treatment yet inc_estimates = p["who_incidence_estimates"] @@ -869,10 +870,11 @@ def initialise_simulation(self, sim): sim.schedule_event(TbRegularEvents(self), sim.date) sim.schedule_event(TbSelfCureEvent(self), sim.date) - if sim.generate_event_chains is False: - sim.schedule_event(TbActiveCasePoll(self), sim.date + DateOffset(years=1)) - else: + if sim.generate_event_chains is True and sim.generate_event_chains_overwrite_epi is True: sim.schedule_event(TbActiveCasePollGenerateData(self), sim.date + DateOffset(days=0)) + else: + sim.schedule_event(TbActiveCasePoll(self), sim.date + DateOffset(years=1)) + # 2) log at the end of the year # Optional: Schedule the scale-up of programs diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index 794bfef98e..4aff23c9d7 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -105,6 +105,7 @@ def __init__( self.modules = OrderedDict() self.event_queue = EventQueue() self.generate_event_chains = None + self.generate_event_chains_overwrite_epi = None self.generate_event_chains_modules_of_interest = [] self.generate_event_chains_ignore_events = [] self.end_date = None @@ -298,10 +299,11 @@ def initialise(self, *, end_date: Date, generate_event_chains) -> None: self.end_date = end_date # store the end_date so that others can reference it self.generate_event_chains = generate_event_chains # for now ensure we're always aiming to print data + self.generate_event_chains_overwrite_epi = False if self.generate_event_chains: # For now keep these fixed, eventually they will be input from user self.generate_event_chains_modules_of_interest = [self.modules['Tb'], self.modules['Hiv'], self.modules['CardioMetabolicDisorders']] - self.generate_event_chains_ignore_events = ['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler'] + self.generate_event_chains_ignore_events = ['AgeUpdateEvent','HealthSystemScheduler', 'DirectBirth'] #['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler'] #df_event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when']) From cfb4264f0133fccbc0a82a6c9d3f51479d19038f Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Mon, 7 Oct 2024 15:51:37 +0200 Subject: [PATCH 06/21] Switch iloc for loc --- src/tlo/events.py | 5 ++--- src/tlo/methods/hsi_event.py | 4 ++-- src/tlo/simulation.py | 9 ++++++--- tests/test_data_generation.py | 2 +- 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/tlo/events.py b/src/tlo/events.py index 78b828091d..a50832a58d 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -74,7 +74,7 @@ def run(self): if (self.module in self.sim.generate_event_chains_modules_of_interest) and all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): print_chains = True if self.target != self.sim.population: - row = self.sim.population.props.iloc[[self.target]] + row = self.sim.population.props.loc[[self.target]] row['person_ID'] = self.target row['event'] = self row['event_date'] = self.sim.date @@ -83,13 +83,12 @@ def run(self): else: df_before = self.sim.population.props.copy() - self.apply(self.target) self.post_apply_hook() if print_chains: if self.target != self.sim.population: - row = self.sim.population.props.iloc[[self.target]] + row = self.sim.population.props.loc[[self.target]] row['person_ID'] = self.target row['event'] = self row['event_date'] = self.sim.date diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index 785f27b7a6..cffeb32992 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -201,7 +201,7 @@ def run(self, squeeze_factor): print_chains = True if self.target != self.sim.population: - row = self.sim.population.props.iloc[[self.target]] + row = self.sim.population.props.loc[[self.target]] row['person_ID'] = self.target row['event'] = self row['event_date'] = self.sim.date @@ -216,7 +216,7 @@ def run(self, squeeze_factor): if print_chains: if self.target != self.sim.population: - row = self.sim.population.props.iloc[[self.target]] + row = self.sim.population.props.loc[[self.target]] row['person_ID'] = self.target row['event'] = self row['event_date'] = self.sim.date diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index 4aff23c9d7..42a2a288d3 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -298,14 +298,17 @@ def initialise(self, *, end_date: Date, generate_event_chains) -> None: self.date = self.start_date self.end_date = end_date # store the end_date so that others can reference it - self.generate_event_chains = generate_event_chains # for now ensure we're always aiming to print data - self.generate_event_chains_overwrite_epi = False + self.generate_event_chains = generate_event_chains if self.generate_event_chains: + # Eventually this can be made an option + self.generate_event_chains_overwrite_epi = True # For now keep these fixed, eventually they will be input from user self.generate_event_chains_modules_of_interest = [self.modules['Tb'], self.modules['Hiv'], self.modules['CardioMetabolicDisorders']] self.generate_event_chains_ignore_events = ['AgeUpdateEvent','HealthSystemScheduler', 'DirectBirth'] #['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler'] + else: + # If not using to print chains, cannot ignore epi + self.generate_event_chains_overwrite_epi = False - #df_event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when']) # Reorder columns to place the new columns at the front pd.set_option('display.max_columns', None) diff --git a/tests/test_data_generation.py b/tests/test_data_generation.py index 1f6333bbfe..8dd92513f9 100644 --- a/tests/test_data_generation.py +++ b/tests/test_data_generation.py @@ -32,7 +32,7 @@ # create simulation parameters start_date = Date(2010, 1, 1) -end_date = Date(2015, 1, 1) +end_date = Date(2014, 1, 1) popsize = 100 @pytest.mark.slow From e0327de6b6f850ac871a2308271f6863333f173e Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Mon, 7 Oct 2024 15:55:57 +0200 Subject: [PATCH 07/21] Change syntax of if statement --- src/tlo/events.py | 2 +- src/tlo/methods/hsi_event.py | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/tlo/events.py b/src/tlo/events.py index a50832a58d..2eef87ba3f 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -71,7 +71,7 @@ def run(self): if self.sim.generate_event_chains: # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore - if (self.module in self.sim.generate_event_chains_modules_of_interest) and all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): + if (self.module in self.sim.generate_event_chains_modules_of_interest) and not set(self.sim.generate_event_chains_ignore_events).intersect(str(self)): print_chains = True if self.target != self.sim.population: row = self.sim.population.props.loc[[self.target]] diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index cffeb32992..805c9584fb 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -196,9 +196,7 @@ def run(self, squeeze_factor): if self.sim.generate_event_chains: # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore - #if (self.module in self.sim.generate_event_chains_modules_of_interest) and - if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): - + if (self.module in self.sim.generate_event_chains_modules_of_interest) and not set(self.sim.generate_event_chains_ignore_events).intersect(str(self)): print_chains = True if self.target != self.sim.population: row = self.sim.population.props.loc[[self.target]] From fceee02e68722e29314c3d9efe35983709a78deb Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Wed, 9 Oct 2024 09:27:54 +0100 Subject: [PATCH 08/21] Change syntax of if statement and print string of event --- src/tlo/events.py | 6 +++--- src/tlo/methods/hsi_event.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/tlo/events.py b/src/tlo/events.py index 2eef87ba3f..2a7871c2c8 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -71,12 +71,12 @@ def run(self): if self.sim.generate_event_chains: # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore - if (self.module in self.sim.generate_event_chains_modules_of_interest) and not set(self.sim.generate_event_chains_ignore_events).intersect(str(self)): + if (self.module in self.sim.generate_event_chains_modules_of_interest) and not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)): print_chains = True if self.target != self.sim.population: row = self.sim.population.props.loc[[self.target]] row['person_ID'] = self.target - row['event'] = self + row['event'] = str(self) row['event_date'] = self.sim.date row['when'] = 'Before' self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) @@ -90,7 +90,7 @@ def run(self): if self.target != self.sim.population: row = self.sim.population.props.loc[[self.target]] row['person_ID'] = self.target - row['event'] = self + row['event'] = str(self) row['event_date'] = self.sim.date row['when'] = 'After' self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index 805c9584fb..ea9066bc8b 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -196,12 +196,12 @@ def run(self, squeeze_factor): if self.sim.generate_event_chains: # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore - if (self.module in self.sim.generate_event_chains_modules_of_interest) and not set(self.sim.generate_event_chains_ignore_events).intersect(str(self)): + if (self.module in self.sim.generate_event_chains_modules_of_interest) and not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)): print_chains = True if self.target != self.sim.population: row = self.sim.population.props.loc[[self.target]] row['person_ID'] = self.target - row['event'] = self + row['event'] = str(self) row['event_date'] = self.sim.date row['when'] = 'Before' self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) @@ -216,7 +216,7 @@ def run(self, squeeze_factor): if self.target != self.sim.population: row = self.sim.population.props.loc[[self.target]] row['person_ID'] = self.target - row['event'] = self + row['event'] = str(self) row['event_date'] = self.sim.date row['when'] = 'After' self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) From eaeae626a4b37c024db38abf82bdb7c2e723ffe2 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Thu, 10 Oct 2024 14:45:41 +0100 Subject: [PATCH 09/21] Focus on rti and print footprint --- src/tlo/events.py | 16 +++++++++++++--- src/tlo/methods/hsi_event.py | 36 ++++++++++++++++------------------- src/tlo/methods/rti.py | 8 ++++++-- src/tlo/simulation.py | 6 +++--- tests/test_data_generation.py | 31 ++++++++++++++++-------------- 5 files changed, 55 insertions(+), 42 deletions(-) diff --git a/src/tlo/events.py b/src/tlo/events.py index 2a7871c2c8..76e1b9a117 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -71,14 +71,19 @@ def run(self): if self.sim.generate_event_chains: # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore - if (self.module in self.sim.generate_event_chains_modules_of_interest) and not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)): + #if (self.module in self.sim.generate_event_chains_modules_of_interest) and not + #if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)): + #if (self.module in self.sim.generate_event_chains_modules_of_interest) and all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): + if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): + print_chains = True if self.target != self.sim.population: - row = self.sim.population.props.loc[[self.target]] + row = self.sim.population.props.iloc[[self.target]] row['person_ID'] = self.target row['event'] = str(self) row['event_date'] = self.sim.date row['when'] = 'Before' + row['appt_footprint'] = 'N/A' self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) else: df_before = self.sim.population.props.copy() @@ -88,11 +93,12 @@ def run(self): if print_chains: if self.target != self.sim.population: - row = self.sim.population.props.loc[[self.target]] + row = self.sim.population.props.iloc[[self.target]] row['person_ID'] = self.target row['event'] = str(self) row['event_date'] = self.sim.date row['when'] = 'After' + row['appt_footprint'] = 'N/A' self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) else: df_after = self.sim.population.props.copy() @@ -104,11 +110,15 @@ def run(self): new_rows_before['event'] = self new_rows_before['event_date'] = self.sim.date new_rows_before['when'] = 'Before' + new_rows_before['appt_footprint'] = 'N/A' + new_rows_after = df_after.loc[indices] new_rows_after['person_ID'] = new_rows_after.index new_rows_after['event'] = self new_rows_after['event_date'] = self.sim.date new_rows_after['when'] = 'After' + new_rows_after['appt_footprint'] = 'N/A' + self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_before], ignore_index=True) self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_after], ignore_index=True) diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index ea9066bc8b..f8e8738543 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -196,14 +196,19 @@ def run(self, squeeze_factor): if self.sim.generate_event_chains: # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore - if (self.module in self.sim.generate_event_chains_modules_of_interest) and not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)): + #if (self.module in self.sim.generate_event_chains_modules_of_interest) and not + #if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)): +# if (self.module in self.sim.generate_event_chains_modules_of_interest) and all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): + if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): + print_chains = True if self.target != self.sim.population: - row = self.sim.population.props.loc[[self.target]] + row = self.sim.population.props.iloc[[self.target]] row['person_ID'] = self.target row['event'] = str(self) row['event_date'] = self.sim.date row['when'] = 'Before' + row['appt_footprint'] = str(self.EXPECTED_APPT_FOOTPRINT) self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) else: df_before = self.sim.population.props.copy() @@ -212,32 +217,23 @@ def run(self, squeeze_factor): self.post_apply_hook() self._run_after_hsi_event() + footprint = self.EXPECTED_APPT_FOOTPRINT + if updated_appt_footprint is not None: + footprint = updated_appt_footprint + if print_chains: if self.target != self.sim.population: - row = self.sim.population.props.loc[[self.target]] + row = self.sim.population.props.iloc[[self.target]] row['person_ID'] = self.target row['event'] = str(self) row['event_date'] = self.sim.date row['when'] = 'After' + row['appt_footprint'] = str(footprint) self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) else: - df_after = self.sim.population.props.copy() - change = df_before.compare(df_after) - if ~change.empty: - indices = change.index - new_rows_before = df_before.loc[indices] - new_rows_before['person_ID'] = new_rows_before.index - new_rows_before['event'] = self - new_rows_before['event_date'] = self.sim.date - new_rows_before['when'] = 'Before' - new_rows_after = df_after.loc[indices] - new_rows_after['person_ID'] = new_rows_after.index - new_rows_after['event'] = self - new_rows_after['event_date'] = self.sim.date - new_rows_after['when'] = 'After' - - self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_before], ignore_index=True) - self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_after], ignore_index=True) + print("Error, I shouldn't be here") + exit(-1) + return updated_appt_footprint def get_consumables( diff --git a/src/tlo/methods/rti.py b/src/tlo/methods/rti.py index 18c1987483..1c12e7162b 100644 --- a/src/tlo/methods/rti.py +++ b/src/tlo/methods/rti.py @@ -2776,7 +2776,7 @@ class RTIPollingEvent(RegularEvent, PopulationScopeEventMixin): def __init__(self, module): """Schedule to take place every month """ - super().__init__(module, frequency=DateOffset(months=1)) + super().__init__(module, frequency=DateOffset(months=1000)) p = module.parameters # Parameters which transition the model between states self.base_1m_prob_rti = (p['base_rate_injrti'] / 12) @@ -2864,9 +2864,13 @@ def apply(self, population): .when('.between(70,79)', self.rr_injrti_age7079), Predictor('li_ex_alc').when(True, self.rr_injrti_excessalcohol) ) - pred = eq.predict(df.loc[rt_current_non_ind]) + if self.sim.generate_event_chains is True and self.sim.generate_event_chains_overwrite_epi is True: + pred = 1 + else: + pred = eq.predict(df.loc[rt_current_non_ind]) random_draw_in_rti = self.module.rng.random_sample(size=len(rt_current_non_ind)) selected_for_rti = rt_current_non_ind[pred > random_draw_in_rti] + # Update to say they have been involved in a rti df.loc[selected_for_rti, 'rt_road_traffic_inc'] = True # Set the date that people were injured to now diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index 42a2a288d3..a8ecf14cc6 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -281,7 +281,7 @@ def make_initial_population(self, *, n: int) -> None: data=f"{module.name}.initialise_population() {time.time() - start1} s", ) - self.event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when']) + self.event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when'] + ['appt_footprint']) end = time.time() logger.info(key="info", data=f"make_initial_population() {end - start} s") @@ -303,8 +303,8 @@ def initialise(self, *, end_date: Date, generate_event_chains) -> None: # Eventually this can be made an option self.generate_event_chains_overwrite_epi = True # For now keep these fixed, eventually they will be input from user - self.generate_event_chains_modules_of_interest = [self.modules['Tb'], self.modules['Hiv'], self.modules['CardioMetabolicDisorders']] - self.generate_event_chains_ignore_events = ['AgeUpdateEvent','HealthSystemScheduler', 'DirectBirth'] #['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler'] + self.generate_event_chains_modules_of_interest = [self.modules['RTI']] + self.generate_event_chains_ignore_events = ['AgeUpdateEvent','HealthSystemScheduler', 'SimplifiedBirthsPoll','DirectBirth'] #['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler'] else: # If not using to print chains, cannot ignore epi self.generate_event_chains_overwrite_epi = False diff --git a/tests/test_data_generation.py b/tests/test_data_generation.py index 8dd92513f9..af3c4f0ae9 100644 --- a/tests/test_data_generation.py +++ b/tests/test_data_generation.py @@ -25,15 +25,16 @@ depression, tb, contraception, -# simplified_births, + simplified_births, + rti, symptommanager, ) from tlo.methods.hsi_generic_first_appts import HSI_GenericEmergencyFirstAppt # create simulation parameters start_date = Date(2010, 1, 1) -end_date = Date(2014, 1, 1) -popsize = 100 +end_date = Date(2012, 1, 1) +popsize = 200 @pytest.mark.slow def test_data_harvesting(seed): @@ -41,7 +42,7 @@ def test_data_harvesting(seed): This test runs a simulation to print all individual events of specific individuals """ - module_of_interest = 'Hiv' + module_of_interest = 'RTI' # create sim object sim = create_basic_sim(popsize, seed) @@ -55,29 +56,31 @@ def test_data_harvesting(seed): # run simulation sim.simulate(end_date=end_date, generate_event_chains = True) - + exit(-1) def create_basic_sim(population_size, seed): # create the basic outline of an rti simulation object sim = Simulation(start_date=start_date, seed=seed) resourcefilepath = Path(os.path.dirname(__file__)) / '../resources' sim.register(demography.Demography(resourcefilepath=resourcefilepath), - contraception.Contraception(resourcefilepath=resourcefilepath), + # contraception.Contraception(resourcefilepath=resourcefilepath), enhanced_lifestyle.Lifestyle(resourcefilepath=resourcefilepath), healthburden.HealthBurden(resourcefilepath=resourcefilepath), symptommanager.SymptomManager(resourcefilepath=resourcefilepath), healthsystem.HealthSystem(resourcefilepath=resourcefilepath, service_availability=['*']), + rti.RTI(resourcefilepath=resourcefilepath), healthseekingbehaviour.HealthSeekingBehaviour(resourcefilepath=resourcefilepath), - epi.Epi(resourcefilepath=resourcefilepath), - hiv.Hiv(resourcefilepath=resourcefilepath), - tb.Tb(resourcefilepath=resourcefilepath), + simplified_births.SimplifiedBirths(resourcefilepath=resourcefilepath), + # epi.Epi(resourcefilepath=resourcefilepath), + # hiv.Hiv(resourcefilepath=resourcefilepath), + # tb.Tb(resourcefilepath=resourcefilepath), cardio_metabolic_disorders.CardioMetabolicDisorders(resourcefilepath=resourcefilepath), depression.Depression(resourcefilepath=resourcefilepath), - newborn_outcomes.NewbornOutcomes(resourcefilepath=resourcefilepath), - pregnancy_supervisor.PregnancySupervisor(resourcefilepath=resourcefilepath), - care_of_women_during_pregnancy.CareOfWomenDuringPregnancy(resourcefilepath=resourcefilepath), - labour.Labour(resourcefilepath=resourcefilepath), - postnatal_supervisor.PostnatalSupervisor(resourcefilepath=resourcefilepath), + # newborn_outcomes.NewbornOutcomes(resourcefilepath=resourcefilepath), + # pregnancy_supervisor.PregnancySupervisor(resourcefilepath=resourcefilepath), + # care_of_women_during_pregnancy.CareOfWomenDuringPregnancy(resourcefilepath=resourcefilepath), + # labour.Labour(resourcefilepath=resourcefilepath), + #postnatal_supervisor.PostnatalSupervisor(resourcefilepath=resourcefilepath), ) sim.make_initial_population(n=population_size) From c7bd9d058cea79fad0f8471830766f5c335a7df1 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Fri, 11 Oct 2024 16:57:21 +0100 Subject: [PATCH 10/21] Only store change in individual properties, not entire property row. Log changes to logger. --- src/tlo/events.py | 204 ++++++++++++++++++++++++++-------- src/tlo/methods/hsi_event.py | 134 ++++++++++++++++------ src/tlo/simulation.py | 2 +- tests/test_data_generation.py | 22 ++-- 4 files changed, 268 insertions(+), 94 deletions(-) diff --git a/src/tlo/events.py b/src/tlo/events.py index 76e1b9a117..436a01a97c 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -4,13 +4,20 @@ from enum import Enum from typing import TYPE_CHECKING -from tlo import DateOffset +from tlo import DateOffset, logging if TYPE_CHECKING: from tlo import Simulation import pandas as pd +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + +logger_summary = logging.getLogger(f"{__name__}.summary") +logger_summary.setLevel(logging.INFO) + +debug_chains = True class Priority(Enum): """Enumeration for the Priority, which is used in sorting the events in the simulation queue.""" @@ -62,66 +69,167 @@ def apply(self, target): :param target: the target of the event """ raise NotImplementedError - - def run(self): - """Make the event happen.""" + def compare_population_dataframe(self,df_before, df_after): + """ This function compares the population dataframe before/after a population-wide event has occurred. + It allows us to identify the individuals for which this event led to a significant (i.e. property) change, and to store the properties which have changed as a result of it. """ + + # Create a mask of where values are different + diff_mask = (df_before != df_after) & ~(df_before.isna() & df_after.isna()) + + # Create an empty list to store changes for each of the individuals + chain_links = {} + + # Loop through each row of the mask + for idx, row in diff_mask.iterrows(): + changed_cols = row.index[row].tolist() + + if changed_cols: # Proceed only if there are changes in the row + + # Create a dictionary for this person + # First add event info + link_info = { + #'person_ID': idx, + 'event': str(self), + 'event_date': self.sim.date, + } + + # Store the new values from df_after for the changed columns + for col in changed_cols: + link_info[col] = df_after.at[idx, col] + + + # Append the event and changes to the individual key + chain_links = {idx : link_info} + + return chain_links + + def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame]: + """ This function checks whether this event should be logged as part of the event chains, and if so stored required information before the event has occurred. """ + + # Initialise these variables print_chains = False df_before = [] + row_before = pd.Series() - if self.sim.generate_event_chains: - # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore - #if (self.module in self.sim.generate_event_chains_modules_of_interest) and not - #if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)): - #if (self.module in self.sim.generate_event_chains_modules_of_interest) and all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): - if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): - - print_chains = True - if self.target != self.sim.population: - row = self.sim.population.props.iloc[[self.target]] + # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore + #if (self.module in self.sim.generate_event_chains_modules_of_interest) and .. + if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): + + # Will eventually use this once I can actually GET THE NAME OF THE SELF + #if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)): + + print_chains = True + + # Target is single individual + if self.target != self.sim.population: + # Save row for comparison after event has occurred + row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999) + + if debug_chains: + # Print entire row + row = self.sim.population.props.loc[[abs(self.target)]] row['person_ID'] = self.target row['event'] = str(self) row['event_date'] = self.sim.date row['when'] = 'Before' - row['appt_footprint'] = 'N/A' self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) - else: - df_before = self.sim.population.props.copy() - - self.apply(self.target) - self.post_apply_hook() + else: + # This will be a population-wide event. In order to find individuals for which this led to + # a meaningful change, make a copy of the pop dataframe before the event has occurred. + df_before = self.sim.population.props.copy() + + return print_chains, row_before, df_before + + def store_chains_to_do_after_event(self, print_chains, row_before, df_before) -> dict: + """ If print_chains=True, this function logs the event and identifies and logs the any property changes that have occured to one or multiple individuals as a result of the event taking place. """ + + chain_links = {} + if print_chains: + + # Target is single individual if self.target != self.sim.population: - row = self.sim.population.props.iloc[[self.target]] - row['person_ID'] = self.target - row['event'] = str(self) - row['event_date'] = self.sim.date - row['when'] = 'After' - row['appt_footprint'] = 'N/A' - self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) + row_after = self.sim.population.props.loc[abs(self.target)].fillna(-99999) + + # Create and store event for this individual + link_info = { + #'person_ID' : self.target, + 'event' : str(self), + 'event_date' : self.sim.date, + } + # Store property changes as a result of the event for this individual + for key in row_before.index: + if row_before[key] != row_after[key]: # Note: used fillna previously + link_info[key] = row_after[key] + + chain_links = {self.target : link_info} + + if debug_chains: + # Print entire row + row = self.sim.population.props.loc[[abs(self.target)]] # Use abs to avoid potentil issue with direct births + row['person_ID'] = self.target + row['event'] = str(self) + row['event_date'] = self.sim.date + row['when'] = 'After' + self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) + else: - df_after = self.sim.population.props.copy() - change = df_before.compare(df_after) - if ~change.empty: - indices = change.index - new_rows_before = df_before.loc[indices] - new_rows_before['person_ID'] = new_rows_before.index - new_rows_before['event'] = self - new_rows_before['event_date'] = self.sim.date - new_rows_before['when'] = 'Before' - new_rows_before['appt_footprint'] = 'N/A' - - new_rows_after = df_after.loc[indices] - new_rows_after['person_ID'] = new_rows_after.index - new_rows_after['event'] = self - new_rows_after['event_date'] = self.sim.date - new_rows_after['when'] = 'After' - new_rows_after['appt_footprint'] = 'N/A' - - - self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_before], ignore_index=True) - self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_after], ignore_index=True) + # Target is entire population. Identify individuals for which properties have changed + # and store their changes. + + # Population frame after event + df_after = self.sim.population.props + + # Create and store the event and dictionary of changes for affected individuals + chain_links = self.compare_population_dataframe(df_before, df_after) + + if debug_chains: + # Or print entire rows + change = df_before.compare(df_after) + if not change.empty: + indices = change.index + new_rows_before = df_before.loc[indices] + new_rows_before['person_ID'] = new_rows_before.index + new_rows_before['event'] = self + new_rows_before['event_date'] = self.sim.date + new_rows_before['when'] = 'Before' + + new_rows_after = df_after.loc[indices] + new_rows_after['person_ID'] = new_rows_after.index + new_rows_after['event'] = self + new_rows_after['event_date'] = self.sim.date + new_rows_after['when'] = 'After' + + self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_before], ignore_index=True) + self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_after], ignore_index=True) + + return chain_links + + def run(self): + """Make the event happen.""" + + # Collect relevant information before event takes place + if self.sim.generate_event_chains: + print_chains, row_before, df_before = self.store_chains_to_do_before_event() + + self.apply(self.target) + self.post_apply_hook() + + # Collect event info + meaningful property changes of individuals. Combined, these will constitute a 'link' + # in the individual's event chain. + if self.sim.generate_event_chains: + chain_links = self.store_chains_to_do_after_event(print_chains, row_before, df_before) + + # Log chain_links here + if len(chain_links)>0: + logger.info(key='event_chains', + data= chain_links, + description='Links forming chains of events for simulated individuals') + + #print("Chain events ", chain_links) + class RegularEvent(Event): diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index f8e8738543..1c727f014b 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -16,12 +16,19 @@ from tlo import Module, Simulation from tlo.methods.healthsystem import HealthSystem +# Pointing to the logger in events +logger_chains = logging.getLogger("tlo.methods.event") +logger_chains.setLevel(logging.INFO) + logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) logger_summary = logging.getLogger(f"{__name__}.summary") logger_summary.setLevel(logging.INFO) +debug_chains = True + + # Declare the level which will be used to represent the merging of levels '1b' and '2' LABEL_FOR_MERGED_FACILITY_LEVELS_1B_AND_2 = "2" @@ -187,54 +194,113 @@ def _run_after_hsi_event(self) -> None: item_codes=self._EQUIPMENT, facility_id=self.facility_info.id ) - - def run(self, squeeze_factor): - """Make the event happen.""" + + def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series]: + """ This function checks whether this event should be logged as part of the event chains, and if so stored required information before the event has occurred. """ + # Initialise these variables print_chains = False - df_before = [] - - if self.sim.generate_event_chains: - # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore - #if (self.module in self.sim.generate_event_chains_modules_of_interest) and not - #if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)): -# if (self.module in self.sim.generate_event_chains_modules_of_interest) and all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): - if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): - - print_chains = True - if self.target != self.sim.population: - row = self.sim.population.props.iloc[[self.target]] - row['person_ID'] = self.target - row['event'] = str(self) - row['event_date'] = self.sim.date - row['when'] = 'Before' - row['appt_footprint'] = str(self.EXPECTED_APPT_FOOTPRINT) - self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) - else: - df_before = self.sim.population.props.copy() - - updated_appt_footprint = self.apply(self.target, squeeze_factor) - self.post_apply_hook() - self._run_after_hsi_event() + row_before = pd.Series() - footprint = self.EXPECTED_APPT_FOOTPRINT - if updated_appt_footprint is not None: - footprint = updated_appt_footprint + # Only print event if it belongs to modules of interest and if it is not in the list of events to ignore + # if (self.module in self.sim.generate_event_chains_modules_of_interest) and + if all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events): - if print_chains: + # Will eventually use this once I can actually GET THE NAME OF THE SELF + # if not set(self.sim.generate_event_chains_ignore_events).intersection(str(self)): + if self.target != self.sim.population: - row = self.sim.population.props.iloc[[self.target]] + + # In the case of HSI events, only individual events should exist and therefore be logged + print_chains = True + + # Save row for comparison after event has occurred + row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999) + + row = self.sim.population.props.loc[[abs(self.target)]] row['person_ID'] = self.target row['event'] = str(self) row['event_date'] = self.sim.date - row['when'] = 'After' - row['appt_footprint'] = str(footprint) + row['when'] = 'Before' + row['appt_footprint'] = str(self.EXPECTED_APPT_FOOTPRINT) + row['level'] = self.facility_info.level self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) + else: + # Many of our HealthSystem implementations rely on the assumption that print("Error, I shouldn't be here") exit(-1) + + return print_chains, row_before + + def store_chains_to_do_after_event(self, print_chains, row_before, footprint) -> dict: + """ If print_chains=True, this function logs the event and identifies and logs the any property changes that have occured to one or multiple individuals as a result of the event taking place. """ + if print_chains: + # For HSI event, this will only ever occur for individual events + + row_after = self.sim.population.props.loc[abs(self.target)].fillna(-99999) + + # Create and store dictionary of changes. Note that person_ID, event, event_date, appt_foot, and level + # will be stored regardless of whether individual experienced property changes. + + # Add event details + link_info = { + 'event' : str(self), + 'event_date' : self.sim.date, + 'appt_footprint' : str(footprint), + 'level' : self.facility_info.level, + } + + # Add changes to properties + for key in row_before.index: + if row_before[key] != row_after[key]: # Note: used fillna previously + link_info[key] = row_after[key] + + chain_links = {self.target : link_info} + + # Print entire row + row = self.sim.population.props.loc[[abs(self.target)]] + row['person_ID'] = self.target + row['event'] = str(self) + row['event_date'] = self.sim.date + row['when'] = 'After' + row['appt_footprint'] = footprint + row['level'] = self.facility_info.level + self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) + + return chain_links + + + def run(self, squeeze_factor): + """Make the event happen.""" + + + if self.sim.generate_event_chains: + print_chains, row_before = self.store_chains_to_do_before_event() + + footprint = self.EXPECTED_APPT_FOOTPRINT + updated_appt_footprint = self.apply(self.target, squeeze_factor) + self.post_apply_hook() + self._run_after_hsi_event() + + + if self.sim.generate_event_chains: + + # If the footprint has been updated when the event ran, change it here + if updated_appt_footprint is not None: + footprint = updated_appt_footprint + + chain_links = self.store_chains_to_do_after_event(print_chains, row_before, str(footprint)) + + if len(chain_links)>0: + logger_chains.info(key='event_chains', + data = chain_links, + description='Links forming chains of events for simulated individuals') + #print(chain_links) + return updated_appt_footprint + def get_consumables( self, diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index a8ecf14cc6..20b3a4898f 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -281,7 +281,7 @@ def make_initial_population(self, *, n: int) -> None: data=f"{module.name}.initialise_population() {time.time() - start1} s", ) - self.event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when'] + ['appt_footprint']) + self.event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when'] + ['appt_footprint'] + ['level']) end = time.time() logger.info(key="info", data=f"make_initial_population() {end - start} s") diff --git a/tests/test_data_generation.py b/tests/test_data_generation.py index af3c4f0ae9..39f2b022aa 100644 --- a/tests/test_data_generation.py +++ b/tests/test_data_generation.py @@ -33,7 +33,7 @@ # create simulation parameters start_date = Date(2010, 1, 1) -end_date = Date(2012, 1, 1) +end_date = Date(2011, 1, 1) popsize = 200 @pytest.mark.slow @@ -63,24 +63,24 @@ def create_basic_sim(population_size, seed): sim = Simulation(start_date=start_date, seed=seed) resourcefilepath = Path(os.path.dirname(__file__)) / '../resources' sim.register(demography.Demography(resourcefilepath=resourcefilepath), - # contraception.Contraception(resourcefilepath=resourcefilepath), + contraception.Contraception(resourcefilepath=resourcefilepath), enhanced_lifestyle.Lifestyle(resourcefilepath=resourcefilepath), healthburden.HealthBurden(resourcefilepath=resourcefilepath), symptommanager.SymptomManager(resourcefilepath=resourcefilepath), healthsystem.HealthSystem(resourcefilepath=resourcefilepath, service_availability=['*']), rti.RTI(resourcefilepath=resourcefilepath), healthseekingbehaviour.HealthSeekingBehaviour(resourcefilepath=resourcefilepath), - simplified_births.SimplifiedBirths(resourcefilepath=resourcefilepath), - # epi.Epi(resourcefilepath=resourcefilepath), - # hiv.Hiv(resourcefilepath=resourcefilepath), - # tb.Tb(resourcefilepath=resourcefilepath), + # simplified_births.SimplifiedBirths(resourcefilepath=resourcefilepath), + epi.Epi(resourcefilepath=resourcefilepath), + hiv.Hiv(resourcefilepath=resourcefilepath), + tb.Tb(resourcefilepath=resourcefilepath), cardio_metabolic_disorders.CardioMetabolicDisorders(resourcefilepath=resourcefilepath), depression.Depression(resourcefilepath=resourcefilepath), - # newborn_outcomes.NewbornOutcomes(resourcefilepath=resourcefilepath), - # pregnancy_supervisor.PregnancySupervisor(resourcefilepath=resourcefilepath), - # care_of_women_during_pregnancy.CareOfWomenDuringPregnancy(resourcefilepath=resourcefilepath), - # labour.Labour(resourcefilepath=resourcefilepath), - #postnatal_supervisor.PostnatalSupervisor(resourcefilepath=resourcefilepath), + newborn_outcomes.NewbornOutcomes(resourcefilepath=resourcefilepath), + pregnancy_supervisor.PregnancySupervisor(resourcefilepath=resourcefilepath), + care_of_women_during_pregnancy.CareOfWomenDuringPregnancy(resourcefilepath=resourcefilepath), + labour.Labour(resourcefilepath=resourcefilepath), + postnatal_supervisor.PostnatalSupervisor(resourcefilepath=resourcefilepath), ) sim.make_initial_population(n=population_size) From 769aaeca44aaedc324bd3da2f5f338bb47e02106 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Fri, 11 Oct 2024 17:03:22 +0100 Subject: [PATCH 11/21] Style fixes --- src/tlo/methods/tb.py | 2 +- src/tlo/simulation.py | 4 ++-- tests/test_data_generation.py | 5 ----- 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/src/tlo/methods/tb.py b/src/tlo/methods/tb.py index 4c170944d2..9dc05ff301 100644 --- a/src/tlo/methods/tb.py +++ b/src/tlo/methods/tb.py @@ -1393,7 +1393,7 @@ def apply(self, population): & (df.tb_inf != "active") ].index - n_susceptible = len(susc_idx) + len(susc_idx) middle_index = len(susc_idx) // 2 diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index 20b3a4898f..75dfa76429 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -8,7 +8,7 @@ import time from collections import OrderedDict from pathlib import Path -from typing import Dict, Optional, Union +from typing import Optional from typing import TYPE_CHECKING, Optional import pandas as pd @@ -374,7 +374,7 @@ def run_simulation_to(self, *, to_date: Date) -> None: :param to_date: Date to simulate up to but not including - must be before or equal to simulation end date specified in call to :py:meth:`initialise`. """ - f = open('output.txt', mode='a') + open('output.txt', mode='a') if not self._initialised: msg = "Simulation must be initialised before calling run_simulation_to" diff --git a/tests/test_data_generation.py b/tests/test_data_generation.py index 39f2b022aa..c94618a77d 100644 --- a/tests/test_data_generation.py +++ b/tests/test_data_generation.py @@ -1,7 +1,6 @@ import os from pathlib import Path -import pandas as pd import pytest from tlo import Date, Simulation @@ -11,7 +10,6 @@ depression, enhanced_lifestyle, epi, - epilepsy, healthburden, healthseekingbehaviour, healthsystem, @@ -20,16 +18,13 @@ labour, newborn_outcomes, postnatal_supervisor, - pregnancy_helper_functions, pregnancy_supervisor, depression, tb, contraception, - simplified_births, rti, symptommanager, ) -from tlo.methods.hsi_generic_first_appts import HSI_GenericEmergencyFirstAppt # create simulation parameters start_date = Date(2010, 1, 1) From 757cee36b0ae611f1f7ae31d25799fc0d6e7daa1 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Sun, 13 Oct 2024 11:15:17 +0100 Subject: [PATCH 12/21] Include printing of individual properties at the beginning and at birth, label what is only used for ddebugging and will be later removed --- src/tlo/events.py | 5 +++-- src/tlo/methods/hsi_event.py | 7 ++++--- src/tlo/methods/rti.py | 2 +- src/tlo/simulation.py | 28 ++++++++++++++++++++++++++++ tests/test_data_generation.py | 5 ++--- 5 files changed, 38 insertions(+), 9 deletions(-) diff --git a/src/tlo/events.py b/src/tlo/events.py index 436a01a97c..03bf7c72fa 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -98,7 +98,6 @@ def compare_population_dataframe(self,df_before, df_after): for col in changed_cols: link_info[col] = df_after.at[idx, col] - # Append the event and changes to the individual key chain_links = {idx : link_info} @@ -127,7 +126,7 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999) if debug_chains: - # Print entire row + # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. row = self.sim.population.props.loc[[abs(self.target)]] row['person_ID'] = self.target row['event'] = str(self) @@ -166,6 +165,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) -> chain_links = {self.target : link_info} + # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. if debug_chains: # Print entire row row = self.sim.population.props.loc[[abs(self.target)]] # Use abs to avoid potentil issue with direct births @@ -185,6 +185,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) -> # Create and store the event and dictionary of changes for affected individuals chain_links = self.compare_population_dataframe(df_before, df_after) + # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. if debug_chains: # Or print entire rows change = df_before.compare(df_after) diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index 1c727f014b..0c3bc16072 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -217,6 +217,7 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series]: # Save row for comparison after event has occurred row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999) + # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. row = self.sim.population.props.loc[[abs(self.target)]] row['person_ID'] = self.target row['event'] = str(self) @@ -228,8 +229,8 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series]: else: # Many of our HealthSystem implementations rely on the assumption that - print("Error, I shouldn't be here") - exit(-1) + raise RuntimeError("Cannot have population-wide HSI events") + return print_chains, row_before @@ -258,7 +259,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, footprint) -> chain_links = {self.target : link_info} - # Print entire row + # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. row = self.sim.population.props.loc[[abs(self.target)]] row['person_ID'] = self.target row['event'] = str(self) diff --git a/src/tlo/methods/rti.py b/src/tlo/methods/rti.py index 1c12e7162b..3642365976 100644 --- a/src/tlo/methods/rti.py +++ b/src/tlo/methods/rti.py @@ -2865,7 +2865,7 @@ def apply(self, population): Predictor('li_ex_alc').when(True, self.rr_injrti_excessalcohol) ) if self.sim.generate_event_chains is True and self.sim.generate_event_chains_overwrite_epi is True: - pred = 1 + pred = 1.0 else: pred = eq.predict(df.loc[rt_current_non_ind]) random_draw_in_rti = self.module.rng.random_sample(size=len(rt_current_non_ind)) diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index 75dfa76429..582fb4ba1c 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -37,6 +37,9 @@ logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) +logger_chains = logging.getLogger("tlo.methods.event") +logger_chains.setLevel(logging.INFO) + class SimulationPreviouslyInitialisedError(Exception): """Exception raised when trying to initialise an already initialised simulation.""" @@ -111,6 +114,8 @@ def __init__( self.end_date = None self.output_file = None self.population: Optional[Population] = None + + # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. self.event_chains: Optinoal[Population] = None self.show_progress_bar = show_progress_bar @@ -281,7 +286,16 @@ def make_initial_population(self, *, n: int) -> None: data=f"{module.name}.initialise_population() {time.time() - start1} s", ) + # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. self.event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when'] + ['appt_footprint'] + ['level']) + + # When logging events for each individual to reconstruct chains, only the changes in individual properties will be logged. + # At the start of the simulation + when a new individual is born, we therefore want to store all of their properties at the start. + if self.generate_event_chains: + pop_dict = self.population.props.to_dict(orient='index') + logger_chains.info(key='event_chains', + data = pop_dict, + description='Links forming chains of events for simulated individuals') end = time.time() logger.info(key="info", data=f"make_initial_population() {end - start} s") @@ -392,6 +406,8 @@ def run_simulation_to(self, *, to_date: Date) -> None: self._update_progress_bar(progress_bar, date) self.fire_single_event(event, date) self.date = to_date + + # TO BE REMOVED: this is currently only used for debugging, will be removed from final PR. self.event_chains.to_csv('output.csv', index=False) if self.show_progress_bar: @@ -449,13 +465,25 @@ def do_birth(self, mother_id: int) -> int: child_id = self.population.do_birth() for module in self.modules.values(): module.on_birth(mother_id, child_id) + if self.generate_event_chains: + # When individual is born, store their initial properties to provide a starting point to the chain of property + # changes that this individual will undergo as a result of events taking place. + prop_dict = self.population.props.loc[child_id].to_dict() + + child_dict = {child_id : prop_dict} + logger_chains.info(key='event_chains', + data = child_dict, + description='Links forming chains of events for simulated individuals') + + # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. row = self.population.props.iloc[[child_id]] row['person_ID'] = child_id row['event'] = 'Birth' row['event_date'] = self.date row['when'] = 'After' self.event_chains = pd.concat([self.event_chains, row], ignore_index=True) + return child_id def find_events_for_person(self, person_id: int) -> list[tuple[Date, Event]]: diff --git a/tests/test_data_generation.py b/tests/test_data_generation.py index c94618a77d..d9885c1fab 100644 --- a/tests/test_data_generation.py +++ b/tests/test_data_generation.py @@ -28,8 +28,8 @@ # create simulation parameters start_date = Date(2010, 1, 1) -end_date = Date(2011, 1, 1) -popsize = 200 +end_date = Date(2012, 1, 1) +popsize = 100 @pytest.mark.slow def test_data_harvesting(seed): @@ -51,7 +51,6 @@ def test_data_harvesting(seed): # run simulation sim.simulate(end_date=end_date, generate_event_chains = True) - exit(-1) def create_basic_sim(population_size, seed): # create the basic outline of an rti simulation object From 22a5e44312ad4d2f1d955b70399ae9569efb13c0 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Wed, 16 Oct 2024 14:00:22 +0100 Subject: [PATCH 13/21] Log everything to simulation, as events logger doesn't seem to be visible to all modules. For now add person_ID to the dict of info printed as the outer dictionary key logging seems to have a problem. --- src/tlo/events.py | 13 +++++++++---- src/tlo/methods/hsi_event.py | 3 ++- src/tlo/simulation.py | 25 +++++++++++++++++-------- 3 files changed, 28 insertions(+), 13 deletions(-) diff --git a/src/tlo/events.py b/src/tlo/events.py index 03bf7c72fa..98832faecb 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -11,9 +11,13 @@ import pandas as pd + logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) +logger_chain = logging.getLogger('tlo.simulation') +logger_chain.setLevel(logging.INFO) + logger_summary = logging.getLogger(f"{__name__}.summary") logger_summary.setLevel(logging.INFO) @@ -89,7 +93,7 @@ def compare_population_dataframe(self,df_before, df_after): # Create a dictionary for this person # First add event info link_info = { - #'person_ID': idx, + 'person_ID': idx, 'event': str(self), 'event_date': self.sim.date, } @@ -152,13 +156,14 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) -> if self.target != self.sim.population: row_after = self.sim.population.props.loc[abs(self.target)].fillna(-99999) - # Create and store event for this individual + # Create and store event for this individual, regardless of whether any property change occurred link_info = { #'person_ID' : self.target, + 'person_ID' : self.target, 'event' : str(self), 'event_date' : self.sim.date, } - # Store property changes as a result of the event for this individual + # Store (if any) property changes as a result of the event for this individual for key in row_before.index: if row_before[key] != row_after[key]: # Note: used fillna previously link_info[key] = row_after[key] @@ -225,7 +230,7 @@ def run(self): # Log chain_links here if len(chain_links)>0: - logger.info(key='event_chains', + logger_chain.info(key='event_chains', data= chain_links, description='Links forming chains of events for simulated individuals') diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index 0c3bc16072..6651a8704a 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -17,7 +17,7 @@ from tlo.methods.healthsystem import HealthSystem # Pointing to the logger in events -logger_chains = logging.getLogger("tlo.methods.event") +logger_chains = logging.getLogger("tlo.simulation") logger_chains.setLevel(logging.INFO) logger = logging.getLogger(__name__) @@ -246,6 +246,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, footprint) -> # Add event details link_info = { + 'person_ID': self.target, 'event' : str(self), 'event_date' : self.sim.date, 'appt_footprint' : str(footprint), diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index 582fb4ba1c..fd9fade215 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -107,7 +107,7 @@ def __init__( self.date = self.start_date = start_date self.modules = OrderedDict() self.event_queue = EventQueue() - self.generate_event_chains = None + self.generate_event_chains = True self.generate_event_chains_overwrite_epi = None self.generate_event_chains_modules_of_interest = [] self.generate_event_chains_ignore_events = [] @@ -292,15 +292,23 @@ def make_initial_population(self, *, n: int) -> None: # When logging events for each individual to reconstruct chains, only the changes in individual properties will be logged. # At the start of the simulation + when a new individual is born, we therefore want to store all of their properties at the start. if self.generate_event_chains: + pop_dict = self.population.props.to_dict(orient='index') - logger_chains.info(key='event_chains', + + print(pop_dict) + print(pop_dict.keys()) + for key in pop_dict.keys(): + pop_dict[key]['person_ID'] = key + print("Length of properties", len(pop_dict[0].keys())) + #exit(-1) + logger.info(key='event_chains', data = pop_dict, description='Links forming chains of events for simulated individuals') end = time.time() logger.info(key="info", data=f"make_initial_population() {end - start} s") - def initialise(self, *, end_date: Date, generate_event_chains) -> None: + def initialise(self, *, end_date: Date) -> None: """Initialise all modules in simulation. :param end_date: Date to end simulation on - accessible to modules to allow initialising data structures which may depend (in size for example) on the @@ -312,7 +320,7 @@ def initialise(self, *, end_date: Date, generate_event_chains) -> None: self.date = self.start_date self.end_date = end_date # store the end_date so that others can reference it - self.generate_event_chains = generate_event_chains + #self.generate_event_chains = generate_event_chains if self.generate_event_chains: # Eventually this can be made an option self.generate_event_chains_overwrite_epi = True @@ -413,7 +421,7 @@ def run_simulation_to(self, *, to_date: Date) -> None: if self.show_progress_bar: progress_bar.stop() - def simulate(self, *, end_date: Date, generate_event_chains=False) -> None: + def simulate(self, *, end_date: Date) -> None: """Simulate until the given end date :param end_date: When to stop simulating. Only events strictly before this @@ -421,7 +429,7 @@ def simulate(self, *, end_date: Date, generate_event_chains=False) -> None: clarity. """ start = time.time() - self.initialise(end_date=end_date, generate_event_chains=generate_event_chains) + self.initialise(end_date=end_date) self.run_simulation_to(to_date=end_date) self.finalise(time.time() - start) @@ -470,9 +478,10 @@ def do_birth(self, mother_id: int) -> int: # When individual is born, store their initial properties to provide a starting point to the chain of property # changes that this individual will undergo as a result of events taking place. prop_dict = self.population.props.loc[child_id].to_dict() - + prop_dict['event'] = 'Birth' + prop_dict['event_date'] = self.date child_dict = {child_id : prop_dict} - logger_chains.info(key='event_chains', + logger.info(key='event_chains', data = child_dict, description='Links forming chains of events for simulated individuals') From 7faa81783dc43e434e26ef8c95717480cebd3816 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Fri, 18 Oct 2024 10:07:46 +0200 Subject: [PATCH 14/21] Consider all modules included as of interest --- src/tlo/simulation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index fd9fade215..15be1622e8 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -325,7 +325,7 @@ def initialise(self, *, end_date: Date) -> None: # Eventually this can be made an option self.generate_event_chains_overwrite_epi = True # For now keep these fixed, eventually they will be input from user - self.generate_event_chains_modules_of_interest = [self.modules['RTI']] + self.generate_event_chains_modules_of_interest = [self.modules] self.generate_event_chains_ignore_events = ['AgeUpdateEvent','HealthSystemScheduler', 'SimplifiedBirthsPoll','DirectBirth'] #['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler'] else: # If not using to print chains, cannot ignore epi From 7232f976831054ed541d59d8da20c91289fa79e6 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Fri, 18 Oct 2024 11:29:43 +0200 Subject: [PATCH 15/21] Remove pop-wide HSI warning and make epi default even when printing chains --- src/tlo/methods/hsi_event.py | 38 ++++++++++++++++++++++++++---------- src/tlo/simulation.py | 2 +- 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index 6651a8704a..d0cdb5bbdd 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -223,13 +223,23 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series]: row['event'] = str(self) row['event_date'] = self.sim.date row['when'] = 'Before' - row['appt_footprint'] = str(self.EXPECTED_APPT_FOOTPRINT) - row['level'] = self.facility_info.level + try: + row['appt_footprint'] = str(self.EXPECTED_APPT_FOOTPRINT) + row['level'] = self.facility_info.level + except: + row['appt_footprint'] = 'N/A' + row['level'] = 'N/A' self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) else: - # Many of our HealthSystem implementations rely on the assumption that - raise RuntimeError("Cannot have population-wide HSI events") + # Once this has been removed from Chronic Syndrome mock module, make this a Runtime Error + # raise RuntimeError("Cannot have population-wide HSI events") + logger.debug( + key="message", + data=( + f"Cannot have population-wide HSI events" + ), + ) return print_chains, row_before @@ -245,12 +255,20 @@ def store_chains_to_do_after_event(self, print_chains, row_before, footprint) -> # will be stored regardless of whether individual experienced property changes. # Add event details + + try: + record_footprint = str(footprint) + record_level = self.facility_info.level + except: + record_footprint = 'N/A' + record_level = 'N/A' + link_info = { 'person_ID': self.target, 'event' : str(self), 'event_date' : self.sim.date, - 'appt_footprint' : str(footprint), - 'level' : self.facility_info.level, + 'appt_footprint' : record_footprint, + 'level' : record_level, } # Add changes to properties @@ -266,8 +284,8 @@ def store_chains_to_do_after_event(self, print_chains, row_before, footprint) -> row['event'] = str(self) row['event_date'] = self.sim.date row['when'] = 'After' - row['appt_footprint'] = footprint - row['level'] = self.facility_info.level + row['appt_footprint'] = record_footprint + row['level'] = record_level self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) return chain_links @@ -277,7 +295,7 @@ def run(self, squeeze_factor): """Make the event happen.""" - if self.sim.generate_event_chains: + if self.sim.generate_event_chains and self.target != self.sim.population: print_chains, row_before = self.store_chains_to_do_before_event() footprint = self.EXPECTED_APPT_FOOTPRINT @@ -287,7 +305,7 @@ def run(self, squeeze_factor): self._run_after_hsi_event() - if self.sim.generate_event_chains: + if self.sim.generate_event_chains and self.target != self.sim.population: # If the footprint has been updated when the event ran, change it here if updated_appt_footprint is not None: diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index 15be1622e8..0c70b164d9 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -323,7 +323,7 @@ def initialise(self, *, end_date: Date) -> None: #self.generate_event_chains = generate_event_chains if self.generate_event_chains: # Eventually this can be made an option - self.generate_event_chains_overwrite_epi = True + self.generate_event_chains_overwrite_epi = False # For now keep these fixed, eventually they will be input from user self.generate_event_chains_modules_of_interest = [self.modules] self.generate_event_chains_ignore_events = ['AgeUpdateEvent','HealthSystemScheduler', 'SimplifiedBirthsPoll','DirectBirth'] #['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler'] From a6def2d22c0d291ce775afef561b580847ad36cf Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Fri, 18 Oct 2024 11:39:24 +0200 Subject: [PATCH 16/21] Style fix --- src/tlo/methods/hsi_event.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index d0cdb5bbdd..041ab9cf08 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -237,7 +237,7 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series]: logger.debug( key="message", data=( - f"Cannot have population-wide HSI events" + "Cannot have population-wide HSI events" ), ) From ecea532a2843d312580accf97383cd62c457fd04 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Fri, 18 Oct 2024 11:51:39 +0200 Subject: [PATCH 17/21] Remove data generation test, which wasn't really a test --- tests/test_data_generation.py | 82 ----------------------------------- 1 file changed, 82 deletions(-) delete mode 100644 tests/test_data_generation.py diff --git a/tests/test_data_generation.py b/tests/test_data_generation.py deleted file mode 100644 index d9885c1fab..0000000000 --- a/tests/test_data_generation.py +++ /dev/null @@ -1,82 +0,0 @@ -import os -from pathlib import Path - -import pytest - -from tlo import Date, Simulation -from tlo.methods import ( - care_of_women_during_pregnancy, - demography, - depression, - enhanced_lifestyle, - epi, - healthburden, - healthseekingbehaviour, - healthsystem, - hiv, - cardio_metabolic_disorders, - labour, - newborn_outcomes, - postnatal_supervisor, - pregnancy_supervisor, - depression, - tb, - contraception, - rti, - symptommanager, -) - -# create simulation parameters -start_date = Date(2010, 1, 1) -end_date = Date(2012, 1, 1) -popsize = 100 - -@pytest.mark.slow -def test_data_harvesting(seed): - """ - This test runs a simulation to print all individual events of specific individuals - """ - - module_of_interest = 'RTI' - # create sim object - sim = create_basic_sim(popsize, seed) - - dependencies_list = sim.modules[module_of_interest].ADDITIONAL_DEPENDENCIES.union(sim.modules[module_of_interest].INIT_DEPENDENCIES) - - # Check that all dependencies are included - for dep in dependencies_list: - if dep not in sim.modules: - print("WARNING: dependency ", dep, "not included") - exit(-1) - - # run simulation - sim.simulate(end_date=end_date, generate_event_chains = True) - -def create_basic_sim(population_size, seed): - # create the basic outline of an rti simulation object - sim = Simulation(start_date=start_date, seed=seed) - resourcefilepath = Path(os.path.dirname(__file__)) / '../resources' - sim.register(demography.Demography(resourcefilepath=resourcefilepath), - contraception.Contraception(resourcefilepath=resourcefilepath), - enhanced_lifestyle.Lifestyle(resourcefilepath=resourcefilepath), - healthburden.HealthBurden(resourcefilepath=resourcefilepath), - symptommanager.SymptomManager(resourcefilepath=resourcefilepath), - healthsystem.HealthSystem(resourcefilepath=resourcefilepath, service_availability=['*']), - rti.RTI(resourcefilepath=resourcefilepath), - healthseekingbehaviour.HealthSeekingBehaviour(resourcefilepath=resourcefilepath), - # simplified_births.SimplifiedBirths(resourcefilepath=resourcefilepath), - epi.Epi(resourcefilepath=resourcefilepath), - hiv.Hiv(resourcefilepath=resourcefilepath), - tb.Tb(resourcefilepath=resourcefilepath), - cardio_metabolic_disorders.CardioMetabolicDisorders(resourcefilepath=resourcefilepath), - depression.Depression(resourcefilepath=resourcefilepath), - newborn_outcomes.NewbornOutcomes(resourcefilepath=resourcefilepath), - pregnancy_supervisor.PregnancySupervisor(resourcefilepath=resourcefilepath), - care_of_women_during_pregnancy.CareOfWomenDuringPregnancy(resourcefilepath=resourcefilepath), - labour.Labour(resourcefilepath=resourcefilepath), - postnatal_supervisor.PostnatalSupervisor(resourcefilepath=resourcefilepath), - ) - - sim.make_initial_population(n=population_size) - return sim - From ae7a44cb5f72063c48555e3b21d5d6dd4400ee97 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Wed, 23 Oct 2024 15:29:03 +0200 Subject: [PATCH 18/21] Change dict of properties to string in logging, and add analysis files --- .../analysis_extract_data.py | 370 ++++++++++++++++++ .../postprocess_events_chain.py | 156 ++++++++ .../scenario_generate_chains.py | 115 ++++++ src/tlo/events.py | 23 +- src/tlo/methods/hsi_event.py | 13 +- src/tlo/simulation.py | 29 +- 6 files changed, 684 insertions(+), 22 deletions(-) create mode 100644 src/scripts/analysis_data_generation/analysis_extract_data.py create mode 100644 src/scripts/analysis_data_generation/postprocess_events_chain.py create mode 100644 src/scripts/analysis_data_generation/scenario_generate_chains.py diff --git a/src/scripts/analysis_data_generation/analysis_extract_data.py b/src/scripts/analysis_data_generation/analysis_extract_data.py new file mode 100644 index 0000000000..2cfba5315b --- /dev/null +++ b/src/scripts/analysis_data_generation/analysis_extract_data.py @@ -0,0 +1,370 @@ +"""Produce plots to show the health impact (deaths, dalys) each the healthcare system (overall health impact) when +running under different MODES and POLICIES (scenario_impact_of_actual_vs_funded.py)""" + +# short tclose -> ideal case +# long tclose -> status quo +import argparse +from pathlib import Path +from typing import Tuple + +import pandas as pd + +from tlo import Date +from tlo.analysis.utils import extract_results +from datetime import datetime + +# Range of years considered +min_year = 2010 +max_year = 2040 + + +def all_columns(_df): + return pd.Series(_df.all()) + +def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = None, ): + """Produce standard set of plots describing the effect of each TREATMENT_ID. + - We estimate the epidemiological impact as the EXTRA deaths that would occur if that treatment did not occur. + - We estimate the draw on healthcare system resources as the FEWER appointments when that treatment does not occur. + """ + pd.set_option('display.max_rows', None) + pd.set_option('display.max_colwidth', None) + event_chains = extract_results( + results_folder, + module='tlo.simulation', + key='event_chains', + column='0', + #column = str(i), + #custom_generate_series=get_num_dalys_by_year, + do_scaling=False + ) + # print(event_chains.loc[0,(0, 0)]) + + eval_env = { + 'datetime': datetime, # Add the datetime class to the eval environment + 'pd': pd, # Add pandas to handle Timestamp + 'Timestamp': pd.Timestamp, # Specifically add Timestamp for eval + 'NaT': pd.NaT, + 'nan': float('nan'), # Include NaN for eval (can also use pd.NA if preferred) + } + + for item,row in event_chains.iterrows(): + value = event_chains.loc[item,(0, 0)] + if value !='': + print('') + print(value) + exit(-1) + #dict = {} + #for i in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]: + # dict[i] = [] + + #for i in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]: + # event_chains = extract_results( + # results_folder, + # module='tlo.simulation'#, + # key='event_chains', + # column = str(i), + # #custom_generate_series=get_num_dalys_by_year, + # do_scaling=False + # ) + # print(event_chains) + # print(event_chains.index) + # print(event_chains.columns.levels) + + # for index, row in event_chains.iterrows(): + # if event_chains.iloc[index,0] is not None: + # if(event_chains.iloc[index,0]['person_ID']==i): #and 'event' in event_chains.iloc[index,0].keys()): + # dict[i].append(event_chains.iloc[index,0]) + #elif (event_chains.iloc[index,0]['person_ID']==i and 'event' not in event_chains.iloc[index,0].keys()): + #print(event_chains.iloc[index,0]['de_depr']) + # exit(-1) + #for item in dict[0]: + # print(item) + + #exit(-1) + + TARGET_PERIOD = (Date(min_year, 1, 1), Date(max_year, 1, 1)) + + # Definitions of general helper functions + lambda stub: output_folder / f"{stub.replace('*', '_star_')}.png" # noqa: E731 + + def target_period() -> str: + """Returns the target period as a string of the form YYYY-YYYY""" + return "-".join(str(t.year) for t in TARGET_PERIOD) + + def get_parameter_names_from_scenario_file() -> Tuple[str]: + """Get the tuple of names of the scenarios from `Scenario` class used to create the results.""" + from scripts.healthsystem.impact_of_actual_vs_funded.scenario_impact_of_actual_vs_funded import ( + ImpactOfHealthSystemMode, + ) + e = ImpactOfHealthSystemMode() + return tuple(e._scenarios.keys()) + + def get_num_deaths(_df): + """Return total number of Deaths (total within the TARGET_PERIOD) + """ + return pd.Series(data=len(_df.loc[pd.to_datetime(_df.date).between(*TARGET_PERIOD)])) + + def get_num_dalys(_df): + """Return total number of DALYs (Stacked) by label (total within the TARGET_PERIOD)""" + return pd.Series( + data=_df + .loc[_df.year.between(*[i.year for i in TARGET_PERIOD])] + .drop(columns=['date', 'sex', 'age_range', 'year']) + .sum().sum() + ) + + def get_num_dalys_by_cause(_df): + """Return number of DALYs by cause by label (total within the TARGET_PERIOD)""" + return pd.Series( + data=_df + .loc[_df.year.between(*[i.year for i in TARGET_PERIOD])] + .drop(columns=['date', 'sex', 'age_range', 'year']) + .sum() + ) + + def set_param_names_as_column_index_level_0(_df): + """Set the columns index (level 0) as the param_names.""" + ordered_param_names_no_prefix = {i: x for i, x in enumerate(param_names)} + names_of_cols_level0 = [ordered_param_names_no_prefix.get(col) for col in _df.columns.levels[0]] + assert len(names_of_cols_level0) == len(_df.columns.levels[0]) + _df.columns = _df.columns.set_levels(names_of_cols_level0, level=0) + return _df + + def find_difference_relative_to_comparison(_ser: pd.Series, + comparison: str, + scaled: bool = False, + drop_comparison: bool = True, + ): + """Find the difference in the values in a pd.Series with a multi-index, between the draws (level 0) + within the runs (level 1), relative to where draw = `comparison`. + The comparison is `X - COMPARISON`.""" + return _ser \ + .unstack(level=0) \ + .apply(lambda x: (x - x[comparison]) / (x[comparison] if scaled else 1.0), axis=1) \ + .drop(columns=([comparison] if drop_comparison else [])) \ + .stack() + + + def get_counts_of_hsi_by_treatment_id(_df): + """Get the counts of the short TREATMENT_IDs occurring""" + _counts_by_treatment_id = _df \ + .loc[pd.to_datetime(_df['date']).between(*TARGET_PERIOD), 'TREATMENT_ID'] \ + .apply(pd.Series) \ + .sum() \ + .astype(int) + return _counts_by_treatment_id.groupby(level=0).sum() + + year_target = 2023 + def get_counts_of_hsi_by_treatment_id_by_year(_df): + """Get the counts of the short TREATMENT_IDs occurring""" + _counts_by_treatment_id = _df \ + .loc[pd.to_datetime(_df['date']).dt.year ==year_target, 'TREATMENT_ID'] \ + .apply(pd.Series) \ + .sum() \ + .astype(int) + return _counts_by_treatment_id.groupby(level=0).sum() + + def get_counts_of_hsi_by_short_treatment_id(_df): + """Get the counts of the short TREATMENT_IDs occurring (shortened, up to first underscore)""" + _counts_by_treatment_id = get_counts_of_hsi_by_treatment_id(_df) + _short_treatment_id = _counts_by_treatment_id.index.map(lambda x: x.split('_')[0] + "*") + return _counts_by_treatment_id.groupby(by=_short_treatment_id).sum() + + def get_counts_of_hsi_by_short_treatment_id_by_year(_df): + """Get the counts of the short TREATMENT_IDs occurring (shortened, up to first underscore)""" + _counts_by_treatment_id = get_counts_of_hsi_by_treatment_id_by_year(_df) + _short_treatment_id = _counts_by_treatment_id.index.map(lambda x: x.split('_')[0] + "*") + return _counts_by_treatment_id.groupby(by=_short_treatment_id).sum() + + + # Obtain parameter names for this scenario file + param_names = get_parameter_names_from_scenario_file() + print(param_names) + + # ================================================================================================ + # TIME EVOLUTION OF TOTAL DALYs + # Plot DALYs averted compared to the ``No Policy'' policy + + year_target = 2023 # This global variable will be passed to custom function + def get_num_dalys_by_year(_df): + """Return total number of DALYs (Stacked) by label (total within the TARGET_PERIOD)""" + return pd.Series( + data=_df + .loc[_df.year == year_target] + .drop(columns=['date', 'sex', 'age_range', 'year']) + .sum().sum() + ) + + ALL = {} + # Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred + # are consistent across different policies + this_min_year = 2010 + for year in range(this_min_year, max_year+1): + year_target = year + num_dalys_by_year = extract_results( + results_folder, + module='tlo.methods.healthburden', + key='dalys_stacked', + custom_generate_series=get_num_dalys_by_year, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + ALL[year_target] = num_dalys_by_year + # Concatenate the DataFrames into a single DataFrame + concatenated_df = pd.concat(ALL.values(), keys=ALL.keys()) + concatenated_df.index = concatenated_df.index.set_names(['date', 'index_original']) + concatenated_df = concatenated_df.reset_index(level='index_original',drop=True) + dalys_by_year = concatenated_df + print(dalys_by_year) + dalys_by_year.to_csv('ConvertedOutputs/Total_DALYs_with_time.csv', index=True) + + # ================================================================================================ + # Print population under each scenario + pop_model = extract_results(results_folder, + module="tlo.methods.demography", + key="population", + column="total", + index="date", + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + + pop_model.index = pop_model.index.year + pop_model = pop_model[(pop_model.index >= this_min_year) & (pop_model.index <= max_year)] + print(pop_model) + assert dalys_by_year.index.equals(pop_model.index) + assert all(dalys_by_year.columns == pop_model.columns) + pop_model.to_csv('ConvertedOutputs/Population_with_time.csv', index=True) + + # ================================================================================================ + # DALYs BROKEN DOWN BY CAUSES AND YEAR + # DALYs by cause per year + # %% Quantify the health losses associated with all interventions combined. + + year_target = 2023 # This global variable will be passed to custom function + def get_num_dalys_by_year_and_cause(_df): + """Return total number of DALYs (Stacked) by label (total within the TARGET_PERIOD)""" + return pd.Series( + data=_df + .loc[_df.year == year_target] + .drop(columns=['date', 'sex', 'age_range', 'year']) + .sum() + ) + + ALL = {} + # Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred + # are consistent across different policies + this_min_year = 2010 + for year in range(this_min_year, max_year+1): + year_target = year + num_dalys_by_year = extract_results( + results_folder, + module='tlo.methods.healthburden', + key='dalys_stacked', + custom_generate_series=get_num_dalys_by_year_and_cause, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + ALL[year_target] = num_dalys_by_year #summarize(num_dalys_by_year) + + # Concatenate the DataFrames into a single DataFrame + concatenated_df = pd.concat(ALL.values(), keys=ALL.keys()) + + concatenated_df.index = concatenated_df.index.set_names(['date', 'cause']) + + df_total = concatenated_df + df_total.to_csv('ConvertedOutputs/DALYS_by_cause_with_time.csv', index=True) + + ALL = {} + # Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred + # are consistent across different policies + for year in range(min_year, max_year+1): + year_target = year + + hsi_delivered_by_year = extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='HSI_Event', + custom_generate_series=get_counts_of_hsi_by_short_treatment_id_by_year, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + ALL[year_target] = hsi_delivered_by_year + + # Concatenate the DataFrames into a single DataFrame + concatenated_df = pd.concat(ALL.values(), keys=ALL.keys()) + concatenated_df.index = concatenated_df.index.set_names(['date', 'cause']) + HSI_ran_by_year = concatenated_df + + del ALL + + ALL = {} + # Plot time trend show year prior transition as well to emphasise that until that point DALYs incurred + # are consistent across different policies + for year in range(min_year, max_year+1): + year_target = year + + hsi_not_delivered_by_year = extract_results( + results_folder, + module='tlo.methods.healthsystem.summary', + key='Never_ran_HSI_Event', + custom_generate_series=get_counts_of_hsi_by_short_treatment_id_by_year, + do_scaling=True + ).pipe(set_param_names_as_column_index_level_0) + ALL[year_target] = hsi_not_delivered_by_year + + # Concatenate the DataFrames into a single DataFrame + concatenated_df = pd.concat(ALL.values(), keys=ALL.keys()) + concatenated_df.index = concatenated_df.index.set_names(['date', 'cause']) + HSI_never_ran_by_year = concatenated_df + + HSI_never_ran_by_year = HSI_never_ran_by_year.fillna(0) #clean_df( + HSI_ran_by_year = HSI_ran_by_year.fillna(0) + HSI_total_by_year = HSI_ran_by_year.add(HSI_never_ran_by_year, fill_value=0) + HSI_ran_by_year.to_csv('ConvertedOutputs/HSIs_ran_by_area_with_time.csv', index=True) + HSI_never_ran_by_year.to_csv('ConvertedOutputs/HSIs_never_ran_by_area_with_time.csv', index=True) + print(HSI_ran_by_year) + print(HSI_never_ran_by_year) + print(HSI_total_by_year) + +if __name__ == "__main__": + rfp = Path('resources') + + parser = argparse.ArgumentParser( + description="Produce plots to show the impact each set of treatments", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument( + "--output-path", + help=( + "Directory to write outputs to. If not specified (set to None) outputs " + "will be written to value of --results-path argument." + ), + type=Path, + default=None, + required=False, + ) + parser.add_argument( + "--resources-path", + help="Directory containing resource files", + type=Path, + default=Path('resources'), + required=False, + ) + parser.add_argument( + "--results-path", + type=Path, + help=( + "Directory containing results from running " + "src/scripts/analysis_data_generation/scenario_generate_chains.py " + ), + default=None, + required=False + ) + args = parser.parse_args() + assert args.results_path is not None + results_path = args.results_path + + output_path = results_path if args.output_path is None else args.output_path + + apply( + results_folder=results_path, + output_folder=output_path, + resourcefilepath=args.resources_path + ) diff --git a/src/scripts/analysis_data_generation/postprocess_events_chain.py b/src/scripts/analysis_data_generation/postprocess_events_chain.py new file mode 100644 index 0000000000..96c27a04b1 --- /dev/null +++ b/src/scripts/analysis_data_generation/postprocess_events_chain.py @@ -0,0 +1,156 @@ +import pandas as pd +from dateutil.relativedelta import relativedelta + +# Remove from every individual's event chain all events that were fired after death +def cut_off_events_after_death(df): + + events_chain = df.groupby('person_ID') + + filtered_data = pd.DataFrame() + + for name, group in events_chain: + + # Find the first non-NaN 'date_of_death' and its index + first_non_nan_index = group['date_of_death'].first_valid_index() + + if first_non_nan_index is not None: + # Filter out all rows after the first non-NaN index + filtered_group = group.loc[:first_non_nan_index] # Keep rows up to and including the first valid index + filtered_data = pd.concat([filtered_data, filtered_group]) + else: + # If there are no non-NaN values, keep the original group + filtered_data = pd.concat([filtered_data, group]) + + return filtered_data + +# Load into DataFrame +def load_csv_to_dataframe(file_path): + try: + # Load raw chains into df + df = pd.read_csv(file_path) + print("Raw event chains loaded successfully!") + return df + except FileNotFoundError: + print(f"Error: The file '{file_path}' was not found.") + except Exception as e: + print(f"An error occurred: {e}") + +file_path = 'output.csv' # Replace with the path to your CSV file + +output = load_csv_to_dataframe(file_path) + +# Some of the dates appeared not to be in datetime format. Correct here. +output['date_of_death'] = pd.to_datetime(output['date_of_death'], errors='coerce') +output['date_of_birth'] = pd.to_datetime(output['date_of_birth'], errors='coerce') +if 'hv_date_inf' in output.columns: + output['hv_date_inf'] = pd.to_datetime(output['hv_date_inf'], errors='coerce') + + +date_start = pd.to_datetime('2010-01-01') +if 'Other' in output['cause_of_death'].values: + print("ERROR: 'Other' was included in sim as possible cause of death") + exit(-1) + +# Choose which columns in individual properties to visualise +columns_to_print =['event','is_alive','hv_inf', 'hv_art','tb_inf', 'tb_date_active', 'event_date', 'when'] +#columns_to_print =['person_ID', 'date_of_birth', 'date_of_death', 'cause_of_death','hv_date_inf', 'hv_art','tb_inf', 'tb_date_active', 'event date', 'event'] + +# When checking which individuals led to *any* changes in individual properties, exclude these columns from comparison +columns_to_exclude_in_comparison = ['when', 'event', 'event_date', 'age_exact_years', 'age_years', 'age_days', 'age_range', 'level', 'appt_footprint'] + +# If considering epidemiology consistent with sim, add check here. +check_ages_of_those_HIV_inf = False +if check_ages_of_those_HIV_inf: + for index, row in output.iterrows(): + if pd.isna(row['hv_date_inf']): + continue # Skip this iteration + diff = relativedelta(output.loc[index, 'hv_date_inf'],output.loc[index, 'date_of_birth']) + if diff.years > 1 and diff.years<15: + print("Person contracted HIV infection at age younger than 15", diff) + +# Remove events after death +filtered_data = cut_off_events_after_death(output) + +print_raw_events = True # Print raw chain of events for each individual +print_selected_changes = False +print_all_changes = True +person_ID_of_interest = 494 + +pd.set_option('display.max_rows', None) + +for name, group in filtered_data.groupby('person_ID'): + list_of_dob = group['date_of_birth'] + + # Select individuals based on when they were born + if list_of_dob.iloc[0].year<2010: + + # Check that immutable properties are fixed for this individual, i.e. that events were collated properly: + all_identical_dob = group['date_of_birth'].nunique() == 1 + all_identical_sex = group['sex'].nunique() == 1 + if all_identical_dob is False or all_identical_sex is False: + print("Immutable properties are changing! This is not chain for single individual") + print(group) + exit(-1) + + print("----------------------------------------------------------------------") + print("person_ID ", group['person_ID'].iloc[0], "d.o.b ", group['date_of_birth'].iloc[0]) + print("Number of events for this individual ", group['person_ID'].iloc[0], "is :", len(group)/2) # Divide by 2 before printing Before/After for each event + number_of_events =len(group)/2 + number_of_changes=0 + if print_raw_events: + print(group) + + if print_all_changes: + # Check each row + comparison = group.drop(columns=columns_to_exclude_in_comparison).fillna(-99999).ne(group.drop(columns=columns_to_exclude_in_comparison).shift().fillna(-99999)) + + # Iterate over rows where any column has changed + for idx, row_changed in comparison.iloc[1:].iterrows(): + if row_changed.any(): # Check if any column changed in this row + number_of_changes+=1 + changed_columns = row_changed[row_changed].index.tolist() # Get the columns where changes occurred + print(f"Row {idx} - Changes detected in columns: {changed_columns}") + columns_output = ['event', 'event_date', 'appt_footprint', 'level'] + changed_columns + print(group.loc[idx, columns_output]) # Print only the changed columns + if group.loc[idx, 'when'] == 'Before': + print('-----> THIS CHANGE OCCURRED BEFORE EVENT!') + #print(group.loc[idx,columns_to_print]) + print() # For better readability + print("Number of changes is ", number_of_changes, "out of ", number_of_events, " events") + + if print_selected_changes: + tb_inf_condition = ( + ((group['tb_inf'].shift(1) == 'uninfected') & (group['tb_inf'] == 'active')) | + ((group['tb_inf'].shift(1) == 'latent') & (group['tb_inf'] == 'active')) | + ((group['tb_inf'].shift(1) == 'active') & (group['tb_inf'] == 'latent')) | + ((group['hv_inf'].shift(1) is False) & (group['hv_inf'] is True)) | + ((group['hv_art'].shift(1) == 'not') & (group['hv_art'] == 'on_not_VL_suppressed')) | + ((group['hv_art'].shift(1) == 'not') & (group['hv_art'] == 'on_VL_suppressed')) | + ((group['hv_art'].shift(1) == 'on_VL_suppressed') & (group['hv_art'] == 'on_not_VL_suppressed')) | + ((group['hv_art'].shift(1) == 'on_VL_suppressed') & (group['hv_art'] == 'not')) | + ((group['hv_art'].shift(1) == 'on_not_VL_suppressed') & (group['hv_art'] == 'on_VL_suppressed')) | + ((group['hv_art'].shift(1) == 'on_not_VL_suppressed') & (group['hv_art'] == 'not')) + ) + + alive_condition = ( + (group['is_alive'].shift(1) is True) & (group['is_alive'] is False) + ) + # Combine conditions for rows of interest + transition_condition = tb_inf_condition | alive_condition + + if list_of_dob.iloc[0].year >= 2010: + print("DETECTED OF INTEREST") + print(group[group['event'] == 'Birth'][columns_to_print]) + + # Filter the DataFrame based on the condition + filtered_transitions = group[transition_condition] + if not filtered_transitions.empty: + if list_of_dob.iloc[0].year < 2010: + print("DETECTED OF INTEREST") + print(filtered_transitions[columns_to_print]) + + +print("Number of individuals simulated ", filtered_data.groupby('person_ID').ngroups) + + + diff --git a/src/scripts/analysis_data_generation/scenario_generate_chains.py b/src/scripts/analysis_data_generation/scenario_generate_chains.py new file mode 100644 index 0000000000..6bdcd02d90 --- /dev/null +++ b/src/scripts/analysis_data_generation/scenario_generate_chains.py @@ -0,0 +1,115 @@ +"""This Scenario file run the model to generate event chans + +Run on the batch system using: +``` +tlo batch-submit + src/scripts/analysis_data_generation/scenario_generate_chains.py +``` + +or locally using: +``` + tlo scenario-run src/scripts/analysis_data_generation/scenario_generate_chains.py +``` + +""" +from pathlib import Path +from typing import Dict + +import pandas as pd + +from tlo import Date, logging +from tlo.analysis.utils import get_parameters_for_status_quo, mix_scenarios +from tlo.methods.fullmodel import fullmodel +from tlo.methods.scenario_switcher import ImprovedHealthSystemAndCareSeekingScenarioSwitcher +from tlo.scenario import BaseScenario + + +class GenerateDataChains(BaseScenario): + def __init__(self): + super().__init__() + self.seed = 0 + self.start_date = Date(2010, 1, 1) + self.end_date = self.start_date + pd.DateOffset(months=1) + self.pop_size = 120 + self._scenarios = self._get_scenarios() + self.number_of_draws = len(self._scenarios) + self.runs_per_draw = 1 + self.generate_event_chains = True + + def log_configuration(self): + return { + 'filename': 'generate_event_chains', + 'directory': Path('./outputs'), # <- (specified only for local running) + 'custom_levels': { + '*': logging.WARNING, + 'tlo.methods.demography': logging.INFO, + 'tlo.methods.events': logging.INFO, + 'tlo.methods.demography.detail': logging.WARNING, + 'tlo.methods.healthburden': logging.INFO, + 'tlo.methods.healthsystem.summary': logging.INFO, + } + } + + def modules(self): + return ( + fullmodel(resourcefilepath=self.resources) + + [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)] + ) + + def draw_parameters(self, draw_number, rng): + if draw_number < self.number_of_draws: + return list(self._scenarios.values())[draw_number] + else: + return + + # case 1: gfHE = -0.030, factor = 1.01074 + # case 2: gfHE = -0.020, factor = 1.02116 + # case 3: gfHE = -0.015, factor = 1.02637 + # case 4: gfHE = 0.015, factor = 1.05763 + # case 5: gfHE = 0.020, factor = 1.06284 + # case 6: gfHE = 0.030, factor = 1.07326 + + def _get_scenarios(self) -> Dict[str, Dict]: + """Return the Dict with values for the parameters that are changed, keyed by a name for the scenario. + """ + + self.YEAR_OF_CHANGE = 2019 + + return { + + # =========== STATUS QUO ============ + "Baseline": + mix_scenarios( + self._baseline(), + { + "HealthSystem": { + "yearly_HR_scaling_mode": "no_scaling", + }, + } + ), + + } + + def _baseline(self) -> Dict: + """Return the Dict with values for the parameter changes that define the baseline scenario. """ + return mix_scenarios( + get_parameters_for_status_quo(), + { + "HealthSystem": { + "mode_appt_constraints": 1, # <-- Mode 1 prior to change to preserve calibration + "mode_appt_constraints_postSwitch": 2, # <-- Mode 2 post-change to show effects of HRH + "year_mode_switch": self.YEAR_OF_CHANGE, + "scale_to_effective_capabilities": True, + "policy_name": "Naive", + "tclose_overwrite": 1, + "tclose_days_offset_overwrite": 7, + "use_funded_or_actual_staffing": "actual", + "cons_availability": "default", + } + }, + ) + +if __name__ == '__main__': + from tlo.cli import scenario_run + + scenario_run([__file__]) diff --git a/src/tlo/events.py b/src/tlo/events.py index 98832faecb..00a6fe4e7d 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -11,6 +11,8 @@ import pandas as pd +FACTOR_POP_DICT = 5000 + logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) @@ -83,13 +85,14 @@ def compare_population_dataframe(self,df_before, df_after): # Create an empty list to store changes for each of the individuals chain_links = {} - + len_of_diff = len(diff_mask) + # Loop through each row of the mask + for idx, row in diff_mask.iterrows(): changed_cols = row.index[row].tolist() - + if changed_cols: # Proceed only if there are changes in the row - # Create a dictionary for this person # First add event info link_info = { @@ -103,7 +106,7 @@ def compare_population_dataframe(self,df_before, df_after): link_info[col] = df_after.at[idx, col] # Append the event and changes to the individual key - chain_links = {idx : link_info} + chain_links[idx] = str(link_info) return chain_links @@ -168,7 +171,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) -> if row_before[key] != row_after[key]: # Note: used fillna previously link_info[key] = row_after[key] - chain_links = {self.target : link_info} + chain_links[self.target] = str(link_info) # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. if debug_chains: @@ -228,14 +231,18 @@ def run(self): if self.sim.generate_event_chains: chain_links = self.store_chains_to_do_after_event(print_chains, row_before, df_before) + # Create empty logger for entire pop + pop_dict = {i: '' for i in range(FACTOR_POP_DICT)} # Always include all possible individuals + + pop_dict.update(chain_links) + # Log chain_links here if len(chain_links)>0: logger_chain.info(key='event_chains', - data= chain_links, - description='Links forming chains of events for simulated individuals') + data= pop_dict, + description='Links forming chains of events for simulated individuals') #print("Chain events ", chain_links) - class RegularEvent(Event): diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index 041ab9cf08..d657e9d3a0 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -11,6 +11,8 @@ import pandas as pd +FACTOR_POP_DICT = 5000 + if TYPE_CHECKING: from tlo import Module, Simulation @@ -276,7 +278,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, footprint) -> if row_before[key] != row_after[key]: # Note: used fillna previously link_info[key] = row_after[key] - chain_links = {self.target : link_info} + chain_links = {self.target : str(link_info)} # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. row = self.sim.population.props.loc[[abs(self.target)]] @@ -314,10 +316,15 @@ def run(self, squeeze_factor): chain_links = self.store_chains_to_do_after_event(print_chains, row_before, str(footprint)) if len(chain_links)>0: + + pop_dict = {i: '' for i in range(FACTOR_POP_DICT)} + # pop_dict = {i: '' for i in range(1000)} # Always include all possible individuals + + pop_dict.update(chain_links) + logger_chains.info(key='event_chains', - data = chain_links, + data = pop_dict, description='Links forming chains of events for simulated individuals') - #print(chain_links) return updated_appt_footprint diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index 0c70b164d9..d9ba62c43a 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -40,6 +40,8 @@ logger_chains = logging.getLogger("tlo.methods.event") logger_chains.setLevel(logging.INFO) +FACTOR_POP_DICT = 5000 + class SimulationPreviouslyInitialisedError(Exception): """Exception raised when trying to initialise an already initialised simulation.""" @@ -294,17 +296,18 @@ def make_initial_population(self, *, n: int) -> None: if self.generate_event_chains: pop_dict = self.population.props.to_dict(orient='index') - - print(pop_dict) - print(pop_dict.keys()) for key in pop_dict.keys(): pop_dict[key]['person_ID'] = key - print("Length of properties", len(pop_dict[0].keys())) - #exit(-1) + pop_dict[key] = str(pop_dict[key]) # Log as string to avoid issues around length of properties stored later + + pop_dict_full = {i: '' for i in range(FACTOR_POP_DICT)} + pop_dict_full.update(pop_dict) + + print("Size for full sim", len(pop_dict_full)) + logger.info(key='event_chains', - data = pop_dict, + data = pop_dict_full, description='Links forming chains of events for simulated individuals') - end = time.time() logger.info(key="info", data=f"make_initial_population() {end - start} s") @@ -323,7 +326,7 @@ def initialise(self, *, end_date: Date) -> None: #self.generate_event_chains = generate_event_chains if self.generate_event_chains: # Eventually this can be made an option - self.generate_event_chains_overwrite_epi = False + self.generate_event_chains_overwrite_epi = True # For now keep these fixed, eventually they will be input from user self.generate_event_chains_modules_of_interest = [self.modules] self.generate_event_chains_ignore_events = ['AgeUpdateEvent','HealthSystemScheduler', 'SimplifiedBirthsPoll','DirectBirth'] #['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler'] @@ -480,9 +483,13 @@ def do_birth(self, mother_id: int) -> int: prop_dict = self.population.props.loc[child_id].to_dict() prop_dict['event'] = 'Birth' prop_dict['event_date'] = self.date - child_dict = {child_id : prop_dict} + + pop_dict = {i: '' for i in range(FACTOR_POP_DICT)} # Always include all possible individuals + pop_dict[child_id] = str(prop_dict) # Convert to string to avoid issue of length + + print("Length at birth", len(pop_dict)) logger.info(key='event_chains', - data = child_dict, + data = pop_dict, description='Links forming chains of events for simulated individuals') # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. @@ -492,7 +499,7 @@ def do_birth(self, mother_id: int) -> int: row['event_date'] = self.date row['when'] = 'After' self.event_chains = pd.concat([self.event_chains, row], ignore_index=True) - + return child_id def find_events_for_person(self, person_id: int) -> list[tuple[Date, Event]]: From 16299a21f43862a188f41ea6117b81c2c11d72ab Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Mon, 25 Nov 2024 09:37:29 +0000 Subject: [PATCH 19/21] Include debugging option, final set-up of scenario to print data, analysis file now collects all relevant info and prints them --- .../analysis_extract_data.py | 157 ++++++++++++++++-- .../scenario_generate_chains.py | 53 +++++- src/tlo/events.py | 10 +- src/tlo/methods/hsi_event.py | 50 +++--- src/tlo/methods/rti.py | 17 +- src/tlo/simulation.py | 39 +++-- src/tlo/util.py | 1 + 7 files changed, 252 insertions(+), 75 deletions(-) diff --git a/src/scripts/analysis_data_generation/analysis_extract_data.py b/src/scripts/analysis_data_generation/analysis_extract_data.py index 2cfba5315b..6eb6408830 100644 --- a/src/scripts/analysis_data_generation/analysis_extract_data.py +++ b/src/scripts/analysis_data_generation/analysis_extract_data.py @@ -8,10 +8,14 @@ from typing import Tuple import pandas as pd +import matplotlib.pyplot as plt from tlo import Date from tlo.analysis.utils import extract_results from datetime import datetime +from collections import Counter +import ast + # Range of years considered min_year = 2010 @@ -28,17 +32,7 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No """ pd.set_option('display.max_rows', None) pd.set_option('display.max_colwidth', None) - event_chains = extract_results( - results_folder, - module='tlo.simulation', - key='event_chains', - column='0', - #column = str(i), - #custom_generate_series=get_num_dalys_by_year, - do_scaling=False - ) - # print(event_chains.loc[0,(0, 0)]) - + eval_env = { 'datetime': datetime, # Add the datetime class to the eval environment 'pd': pd, # Add pandas to handle Timestamp @@ -46,13 +40,144 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No 'NaT': pd.NaT, 'nan': float('nan'), # Include NaN for eval (can also use pd.NA if preferred) } + + initial_properties_of_interest = ['rt_inj_severity','rt_MAIS_military_score','rt_ISS_score','rt_disability','rt_polytrauma','rt_injury_1','rt_injury_2','rt_injury_3','rt_injury_4','rt_injury_5','rt_injury_6', 'rt_imm_death','sy_injury','sex','li_urban', 'li_wealth', 'li_ex_alc', 'li_exposed_to_campaign_alcohol_reduction', 'li_mar_stat', 'li_in_ed', 'li_ed_lev'] + + # Will be added through computation: age at time of RTI + + # Will be added through computation: total duration of event + + initial_rt_event_properties = set() + + num_individuals = 1000 + num_runs = 50 + record = [] + + + for i in range(0,num_individuals): - for item,row in event_chains.iterrows(): - value = event_chains.loc[item,(0, 0)] - if value !='': - print('') - print(value) + individual_event_chains = extract_results( + results_folder, + module='tlo.simulation', + key='event_chains', + column=str(i), + do_scaling=False + ) + + #print(individual_event_chains) + + + for r in range(0,num_runs): + + print("AT RUN = ", r) + + initial_properties = {} + progression_properties = {} + key_first_event = {} + key_last_event = {} + first_event = {} + last_event = {} + properties = {} + + + #ind_Counter = Counter() + ind_Counter = {'0': Counter(), '1a': Counter(), '1b' : Counter(), '2' : Counter()} + # Count total appts + + list_for_individual = [] + for item,row in individual_event_chains.iterrows(): + value = individual_event_chains.loc[item,(0, r)] + # print("The value is", value, "at run ", r) + if value !='' and isinstance(value, str): + evaluated = eval(value, eval_env) + list_for_individual.append(evaluated) + # elif not isinstance(value,str): + # print(value) + + initial_properties = list_for_individual[0] + print(initial_properties) + + # Initialise first event by gathering parameters of interest from initial_properties + first_event = {key: initial_properties[key] for key in initial_properties_of_interest if key in initial_properties} + + progression_properties = {} + for i in list_for_individual: + if 'event' in i: + print("") + print(i) + if 'RTIPolling' in i['event']: + #print("I'm in polling event") + #print(i) + + # Keep track of which properties are changed during polling events + for key,value in i.items(): + if 'rt_' in key: + initial_rt_event_properties.add(key) + + # Retain a copy of Polling event + polling_event = i.copy() + + # Update parameters of interest following RTI + key_first_event = {key: i[key] if key in i else value for key, value in first_event.items()} + + # Calculate age of individual at time of event + key_first_event['age_in_days_at_event'] = (i['rt_date_inj'] - initial_properties['date_of_birth']).days + + # Keep track of evolution in individual's properties + progression_properties = initial_properties.copy() + progression_properties.update(i) + + else: + # Progress properties of individual, even if this event is a death + progression_properties.update(i) + + #print(progression_properties) + # Update footprint + if 'appt_footprint' in i and i['appt_footprint'] != 'Counter()': + footprint = i['appt_footprint'] + if 'Counter' in footprint: + footprint = footprint[len("Counter("):-1] + apply = eval(footprint, eval_env) + ind_Counter[i['level']].update(Counter(apply)) + + if 'is_alive' in i and i['is_alive'] is False: + print("Death", i) + print("-------Total footprint", ind_Counter) + break + + + # Compute final properties of individual + key_last_event['is_alive_after_RTI'] = progression_properties['is_alive'] + key_last_event['duration_days'] = (progression_properties['event_date'] - polling_event['rt_date_inj']).days + key_last_event['rt_disability_final'] = progression_properties['rt_disability'] + key_last_event.update({'total_footprint': ind_Counter}) + + #print("-------Total footprint", ind_Counter) + #for key, value in key_first_event.items(): + # if 'rt_' in key or 'alive' in key: + # print(f"{key}: {value}") + #print(#) + #for key, value in key_last_event.items(): + #if 'rt_' in key or 'alive' in key or 'event_date' in key or 'footprint' in key: + # print(f"{key}: {value}") + + #print(key_first_event) + #print(key_last_event) + print(initial_rt_event_properties) + properties = key_first_event | key_last_event + record.append(properties) + for key, value in properties.items(): + #if 'rt_' in key or 'alive' in key or 'event_date' in key or 'footprint' in key: + print(f"{key}: {value}") + + df = pd.DataFrame(record) + df.to_csv("raw_data.csv", index=False) + + print(df) + print(initial_rt_event_properties) exit(-1) + #print(i) + #dict = {} #for i in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]: # dict[i] = [] diff --git a/src/scripts/analysis_data_generation/scenario_generate_chains.py b/src/scripts/analysis_data_generation/scenario_generate_chains.py index 6bdcd02d90..79df3f55b6 100644 --- a/src/scripts/analysis_data_generation/scenario_generate_chains.py +++ b/src/scripts/analysis_data_generation/scenario_generate_chains.py @@ -22,18 +22,42 @@ from tlo.methods.fullmodel import fullmodel from tlo.methods.scenario_switcher import ImprovedHealthSystemAndCareSeekingScenarioSwitcher from tlo.scenario import BaseScenario - +from tlo.methods import ( + alri, + cardio_metabolic_disorders, + care_of_women_during_pregnancy, + contraception, + demography, + depression, + diarrhoea, + enhanced_lifestyle, + epi, + healthburden, + healthseekingbehaviour, + healthsystem, + hiv, + rti, + labour, + malaria, + newborn_outcomes, + postnatal_supervisor, + pregnancy_supervisor, + stunting, + symptommanager, + tb, + wasting, +) class GenerateDataChains(BaseScenario): def __init__(self): super().__init__() self.seed = 0 self.start_date = Date(2010, 1, 1) - self.end_date = self.start_date + pd.DateOffset(months=1) - self.pop_size = 120 + self.end_date = self.start_date + pd.DateOffset(months=13) + self.pop_size = 1000 self._scenarios = self._get_scenarios() self.number_of_draws = len(self._scenarios) - self.runs_per_draw = 1 + self.runs_per_draw = 50 self.generate_event_chains = True def log_configuration(self): @@ -51,10 +75,23 @@ def log_configuration(self): } def modules(self): - return ( - fullmodel(resourcefilepath=self.resources) - + [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)] - ) + # MODIFY + # Here instead of running full module + return [demography.Demography(resourcefilepath=self.resources), + enhanced_lifestyle.Lifestyle(resourcefilepath=self.resources), + healthburden.HealthBurden(resourcefilepath=self.resources), + symptommanager.SymptomManager(resourcefilepath=self.resources, spurious_symptoms=False), + rti.RTI(resourcefilepath=self.resources), + healthseekingbehaviour.HealthSeekingBehaviour(resourcefilepath=self.resources), + #simplified_births.SimplifiedBirths(resourcefilepath=resourcefilepath), + healthsystem.HealthSystem(resourcefilepath=self.resources, + mode_appt_constraints=1, + cons_availability='all')] + + # return ( + # fullmodel(resourcefilepath=self.resources) + # + [ImprovedHealthSystemAndCareSeekingScenarioSwitcher(resourcefilepath=self.resources)] + # ) def draw_parameters(self, draw_number, rng): if draw_number < self.number_of_draws: diff --git a/src/tlo/events.py b/src/tlo/events.py index 00a6fe4e7d..ba8024f621 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -11,7 +11,7 @@ import pandas as pd -FACTOR_POP_DICT = 5000 +from tlo.util import FACTOR_POP_DICT logger = logging.getLogger(__name__) @@ -132,7 +132,7 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame # Save row for comparison after event has occurred row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999) - if debug_chains: + if self.sim.debug_generate_event_chains: # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. row = self.sim.population.props.loc[[abs(self.target)]] row['person_ID'] = self.target @@ -142,6 +142,7 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) else: + # This will be a population-wide event. In order to find individuals for which this led to # a meaningful change, make a copy of the pop dataframe before the event has occurred. df_before = self.sim.population.props.copy() @@ -174,7 +175,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) -> chain_links[self.target] = str(link_info) # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. - if debug_chains: + if self.sim.debug_generate_event_chains: # Print entire row row = self.sim.population.props.loc[[abs(self.target)]] # Use abs to avoid potentil issue with direct births row['person_ID'] = self.target @@ -194,7 +195,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) -> chain_links = self.compare_population_dataframe(df_before, df_after) # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. - if debug_chains: + if self.sim.debug_generate_event_chains: # Or print entire rows change = df_before.compare(df_after) if not change.empty: @@ -233,7 +234,6 @@ def run(self): # Create empty logger for entire pop pop_dict = {i: '' for i in range(FACTOR_POP_DICT)} # Always include all possible individuals - pop_dict.update(chain_links) # Log chain_links here diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index d657e9d3a0..bdf597fba4 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -8,10 +8,9 @@ from tlo import Date, logging from tlo.events import Event from tlo.population import Population - +from tlo.util import FACTOR_POP_DICT import pandas as pd -FACTOR_POP_DICT = 5000 if TYPE_CHECKING: @@ -219,19 +218,21 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series]: # Save row for comparison after event has occurred row_before = self.sim.population.props.loc[abs(self.target)].copy().fillna(-99999) - # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. - row = self.sim.population.props.loc[[abs(self.target)]] - row['person_ID'] = self.target - row['event'] = str(self) - row['event_date'] = self.sim.date - row['when'] = 'Before' - try: - row['appt_footprint'] = str(self.EXPECTED_APPT_FOOTPRINT) - row['level'] = self.facility_info.level - except: - row['appt_footprint'] = 'N/A' - row['level'] = 'N/A' - self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) + if self.sim.debug_generate_event_chains: + # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. + row = self.sim.population.props.loc[[abs(self.target)]] + row['person_ID'] = self.target + row['event'] = str(self) + row['event_date'] = self.sim.date + row['when'] = 'Before' + + try: + row['appt_footprint'] = str(self.EXPECTED_APPT_FOOTPRINT) + row['level'] = self.facility_info.level + except: + row['appt_footprint'] = 'N/A' + row['level'] = 'N/A' + self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) else: # Once this has been removed from Chronic Syndrome mock module, make this a Runtime Error @@ -280,15 +281,16 @@ def store_chains_to_do_after_event(self, print_chains, row_before, footprint) -> chain_links = {self.target : str(link_info)} - # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. - row = self.sim.population.props.loc[[abs(self.target)]] - row['person_ID'] = self.target - row['event'] = str(self) - row['event_date'] = self.sim.date - row['when'] = 'After' - row['appt_footprint'] = record_footprint - row['level'] = record_level - self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) + if self.sim.debug_generate_event_chains: + # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. + row = self.sim.population.props.loc[[abs(self.target)]] + row['person_ID'] = self.target + row['event'] = str(self) + row['event_date'] = self.sim.date + row['when'] = 'After' + row['appt_footprint'] = record_footprint + row['level'] = record_level + self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) return chain_links diff --git a/src/tlo/methods/rti.py b/src/tlo/methods/rti.py index 3642365976..1ca2749af7 100644 --- a/src/tlo/methods/rti.py +++ b/src/tlo/methods/rti.py @@ -2776,7 +2776,7 @@ class RTIPollingEvent(RegularEvent, PopulationScopeEventMixin): def __init__(self, module): """Schedule to take place every month """ - super().__init__(module, frequency=DateOffset(months=1000)) + super().__init__(module, frequency=DateOffset(months=1000)) # Single polling event p = module.parameters # Parameters which transition the model between states self.base_1m_prob_rti = (p['base_rate_injrti'] / 12) @@ -2864,10 +2864,12 @@ def apply(self, population): .when('.between(70,79)', self.rr_injrti_age7079), Predictor('li_ex_alc').when(True, self.rr_injrti_excessalcohol) ) - if self.sim.generate_event_chains is True and self.sim.generate_event_chains_overwrite_epi is True: - pred = 1.0 - else: - pred = eq.predict(df.loc[rt_current_non_ind]) + #if self.sim.generate_event_chains is True and self.sim.generate_event_chains_overwrite_epi is True: + pred = 1.0 + #else: + # pred = eq.predict(df.loc[rt_current_non_ind]) + + random_draw_in_rti = self.module.rng.random_sample(size=len(rt_current_non_ind)) selected_for_rti = rt_current_non_ind[pred > random_draw_in_rti] @@ -4852,6 +4854,7 @@ def __init__(self, module, person_id): self.treated_code = 'none' def apply(self, person_id, squeeze_factor): + self._number_of_times_this_event_has_run += 1 df = self.sim.population.props rng = self.module.rng @@ -4900,10 +4903,12 @@ def apply(self, person_id, squeeze_factor): # injury is being treated in this surgery # find untreated injury codes that are treated with major surgery relevant_codes = np.intersect1d(injuries_to_be_treated, surgically_treated_codes) + # check that the person sent here has an appropriate code(s) assert len(relevant_codes) > 0 # choose a code at random self.treated_code = rng.choice(relevant_codes) + if request_outcome: # check the people sent here hasn't died due to rti, have had their injuries diagnosed and been through # RTI_Med @@ -4990,7 +4995,9 @@ def apply(self, person_id, squeeze_factor): # ------------------------------------- Perm disability from amputation ------------------------------------ codes = ['782', '782a', '782b', '782c', '783', '882', '883', '884'] + if self.treated_code in codes: + # Track whether they are permanently disabled df.at[person_id, 'rt_perm_disability'] = True # Find the column and code where the permanent injury is stored diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index d9ba62c43a..bb766562a0 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -11,8 +11,9 @@ from typing import Optional from typing import TYPE_CHECKING, Optional import pandas as pd - +import tlo.population import numpy as np +from tlo.util import FACTOR_POP_DICT try: import dill @@ -40,8 +41,6 @@ logger_chains = logging.getLogger("tlo.methods.event") logger_chains.setLevel(logging.INFO) -FACTOR_POP_DICT = 5000 - class SimulationPreviouslyInitialisedError(Exception): """Exception raised when trying to initialise an already initialised simulation.""" @@ -113,12 +112,15 @@ def __init__( self.generate_event_chains_overwrite_epi = None self.generate_event_chains_modules_of_interest = [] self.generate_event_chains_ignore_events = [] + self.debug_generate_event_chains = False self.end_date = None self.output_file = None self.population: Optional[Population] = None - # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. - self.event_chains: Optinoal[Population] = None + + if self.debug_generate_event_chains: + # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. + self.event_chains: Optional[Population] = None self.show_progress_bar = show_progress_bar self.resourcefilepath = resourcefilepath @@ -288,8 +290,9 @@ def make_initial_population(self, *, n: int) -> None: data=f"{module.name}.initialise_population() {time.time() - start1} s", ) - # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. - self.event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when'] + ['appt_footprint'] + ['level']) + if self.debug_generate_event_chains: + # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. + self.event_chains = pd.DataFrame(columns= list(self.population.props.columns)+['person_ID'] + ['event'] + ['event_date'] + ['when'] + ['appt_footprint'] + ['level']) # When logging events for each individual to reconstruct chains, only the changes in individual properties will be logged. # At the start of the simulation + when a new individual is born, we therefore want to store all of their properties at the start. @@ -329,7 +332,7 @@ def initialise(self, *, end_date: Date) -> None: self.generate_event_chains_overwrite_epi = True # For now keep these fixed, eventually they will be input from user self.generate_event_chains_modules_of_interest = [self.modules] - self.generate_event_chains_ignore_events = ['AgeUpdateEvent','HealthSystemScheduler', 'SimplifiedBirthsPoll','DirectBirth'] #['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler'] + self.generate_event_chains_ignore_events = ['AgeUpdateEvent','HealthSystemScheduler', 'SimplifiedBirthsPoll','DirectBirth', 'HealthSeekingBehaviourPoll', 'LifestyleEvent'] #['TbActiveCasePollGenerateData','HivPollingEventForDataGeneration','SimplifiedBirthsPoll', 'AgeUpdateEvent', 'HealthSystemScheduler'] else: # If not using to print chains, cannot ignore epi self.generate_event_chains_overwrite_epi = False @@ -418,8 +421,9 @@ def run_simulation_to(self, *, to_date: Date) -> None: self.fire_single_event(event, date) self.date = to_date - # TO BE REMOVED: this is currently only used for debugging, will be removed from final PR. - self.event_chains.to_csv('output.csv', index=False) + if self.debug_generate_event_chains: + # TO BE REMOVED: this is currently only used for debugging, will be removed from final PR. + self.event_chains.to_csv('output.csv', index=False) if self.show_progress_bar: progress_bar.stop() @@ -492,13 +496,14 @@ def do_birth(self, mother_id: int) -> int: data = pop_dict, description='Links forming chains of events for simulated individuals') - # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. - row = self.population.props.iloc[[child_id]] - row['person_ID'] = child_id - row['event'] = 'Birth' - row['event_date'] = self.date - row['when'] = 'After' - self.event_chains = pd.concat([self.event_chains, row], ignore_index=True) + if self.debug_generate_event_chains: + # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. + row = self.population.props.iloc[[child_id]] + row['person_ID'] = child_id + row['event'] = 'Birth' + row['event_date'] = self.date + row['when'] = 'After' + self.event_chains = pd.concat([self.event_chains, row], ignore_index=True) return child_id diff --git a/src/tlo/util.py b/src/tlo/util.py index 168b1d41a1..f8dc67d471 100644 --- a/src/tlo/util.py +++ b/src/tlo/util.py @@ -12,6 +12,7 @@ # Default mother_id value, assigned to individuals initialised as adults at the start of the simulation. DEFAULT_MOTHER_ID = -1e7 +FACTOR_POP_DICT = 1000 def create_age_range_lookup(min_age: int, max_age: int, range_size: int = 5) -> (list, Dict[int, str]): From 0dd862f2a9b485a33933e185e3c59ad64ed33ed9 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Tue, 26 Nov 2024 15:28:30 +0000 Subject: [PATCH 20/21] Change label of person when iterating --- .../analysis_extract_data.py | 68 ++++++++++++------- 1 file changed, 43 insertions(+), 25 deletions(-) diff --git a/src/scripts/analysis_data_generation/analysis_extract_data.py b/src/scripts/analysis_data_generation/analysis_extract_data.py index 6eb6408830..4c8e7d8197 100644 --- a/src/scripts/analysis_data_generation/analysis_extract_data.py +++ b/src/scripts/analysis_data_generation/analysis_extract_data.py @@ -41,7 +41,7 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No 'nan': float('nan'), # Include NaN for eval (can also use pd.NA if preferred) } - initial_properties_of_interest = ['rt_inj_severity','rt_MAIS_military_score','rt_ISS_score','rt_disability','rt_polytrauma','rt_injury_1','rt_injury_2','rt_injury_3','rt_injury_4','rt_injury_5','rt_injury_6', 'rt_imm_death','sy_injury','sex','li_urban', 'li_wealth', 'li_ex_alc', 'li_exposed_to_campaign_alcohol_reduction', 'li_mar_stat', 'li_in_ed', 'li_ed_lev'] + initial_properties_of_interest = ['rt_MAIS_military_score','rt_ISS_score','rt_disability','rt_polytrauma','rt_injury_1','rt_injury_2','rt_injury_3','rt_injury_4','rt_injury_5','rt_injury_6', 'rt_imm_death','sy_injury','sy_severe_trauma','sex','li_urban', 'li_wealth', 'li_mar_stat', 'li_in_ed', 'li_ed_lev'] # Will be added through computation: age at time of RTI @@ -54,13 +54,15 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No record = [] - for i in range(0,num_individuals): + for p in range(0,num_individuals): + + print("At person = ", p) individual_event_chains = extract_results( results_folder, module='tlo.simulation', key='event_chains', - column=str(i), + column=str(p), do_scaling=False ) @@ -69,7 +71,7 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No for r in range(0,num_runs): - print("AT RUN = ", r) + initial_properties = {} progression_properties = {} @@ -78,7 +80,8 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No first_event = {} last_event = {} properties = {} - + average_disability = 0 + prev_disability_incurred = 0 #ind_Counter = Counter() ind_Counter = {'0': Counter(), '1a': Counter(), '1b' : Counter(), '2' : Counter()} @@ -95,7 +98,7 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No # print(value) initial_properties = list_for_individual[0] - print(initial_properties) + # print(initial_properties) # Initialise first event by gathering parameters of interest from initial_properties first_event = {key: initial_properties[key] for key in initial_properties_of_interest if key in initial_properties} @@ -103,8 +106,8 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No progression_properties = {} for i in list_for_individual: if 'event' in i: - print("") - print(i) + #print("") + #print(i) if 'RTIPolling' in i['event']: #print("I'm in polling event") #print(i) @@ -126,10 +129,26 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No # Keep track of evolution in individual's properties progression_properties = initial_properties.copy() progression_properties.update(i) + + # dalys incurred + if 'rt_disability' in i: + prev_disability_incurred = i['rt_disability'] + prev_date = i['event_date'] + #print('At polling event, ', prev_disability_incurred, prev_date) else: # Progress properties of individual, even if this event is a death progression_properties.update(i) + + # If disability has changed as a result of this, recalculate + if 'rt_disability' in i and i['rt_disability'] != prev_disability_incurred: + dt_in_prev_disability = (i['event_date'] - prev_date).days + average_disability += prev_disability_incurred*dt_in_prev_disability + # Update variables + prev_disability_incurred = i['rt_disability'] + prev_date = i['event_date'] + + #print(progression_properties) # Update footprint @@ -141,34 +160,33 @@ def apply(results_folder: Path, output_folder: Path, resourcefilepath: Path = No ind_Counter[i['level']].update(Counter(apply)) if 'is_alive' in i and i['is_alive'] is False: - print("Death", i) - print("-------Total footprint", ind_Counter) + #print("Death", i) + #print("-------Total footprint", ind_Counter) break # Compute final properties of individual key_last_event['is_alive_after_RTI'] = progression_properties['is_alive'] key_last_event['duration_days'] = (progression_properties['event_date'] - polling_event['rt_date_inj']).days - key_last_event['rt_disability_final'] = progression_properties['rt_disability'] + if not key_first_event['rt_imm_death'] and key_last_event['duration_days']> 0.0: + key_last_event['rt_disability_average'] = average_disability/key_last_event['duration_days'] + else: + key_last_event['rt_disability_average'] = 0.0 + key_last_event['rt_disability_permanent'] = progression_properties['rt_disability'] key_last_event.update({'total_footprint': ind_Counter}) - - #print("-------Total footprint", ind_Counter) - #for key, value in key_first_event.items(): - # if 'rt_' in key or 'alive' in key: - # print(f"{key}: {value}") - #print(#) - #for key, value in key_last_event.items(): - #if 'rt_' in key or 'alive' in key or 'event_date' in key or 'footprint' in key: - # print(f"{key}: {value}") - #print(key_first_event) - #print(key_last_event) - print(initial_rt_event_properties) + #print("Average disability", key_last_event['rt_disability_average']) + properties = key_first_event | key_last_event + + if not key_first_event['rt_imm_death'] and ((properties['rt_disability_average']-properties['rt_disability'])/properties['rt_disability'] > 1e-4): + print("Error in computed average for individual ", p, r ) + record.append(properties) - for key, value in properties.items(): + #for key, value in properties.items(): #if 'rt_' in key or 'alive' in key or 'event_date' in key or 'footprint' in key: - print(f"{key}: {value}") + #print(f"{key}: {value}") + # print("Initial event properties", initial_rt_event_properties) df = pd.DataFrame(record) df.to_csv("raw_data.csv", index=False) From 84f826322ba13f6fa1631d639944c2bac50667f6 Mon Sep 17 00:00:00 2001 From: Margherita Molaro <48129834+marghe-molaro@users.noreply.github.com> Date: Fri, 13 Dec 2024 15:55:03 +0000 Subject: [PATCH 21/21] Correctly retrieve event name --- src/tlo/events.py | 12 ++++++------ src/tlo/methods/hsi_event.py | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/tlo/events.py b/src/tlo/events.py index ba8024f621..f67b54458a 100644 --- a/src/tlo/events.py +++ b/src/tlo/events.py @@ -97,7 +97,7 @@ def compare_population_dataframe(self,df_before, df_after): # First add event info link_info = { 'person_ID': idx, - 'event': str(self), + 'event': type(self).__name__, 'event_date': self.sim.date, } @@ -136,7 +136,7 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series, pd.DataFrame # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. row = self.sim.population.props.loc[[abs(self.target)]] row['person_ID'] = self.target - row['event'] = str(self) + row['event'] = type(self).__name__ row['event_date'] = self.sim.date row['when'] = 'Before' self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) @@ -164,7 +164,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) -> link_info = { #'person_ID' : self.target, 'person_ID' : self.target, - 'event' : str(self), + 'event' : type(self).__name__, 'event_date' : self.sim.date, } # Store (if any) property changes as a result of the event for this individual @@ -179,7 +179,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) -> # Print entire row row = self.sim.population.props.loc[[abs(self.target)]] # Use abs to avoid potentil issue with direct births row['person_ID'] = self.target - row['event'] = str(self) + row['event'] = type(self).__name__ row['event_date'] = self.sim.date row['when'] = 'After' self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True) @@ -202,13 +202,13 @@ def store_chains_to_do_after_event(self, print_chains, row_before, df_before) -> indices = change.index new_rows_before = df_before.loc[indices] new_rows_before['person_ID'] = new_rows_before.index - new_rows_before['event'] = self + new_rows_before['event'] = type(self).__name__ new_rows_before['event_date'] = self.sim.date new_rows_before['when'] = 'Before' new_rows_after = df_after.loc[indices] new_rows_after['person_ID'] = new_rows_after.index - new_rows_after['event'] = self + new_rows_after['event'] = type(self).__name__ new_rows_after['event_date'] = self.sim.date new_rows_after['when'] = 'After' diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index f267181b56..978b26d7c5 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -222,7 +222,7 @@ def store_chains_to_do_before_event(self) -> tuple[bool, pd.Series]: # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. row = self.sim.population.props.loc[[abs(self.target)]] row['person_ID'] = self.target - row['event'] = str(self) + row['event'] = type(self).__name__ #str(self.event_name) row['event_date'] = self.sim.date row['when'] = 'Before' @@ -268,7 +268,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, footprint) -> link_info = { 'person_ID': self.target, - 'event' : str(self), + 'event' : type(self).__name__, 'event_date' : self.sim.date, 'appt_footprint' : record_footprint, 'level' : record_level, @@ -285,7 +285,7 @@ def store_chains_to_do_after_event(self, print_chains, row_before, footprint) -> # TO BE REMOVED This is currently just used for debugging. Will be removed from final version of PR. row = self.sim.population.props.loc[[abs(self.target)]] row['person_ID'] = self.target - row['event'] = str(self) + row['event'] = type(self).__name__ row['event_date'] = self.sim.date row['when'] = 'After' row['appt_footprint'] = record_footprint