Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Generate chain of events for individuals #1468

Draft
wants to merge 25 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
dbff470
Investigate analysis of events at sim level
marghe-molaro Apr 3, 2024
bf64628
Merge branch 'master' into molaro/harvest-training-data
marghe-molaro Sep 17, 2024
05098f7
Final data-printing set-up
marghe-molaro Sep 30, 2024
16c071c
Print event chains
marghe-molaro Oct 2, 2024
ba81487
Add chains in mode 2 too and clean up in simuation
marghe-molaro Oct 2, 2024
0474624
Merged with master, and moved all logging into event module to keep t…
marghe-molaro Oct 2, 2024
b1c907c
Fix issue with tests by ensuring standard Polling and infection is ma…
marghe-molaro Oct 7, 2024
cfb4264
Switch iloc for loc
marghe-molaro Oct 7, 2024
e0327de
Change syntax of if statement
marghe-molaro Oct 7, 2024
fceee02
Change syntax of if statement and print string of event
marghe-molaro Oct 9, 2024
eaeae62
Focus on rti and print footprint
marghe-molaro Oct 10, 2024
c7bd9d0
Only store change in individual properties, not entire property row. …
marghe-molaro Oct 11, 2024
769aaec
Style fixes
marghe-molaro Oct 11, 2024
757cee3
Include printing of individual properties at the beginning and at bir…
marghe-molaro Oct 13, 2024
22a5e44
Log everything to simulation, as events logger doesn't seem to be vis…
marghe-molaro Oct 16, 2024
7faa817
Consider all modules included as of interest
marghe-molaro Oct 18, 2024
7232f97
Remove pop-wide HSI warning and make epi default even when printing c…
marghe-molaro Oct 18, 2024
98a8832
Merge branch 'master' into molaro/harvest-training-data
marghe-molaro Oct 18, 2024
a6def2d
Style fix
marghe-molaro Oct 18, 2024
ecea532
Remove data generation test, which wasn't really a test
marghe-molaro Oct 18, 2024
ae7a44c
Change dict of properties to string in logging, and add analysis files
marghe-molaro Oct 23, 2024
16299a2
Include debugging option, final set-up of scenario to print data, ana…
marghe-molaro Nov 25, 2024
0dd862f
Change label of person when iterating
marghe-molaro Nov 26, 2024
0e7dc99
Merge branch 'master' into molaro/harvest-training-data
marghe-molaro Dec 9, 2024
84f8263
Correctly retrieve event name
marghe-molaro Dec 13, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions src/tlo/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
if TYPE_CHECKING:
from tlo import Simulation

import pandas as pd


class Priority(Enum):
"""Enumeration for the Priority, which is used in sorting the events in the simulation queue."""
Expand Down Expand Up @@ -63,8 +65,54 @@ def apply(self, target):

def run(self):
"""Make the event happen."""

print_chains = False
df_before = []

if self.sim.generate_event_chains:
# Only print event if it belongs to modules of interest and if it is not in the list of events to ignore
if (self.module in self.sim.generate_event_chains_modules_of_interest) and all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events):
marghe-molaro marked this conversation as resolved.
Show resolved Hide resolved
print_chains = True
if self.target != self.sim.population:
row = self.sim.population.props.iloc[[self.target]]
marghe-molaro marked this conversation as resolved.
Show resolved Hide resolved
row['person_ID'] = self.target
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

don't think needed?

row['event'] = self
marghe-molaro marked this conversation as resolved.
Show resolved Hide resolved
row['event_date'] = self.sim.date
row['when'] = 'Before'
self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
else:
df_before = self.sim.population.props.copy()


self.apply(self.target)
self.post_apply_hook()

if print_chains:
if self.target != self.sim.population:
row = self.sim.population.props.iloc[[self.target]]
row['person_ID'] = self.target
row['event'] = self
row['event_date'] = self.sim.date
row['when'] = 'After'
self.sim.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe that it's faster to not do these kinds of pandas operations very often, but instead to collect the data in python native structures (sets, dicts, lists, tuples) and then assemble them into a data-frame at the end.

else:
df_after = self.sim.population.props.copy()
change = df_before.compare(df_after)
if ~change.empty:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if ~change.empty:
if not change.empty:

indices = change.index
new_rows_before = df_before.loc[indices]
new_rows_before['person_ID'] = new_rows_before.index
new_rows_before['event'] = self
new_rows_before['event_date'] = self.sim.date
new_rows_before['when'] = 'Before'
new_rows_after = df_after.loc[indices]
new_rows_after['person_ID'] = new_rows_after.index
new_rows_after['event'] = self
new_rows_after['event_date'] = self.sim.date
new_rows_after['when'] = 'After'
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why not store only the changes?


self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_before], ignore_index=True)
self.sim.event_chains = pd.concat([self.sim.event_chains,new_rows_after], ignore_index=True)


class RegularEvent(Event):
Expand Down
7 changes: 4 additions & 3 deletions src/tlo/methods/demography.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,9 +315,10 @@ def initialise_simulation(self, sim):
# Launch the repeating event that will store statistics about the population structure
sim.schedule_event(DemographyLoggingEvent(self), sim.date)

# Create (and store pointer to) the OtherDeathPoll and schedule first occurrence immediately
self.other_death_poll = OtherDeathPoll(self)
sim.schedule_event(self.other_death_poll, sim.date)
if sim.generate_event_chains is False:
# Create (and store pointer to) the OtherDeathPoll and schedule first occurrence immediately
self.other_death_poll = OtherDeathPoll(self)
sim.schedule_event(self.other_death_poll, sim.date)
Comment on lines +318 to +321
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unsure why this change is included; perhaps done for something in debugging?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is required if we want to train disease-specific emulators: we don't want other causes, including those grouped as "Others" (which we do not explicitly include as a disease module when running a sim, because they are always included by default), to "interfere"/end a life short when we are trying to capture effect of single disease.

If we want to use this not for emu training but other purposes then it could become important to not exclude this.


# Log the initial population scaling-factor (to the logger of this module and that of `tlo.methods.population`)
for _logger in (logger, logger_scale_factor):
Expand Down
3 changes: 2 additions & 1 deletion src/tlo/methods/healthsystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -2427,7 +2427,8 @@ def process_events_mode_2(self, hold_over: List[HSIEventQueueItem]) -> None:

# Expected appt footprint before running event
_appt_footprint_before_running = event.EXPECTED_APPT_FOOTPRINT
# Run event & get actual footprint

# Run the HSI event (allowing it to return an updated appt_footprint)
actual_appt_footprint = event.run(squeeze_factor=squeeze_factor)

# Check if the HSI event returned updated_appt_footprint, and if so adjust original_call
Expand Down
67 changes: 55 additions & 12 deletions src/tlo/methods/hiv.py
Original file line number Diff line number Diff line change
Expand Up @@ -631,11 +631,12 @@ def initialise_population(self, population):
df.loc[df.is_alive, "hv_date_treated"] = pd.NaT
df.loc[df.is_alive, "hv_date_last_ART"] = pd.NaT

# Launch sub-routines for allocating the right number of people into each category
self.initialise_baseline_prevalence(population) # allocate baseline prevalence
if self.sim.generate_event_chains is False:
# Launch sub-routines for allocating the right number of people into each category
self.initialise_baseline_prevalence(population) # allocate baseline prevalence

self.initialise_baseline_art(population) # allocate baseline art coverage
self.initialise_baseline_tested(population) # allocate baseline testing coverage
self.initialise_baseline_art(population) # allocate baseline art coverage
self.initialise_baseline_tested(population) # allocate baseline testing coverage
marghe-molaro marked this conversation as resolved.
Show resolved Hide resolved

def initialise_baseline_prevalence(self, population):
"""
Expand Down Expand Up @@ -905,10 +906,16 @@ def initialise_simulation(self, sim):
df = sim.population.props
p = self.parameters

# 1) Schedule the Main HIV Regular Polling Event
sim.schedule_event(
HivRegularPollingEvent(self), sim.date + DateOffset(days=0)
)
if self.sim.generate_event_chains:
print("Should be generating data")
sim.schedule_event(
HivPollingEventForDataGeneration(self), sim.date + DateOffset(days=0)
)
else:
# 1) Schedule the Main HIV Regular Polling Event
sim.schedule_event(
HivRegularPollingEvent(self), sim.date + DateOffset(days=0)
)

# 2) Schedule the Logging Event
sim.schedule_event(HivLoggingEvent(self), sim.date + DateOffset(years=1))
Expand Down Expand Up @@ -1662,6 +1669,37 @@ def do_at_generic_first_appt(
# Main Polling Event
# ---------------------------------------------------------------------------

class HivPollingEventForDataGeneration(RegularEvent, PopulationScopeEventMixin):
""" The HIV Polling Events for Data Generation
* Ensures that
"""

def __init__(self, module):
super().__init__(
module, frequency=DateOffset(years=120)
) # repeats every 12 months, but this can be changed

def apply(self, population):

df = population.props

# Make everyone who is alive and not infected (no-one should be) susceptible
susc_idx = df.loc[
df.is_alive
& ~df.hv_inf
].index

n_susceptible = len(susc_idx)
print("Number of individuals susceptible", n_susceptible)
# Schedule the date of infection for each new infection:
for i in susc_idx:
date_of_infection = self.sim.date + pd.DateOffset(
# Ensure that individual will be infected before end of sim
days=self.module.rng.randint(0, 365*(int(self.sim.end_date.year - self.sim.date.year)+1))
)
self.sim.schedule_event(
HivInfectionEvent(self.module, i), date_of_infection
)

class HivRegularPollingEvent(RegularEvent, PopulationScopeEventMixin):
""" The HIV Regular Polling Events
Expand All @@ -1683,6 +1721,7 @@ def apply(self, population):
fraction_of_year_between_polls = self.frequency.months / 12
beta = p["beta"] * fraction_of_year_between_polls


# ----------------------------------- HORIZONTAL TRANSMISSION -----------------------------------
def horizontal_transmission(to_sex, from_sex):
# Count current number of alive 15-80 year-olds at risk of transmission
Expand Down Expand Up @@ -1758,6 +1797,7 @@ def horizontal_transmission(to_sex, from_sex):
HivInfectionEvent(self.module, idx), date_of_infection
)


# ----------------------------------- SPONTANEOUS TESTING -----------------------------------
def spontaneous_testing(current_year):

Expand Down Expand Up @@ -1861,11 +1901,12 @@ def vmmc_for_child():
priority=0,
)

# Horizontal transmission: Male --> Female
horizontal_transmission(from_sex="M", to_sex="F")
if self.sim.generate_event_chains is False:
# Horizontal transmission: Male --> Female
horizontal_transmission(from_sex="M", to_sex="F")

# Horizontal transmission: Female --> Male
horizontal_transmission(from_sex="F", to_sex="M")
# Horizontal transmission: Female --> Male
horizontal_transmission(from_sex="F", to_sex="M")

# testing
# if year later than 2020, set testing rates to those reported in 2020
Expand All @@ -1882,6 +1923,8 @@ def vmmc_for_child():
vmmc_for_child()




# ---------------------------------------------------------------------------
# Natural History Events
# ---------------------------------------------------------------------------
Expand Down
48 changes: 48 additions & 0 deletions src/tlo/methods/hsi_event.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
from tlo.events import Event
from tlo.population import Population

import pandas as pd


if TYPE_CHECKING:
from tlo import Module, Simulation
from tlo.methods.healthsystem import HealthSystem
Expand Down Expand Up @@ -187,9 +190,54 @@ def _run_after_hsi_event(self) -> None:

def run(self, squeeze_factor):
"""Make the event happen."""

print_chains = False
df_before = []

if self.sim.generate_event_chains:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

obviously would be nice to factorise-out this logic, which repeats in events.py

(Shame we don't have HSI_Event inheriting from Event, and we'd get it for free. We used to.. but it was changed at some point for a reason I can no longer remember.)

# Only print event if it belongs to modules of interest and if it is not in the list of events to ignore
if (self.module in self.sim.generate_event_chains_modules_of_interest) and all(sub not in str(self) for sub in self.sim.generate_event_chains_ignore_events):
print_chains = True
if self.target != self.sim.population:
row = self.sim.population.props.iloc[[self.target]]
row['person_ID'] = self.target
row['event'] = self
row['event_date'] = self.sim.date
row['when'] = 'Before'
self.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
else:
df_before = self.sim.population.props.copy()

updated_appt_footprint = self.apply(self.target, squeeze_factor)
self.post_apply_hook()
self._run_after_hsi_event()

if print_chains:
if self.target != self.sim.population:
row = self.sim.population.props.iloc[[self.target]]
row['person_ID'] = self.target
row['event'] = self
row['event_date'] = self.sim.date
row['when'] = 'After'
self.event_chains = pd.concat([self.sim.event_chains, row], ignore_index=True)
else:
df_after = self.sim.population.props.copy()
change = df_before.compare(df_after)
if ~change.empty:
indices = change.index
new_rows_before = df_before.loc[indices]
new_rows_before['person_ID'] = new_rows_before.index
new_rows_before['event'] = self
new_rows_before['event_date'] = self.sim.date
new_rows_before['when'] = 'Before'
new_rows_after = df_after.loc[indices]
new_rows_after['person_ID'] = new_rows_after.index
new_rows_after['event'] = self
new_rows_after['event_date'] = self.sim.date
new_rows_after['when'] = 'After'

self.event_chains = pd.concat([self.sim.event_chains,new_rows_before], ignore_index=True)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

rather than building up the enormous thing in memory in the format of a data frame, I have the feeling that it's more efficient to put out to a logger bit by bit.

self.event_chains = pd.concat([self.sim.event_chains,new_rows_after], ignore_index=True)
return updated_appt_footprint

def get_consumables(
Expand Down
99 changes: 75 additions & 24 deletions src/tlo/methods/tb.py
Original file line number Diff line number Diff line change
Expand Up @@ -833,28 +833,29 @@ def initialise_population(self, population):
df["tb_date_ipt"] = pd.NaT

# # ------------------ infection status ------------------ #
# WHO estimates of active TB for 2010 to get infected initial population
# don't need to scale or include treated proportion as no-one on treatment yet
inc_estimates = p["who_incidence_estimates"]
incidence_year = (inc_estimates.loc[
(inc_estimates.year == self.sim.date.year), "incidence_per_100k"
].values[0]) / 100_000

incidence_year = incidence_year * p["scaling_factor_WHO"]

self.assign_active_tb(
population,
strain="ds",
incidence=incidence_year)

self.assign_active_tb(
population,
strain="mdr",
incidence=incidence_year * p['prop_mdr2010'])

self.send_for_screening_general(
population
) # send some baseline population for screening
if self.sim.generate_event_chains is False:
# WHO estimates of active TB for 2010 to get infected initial population
# don't need to scale or include treated proportion as no-one on treatment yet
inc_estimates = p["who_incidence_estimates"]
incidence_year = (inc_estimates.loc[
(inc_estimates.year == self.sim.date.year), "incidence_per_100k"
].values[0]) / 100_000

incidence_year = incidence_year * p["scaling_factor_WHO"]

self.assign_active_tb(
population,
strain="ds",
incidence=incidence_year)

self.assign_active_tb(
population,
strain="mdr",
incidence=incidence_year * p['prop_mdr2010'])

self.send_for_screening_general(
population
) # send some baseline population for screening

def initialise_simulation(self, sim):
"""
Expand All @@ -867,7 +868,11 @@ def initialise_simulation(self, sim):
sim.schedule_event(TbActiveEvent(self), sim.date)
sim.schedule_event(TbRegularEvents(self), sim.date)
sim.schedule_event(TbSelfCureEvent(self), sim.date)
sim.schedule_event(TbActiveCasePoll(self), sim.date + DateOffset(years=1))

if sim.generate_event_chains is False:
sim.schedule_event(TbActiveCasePoll(self), sim.date + DateOffset(years=1))
else:
sim.schedule_event(TbActiveCasePollGenerateData(self), sim.date + DateOffset(days=0))

# 2) log at the end of the year
# Optional: Schedule the scale-up of programs
Expand Down Expand Up @@ -1366,6 +1371,53 @@ def is_subset(col_for_set, col_for_subset):
# # TB infection event
# # ---------------------------------------------------------------------------

class TbActiveCasePollGenerateData(RegularEvent, PopulationScopeEventMixin):
"""The Tb Regular Poll Event for Data Generation for assigning active infections
* selects everyone to develop an active infection and schedules onset of active tb
sometime during the simulation
"""

def __init__(self, module):
super().__init__(module, frequency=DateOffset(years=120))

def apply(self, population):

df = population.props
now = self.sim.date
rng = self.module.rng
# Make everyone who is alive and not infected (no-one should be) susceptible
susc_idx = df.loc[
df.is_alive
& (df.tb_inf != "active")
].index

n_susceptible = len(susc_idx)

middle_index = len(susc_idx) // 2

# Will equally split two strains among the population
list_ds = susc_idx[:middle_index]
list_mdr = susc_idx[middle_index:]

# schedule onset of active tb. This will be equivalent to the "Onset", so it
# doesn't matter how long after we have decided which infection this is.
for person_id in list_ds:
date_progression = now + pd.DateOffset(
# At some point during their lifetime, this person will develop TB
days=self.module.rng.randint(0, 365*(int(self.sim.end_date.year - self.sim.date.year)+1))
)
# set date of active tb - properties will be updated at TbActiveEvent poll daily
df.at[person_id, "tb_scheduled_date_active"] = date_progression
df.at[person_id, "tb_strain"] = "ds"

for person_id in list_mdr:
date_progression = now + pd.DateOffset(
days=rng.randint(0, 365*int(self.sim.end_date.year - self.sim.start_date.year + 1))
)
# set date of active tb - properties will be updated at TbActiveEvent poll daily
df.at[person_id, "tb_scheduled_date_active"] = date_progression
df.at[person_id, "tb_strain"] = "mdr"


class TbActiveCasePoll(RegularEvent, PopulationScopeEventMixin):
"""The Tb Regular Poll Event for assigning active infections
Expand Down Expand Up @@ -1439,7 +1491,6 @@ def apply(self, population):

self.module.update_parameters_for_program_scaleup()


class TbActiveEvent(RegularEvent, PopulationScopeEventMixin):
"""
* check for those with dates of active tb onset within last time-period
Expand Down
Loading
Loading