Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Age distributions #205

Merged
merged 30 commits into from
Jan 21, 2024
Merged
Show file tree
Hide file tree
Changes from 31 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,21 @@ All notable changes to the codebase are documented in this file. Changes that ma
:local:
:depth: 1

Version 0.1.2 (2024-01-15)
--------------------------
- Functionality for converting birth & fertility data to callable parameters within SciPy distributions
- Read in age distributions for people initializations
- *GitHub info*: PR `205 <https://github.com/amath-idm/stisim/pull/205>`_


Version 0.1.1 (2024-01-11)
Version 0.1.1 (2024-01-12)
--------------------------
- Functionality for converting birth & fertility data to a callable parameter within SciPy distributions
- *GitHub info*: PR `203 <https://github.com/amath-idm/stisim/pull/203>`_
- Improving performance of MultiRNG
- Now factoring the timestep, ``dt``, into transmission calculations
- *GitHub info*: PRs `204 <https://github.com/amath-idm/stisim/pull/204>`_


Version 0.1.1 (2024-01-10)
Version 0.1.0 (2023-12-10)
--------------------------
- Allows SciPy distributions to be used as parameters
- Optionally use multiple random number streams and other tricks to maintain coherence between simulations
Expand Down
56 changes: 28 additions & 28 deletions stisim/demographics.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,9 @@ def standardize_birth_data(self):
return birth_rate

def init_results(self, sim):
self.results += ss.Result(self.name, 'new', sim.npts, dtype=int)
self.results += ss.Result(self.name, 'cumulative', sim.npts, dtype=int)
self.results += ss.Result(self.name, 'cbr', sim.npts, dtype=int)
self.results += ss.Result(self.name, 'new', sim.npts, dtype=int, scale=True)
self.results += ss.Result(self.name, 'cumulative', sim.npts, dtype=int, scale=True)
self.results += ss.Result(self.name, 'cbr', sim.npts, dtype=int, scale=False)
return

def update(self, sim):
Expand All @@ -94,6 +94,7 @@ def add_births(self, sim):
# Add n_new births to each state in the sim
n_new = self.get_births(sim)
new_uids = sim.people.grow(n_new)
sim.people.age[new_uids] = 0
return new_uids

def update_results(self, n_new, sim):
Expand All @@ -102,7 +103,7 @@ def update_results(self, n_new, sim):
def finalize(self, sim):
super().finalize(sim)
self.results['cumulative'] = np.cumsum(self.results['new'])
self.results['cbr'] = np.divide(self.results['new'], sim.results['n_alive'], where=sim.results['n_alive']>0)
self.results['cbr'] = 1000*np.divide(self.results['new'], sim.results['n_alive'], where=sim.results['n_alive']>0)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this use units instead of 1000?



class background_deaths(DemographicModule):
Expand Down Expand Up @@ -202,9 +203,9 @@ def standardize_death_data(self):
return death_rate

def init_results(self, sim):
self.results += ss.Result(self.name, 'new', sim.npts, dtype=int)
self.results += ss.Result(self.name, 'cumulative', sim.npts, dtype=int)
self.results += ss.Result(self.name, 'cmr', sim.npts, dtype=int)
self.results += ss.Result(self.name, 'new', sim.npts, dtype=int, scale=True)
self.results += ss.Result(self.name, 'cumulative', sim.npts, dtype=int, scale=True)
self.results += ss.Result(self.name, 'cmr', sim.npts, dtype=int, scale=False)
return

def update(self, sim):
Expand All @@ -223,8 +224,9 @@ def update_results(self, n_deaths, sim):
self.results['new'][sim.ti] = n_deaths

def finalize(self, sim):
super().finalize(sim)
self.results['cumulative'] = np.cumsum(self.results['new'])
self.results['cmr'] = np.divide(self.results['new'], sim.results['n_alive'], where=sim.results['n_alive']>0)
self.results['cmr'] = 1000*np.divide(self.results['new'], sim.results['n_alive'], where=sim.results['n_alive']>0)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Again here, units?



class Pregnancy(DemographicModule):
Expand All @@ -247,9 +249,9 @@ def __init__(self, pars=None, metadata=None):
'dur_postpartum': 0.5, # Make this a distribution?
'fertility_rate': 0, # Usually this will be provided in CSV format
'rel_fertility': 1,
'maternal_death_rate': 0.15,
'maternal_death_rate': 0,
'sex_ratio': 0.5, # Ratio of babies born female
'units': 1e-3, # Assumes fertility rates are per 1000. If using percentages, switch this to 1
'units': 1e-3, # ???
}, self.pars)

# Process metadata. Defaults here are the labels used by UN data
Expand Down Expand Up @@ -287,25 +289,22 @@ def make_fertility_prob_fn(module, sim, uids):
val_label = module.metadata.data_cols['value']

available_years = module.pars.fertility_rate[year_label].unique()
year_ind = sc.findnearest(available_years, sim.year)
year_ind = sc.findnearest(available_years, sim.year-module.pars.dur_pregnancy)
nearest_year = available_years[year_ind]

df = module.pars.fertility_rate.loc[module.pars.fertility_rate[year_label] == nearest_year]
conception_arr = df[val_label].values
conception_arr = np.append(conception_arr, 0) # Add zeros for those outside data range
df_arr = df[val_label].values # Pull out dataframe values
df_arr = np.append(df_arr, 0) # Add zeros for those outside data range

# Process age data
age_bins = df[age_label].unique()
age_bins = np.append(age_bins, 50)
age_inds = np.digitize(sim.people.age[uids], age_bins) - 1
age_inds[age_inds>=max(age_inds)] = -1 # This ensures women outside the data range will get a value of 0

# Make array of fertility rates - TODO, check indexing works
# Make array of fertility rates
fertility_rate = pd.Series(index=uids)
fertility_rate[uids] = conception_arr[age_inds]
fertility_rate[uids[sim.people.male[uids]]] = 0
fertility_rate[uids[(sim.people.age < 0)[uids]]] = 0
fertility_rate[uids[(sim.people.age > max(age_inds))[uids]]] = 0
fertility_rate[uids] = df_arr[age_inds]

# Scale from rate to probability. Consider an exponential here.
fertility_prob = fertility_rate * (module.pars.units * module.pars.rel_fertility * sim.pars.dt)
Expand All @@ -330,8 +329,9 @@ def init_results(self, sim):
Still unclear whether this logic should live in the pregnancy module, the
individual disease modules, the connectors, or the sim.
"""
self.results += ss.Result(self.name, 'pregnancies', sim.npts, dtype=int)
self.results += ss.Result(self.name, 'births', sim.npts, dtype=int)
self.results += ss.Result(self.name, 'pregnancies', sim.npts, dtype=int, scale=True)
self.results += ss.Result(self.name, 'births', sim.npts, dtype=int, scale=True)
self.results += ss.Result(self.name, 'cbr', sim.npts, dtype=int, scale=False)
return

def update(self, sim):
Expand All @@ -356,7 +356,7 @@ def update_states(self, sim):
self.ti_delivery[deliveries] = sim.ti

# Check for new women emerging from post-partum
postpartum = ~self.pregnant & (self.ti_postpartum == sim.ti)
postpartum = ~self.pregnant & (self.ti_postpartum <= sim.ti)
self.postpartum[postpartum] = False
self.susceptible[postpartum] = True
self.ti_postpartum[postpartum] = sim.ti
Expand All @@ -370,14 +370,11 @@ def update_states(self, sim):
def make_pregnancies(self, sim):
"""
Select people to make pregnancy using incidence data
This should use ASFR data from https://population.un.org/wpp/Download/Standard/Fertility/
"""
# Abbreviate key variables
# Abbreviate
ppl = sim.people

# If incidence of pregnancy is non-zero, make some cases
# Think about how to deal with age/time-varying fertility
denom_conds = ppl.female & self.susceptible
denom_conds = ppl.female & self.susceptible & ppl.alive
inds_to_choose_from = ss.true(denom_conds)
uids = self.fertility_dist.filter(inds_to_choose_from)

Expand All @@ -390,10 +387,9 @@ def make_pregnancies(self, sim):

# Grow the arrays and set properties for the unborn agents
new_uids = sim.people.grow(len(new_slots))

sim.people.age[new_uids] = -self.pars.dur_pregnancy
sim.people.slot[new_uids] = new_slots # Before sampling female_dist
sim.people.female[new_uids] = self.sex_dist.rvs(uids)
sim.people.female[new_uids] = self.sex_dist.rvs(new_uids)

# Add connections to any vertical transmission layers
# Placeholder code to be moved / refactored. The maternal network may need to be
Expand Down Expand Up @@ -436,3 +432,7 @@ def update_results(self, sim):
self.results['pregnancies'][sim.ti] = np.count_nonzero(self.ti_pregnant == sim.ti)
self.results['births'][sim.ti] = np.count_nonzero(self.ti_delivery == sim.ti)
return

def finalize(self, sim):
super().finalize(sim)
self.results['cbr'] = 1000* np.divide(self.results['births'], sim.results['n_alive'], where=sim.results['n_alive']>0)
126 changes: 47 additions & 79 deletions stisim/diseases/disease.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,15 +155,15 @@ def init_results(self, sim):
Result for 'n_susceptible'
"""
for state in self._boolean_states:
self.results += ss.Result(self.name, f'n_{state.name}', sim.npts, dtype=int)
self.results += ss.Result(self.name, f'n_{state.name}', sim.npts, dtype=int, scale=True)
return

def finalize_results(self, sim):
"""
Finalize results
"""
# TODO - will probably need to account for rescaling outputs for the default results here
pass
super().finalize_results(sim)
return

def update_pre(self, sim):
"""
Expand Down Expand Up @@ -299,8 +299,8 @@ def init_results(self, sim):
Initialize results
"""
super().init_results(sim)
self.results += ss.Result(self.name, 'prevalence', sim.npts, dtype=float)
self.results += ss.Result(self.name, 'new_infections', sim.npts, dtype=int)
self.results += ss.Result(self.name, 'prevalence', sim.npts, dtype=float, scale=False)
self.results += ss.Result(self.name, 'new_infections', sim.npts, dtype=int, scale=True)
return

def update_pre(self, sim):
Expand All @@ -312,124 +312,92 @@ def update_pre(self, sim):
"""
pass

def _make_new_cases_singlerng(self, sim):
def _make_new_cases_singlerng(self, people):
# Not common-random-number-safe, but more efficient for when not using the multirng feature
new_cases = []
sources = []
for k, layer in sim.people.networks.items():
for k, layer in people.networks.items():
if k in self.pars['beta']:
contacts = layer.contacts
rel_trans = (self.infected & sim.people.alive) * self.rel_trans
rel_sus = (self.susceptible & sim.people.alive) * self.rel_sus
rel_trans = (self.infected & people.alive) * self.rel_trans
rel_sus = (self.susceptible & people.alive) * self.rel_sus
for a, b, beta in [[contacts.p1, contacts.p2, self.pars.beta[k][0]],
[contacts.p2, contacts.p1, self.pars.beta[k][1]]]:
if beta == 0:
continue
# probability of a->b transmission
p_transmit = rel_trans[a] * rel_sus[b] * contacts.beta * beta # TODO: Needs DT
p_transmit = rel_trans[a] * rel_sus[b] * contacts.beta * beta * people.dt
new_cases_bool = np.random.random(len(a)) < p_transmit # As this class is not common-random-number safe anyway, calling np.random is perfectly fine!
new_cases.append(b[new_cases_bool])
sources.append(a[new_cases_bool])
return np.concatenate(new_cases), np.concatenate(sources)

def _choose_new_cases_multirng(self, people):
def _make_new_cases_multirng(self, people):
'''
Common-random-number-safe transmission code works by computing the
probability of each _node_ acquiring a case rather than checking if each
_edge_ transmits.

Subsequent step uses a roulette wheel with slotted RNG to determine
infection source.
'''
n = len(people.uid) # TODO: possibly could be shortened to just the people who are alive
p_acq_node = np.zeros(n)

dfs = []
for lkey, layer in people.networks.items():
if lkey in self.pars['beta']:
contacts = layer.contacts
rel_trans = self.rel_trans * (self.infected & people.alive)
rel_sus = self.rel_sus * (self.susceptible & people.alive)

a_to_b = [contacts.p1, contacts.p2, self.pars.beta[lkey][0]]
b_to_a = [contacts.p2, contacts.p1, self.pars.beta[lkey][1]]
for a, b, beta in [a_to_b, b_to_a]: # Transmission from a --> b
a_to_b = ['p1', 'p2', self.pars.beta[lkey][0]]
b_to_a = ['p2', 'p1', self.pars.beta[lkey][1]]
for lbl_src, lbl_tgt, beta in [a_to_b, b_to_a]: # Transmission from a --> b
if beta == 0:
continue

# Accumulate acquisition probability for each person (node)
node_from_edge = np.ones((n, len(a)))
bi = people._uid_map[b] # Indices of b (rather than uid)
node_from_edge[bi, np.arange(len(a))] = 1 - rel_trans[a] * rel_sus[b] * contacts.beta * beta # TODO: Needs DT
p_acq_node = 1 - (1-p_acq_node) * node_from_edge.prod(axis=1) # (1-p1)*(1-p2)*...

a, b = contacts[lbl_src], contacts[lbl_tgt]
df = pd.DataFrame({'p1': a, 'p2': b})
df['p'] = (rel_trans[a] * rel_sus[b] * contacts.beta * beta * people.dt).values
df = df.loc[df['p']>0]
dfs.append(df)

# Slotted draw, need to find a long-term place for this logic
slots = people.slot[people.uid]
#p = np.full(np.max(slots)+1, 0, dtype=p_acq_node.dtype)
#p[slots] = p_acq_node
#new_cases_bool = sps.bernoulli.rvs(p=p).astype(bool)[slots]
new_cases_bool = sps.uniform.rvs(size=np.max(slots)+1)[slots] < p_acq_node
new_cases = people.uid[new_cases_bool]
return new_cases

def _determine_case_source_multirng(self, people, new_cases):
'''
Given the uids of new cases, determine which agents are the source of each case
'''
sources = np.zeros_like(new_cases)
df = pd.concat(dfs)
if len(df) == 0:
return [], []

p_acq_node = df.groupby('p2').apply(lambda x: 1-np.prod(1-x['p']))
uids = p_acq_node.index.values

# Slotted draw, need to find a long-term place for this logic
slots = people.slot[new_cases]
r = sps.uniform.rvs(size=np.max(slots)+1)[slots]

for i, uid in enumerate(new_cases):
p_acqs = []
possible_sources = []

for lkey, layer in people.networks.items():
if lkey in self.pars['beta']:
contacts = layer.contacts
a_to_b = [contacts.p1, contacts.p2, self.pars.beta[lkey][0]]
b_to_a = [contacts.p2, contacts.p1, self.pars.beta[lkey][1]]
for a, b, beta in [a_to_b, b_to_a]: # Transmission from a --> b
if beta == 0:
continue

inds = np.where(b == uid)[0]
if len(inds) == 0:
continue
neighbors = a[inds]

rel_trans = self.rel_trans[neighbors] * (self.infected[neighbors] & people.alive[neighbors])
rel_sus = self.rel_sus[uid] * (self.susceptible[uid] & people.alive[uid])
beta_combined = contacts.beta[inds] * beta

# Compute acquisition probabilities from neighbors --> uid
# TODO: Could remove zeros
p_acqs.append((rel_trans * rel_sus * beta_combined).__array__()) # Needs DT
possible_sources.append(neighbors.__array__())

# Concatenate across layers and directions (p1->p2 vs p2->p1)
p_acqs = np.concatenate(p_acqs)
possible_sources = np.concatenate(possible_sources)

if len(possible_sources) == 1: # Easy if only one possible source
sources[i] = possible_sources[0]
slots = people.slot[uids]
new_cases_bool = sps.uniform.rvs(size=np.max(slots)+1)[slots] < p_acq_node.values
new_cases = uids[new_cases_bool]

# Now choose infection source for new cases
def choose_source(df):
if len(df) == 1: # Easy if only one possible source
src_idx = 0
else:
# Roulette selection using slotted draw r associated with this new case
cumsum = p_acqs / p_acqs.sum()
source_idx = np.argmax(cumsum >= r[i])
sources[i] = possible_sources[source_idx]
return sources
cumsum = df['p'] / df['p'].sum()
src_idx = np.argmax(cumsum >= df['r'])
return df['p1'].iloc[src_idx]

df['r'] = sps.uniform.rvs(size=np.max(slots)+1)[slots]
sources = df.set_index('p2').loc[new_cases].groupby('p2').apply(choose_source)

return new_cases, sources[new_cases].values

def make_new_cases(self, sim):
""" Add new cases of module, through transmission, incidence, etc. """
if not ss.options.multirng:
# Determine new cases for singlerng
new_cases, sources = self._make_new_cases_singlerng(sim)
new_cases, sources = self._make_new_cases_singlerng(sim.people)
else:
# Determine new cases for multirng
new_cases = self._choose_new_cases_multirng(sim.people)
if len(new_cases):
# Now determine whom infected each case
sources = self._determine_case_source_multirng(sim.people, new_cases)
new_cases, sources = self._make_new_cases_multirng(sim.people)

if len(new_cases):
self.set_prognoses(sim, new_cases, sources)
Expand Down
9 changes: 9 additions & 0 deletions stisim/modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,18 @@ def initialize(self, sim):
return

def finalize(self, sim):
self.finalize_results(sim)
self.finalized = True
return

def finalize_results(self, sim):
"""
Finalize results
"""
# Scale results
for reskey, res in self.results.items():
if isinstance(res, ss.Result) and res.scale:
self.results[reskey] = self.results[reskey]*sim.pars.pop_scale
@property
def states(self):
"""
Expand Down
Loading
Loading