starsimhub · cliffckerr · Jan 21, 2024 · Jan 11, 2024 · Jan 11, 2024 · Jan 11, 2024
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -8,14 +8,21 @@ All notable changes to the codebase are documented in this file. Changes that ma
    :local:
    :depth: 1
 
+Version 0.1.2 (2024-01-15)
+--------------------------
+- Functionality for converting birth & fertility data to callable parameters within SciPy distributions
+- Read in age distributions for people initializations 
+- *GitHub info*: PR `205 <https://github.com/amath-idm/stisim/pull/205>`_
+
 
-Version 0.1.1 (2024-01-11)
+Version 0.1.1 (2024-01-12)
 --------------------------
-- Functionality for converting birth & fertility data to a callable parameter within SciPy distributions
-- *GitHub info*: PR `203 <https://github.com/amath-idm/stisim/pull/203>`_
+- Improving performance of MultiRNG
+- Now factoring the timestep, ``dt``, into transmission calculations
+- *GitHub info*: PRs `204 <https://github.com/amath-idm/stisim/pull/204>`_
 
 
-Version 0.1.1 (2024-01-10)
+Version 0.1.0 (2023-12-10)
 --------------------------
 - Allows SciPy distributions to be used as parameters
 - Optionally use multiple random number streams and other tricks to maintain coherence between simulations

diff --git a/stisim/demographics.py b/stisim/demographics.py
@@ -65,9 +65,9 @@ def standardize_birth_data(self):
         return birth_rate
 
     def init_results(self, sim):
-        self.results += ss.Result(self.name, 'new', sim.npts, dtype=int)
-        self.results += ss.Result(self.name, 'cumulative', sim.npts, dtype=int)
-        self.results += ss.Result(self.name, 'cbr', sim.npts, dtype=int)
+        self.results += ss.Result(self.name, 'new', sim.npts, dtype=int, scale=True)
+        self.results += ss.Result(self.name, 'cumulative', sim.npts, dtype=int, scale=True)
+        self.results += ss.Result(self.name, 'cbr', sim.npts, dtype=int, scale=False)
         return
 
     def update(self, sim):
@@ -94,6 +94,7 @@ def add_births(self, sim):
         # Add n_new births to each state in the sim
         n_new = self.get_births(sim)
         new_uids = sim.people.grow(n_new)
+        sim.people.age[new_uids] = 0
         return new_uids
 
     def update_results(self, n_new, sim):
@@ -102,7 +103,7 @@ def update_results(self, n_new, sim):
     def finalize(self, sim):
         super().finalize(sim)
         self.results['cumulative'] = np.cumsum(self.results['new'])
-        self.results['cbr'] = np.divide(self.results['new'], sim.results['n_alive'], where=sim.results['n_alive']>0)
+        self.results['cbr'] = 1000*np.divide(self.results['new'], sim.results['n_alive'], where=sim.results['n_alive']>0)
 
 
 class background_deaths(DemographicModule):
@@ -202,9 +203,9 @@ def standardize_death_data(self):
         return death_rate
 
     def init_results(self, sim):
-        self.results += ss.Result(self.name, 'new', sim.npts, dtype=int)
-        self.results += ss.Result(self.name, 'cumulative', sim.npts, dtype=int)
-        self.results += ss.Result(self.name, 'cmr', sim.npts, dtype=int)
+        self.results += ss.Result(self.name, 'new', sim.npts, dtype=int, scale=True)
+        self.results += ss.Result(self.name, 'cumulative', sim.npts, dtype=int, scale=True)
+        self.results += ss.Result(self.name, 'cmr', sim.npts, dtype=int, scale=False)
         return
 
     def update(self, sim):
@@ -223,8 +224,9 @@ def update_results(self, n_deaths, sim):
         self.results['new'][sim.ti] = n_deaths
 
     def finalize(self, sim):
+        super().finalize(sim)
         self.results['cumulative'] = np.cumsum(self.results['new'])
-        self.results['cmr'] = np.divide(self.results['new'], sim.results['n_alive'], where=sim.results['n_alive']>0)
+        self.results['cmr'] = 1000*np.divide(self.results['new'], sim.results['n_alive'], where=sim.results['n_alive']>0)
 
 
 class Pregnancy(DemographicModule):
@@ -247,9 +249,9 @@ def __init__(self, pars=None, metadata=None):
             'dur_postpartum': 0.5,  # Make this a distribution?
             'fertility_rate': 0,    # Usually this will be provided in CSV format
             'rel_fertility': 1,
-            'maternal_death_rate': 0.15,
+            'maternal_death_rate': 0,
             'sex_ratio': 0.5,       # Ratio of babies born female
-            'units': 1e-3,          # Assumes fertility rates are per 1000. If using percentages, switch this to 1
+            'units': 1e-3,             # ???
         }, self.pars)
 
         # Process metadata. Defaults here are the labels used by UN data
@@ -287,25 +289,22 @@ def make_fertility_prob_fn(module, sim, uids):
             val_label = module.metadata.data_cols['value']
 
             available_years = module.pars.fertility_rate[year_label].unique()
-            year_ind = sc.findnearest(available_years, sim.year)
+            year_ind = sc.findnearest(available_years, sim.year-module.pars.dur_pregnancy)
             nearest_year = available_years[year_ind]
 
             df = module.pars.fertility_rate.loc[module.pars.fertility_rate[year_label] == nearest_year]
-            conception_arr = df[val_label].values
-            conception_arr = np.append(conception_arr, 0)  # Add zeros for those outside data range
+            df_arr = df[val_label].values  # Pull out dataframe values
+            df_arr = np.append(df_arr, 0)  # Add zeros for those outside data range
 
             # Process age data
             age_bins = df[age_label].unique()
             age_bins = np.append(age_bins, 50)
             age_inds = np.digitize(sim.people.age[uids], age_bins) - 1
             age_inds[age_inds>=max(age_inds)] = -1  # This ensures women outside the data range will get a value of 0
 
-            # Make array of fertility rates - TODO, check indexing works
+            # Make array of fertility rates
             fertility_rate = pd.Series(index=uids)
-            fertility_rate[uids] = conception_arr[age_inds]
-            fertility_rate[uids[sim.people.male[uids]]] = 0
-            fertility_rate[uids[(sim.people.age < 0)[uids]]] = 0
-            fertility_rate[uids[(sim.people.age > max(age_inds))[uids]]] = 0
+            fertility_rate[uids] = df_arr[age_inds]
 
         # Scale from rate to probability. Consider an exponential here.
         fertility_prob = fertility_rate * (module.pars.units * module.pars.rel_fertility * sim.pars.dt)
@@ -330,8 +329,9 @@ def init_results(self, sim):
         Still unclear whether this logic should live in the pregnancy module, the
         individual disease modules, the connectors, or the sim.
         """
-        self.results += ss.Result(self.name, 'pregnancies', sim.npts, dtype=int)
-        self.results += ss.Result(self.name, 'births', sim.npts, dtype=int)
+        self.results += ss.Result(self.name, 'pregnancies', sim.npts, dtype=int, scale=True)
+        self.results += ss.Result(self.name, 'births', sim.npts, dtype=int, scale=True)
+        self.results += ss.Result(self.name, 'cbr', sim.npts, dtype=int, scale=False)
         return
 
     def update(self, sim):
@@ -356,7 +356,7 @@ def update_states(self, sim):
         self.ti_delivery[deliveries] = sim.ti
 
         # Check for new women emerging from post-partum
-        postpartum = ~self.pregnant & (self.ti_postpartum == sim.ti)
+        postpartum = ~self.pregnant & (self.ti_postpartum <= sim.ti)
         self.postpartum[postpartum] = False
         self.susceptible[postpartum] = True
         self.ti_postpartum[postpartum] = sim.ti
@@ -370,14 +370,11 @@ def update_states(self, sim):
     def make_pregnancies(self, sim):
         """
         Select people to make pregnancy using incidence data
-        This should use ASFR data from https://population.un.org/wpp/Download/Standard/Fertility/
         """
-        # Abbreviate key variables
+        # Abbreviate
         ppl = sim.people
 
-        # If incidence of pregnancy is non-zero, make some cases
-        # Think about how to deal with age/time-varying fertility
-        denom_conds = ppl.female & self.susceptible
+        denom_conds = ppl.female & self.susceptible & ppl.alive
         inds_to_choose_from = ss.true(denom_conds)
         uids = self.fertility_dist.filter(inds_to_choose_from)
 
@@ -390,10 +387,9 @@ def make_pregnancies(self, sim):
 
             # Grow the arrays and set properties for the unborn agents
             new_uids = sim.people.grow(len(new_slots))
-
             sim.people.age[new_uids] = -self.pars.dur_pregnancy
             sim.people.slot[new_uids] = new_slots # Before sampling female_dist
-            sim.people.female[new_uids] = self.sex_dist.rvs(uids)
+            sim.people.female[new_uids] = self.sex_dist.rvs(new_uids)
 
             # Add connections to any vertical transmission layers
             # Placeholder code to be moved / refactored. The maternal network may need to be
@@ -436,3 +432,7 @@ def update_results(self, sim):
         self.results['pregnancies'][sim.ti] = np.count_nonzero(self.ti_pregnant == sim.ti)
         self.results['births'][sim.ti] = np.count_nonzero(self.ti_delivery == sim.ti)
         return
+
+    def finalize(self, sim):
+        super().finalize(sim)
+        self.results['cbr'] = 1000* np.divide(self.results['births'], sim.results['n_alive'], where=sim.results['n_alive']>0)
diff --git a/stisim/diseases/disease.py b/stisim/diseases/disease.py
@@ -155,15 +155,15 @@ def init_results(self, sim):
         Result for 'n_susceptible'
         """
         for state in self._boolean_states:
-            self.results += ss.Result(self.name, f'n_{state.name}', sim.npts, dtype=int)
+            self.results += ss.Result(self.name, f'n_{state.name}', sim.npts, dtype=int, scale=True)
         return
 
     def finalize_results(self, sim):
         """
         Finalize results
         """
-        # TODO - will probably need to account for rescaling outputs for the default results here
-        pass
+        super().finalize_results(sim)
+        return
 
     def update_pre(self, sim):
         """
@@ -299,8 +299,8 @@ def init_results(self, sim):
         Initialize results
         """
         super().init_results(sim)
-        self.results += ss.Result(self.name, 'prevalence', sim.npts, dtype=float)
-        self.results += ss.Result(self.name, 'new_infections', sim.npts, dtype=int)
+        self.results += ss.Result(self.name, 'prevalence', sim.npts, dtype=float, scale=False)
+        self.results += ss.Result(self.name, 'new_infections', sim.npts, dtype=int, scale=True)
         return
 
     def update_pre(self, sim):
@@ -312,124 +312,92 @@ def update_pre(self, sim):
         """
         pass
 
-    def _make_new_cases_singlerng(self, sim):
+    def _make_new_cases_singlerng(self, people):
         # Not common-random-number-safe, but more efficient for when not using the multirng feature
         new_cases = []
         sources = []
-        for k, layer in sim.people.networks.items():
+        for k, layer in people.networks.items():
             if k in self.pars['beta']:
                 contacts = layer.contacts
-                rel_trans = (self.infected & sim.people.alive) * self.rel_trans
-                rel_sus = (self.susceptible & sim.people.alive) * self.rel_sus
+                rel_trans = (self.infected & people.alive) * self.rel_trans
+                rel_sus = (self.susceptible & people.alive) * self.rel_sus
                 for a, b, beta in [[contacts.p1, contacts.p2, self.pars.beta[k][0]],
                                    [contacts.p2, contacts.p1, self.pars.beta[k][1]]]:
                     if beta == 0:
                         continue
                     # probability of a->b transmission
-                    p_transmit = rel_trans[a] * rel_sus[b] * contacts.beta * beta # TODO: Needs DT
+                    p_transmit = rel_trans[a] * rel_sus[b] * contacts.beta * beta * people.dt
                     new_cases_bool = np.random.random(len(a)) < p_transmit # As this class is not common-random-number safe anyway, calling np.random is perfectly fine!
                     new_cases.append(b[new_cases_bool])
                     sources.append(a[new_cases_bool])
         return np.concatenate(new_cases), np.concatenate(sources)
 
-    def _choose_new_cases_multirng(self, people):
+    def _make_new_cases_multirng(self, people):
         '''
         Common-random-number-safe transmission code works by computing the
         probability of each _node_ acquiring a case rather than checking if each
         _edge_ transmits.
+
+        Subsequent step uses a roulette wheel with slotted RNG to determine
+        infection source.
         '''
         n = len(people.uid) # TODO: possibly could be shortened to just the people who are alive
         p_acq_node = np.zeros(n)
 
+        dfs = []
         for lkey, layer in people.networks.items():
             if lkey in self.pars['beta']:
                 contacts = layer.contacts
                 rel_trans = self.rel_trans * (self.infected & people.alive)
                 rel_sus = self.rel_sus * (self.susceptible & people.alive)
 
-                a_to_b = [contacts.p1, contacts.p2, self.pars.beta[lkey][0]]
-                b_to_a = [contacts.p2, contacts.p1, self.pars.beta[lkey][1]]
-                for a, b, beta in [a_to_b, b_to_a]: # Transmission from a --> b
+                a_to_b = ['p1', 'p2', self.pars.beta[lkey][0]]
+                b_to_a = ['p2', 'p1', self.pars.beta[lkey][1]]
+                for lbl_src, lbl_tgt, beta in [a_to_b, b_to_a]: # Transmission from a --> b
                     if beta == 0:
                         continue
-
-                    # Accumulate acquisition probability for each person (node)
-                    node_from_edge = np.ones((n, len(a)))
-                    bi = people._uid_map[b] # Indices of b (rather than uid)
-                    node_from_edge[bi, np.arange(len(a))] = 1 - rel_trans[a] * rel_sus[b] * contacts.beta * beta # TODO: Needs DT
-                    p_acq_node = 1 - (1-p_acq_node) * node_from_edge.prod(axis=1) # (1-p1)*(1-p2)*...
 
+                    a, b = contacts[lbl_src], contacts[lbl_tgt]
+                    df = pd.DataFrame({'p1': a, 'p2': b})
+                    df['p'] = (rel_trans[a] * rel_sus[b] * contacts.beta * beta * people.dt).values
+                    df = df.loc[df['p']>0]
+                    dfs.append(df)
 
-        # Slotted draw, need to find a long-term place for this logic
-        slots = people.slot[people.uid]
-        #p = np.full(np.max(slots)+1, 0, dtype=p_acq_node.dtype)
-        #p[slots] = p_acq_node
-        #new_cases_bool = sps.bernoulli.rvs(p=p).astype(bool)[slots]
-        new_cases_bool = sps.uniform.rvs(size=np.max(slots)+1)[slots] < p_acq_node
-        new_cases = people.uid[new_cases_bool]
-        return new_cases
-
-    def _determine_case_source_multirng(self, people, new_cases):
-        '''
-        Given the uids of new cases, determine which agents are the source of each case
-        '''
-        sources = np.zeros_like(new_cases)
+        df = pd.concat(dfs)
+        if len(df) == 0:
+            return [], []
+
+        p_acq_node = df.groupby('p2').apply(lambda x: 1-np.prod(1-x['p']))
+        uids = p_acq_node.index.values
 
         # Slotted draw, need to find a long-term place for this logic
-        slots = people.slot[new_cases]
-        r = sps.uniform.rvs(size=np.max(slots)+1)[slots]
-
-        for i, uid in enumerate(new_cases):
-            p_acqs = []
-            possible_sources = []
-
-            for lkey, layer in people.networks.items():
-                if lkey in self.pars['beta']:
-                    contacts = layer.contacts
-                    a_to_b = [contacts.p1, contacts.p2, self.pars.beta[lkey][0]]
-                    b_to_a = [contacts.p2, contacts.p1, self.pars.beta[lkey][1]]
-                    for a, b, beta in [a_to_b, b_to_a]: # Transmission from a --> b
-                        if beta == 0:
-                            continue
-
-                        inds = np.where(b == uid)[0]
-                        if len(inds) == 0:
-                            continue
-                        neighbors = a[inds]
-
-                        rel_trans = self.rel_trans[neighbors] * (self.infected[neighbors] & people.alive[neighbors])
-                        rel_sus = self.rel_sus[uid] * (self.susceptible[uid] & people.alive[uid])
-                        beta_combined = contacts.beta[inds] * beta
-
-                        # Compute acquisition probabilities from neighbors --> uid
-                        # TODO: Could remove zeros
-                        p_acqs.append((rel_trans * rel_sus * beta_combined).__array__()) # Needs DT
-                        possible_sources.append(neighbors.__array__())
-
-            # Concatenate across layers and directions (p1->p2 vs p2->p1)
-            p_acqs = np.concatenate(p_acqs)
-            possible_sources = np.concatenate(possible_sources)
-
-            if len(possible_sources) == 1: # Easy if only one possible source
-                sources[i] = possible_sources[0]
+        slots = people.slot[uids]
+        new_cases_bool = sps.uniform.rvs(size=np.max(slots)+1)[slots] < p_acq_node.values
+        new_cases = uids[new_cases_bool]
+
+        # Now choose infection source for new cases
+        def choose_source(df):
+            if len(df) == 1: # Easy if only one possible source
+                src_idx = 0
             else:
                 # Roulette selection using slotted draw r associated with this new case
-                cumsum = p_acqs / p_acqs.sum()
-                source_idx = np.argmax(cumsum >= r[i])
-                sources[i] = possible_sources[source_idx]
-        return sources
+                cumsum = df['p'] / df['p'].sum()
+                src_idx = np.argmax(cumsum >= df['r'])
+            return df['p1'].iloc[src_idx]
+
+        df['r'] = sps.uniform.rvs(size=np.max(slots)+1)[slots]
+        sources = df.set_index('p2').loc[new_cases].groupby('p2').apply(choose_source)
+
+        return new_cases, sources[new_cases].values
 
     def make_new_cases(self, sim):
         """ Add new cases of module, through transmission, incidence, etc. """
         if not ss.options.multirng:
             # Determine new cases for singlerng
-            new_cases, sources = self._make_new_cases_singlerng(sim)
+            new_cases, sources = self._make_new_cases_singlerng(sim.people)
         else:
             # Determine new cases for multirng
-            new_cases = self._choose_new_cases_multirng(sim.people)
-            if len(new_cases):
-                # Now determine whom infected each case
-                sources = self._determine_case_source_multirng(sim.people, new_cases)
+            new_cases, sources = self._make_new_cases_multirng(sim.people)
 
         if len(new_cases):
             self.set_prognoses(sim, new_cases, sources)

diff --git a/stisim/modules.py b/stisim/modules.py
@@ -67,9 +67,18 @@ def initialize(self, sim):
         return
 
     def finalize(self, sim):
+        self.finalize_results(sim)
         self.finalized = True
         return
 
+    def finalize_results(self, sim):
+        """
+        Finalize results
+        """
+        # Scale results
+        for reskey, res in self.results.items():
+            if isinstance(res, ss.Result) and res.scale:
+                self.results[reskey] = self.results[reskey]*sim.pars.pop_scale
     @property
     def states(self):
         """