-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathhealthburden.py
663 lines (559 loc) · 34.6 KB
/
healthburden.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
"""
This Module runs the counting of Life-years Lost, Life-years Lived with Disability,
and Disability-Adjusted Life-years (DALYS).
"""
from copy import copy
from pathlib import Path
from typing import Dict
import numpy as np
import pandas as pd
from tlo import Date, DateOffset, Module, Parameter, Types, logging
from tlo.events import PopulationScopeEventMixin, Priority, RegularEvent
from tlo.methods import Metadata
from tlo.methods.causes import (
Cause,
collect_causes_from_disease_modules,
create_mappers_from_causes_to_label,
get_gbd_causes_not_represented_in_disease_modules,
)
from tlo.methods.demography import age_at_date
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
class HealthBurden(Module):
"""
This module holds all the stuff to do with recording DALYS
"""
def __init__(self, name=None, resourcefilepath=None):
super().__init__(name)
self.resourcefilepath = resourcefilepath
# instance variables
self.multi_index_for_age_and_wealth_and_time = None
self.years_life_lost = None
self.years_life_lost_stacked_time = None
self.years_life_lost_stacked_age_and_time = None
self.years_lived_with_disability = None
self.recognised_modules_names = None
self.causes_of_disability = None
self._causes_of_yll = None
self._causes_of_dalys = None
self._years_written_to_log = []
INIT_DEPENDENCIES = {'Demography'}
ADDITIONAL_DEPENDENCIES = {'Lifestyle'}
# Declare Metadata
METADATA = {}
PARAMETERS = {
'DALY_Weight_Database': Parameter(
Types.DATA_FRAME, 'DALY Weight Database from GBD'),
'Age_Limit_For_YLL': Parameter(
Types.REAL, 'The age up to which deaths are recorded as having induced a lost of life years'),
'gbd_causes_of_disability': Parameter(
Types.LIST, 'List of the strings of causes of disability defined in the GBD data')
}
PROPERTIES = {}
def read_parameters(self, data_folder):
p = self.parameters
p['DALY_Weight_Database'] = pd.read_csv(Path(self.resourcefilepath) / 'ResourceFile_DALY_Weights.csv')
p['Age_Limit_For_YLL'] = 90.0 # Frontier life expectancy at birth
# https://cdn.who.int/media/docs/default-source/gho-documents/global-health-estimates/
# ghe2019_daly-methods.pdf?sfvrsn=31b25009_7
p['gbd_causes_of_disability'] = set(pd.read_csv(
Path(self.resourcefilepath) / 'gbd' / 'ResourceFile_CausesOfDALYS_GBD2019.csv', header=None)[0].values)
def initialise_population(self, population):
pass
def initialise_simulation(self, sim):
"""Do before simulation starts:
1) Prepare data storage structures
2) Collect the module that will use this HealthBurden module
3) Process the declarations of causes of disability made by the disease modules
4) Launch the DALY Logger to run every month, starting with the end of the first month of simulation
5) Schedule `Healthburden_WriteToLog` that will write to log annually (end of each year)
"""
# 1) Prepare data storage structures
# Create the sex/age_range/year multi-index for YLL and YLD storage dataframes
sex_index = ['M', 'F']
age_index = self.sim.modules['Demography'].AGE_RANGE_CATEGORIES
wealth_index = sim.modules['Lifestyle'].PROPERTIES['li_wealth'].categories
year_index = list(range(self.sim.start_date.year, self.sim.end_date.year + 1))
self.multi_index_for_age_and_wealth_and_time = pd.MultiIndex.from_product(
[sex_index, age_index, wealth_index, year_index], names=['sex', 'age_range', 'li_wealth', 'year'])
# Create the YLL and YLD storage data-frame (using sex/age_range/year multi-index)
self.years_life_lost = pd.DataFrame(index=self.multi_index_for_age_and_wealth_and_time)
self.years_life_lost_stacked_time = pd.DataFrame(index=self.multi_index_for_age_and_wealth_and_time)
self.years_life_lost_stacked_age_and_time = pd.DataFrame(index=self.multi_index_for_age_and_wealth_and_time)
self.years_lived_with_disability = pd.DataFrame(index=self.multi_index_for_age_and_wealth_and_time)
# 2) Collect the module that will use this HealthBurden module
self.recognised_modules_names = [
m.name for m in self.sim.modules.values() if Metadata.USES_HEALTHBURDEN in m.METADATA
]
# Check that all registered disease modules have the report_daly_values() function
for module_name in self.recognised_modules_names:
assert getattr(self.sim.modules[module_name], 'report_daly_values', None) and \
callable(self.sim.modules[module_name].report_daly_values), 'A module that declares use of ' \
'HealthBurden module must have a ' \
'callable function "report_daly_values"'
# 3) Process the declarations of causes of disability and DALYS made by the disease modules
self.process_causes_of_disability()
self.process_causes_of_dalys()
# 4) Launch the DALY Logger to run every month, starting with the end of the first month of simulation
sim.schedule_event(Get_Current_DALYS(self), sim.date + DateOffset(months=1))
# 5) Schedule `Healthburden_WriteToLog` that will write to log annually
last_day_of_the_year = Date(sim.date.year, 12, 31)
sim.schedule_event(Healthburden_WriteToLog(self), last_day_of_the_year)
def process_causes_of_disability(self):
"""
1) Collect causes of disability that are reported by each disease module
2) Define the "Other" tlo_cause of disability (corresponding to those gbd_causes that are not represented by
the disease modules in this sim.)
3) Output to the log mappers for causes of disability to the label
"""
# 1) Collect causes of disability that are reported by each disease module
self.causes_of_disability = collect_causes_from_disease_modules(
all_modules=self.sim.modules.values(),
collect='CAUSES_OF_DISABILITY',
acceptable_causes=set(self.parameters['gbd_causes_of_disability'])
)
# 2) Define the "Other" tlo_cause of disability
self.causes_of_disability['Other'] = Cause(
label='Other',
gbd_causes=get_gbd_causes_not_represented_in_disease_modules(
causes=self.causes_of_disability,
gbd_causes=set(self.parameters['gbd_causes_of_disability'])
)
)
# 3) Output to the log mappers for causes of disability
mapper_from_tlo_causes, mapper_from_gbd_causes = create_mappers_from_causes_to_label(
causes=self.causes_of_disability,
all_gbd_causes=set(self.parameters['gbd_causes_of_disability'])
)
logger.info(
key='disability_mapper_from_tlo_cause_to_common_label',
data=mapper_from_tlo_causes
)
logger.info(
key='disability_mapper_from_gbd_cause_to_common_label',
data=mapper_from_gbd_causes
)
def process_causes_of_dalys(self):
"""
1) Collect causes of DALYS (i.e., death _and_ disability) that are reported by each disease module
2) Define the "Other" tlo_cause of DALYS (corresponding to those gbd_causes that are not represented by
the disease modules in this sim.)
3) Output to the log mappers for causes of disability to the label
"""
...
# 1) Collect causes of death and disability that are reported by each disease module,
# merging the gbd_causes declared for deaths or disabilities under the same label,
def merge_dicts_of_causes(d1: Dict, d2: Dict) -> Dict:
"""Combine two dictionaries of the form {tlo_cause_name: Cause}, merging the values of `Cause.gbd_causes`
where the values of `Cause.label` are common, attaching to the first key in d1 that uses that label."""
labels_seen = dict() # Look-up of the form {label: tlo_cause_name} for the tlo_cause_name associated
# (first) with a label.
merged_causes = dict() # Dict that will build-up as {tlo_cause_name: Cause} and be returned
for d in (d1, d2):
for _tlo_cause_name, _cause in d.items():
if _cause.label not in labels_seen:
# If label is not already included, add this cause to the merged dict
merged_causes[_tlo_cause_name] = copy(_cause) # Use copy to avoid the merged dict being linked
# to the passed arguments
labels_seen[_cause.label] = _tlo_cause_name
else:
# If label is already included, merge the gbd_causes into the cause defined.
tlo_cause_name_to_merge_into = labels_seen[_cause.label]
merged_causes[tlo_cause_name_to_merge_into].gbd_causes = \
merged_causes[tlo_cause_name_to_merge_into].gbd_causes.union(_cause.gbd_causes)
return merged_causes
causes_of_death = collect_causes_from_disease_modules(
all_modules=self.sim.modules.values(),
collect='CAUSES_OF_DEATH',
acceptable_causes=self.sim.modules['Demography'].gbd_causes_of_death)
causes_of_disability = collect_causes_from_disease_modules(
all_modules=self.sim.modules.values(),
collect='CAUSES_OF_DISABILITY',
acceptable_causes=set(self.parameters['gbd_causes_of_disability']))
causes_of_death_and_disability = merge_dicts_of_causes(
causes_of_death,
causes_of_disability,
)
# N.B. In the GBD definitions, MANY things which disable but don't kill; but NO things that kill but which
# don't also disable (because things that kill cause DALYS that way.)
assert set(self.parameters['gbd_causes_of_disability']).issuperset(
self.sim.modules['Demography'].gbd_causes_of_death)
# 2) Define the "Other" cause
all_gbd_causes_of_death_and_disability = set(self.parameters['gbd_causes_of_disability']).union(
self.sim.modules['Demography'].gbd_causes_of_death
)
causes_of_death_and_disability['Other'] = Cause(
label='Other',
gbd_causes=get_gbd_causes_not_represented_in_disease_modules(
causes=causes_of_death_and_disability,
gbd_causes=all_gbd_causes_of_death_and_disability
)
)
# 3) Output to the log mappers for causes of DALYs
mapper_from_tlo_causes, mapper_from_gbd_causes = create_mappers_from_causes_to_label(
causes=causes_of_death_and_disability,
all_gbd_causes=all_gbd_causes_of_death_and_disability
)
logger.info(
key='daly_mapper_from_tlo_cause_to_common_label',
data=mapper_from_tlo_causes
)
logger.info(
key='daly_mapper_from_gbd_cause_to_common_label',
data=mapper_from_gbd_causes
)
# store all possible causes of YLL and DALYS
self._causes_of_yll = set(causes_of_death.keys()).union({'Other'})
self._causes_of_dalys = set(mapper_from_tlo_causes.values())
def on_birth(self, mother_id, child_id):
pass
def on_simulation_end(self):
"""Write to the log anything that has not already been logged (i.e., if simulation terminating mid-way through
a year when the WriteToLog event has not run."""
self.write_to_log(year=self.sim.date.year)
def get_dalys(self, yld: pd.DataFrame, yll: pd.DataFrame) -> pd.DataFrame:
"""Returns pd.DataFrame of DALYS that is the sum of the 'Years Lived with Disability' (`yld`) and the 'Years
of Life Lost' (`yll`), under their common 'labels'. (i.e. multiple causes of yld and of yll may occur on the
same labels, and these labels unite the causes across yll and yld.)
"""
# Put YLD under common label
yld_with_label = yld.rename(
columns={
c: self.causes_of_disability[c].label for c in yld.columns
if c in self.causes_of_disability
}
)
# Put YLL under common label
yll_with_label = yll.rename(
columns={
c: self.sim.modules['Demography'].causes_of_death[c].label for c in yll
if c in self.sim.modules['Demography'].causes_of_death
}
)
# Join together and add, setting the index to be any columns that are not the causes_of_dalys (e.g. year, sex,
# age_range, wealth)
tots = pd.concat(
[
yld_with_label.set_index(sorted([i for i in yld_with_label.columns if i not in self._causes_of_dalys])),
yll_with_label.set_index(sorted([i for i in yll_with_label.columns if i not in self._causes_of_dalys])),
],
axis=1,
)
return tots.groupby(tots.columns, axis=1).sum().reset_index()
def get_daly_weight(self, sequlae_code):
"""
This can be used to look up the DALY weight for a particular condition identified by the 'sequela code'
Sequela code for particular conditions can be looked-up in ResourceFile_DALY_Weights.csv
:param sequela_code:
:return: the daly weight associated with that sequela code
"""
w = self.parameters['DALY_Weight_Database']
daly_wt = w.loc[w['TLO_Sequela_Code'] == sequlae_code, 'disability weight'].values[0]
# Check that the sequela code was found
assert (not pd.isnull(daly_wt))
# Check that the value is within bounds [0,1]
assert (daly_wt >= 0) & (daly_wt <= 1)
return daly_wt
def report_live_years_lost(self, sex=None, wealth=None, date_of_birth=None, age_range=None, cause_of_death=None):
"""
Calculate and store the period for which there is 'years of lost life' when someone dies (assuming that the
person has died on today's date in the simulation).
:param sex: sex of the person that had died
:param wealth: the value 'li_wealth' for the person at the time of death
:param date_of_birth: date_of_birth of the person that has died
:param age_range: The age-range for the person at the time of death
:param cause_of_death: title for the column in YLL dataframe (of form <ModuleName>_<Cause>)
"""
def _format_for_multi_index(_yll: pd.Series):
"""Returns pd.Series which is the same as in the argument `_yll` except that the multi-index has been
expanded to include sex and li_wealth and rearranged so that it matched the expected multi-index format
(sex/age_range/li_wealth/year)."""
return pd.DataFrame(_yll)\
.assign(sex=sex, li_wealth=wealth)\
.set_index(['sex', 'li_wealth'], append=True)\
.reorder_levels(['sex', 'age_range', 'li_wealth', 'year'])[_yll.name]
assert self.years_life_lost.index.equals(self.multi_index_for_age_and_wealth_and_time)
assert self.years_life_lost_stacked_time.index.equals(self.multi_index_for_age_and_wealth_and_time)
assert self.years_life_lost_stacked_age_and_time.index.equals(self.multi_index_for_age_and_wealth_and_time)
# date from which years of life are lost
date_of_death = self.sim.date
# Get the years of life lost split out by year and age-group: Not Stacked by time... so counting years of life
# lost up to the earliest of the age_limit or end of simulation.
yll = self.decompose_yll_by_age_and_time(start_date=date_of_death,
end_date=min(
self.sim.end_date,
(date_of_birth +
pd.DateOffset(years=self.parameters['Age_Limit_For_YLL']))
),
date_of_birth=date_of_birth
)['person_years'].pipe(_format_for_multi_index)
# Get the years of live lost "stacked by time", whereby all the life-years lost up to the age_limit are ascribed
# to the year of death.
yll_stacked_by_time = \
self.decompose_yll_by_age_and_time(
start_date=date_of_death,
end_date=(
date_of_birth + pd.DateOffset(years=self.parameters['Age_Limit_For_YLL']) - pd.DateOffset(days=1)),
date_of_birth=date_of_birth
).groupby(level=1).sum()\
.assign(year=date_of_death.year)\
.set_index(['year'], append=True)['person_years']\
.pipe(_format_for_multi_index)
# Get the years of live lost "stacked by age and time", whereby all the life-years lost up to the age_limit are
# ascribed to the age of death and to the year of death. This is computed by collapsing the age-dimension of
# `yll_stacked_by_time` onto the age(-range) of death.
age_range_to_stack_to = age_range
yll_stacked_by_age_and_time = pd.DataFrame(yll_stacked_by_time.groupby(level=[0, 2, 3]).sum())\
.assign(age_range=age_range_to_stack_to)\
.set_index(['age_range'], append=True)['person_years']\
.reorder_levels(['sex', 'age_range', 'li_wealth', 'year'])
# Add the years-of-life-lost from this death to the overall YLL dataframe keeping track
if cause_of_death not in self.years_life_lost.columns:
# cause has not been added to the LifeYearsLost dataframe, so make a new columns
self.years_life_lost[cause_of_death] = 0.0
self.years_life_lost_stacked_time[cause_of_death] = 0.0
self.years_life_lost_stacked_age_and_time[cause_of_death] = 0.0
# Add the life-years-lost from this death to the running total in LifeYearsLost dataframe
self.years_life_lost[cause_of_death] = self.years_life_lost[cause_of_death].add(
yll, fill_value=0)
self.years_life_lost_stacked_time[cause_of_death] = self.years_life_lost_stacked_time[cause_of_death].add(
yll_stacked_by_time, fill_value=0)
self.years_life_lost_stacked_age_and_time[cause_of_death] = \
self.years_life_lost_stacked_age_and_time[cause_of_death].add(yll_stacked_by_age_and_time, fill_value=0)
# Check that the index of the YLL dataframe is not changed
assert self.years_life_lost.index.equals(self.multi_index_for_age_and_wealth_and_time)
assert self.years_life_lost_stacked_time.index.equals(self.multi_index_for_age_and_wealth_and_time)
assert self.years_life_lost_stacked_age_and_time.index.equals(self.multi_index_for_age_and_wealth_and_time)
def decompose_yll_by_age_and_time(self, start_date, end_date, date_of_birth):
"""
This helper function will decompose a period of years of lost life into time-spent in each age group in each
calendar year
:return: a dataframe (X) of the person-time (in years) spent by age-group and time-period
"""
df = pd.DataFrame()
# Get all the days between start and end (inclusively)
df['days'] = pd.date_range(start=start_date, end=end_date, freq='D')
df['year'] = df['days'].dt.year
# Get the age (in whole years) that this person will be on each day.
# N.B. This is a slight approximation as it doesn't make allowance for leap-years.
df['age_in_years'] = age_at_date(df['days'], date_of_birth).astype(int)
age_range_lookup = self.sim.modules['Demography'].AGE_RANGE_LOOKUP # get the age_range_lookup from demography
df['age_range'] = df['age_in_years'].map(age_range_lookup)
period = pd.DataFrame(df.groupby(by=['year', 'age_range'])['days'].count())
period['person_years'] = (period['days'] / 365).clip(lower=0.0, upper=1.0)
period.drop(columns=['days'], axis=1, inplace=True)
return period
def write_to_log(self, year: int):
"""Write to the log the YLL, YLD and DALYS for a specific year.
N.B. This is called at the end of the simulation as well as at the end of each year, so we need to check that
the year is not being written to the log more than once."""
if year in self._years_written_to_log:
return # Skip if the year has already been logged.
def summarise_results_for_this_year(df, level=[0, 1]) -> pd.DataFrame:
"""Return pd.DataFrame that gives the summary of the `df` for the `year` by certain levels in the df's
multi-index. The `level` argument gives a list of levels to use in `groupby`: e.g., level=[0,1] gives a
summary of sex/age-group; and level=[2] gives a summary only by wealth category."""
return df.loc[(slice(None), slice(None), slice(None), year)] \
.groupby(level=level) \
.sum() \
.reset_index() \
.assign(year=year)
def log_df_line_by_line(key, description, df, force_cols=None) -> None:
"""Log each line of a dataframe to `logger.info`. Each row of the dataframe is one logged entry.
`force_cols` is the names of the colums that must be included in each logging line (As the parsing of the
log requires the name of the format of each row to be uniform.)."""
df[sorted(set(force_cols) - set(df.columns))] = 0.0 # Force the addition of any missing causes
df = df[sorted(df.columns)] # sort the columns so that they are always in same order
for _, row in df.iterrows():
logger.info(
key=key,
data=row.to_dict(),
description=description,
)
# Check that the format of the internal storage is as expected.
self.check_multi_index()
# 1) Log the Years Lived With Disability (YLD) (by the 'causes of disability' declared by disease modules).
log_df_line_by_line(
key='yld_by_causes_of_disability',
description='Years lived with disability by the declared cause_of_disability, '
'broken down by year, sex, age-group',
df=(yld := summarise_results_for_this_year(self.years_lived_with_disability)),
force_cols=sorted(set(self.causes_of_disability.keys())),
)
# 2) Log the Years of Live Lost (YLL) (by the 'causes of death' declared by disease modules).
log_df_line_by_line(
key='yll_by_causes_of_death',
description='Years of life lost by the declared cause_of_death, '
'broken down by year, sex, age-group. '
'No stacking: i.e., each year of life lost is ascribed to the'
' age and year that the person would have lived.',
df=(yll := summarise_results_for_this_year(self.years_life_lost)),
force_cols=self._causes_of_yll,
)
log_df_line_by_line(
key='yll_by_causes_of_death_stacked',
description='Years of life lost by the declared cause_of_death, '
'broken down by year, sex, age-group. '
'Stacking by time: i.e., every year of life lost is ascribed to'
' the year of the death, but each is ascribed to the age that '
'the person would have lived, .',
df=(yll_stacked_by_time := summarise_results_for_this_year(self.years_life_lost_stacked_time)),
force_cols=self._causes_of_yll,
)
log_df_line_by_line(
key='yll_by_causes_of_death_stacked_by_age_and_time',
description='Years of life lost by the declared cause_of_death, '
'broken down by year, sex, age-group. '
'Stacking by age and time: i.e., all the year of life lost '
'are ascribed to the age of the death and the year of the death.',
df=(yll_stacked_by_age_and_time := summarise_results_for_this_year(
self.years_life_lost_stacked_age_and_time)),
force_cols=self._causes_of_yll,
)
# 3) Log total DALYS recorded (YLD + LYL) (by the labels declared)
log_df_line_by_line(
key='dalys',
description='DALYS, by the labels are that are declared for each cause_of_death and cause_of_disability'
', broken down by year, sex, age-group. '
'No stacking: i.e., each year of life lost is ascribed to the'
' age and year that the person would have lived.',
df=self.get_dalys(yld=yld, yll=yll),
force_cols=self._causes_of_dalys,
)
log_df_line_by_line(
key='dalys_stacked',
description='DALYS, by the labels are that are declared for each cause_of_death and cause_of_disability'
', broken down by year, sex, age-group. '
'Stacking by time: i.e., every year of life lost is ascribed to'
' the year of the death, but each is ascribed to the age that '
'the person would have lived, .',
df=self.get_dalys(yld=yld, yll=yll_stacked_by_time),
force_cols=self._causes_of_dalys,
)
log_df_line_by_line(
key='dalys_stacked_by_age_and_time',
description='DALYS, by the labels are that are declared for each cause_of_death and cause_of_disability'
', broken down by year, sex, age-group. '
'Stacking by age and time: i.e., all the year of life lost '
'are ascribed to the age of the death and the year of the death.',
df=self.get_dalys(yld=yld, yll=yll_stacked_by_age_and_time),
force_cols=self._causes_of_dalys,
)
# 4) Log total DALYS (Stacked by Age and Time), broken down by wealth only (with the YLL stacked by age and
# time)
yld_by_wealth = summarise_results_for_this_year(
self.years_lived_with_disability, level=2
)
yll_by_wealth = summarise_results_for_this_year(
self.years_life_lost_stacked_age_and_time, level=2
)
log_df_line_by_line(
key='dalys_by_wealth_stacked_by_age_and_time',
description='DALYS, by the labels are that are declared for each cause_of_death and cause_of_disability'
', broken down by year and wealth category.'
'Stacking by age and time: i.e., all the year of life lost '
'are ascribed to the age of the death and the year of the death.',
df=self.get_dalys(yld=yld_by_wealth, yll=yll_by_wealth),
force_cols=self._causes_of_dalys,
)
self._years_written_to_log += [year]
def check_multi_index(self):
"""Check that the multi-index of the dataframes are as expected"""
assert self.years_life_lost.index.equals(self.multi_index_for_age_and_wealth_and_time)
assert self.years_life_lost_stacked_time.index.equals(self.multi_index_for_age_and_wealth_and_time)
assert self.years_life_lost_stacked_age_and_time.index.equals(self.multi_index_for_age_and_wealth_and_time)
assert self.years_lived_with_disability.index.equals(self.multi_index_for_age_and_wealth_and_time)
class Get_Current_DALYS(RegularEvent, PopulationScopeEventMixin):
"""
This event runs every months and asks each disease module to report the average disability
weight for each living person during the previous month. It reconciles this with reports from other disease modules
to ensure that no person has a total weight greater than one.
A known (small) limitation of this is that persons who died during the previous month do not contribute any YLD.
"""
def __init__(self, module):
super().__init__(module, frequency=DateOffset(months=1))
def apply(self, population):
# Running the DALY Logger
# Do nothing if no disease modules are registered or no causes of disability are registered
if (not self.module.recognised_modules_names) or (not self.module.causes_of_disability):
return
# Get the population dataframe
df = self.sim.population.props
idx_alive = df.loc[df.is_alive].index
# 1) Ask each disease module to log the DALYS for the previous month
dalys_from_each_disease_module = list()
for disease_module_name in self.module.recognised_modules_names:
disease_module = self.sim.modules[disease_module_name]
declared_causes_of_disability_module = disease_module.CAUSES_OF_DISABILITY.keys()
if declared_causes_of_disability_module:
# if some causes of disability are declared, collect the disability reported by this disease module:
dalys_from_disease_module = disease_module.report_daly_values()
# Check type is in acceptable form and make into dataframe if not already
assert type(dalys_from_disease_module) in (pd.Series, pd.DataFrame)
if isinstance(dalys_from_disease_module, pd.Series):
# if a pd.Series is returned, it implies there is only one cause of disability registered by module:
assert 1 == len(declared_causes_of_disability_module), \
"pd.Series returned but number of causes of disability declared is not equal to one."
# name the returned pd.Series as the only cause of disability that is defined by the module
dalys_from_disease_module.name = list(declared_causes_of_disability_module)[0]
# convert to pd.DataFrame
dalys_from_disease_module = pd.DataFrame(dalys_from_disease_module)
# Perform checks on what has been returned
assert set(dalys_from_disease_module.columns) == set(declared_causes_of_disability_module)
assert set(dalys_from_disease_module.index) == set(idx_alive)
assert not pd.isnull(dalys_from_disease_module).any().any()
assert ((dalys_from_disease_module >= 0) & (dalys_from_disease_module <= 1)).all().all()
assert (dalys_from_disease_module.sum(axis=1) <= 1).all()
# Append to list of dalys reported by each module
dalys_from_each_disease_module.append(dalys_from_disease_module)
# 2) Combine into a single dataframe (each column of this dataframe gives the reports from each module), and
# add together dalys reported by different modules that have the same cause (i.e., add together columns with
# the same name).
disease_specific_daly_values_this_month = pd.concat(
dalys_from_each_disease_module, axis=1).groupby(axis=1, level=0).sum()
# 3) Rescale the DALY weights
# Create a scaling-factor (if total DALYS for one person is more than 1, all DALYS weights are scaled so that
# their sum equals one).
scaling_factor = (disease_specific_daly_values_this_month.sum(axis=1).clip(lower=0, upper=1) /
disease_specific_daly_values_this_month.sum(axis=1)).fillna(1.0)
disease_specific_daly_values_this_month = disease_specific_daly_values_this_month.multiply(scaling_factor,
axis=0)
assert ((disease_specific_daly_values_this_month.sum(axis=1) - 1.0) < 1e-6).all()
# Multiply 1/12 as these weights are for one month only
disease_specific_daly_values_this_month = disease_specific_daly_values_this_month * (1 / 12)
# 4) Summarise the results for this month wrt sex/age/wealth
# - merge in age/wealth/sex information
disease_specific_daly_values_this_month = disease_specific_daly_values_this_month.merge(
df.loc[idx_alive, ['sex', 'li_wealth', 'age_range']], left_index=True, right_index=True, how='left')
# - sum of daly_weight, by sex/age/wealth
disability_monthly_summary = pd.DataFrame(
disease_specific_daly_values_this_month.groupby(['sex', 'age_range', 'li_wealth']).sum().fillna(0))
# - add the year into the multi-index
disability_monthly_summary['year'] = self.sim.date.year
disability_monthly_summary.set_index('year', append=True, inplace=True)
disability_monthly_summary = disability_monthly_summary.reorder_levels(
['sex', 'age_range', 'li_wealth', 'year'])
# 5) Add the monthly summary to the overall dataframe for YearsLivedWithDisability
dalys_to_add = disability_monthly_summary.sum().sum() # for checking
dalys_current = self.module.years_lived_with_disability.sum().sum() # for checking
# (Nb. this will add columns that are not otherwise present and add values to columns where they are.)
combined = self.module.years_lived_with_disability.combine(
disability_monthly_summary,
fill_value=0.0,
func=np.add,
overwrite=False)
# Merge into a dataframe with the correct multi-index (the multi-index from combine is subtly different)
self.module.years_lived_with_disability = \
pd.DataFrame(index=self.module.multi_index_for_age_and_wealth_and_time)\
.merge(combined, left_index=True, right_index=True, how='left')
# Check multi-index is in check and that the addition of DALYS has worked
assert self.module.years_lived_with_disability.index.equals(self.module.multi_index_for_age_and_wealth_and_time)
assert abs(self.module.years_lived_with_disability.sum().sum() - (dalys_to_add + dalys_current)) < 1e-5
self.module.check_multi_index()
class Healthburden_WriteToLog(RegularEvent, PopulationScopeEventMixin):
""" This event runs every year, as the last event on the last day of the year, and writes to the log the YLD, YLL
and DALYS accrued in that year."""
def __init__(self, module):
super().__init__(module, frequency=DateOffset(years=1), priority=Priority.END_OF_DAY)
def apply(self, population):
self.module.write_to_log(year=self.sim.date.year)