diff --git a/.github/workflows/run-on-comment.yml b/.github/workflows/run-on-comment.yml index 3fdc74b53d..0edf974935 100644 --- a/.github/workflows/run-on-comment.yml +++ b/.github/workflows/run-on-comment.yml @@ -126,7 +126,7 @@ jobs: - name: Get comment-bot token if: always() && steps.has_permissions.outputs.result == 'true' id: get_comment_bot_token - uses: peter-murray/workflow-application-token-action@dc0413987a085fa17d19df9e47d4677cf81ffef3 + uses: peter-murray/workflow-application-token-action@8e4e6fbf6fcc8a272781d97597969d21b3812974 with: application_id: ${{ secrets.application-id }} application_private_key: ${{ secrets.application-private-key }} diff --git a/.github/workflows/tests-unpinned.yml b/.github/workflows/tests-unpinned.yml new file mode 100644 index 0000000000..d776644eb9 --- /dev/null +++ b/.github/workflows/tests-unpinned.yml @@ -0,0 +1,32 @@ +name: Tests with unpinned dependencies + +on: + schedule: + - cron: 0 0 15 * * + +jobs: + test: + name: Run tests + strategy: + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + python-version: [3.x] + fail-fast: false + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v4 + with: + lfs: true + - name: Cache tox + uses: actions/cache@v4 + with: + path: .tox + key: tox-${{hashFiles('pyproject.toml') }} + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install tox + run: python -m pip install tox + - name: Run tests + run: tox -v -e py3-latest -- pytest -n auto -vv tests --skip-slow diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 283a53594a..5d39c44840 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -10,6 +10,8 @@ on: - requirements/** - resources/** - src/tlo/** + - src/scripts/profiling/scale_run.py + - src/scripts/profiling/shared.py - tests/** - pyproject.toml - tox.ini @@ -44,9 +46,10 @@ jobs: name: Set matrix run: | set -e + shopt -s globstar # Find all test files and generate their list in JSON format VAR_FILES="{\"include\":[" - for file in tests/test_*.py; do + for file in tests/**/test_*.py; do VAR_FILES="${VAR_FILES}{\"file\":\"${file}\"}," done VAR_FILES="${VAR_FILES}]}" @@ -75,4 +78,12 @@ jobs: tox --version - name: Test with tox run: | - tox -v -e py311,report -- pytest --cov --cov-report=term-missing -vv "${{ matrix.file }}" + tox -v -e py311 -- pytest --show-capture=no -vv "${{ matrix.file }}" --junit-xml="${{ matrix.file }}.results.xml" + - name: Generate test report + if: always() + uses: pmeier/pytest-results-action@fc6576eced1f411ea48ab10e917d9cfce2960e29 + with: + path: ${{ matrix.file }}.results.xml + summary: true + display-options: fEX + title: Results for ${{ matrix.file }} diff --git a/.gitignore b/.gitignore index cb11b17ff2..6805a8cd3d 100644 --- a/.gitignore +++ b/.gitignore @@ -109,9 +109,6 @@ venv.bak/ # PyCharm .idea/ -# TLO .rst files -docs/reference/tlo*.rst - # TLO configuration tlo.conf @@ -124,3 +121,12 @@ profiling_results/ # ignore _version.py file generated by setuptools_scm src/**/_version.py + +# Generated TLO docs files +docs/_*.rst +docs/_*.html +docs/hsi_events.csv +docs/parameters.rst +docs/reference/modules.rst +docs/reference/tlo*.rst +docs/resources/**/*.rst diff --git a/CITATION.cff b/CITATION.cff index d6849dece4..07d4c8801c 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -113,6 +113,7 @@ authors: family-names: Janoušková orcid: https://orcid.org/0000-0002-4104-0119 affiliation: University College London + website: https://profiles.ucl.ac.uk/90260 - given-names: Rachel family-names: Murray-Watson affiliation: Imperial College London diff --git a/README.md b/README.md index eadcbd2c60..ce6afead33 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@
-Thanzi La Onze +Thanzi la Onse

Thanzi la Onse model

@@ -24,7 +24,7 @@ The __Thanzi la Onse model (TLOmodel)__ is a part of the [Thanzi la Onse][thanzi TLOmodel is developed in a collaboration between: - [Kamuzu University of Health Sciences][kuhes-link] -- [MRC Centre for Global Infectioous Disease Analysis][mrc-gida-link], [Imperial College London][imperial-link] +- [MRC Centre for Global Infectious Disease Analysis][mrc-gida-link], [Imperial College London][imperial-link] - [Institute for Global Health][igh-link], [University College London][ucl-link] - [Centre for Advanced Research Computing][arc-link], [University College London][ucl-link] - [Centre for Health Economics][che-link], [University of York][york-link] diff --git a/contributors.yaml b/contributors.yaml index 75cd14f1d9..601baf176a 100644 --- a/contributors.yaml +++ b/contributors.yaml @@ -195,6 +195,7 @@ family-names: Janoušková orcid: "https://orcid.org/0000-0002-4104-0119" affiliation: "University College London" + website: "https://profiles.ucl.ac.uk/90260" github-username: EvaJanouskova contributions: - Epidemiology and modelling diff --git a/docs/conf.py b/docs/conf.py index 2b1c453203..f738017398 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -25,7 +25,6 @@ 'sphinx.ext.ifconfig', 'sphinx.ext.napoleon', 'sphinx.ext.todo', - 'sphinx.ext.viewcode', 'rawfiles' ] @@ -56,7 +55,6 @@ html_theme = 'sphinx_rtd_theme' html_use_smartypants = True -html_last_updated_fmt = '%b %d, %Y' html_split_index = False html_show_copyright = False html_sidebars = { @@ -106,6 +104,9 @@ 'exclude-members': '__dict__, name, rng, sim' # , read_parameters', } +# Include both class level and __init__ docstring content in class documentation +autoclass_content = 'both' + # The checker can't see private repos linkcheck_ignore = ['^https://github.com/UCL/TLOmodel.*', 'https://www.who.int/bulletin/volumes/88/8/09-068213/en/nn'] diff --git a/docs/index.rst b/docs/index.rst index 9de9d148d1..47d4857290 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -52,6 +52,7 @@ Contents azure_batch reference/index resources/index + parameters learning publications contributors diff --git a/docs/publications.bib b/docs/publications.bib new file mode 100644 index 0000000000..6948b618e3 --- /dev/null +++ b/docs/publications.bib @@ -0,0 +1,427 @@ + +@misc{mohan_framework_2024, + title = {Theory of {Change} {Framework} for {Economic} {Evaluation} {Using} {Health} {System} {Models}}, + url = {https://pure.york.ac.uk/portal/en/publications/theory-of-change-framework-for-economic-evaluation-using-health-s}, + abstract = {All-disease health systems models (HSMs) represent the new frontier of economic evaluation to help guide sector-wide resource allocation, allowing for decision analysis in the context of interacting health system capacity constraints. Although there are frameworks for how health systems and their relationship with health outcomes may be characterised, there is a gap in the literature in providing a comprehensive list of health system components and a template for impact pathways from health system components to health outcomes to consider when designing, using and communicating HSMs for economic evaluation. This paper provides a conceptual framework to serve as a theoretical underpinning for the design and use of HSMs developed for economic evaluation. The framework builds upon previous literature as well as our experience developing the Thanzi La Onse (TLO) Model for Malawi.}, + publisher = {York Research Database}, + author = {Mohan, Sakshi and Revill, Paul and Chalkley, Martin and Colbourn, Tim and Mangal, Tara and Molaro, Margherita and Nkhoma, Dominic and She, Bingling and Walker, Simon and Phillips, Andrew and Hallet, Timothy and Sculpher, Mark}, + month = nov, + year = {2024}, + keywords = {Theoretical frameworks}, +} + +@inproceedings{mohan_potential_2024, + address = {AUT}, + title = {The {Potential} {Impact} of {Investments} in {Supply} {Chain} {Strengthening} ({Retrospective} analysis)}, + url = {https://doi.org/10.15124/yao-7b1g-n044}, + doi = {10.15124/yao-7b1g-n044}, + abstract = {Supply chain strengthening (SCS) is a key component in the overall strategy of countries to move towards universal health coverage. Estimating the health benefit of investments in such health system strengthening (HSS) interventions has been challenging because these benefits are mediated through their impact on the delivery of a wide range of healthcare interventions, creating a problem of attribution. We overcome this challenge by simulating the impact of SCS within the Thanzi La Onse (TLO) model, an individual-based simulation of health care needs and service delivery for Malawi, drawing upon demographic, epidemiological and routine healthcare system data (on facilities, staff and consumables). In this study, we combine the results of a previous inferential analysis on the factors associated with consumable availability at health facilities in Malawi with the TLO model to estimate the potential for health impact of SCS interventions in the country. We do this by first predicting the expected change in consumable availability by making a positive change to these factors using previously fitted multi-level regression models of consumable availability. We then run the TLO model with these improved consumable availability estimates. The difference in the DALYs accrued by the simulated population under the baseline availability of consumables and that under improved consumable availability estimates gives us the potential for health impact of SCS interventions which would influence these factors. Countries regularly need to make decisions on allocating resources across a range of health interventions (including service delivery and HSS). Crucial to guide these decisions is a value-for-money (VfM) assessment comparing these interventions. Our analysis offers the first step in estimating the VfM of a sample of SCS interventions and can guide Malawi in its evaluation of alternative health sector investments.}, + language = {en}, + urldate = {2024-11-18}, + booktitle = {European {Health} {Economics} {Association} ({EuHEA}) conference 2024}, + publisher = {White Rose Research Repository}, + author = {Mohan, Sakshi}, + month = nov, + year = {2024}, + keywords = {Analyses using the model}, +} + +@misc{nkhoma_thanzi_2024, + title = {Thanzi {La} {Mawa} ({TLM}) datasets: health worker time and motion, patient exit interview and follow-up, and health facility resources, perceptions and quality in {Malawi}}, + copyright = {© 2024, Posted by Cold Spring Harbor Laboratory. This pre-print is available under a Creative Commons License (Attribution-NonCommercial-NoDerivs 4.0 International), CC BY-NC-ND 4.0, as described at http://creativecommons.org/licenses/by-nc-nd/4.0/}, + shorttitle = {Thanzi {La} {Mawa} ({TLM}) datasets}, + url = {https://www.medrxiv.org/content/10.1101/2024.11.14.24317330v1}, + doi = {10.1101/2024.11.14.24317330}, + abstract = {The Thanzi La Mawa (TLM) study aims to enhance understanding of healthcare delivery and resource allocation in Malawi by capturing real-world data across a range of health facilities. To inform the Thanzi La Onse (TLO) model, which is the first comprehensive health system model developed for any country, this study uses a cross-sectional, mixed-methods approach to collect data on healthcare worker productivity, patient experiences, facility resources, and care quality. The TLM dataset includes information from 29 health facilities sampled across Malawi, covering facility audits, patient exit interviews, follow-ups, time and motion studies, and healthcare worker interviews, conducted from January to May 2024. +Through these data collection tools, the TLM study gathers insights into critical areas such as time allocation of health workers, healthcare resource availability, patient satisfaction, and overall service quality. This data is crucial for enhancing the TLO model’s capacity to answer complex policy questions related to health resource allocation in Malawi. The study also offers a structured framework that other countries in East, Central, and Southern Africa can adopt to improve their healthcare systems. +By documenting methods and protocols, this paper provides valuable guidance for researchers and policymakers interested in healthcare system evaluation and improvement. Given the formal adoption of the TLO model in Malawi, the TLM dataset serves as a foundation for ongoing analyses into quality of care, healthcare workforce efficiency, and patient outcomes. This study seeks to support informed decision-making and future implementation of comprehensive healthcare system models in similar settings.}, + language = {en}, + urldate = {2024-11-18}, + publisher = {medRxiv}, + author = {Nkhoma, Dominic and Chitsulo, Precious and Mulwafu, Watipaso and Mnjowe, Emmanuel and Tafesse, Wiktoria and Mohan, Sakshi and Hallet, Timothy B. and Collins, Joseph H. and Revill, Paul and Chalkley, Martin and Mwapasa, Victor and Mfutso-Bengo, Joseph and Colbourn, Tim}, + month = nov, + year = {2024}, + note = {ISSN: 2431-7330 +Pages: 2024.11.14.24317330}, + keywords = {Data Collection - Protocol and Analyses}, +} + +@article{rao_using_2024, + title = {Using economic analysis to inform health resource allocation: lessons from {Malawi}}, + volume = {3}, + issn = {2731-7501}, + shorttitle = {Using economic analysis to inform health resource allocation}, + doi = {10.1007/s44250-024-00115-4}, + abstract = {Despite making remarkable strides in improving health outcomes, Malawi faces concerns about sustaining the progress achieved due to limited fiscal space and donor dependency. The imperative for efficient health spending becomes evident, necessitating strategic allocation of resources to areas with the greatest impact on mortality and morbidity. Health benefits packages hold promise in supporting efficient resource allocation. However, despite defining these packages over the last two decades, their development and implementation have posed significant challenges for Malawi. In response, the Malawian government, in collaboration with the Thanzi la Onse Programme, has developed a set of tools and frameworks, primarily based on cost-effectiveness analysis, to guide the design of health benefits packages likely to achieve national health objectives. This review provides an overview of these tools and frameworks, accompanied by other related analyses, aiming to better align health financing with health benefits package prioritization. The paper is organized around five key policy questions facing decision-makers: (i) What interventions should the health system deliver? (ii) How should resources be allocated geographically? (iii) How should investments in health system inputs be prioritized? (iv) How should equity considerations be incorporated into resource allocation decisions? and (v) How should evidence generation be prioritized to support resource allocation decisions (guiding research)? The tools and frameworks presented here are intended to be compatible for use in diverse and often complex healthcare systems across Africa, supporting the health resource allocation process as countries pursue Universal Health Coverage.}, + language = {eng}, + number = {1}, + journal = {Discover Health Systems}, + author = {Rao, Megha and Nkhoma, Dominic and Mohan, Sakshi and Twea, Pakwanja and Chilima, Benson and Mfutso-Bengo, Joseph and Ochalek, Jessica and Hallett, Timothy B. and Phillips, Andrew N. and McGuire, Finn and Woods, Beth and Walker, Simon and Sculpher, Mark and Revill, Paul}, + year = {2024}, + pmid = {39022531}, + pmcid = {PMC11249770}, + keywords = {Theoretical Frameworks}, + pages = {48}, +} + +@article{hallett_estimates_2024, + title = {Estimates of resource use in the public-sector health-care system and the effect of strengthening health-care services in {Malawi} during 2015–19: a modelling study ({Thanzi} {La} {Onse})}, + issn = {2214-109X}, + shorttitle = {Estimates of resource use in the public-sector health-care system and the effect of strengthening health-care services in {Malawi} during 2015–19}, + url = {https://www.sciencedirect.com/science/article/pii/S2214109X24004133}, + doi = {10.1016/S2214-109X(24)00413-3}, + abstract = {Background +In all health-care systems, decisions need to be made regarding allocation of available resources. Evidence is needed for these decisions, especially in low-income countries. We aimed to estimate how health-care resources provided by the public sector were used in Malawi during 2015–19 and to estimate the effects of strengthening health-care services. +Methods +For this modelling study, we used the Thanzi La Onse model, an individual-based simulation model. The scope of the model was health care provided by the public sector in Malawi during 2015–19. Health-care services were delivered during health-care system interaction (HSI) events, which we characterised as occurring at a particular facility level and requiring a particular number of appointments. We developed mechanistic models for the causes of death and disability that were estimated to account for approximately 81\% of deaths and approximately 72\% of disability-adjusted life-years (DALYs) in Malawi during 2015–19, according to the Global Burden of Disease (GBD) estimates; we computed DALYs incurred in the population as the sum of years of life lost and years lived with disability. The disease models could interact with one another and with the underlying properties of each person. Each person in the Thanzi La Onse model had specific properties (eg, sex, district of residence, wealth percentile, smoking status, and BMI, among others), for which we measured distribution and evolution over time using demographic and health survey data. We also estimated the effect of different types of health-care system improvement. +Findings +We estimated that the public-sector health-care system in Malawi averted 41·2 million DALYs (95\% UI 38·6–43·8) during 2015–19, approximately half of the 84·3 million DALYs (81·5–86·9) that the population would otherwise have incurred. DALYs averted were heavily skewed to children aged 0–4 years due to services averting DALYs that would be caused by acute lower respiratory tract infection, HIV or AIDS, malaria, or neonatal disorders. DALYs averted among adults were mostly attributed to HIV or AIDS and tuberculosis. Under a scenario whereby each appointment took the time expected and health-care workers did not work for longer than contracted, the health-care system in Malawi during 2015–19 would have averted only 19·1 million DALYs (95\% UI 17·1–22·4), suggesting that approximately 21·3 million DALYS (20·0–23·6) of total effect were derived through overwork of health-care workers. If people becoming ill immediately accessed care, all referrals were successfully completed, diagnostic accuracy of health-care workers was as good as possible, and consumables (ie, medicines) were always available, 28·2\% (95\% UI 25·7–30·9) more DALYS (ie, 12·2 million DALYs [95\% UI 10·9–13·8]) could be averted. +Interpretation +The health-care system in Malawi provides substantial health gains with scarce resources. Strengthening interventions could potentially increase these gains, so should be a priority for investigation and investment. An individual-based simulation model of health-care service delivery is valuable for health-care system planning and strengthening. +Funding +The Wellcome Trust, UK Research and Innovation, the UK Medical Research Council, and Community Jameel.}, + urldate = {2024-11-14}, + journal = {The Lancet Global Health}, + author = {Hallett, Timothy B and Mangal, Tara D and Tamuri, Asif U and Arinaminpathy, Nimalan and Cambiano, Valentina and Chalkley, Martin and Collins, Joseph H and Cooper, Jonathan and Gillman, Matthew S and Giordano, Mosè and Graham, Matthew M and Graham, William and Hawryluk, Iwona and Janoušková, Eva and Jewell, Britta L and Lin, Ines Li and Manning Smith, Robert and Manthalu, Gerald and Mnjowe, Emmanuel and Mohan, Sakshi and Molaro, Margherita and Ng'ambi, Wingston and Nkhoma, Dominic and Piatek, Stefan and Revill, Paul and Rodger, Alison and Salmanidou, Dimitra and She, Bingling and Smit, Mikaela and Twea, Pakwanja D and Colbourn, Tim and Mfutso-Bengo, Joseph and Phillips, Andrew N}, + month = nov, + year = {2024}, + keywords = {Overview of the model}, +} + +@article{tafesse_faith-based_2021, + title = {Faith-based provision of sexual and reproductive healthcare in {Malawi}}, + volume = {282}, + issn = {02779536}, + url = {https://journals.scholarsportal.info/details/02779536/v282icomplete/nfp_fposarhim.xml}, + doi = {10.1016/j.socscimed.2021.113997}, + abstract = {Abstract Faith-based organisations constitute the second largest healthcare providers in Sub-Saharan Africa but their religious values might be in conflict with providing some sexual and reproductive health services. We undertake regression analysis on data detailing client-provider interactions from a facility census in Malawi and examine whether religious ownership of facilities is associated with the degree of adherence to family planning guidelines. We find that faith-based organisations offer fewer services related to the investigation and prevention of sexually transmitted infections (STIs) and the promotion of condom use. The estimates are robust to several sensitivity checks on the impact of client selection. Given the prevalence of faith-based facilities in Sub-Saharan Africa, our results suggest that populations across the region may be at risk from inadequate sexual and reproductive healthcare provision which could exacerbate the incidence of STIs, such as HIV/AIDS, and unplanned pregnancies. Highlights Investigates whether faith-based facilities provide fewer sexual health services. Uses data on client-provider interactions from a facility-level census from Malawi. Faith-based providers are less likely to investigate STIs and promote condoms. Results are robust to matching and are not driven by client selection.}, + number = {Complete}, + urldate = {2024-11-07}, + journal = {Social Science \& Medicine}, + author = {Tafesse, Wiktoria and Chalkley, Martin}, + year = {2021}, + note = {Publisher: Elsevier}, + keywords = {Faith-based providers, Healthcare, Healthcare provision, Least developed country, Ownership, Sexual and reproductive health}, +} + +@misc{li_lin_impact_2024, + address = {Rochester, NY}, + type = {{SSRN} {Scholarly} {Paper}}, + title = {The {Impact} and {Cost}-{Effectiveness} of {Pulse} {Oximetry} and {Oxygen} on {Acute} {Lower} {Respiratory} {Infection} {Outcomes} in {Children} {Under}-5 in {Malawi}: {A} {Modelling} {Study}}, + shorttitle = {The {Impact} and {Cost}-{Effectiveness} of {Pulse} {Oximetry} and {Oxygen} on {Acute} {Lower} {Respiratory} {Infection} {Outcomes} in {Children} {Under}-5 in {Malawi}}, + url = {https://papers.ssrn.com/abstract=4947417}, + doi = {10.2139/ssrn.4947417}, + abstract = {Background: Acute Lower Respiratory Infections (ALRI) are the leading cause of post-neonatal death in children under-5 globally. The impact, costs, and cost-effectiveness of routine pulse oximetry and oxygen on ALRI outcomes at scale remain unquantified. Methods: We evaluate the impact and cost-effectiveness of scaling up pulse oximetry and oxygen on childhood ALRI outcomes in Malawi using a new and detailed individual-based model, together with a comprehensive costing assessment for 2024 that includes both capital and operational expenditures. We model 15 scenarios ranging from no pulse oximetry or oxygen (null scenario) to high coverage (90\% pulse oximetry usage, and 80\% oxygen availability) across the health system. Cost-effectiveness results are presented in incremental cost-effectiveness ratio (ICER) and incremental net health benefit (INHB) using Malawi-specific cost-effectiveness threshold of \$80 per Disability-Adjusted Life Year (DALY) averted. Findings: The cost-effective strategy is the full scale-up of both pulse oximetry and oxygen to 90\% usage rate and 80\% availability, respectively. This combination results in 71\% of hypoxaemic ALRI cases accessing oxygen, averting 73,100 DALYs in the first year of implementation and 29\% of potential ALRI deaths, at an ICER of \$34 per DALY averted and \$894 per death averted. The INHB is 42,200 net DALYs averted. Interpretation: Pulse oximetry and oxygen are complementary cost-effective interventions in Malawi, where health expenditure is low, and should be scaled-up in parallel. Funding: UKRI, Wellcome Trust, DFID, EU, CHAI, Unitaid.Declaration of Interest: Besides funding from the Wellcome Trust and UK Research and Innovation going towards authors’ institutions, some authors took on private projects, outside the submitted work. ILL declares receiving consulting fees from ICDDR-B for her work for the Lancet Commission on Medical Oxygen Security related to this study. TC declares consulting fees donated to his institution from the Global Fund for related work, personal consulting fees from the UN Economic Commission for Africa, and non-paid work chairing a Trial Steering Committee for a trial of adolescent mental health interventions in Nepal. ANP declares receiving consulting fees from the Bill \& Melinda Gates Foundation. All other authors declare no competing interests.Ethical Approval: The Thanzi La Onse project received ethical approval from the College of Medicine Malawi Research Ethics Committee (COMREC, P.10/19/2820) in Malawi. Only anonymized secondary data are used in the Thanzi La Onse model including in the ALRI model used in this paper; therefore, individual informed consent was not required.}, + language = {en}, + urldate = {2024-11-10}, + publisher = {Social Science Research Network}, + author = {Li Lin, Ines and McCollum, Eric D. and Buckley, Eric Thomas and Cambiano, Valentina and Collins, Joseph H. and Graham, Matthew M. and Janoušková, Eva and King, Carina and Lufesi, Norman and Mangal, Tara Danielle and Mfutso-Bengo, Joseph Matthew and Mohan, Sakshi and Molaro, Margherita and Nkhoma, Dominic and Nsona, Humphreys and Rothkopf, Alexander and She, Bingling and Smith, Lisa and Tamuri, Asif U. and Revill, Paul and Phillips, Andrew N. and Hallett, Timothy B. and Colbourn, Tim}, + month = sep, + year = {2024}, + keywords = {Analyses using the model, Malawi, acute lower respiratory infections, cost-effectiveness, oxygen, pulse oximetry}, +} + +@article{tafesse_difference_2024, + title = {The difference in clinical knowledge between staff employed at faith-based and public facilities in {Malawi}}, + volume = {11}, + issn = {2167-2415}, + url = {https://cjgh.org/articles/10.15566/cjgh.v11i1.853}, + doi = {10.15566/cjgh.v11i1.853}, + abstract = {A peer-reviewed, scholarly, and multidisciplinary journal on global health policy and practice, promoting evidence-based and thoughtful analysis on effective and innovative approaches to global health from an integrated Christian perspective. The Journal publishes evidence-based research and Christian reflection addressing the biological, social, environmental, psychological, and spiritual determinants of health in a global context.\  The broad scope of the journal facilitates actionable learning and capacity building in development contexts within a scholarly framework.\  Topics include: Community and Public Health (Health Promotion/Prevention, Nutrition and Food Security, Environmental Health, Maternal and Child Health, Community Development) Health Care Services (Primary Health Care, Surgical Service, Disaster and Emergency, Rehabilitative services, Mental Health, Palliative Care) Organization (Administration and Finance, Policy and Advocacy, Workforce) Mission and Health (Theology, Outreach, Transformational Development) Conditions of Special Interest (HIV/AIDS, Non-Communicable Disease, Neglected Tropical Diseases)}, + language = {en-US}, + number = {1}, + urldate = {2024-11-07}, + journal = {Christian Journal for Global Health}, + author = {Tafesse, Wiktoria and Chalkley, Martin}, + month = feb, + year = {2024}, + keywords = {Healthcare provision}, +} + +@article{ngambi_cross-sectional_2020, + title = {A cross-sectional study on factors associated with health seeking behaviour of {Malawians} aged 15+ years in 2016}, + volume = {32}, + copyright = {Copyright (c) 2021}, + issn = {1995-7262}, + url = {https://www.ajol.info/index.php/mmj/article/view/202965}, + abstract = {IntroductionHealth seeking behaviour (HSB) refers to actions taken by individuals who are ill in order to find appropriate remedy. Most studies on HSB have only examined one symptom or covered only a specific geographical location within a country. In this study, we used a representative sample of adults to explore the factors associated with HSB in response to 30 symptoms reported by adult Malawians in 2016.MethodsWe used the 2016 Malawi Integrated Household Survey dataset. We fitted a multilevel logistic regression model of likelihood of ‘seeking care at a health facility’ using a forward step-wise selection method, with age, sex and reported symptoms entered as a priori variables. We calculated the odds ratios (ORs) and their associated 95\% confidence intervals (95\% CI). We set the level of statistical significance at P \< 0.05.Results Of 6909 adults included in the survey, 1907 (29\%) reported symptoms during the 2 weeks preceding the survey. Of these, 937 (57\%) sought care at a health facility. Adults in urban areas were more likely to seek health care at a health facility than those in rural areas (AOR = 1.65, 95\% CI: 1.19–2.30, P = 0.003). Females had a higher likelihood of seeking care from health facilities than males (AOR = 1.26, 95\% CI: 1.03–1.59, P = 0.029). Being of higher wealth status was associated with a higher likelihood of seeking care from a health facility (AOR = 1.58, 95\% CI: 1.16–2.16, P = 0.004). Having fever and eye problems were associated with higher likelihood of seeking care at a health facility, while having headache, stomach ache and respiratory tract infections were associated with lower likelihood of seeking care at a health facility.ConclusionThis study has shown that there is a need to understand and address individual, socioeconomic and geographical barriers to health seeking to increase access and appropriate use of health care and fast-track progress towards Universal Health Coverage among the adult population.}, + language = {en}, + number = {4}, + urldate = {2024-11-06}, + journal = {Malawi Medical Journal}, + author = {Ng'ambi, Wingston and Mangal, Tara and Phillips, Andrew and Colbourn, Tim and Nkhoma, Dominic and Bengo, Joseph Mfutso- and Revill, Paul and Hallett, Timothy B.}, + year = {2020}, + note = {Number: 4}, + keywords = {Health inequality, Healthcare seeking behaviour, Malawi, health seeking behaviour, integrated household survey}, + pages = {205--212}, +} + +@article{ngambi_socio-demographic_2022, + title = {Socio-demographic factors associated with early antenatal care visits among pregnant women in {Malawi}: 2004–2016}, + volume = {17}, + issn = {1932-6203}, + shorttitle = {Socio-demographic factors associated with early antenatal care visits among pregnant women in {Malawi}}, + url = {https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0263650}, + doi = {10.1371/journal.pone.0263650}, + abstract = {Introduction In 2016, the WHO published recommendations increasing the number of recommended antenatal care (ANC) visits per pregnancy from four to eight. Prior to the implementation of this policy, coverage of four ANC visits has been suboptimal in many low-income settings. In this study we explore socio-demographic factors associated with early initiation of first ANC contact and attending at least four ANC visits (“ANC4+”) in Malawi using the Malawi Demographic and Health Survey (MDHS) data collected between 2004 and 2016, prior to the implementation of new recommendations. Methods We combined data from the 2004–5, 2010 and 2015–16 MDHS using Stata version 16. Participants included all women surveyed between the ages of 15–49 who had given birth in the five years preceding the survey. We conducted weighted univariate, bivariate and multivariable logistic regression analysis of the effects of each of the predictor variables on the binary endpoint of the woman attending at least four ANC visits and having the first ANC attendance within or before the four months of pregnancy (ANC4+). To determine whether a factor was included in the model, the likelihood ratio test was used with a statistical significance of P{\textless} 0.05 as the threshold. Results We evaluated data collected in surveys in 2004/5, 2010 and 2015/6 from 26386 women who had given birth in the five years before being surveyed. The median gestational age, in months, at the time of presenting for the first ANC visit was 5 (inter quartile range: 4–6). The proportion of women initiating ANC4+ increased from 21.3\% in 2004–5 to 38.8\% in 2015–16. From multivariate analysis, there was increasing trend in ANC4+ from women aged 20–24 years (adjusted odds ratio (aOR) = 1.27, 95\%CI:1.05–1.53, P = 0.01) to women aged 45–49 years (aOR = 1.91, 95\%CI:1.18–3.09, P = 0.008) compared to those aged 15–19 years. Women from richest socio-economic position ((aOR = 1.32, 95\%CI:1.12–1.58, P{\textless}0.001) were more likely to demonstrate ANC4+ than those from low socio-economic position. Additionally, women who had completed secondary (aOR = 1.24, 95\%CI:1.02–1.51, P = 0.03) and tertiary (aOR = 2.64, 95\%CI:1.65–4.22, P{\textless}0.001) education were more likely to report having ANC4+ than those with no formal education. Conversely increasing parity was associated with a reduction in likelihood of ANC4+ with women who had previously delivered 2–3 (aOR = 0.74, 95\%CI:0.63–0.86, P{\textless}0.001), 4–5 (aOR = 0.65, 95\%CI:0.53–0.80, P{\textless}0.001) or greater than 6 (aOR = 0.61, 95\%CI: 0.47–0.79, {\textless}0.001) children being less likely to demonstrate ANC4+. Conclusion The proportion of women reporting ANC4+ and of key ANC interventions in Malawi have increased significantly since 2004. However, we found that most women did not access the recommended number of ANC visits in Malawi, prior to the 2016 WHO policy change which may mean that women are less likely to undertake the 2016 WHO recommendation of 8 contacts per pregnancy. Additionally, our results highlighted significant variation in coverage according to key socio-demographic variables which should be considered when devising national strategies to ensure that all women access the appropriate frequency of ANC visits during their pregnancy.}, + language = {en}, + number = {2}, + urldate = {2024-11-06}, + journal = {PLOS ONE}, + author = {Ng'ambi, Wingston Felix and Collins, Joseph H. and Colbourn, Tim and Mangal, Tara and Phillips, Andrew and Kachale, Fannie and Mfutso-Bengo, Joseph and Revill, Paul and Hallett, Timothy B.}, + month = feb, + year = {2022}, + note = {Publisher: Public Library of Science}, + keywords = {Age groups, Antenatal care, Children, Educational attainment, HIV, Healthcare seeking behaviour, Low income countries, Malawi, Pregnancy}, + pages = {e0263650}, +} + +@misc{hawryluk_potential_2020, + title = {The potential impact of including pre-school aged children in the praziquantel mass-drug administration programmes on the {S}.haematobium infections in {Malawi}: a modelling study}, + copyright = {© 2020, Posted by Cold Spring Harbor Laboratory. This pre-print is available under a Creative Commons License (Attribution-NonCommercial-NoDerivs 4.0 International), CC BY-NC-ND 4.0, as described at http://creativecommons.org/licenses/by-nc-nd/4.0/}, + shorttitle = {The potential impact of including pre-school aged children in the praziquantel mass-drug administration programmes on the {S}.haematobium infections in {Malawi}}, + url = {https://www.medrxiv.org/content/10.1101/2020.12.09.20246652v1}, + doi = {10.1101/2020.12.09.20246652}, + abstract = {Background Mass drug administration (MDA) of praziquantel is an intervention used in the treatment and prevention of schistosomiasis. In Malawi, MDA happens annually across high-risk districts and covers around 80\% of school aged children and 50\% of adults. The current formulation of praziquantel is not approved for use in the preventive chemotherapy for children under 5 years old, known as pre-school aged children (PSAC). However, a new formulation for PSAC will be available by 2022. A comprehensive analysis of the potential additional benefits of including PSAC in the MDA will be critical to guide policy-makers. +Methods We developed a new individual-based stochastic transmission model of Schistosoma haematobium for the 6 highest prevalence districts of Malawi. The model was used to evaluate the benefits of including PSAC in the MDA campaigns, with respect to the prevalence of high-intensity infections ({\textgreater} 500 eggs per ml of urine) and reaching the elimination target, meaning the prevalence of high-intensity infections under 5\% in all sentinel sites. The impact of different MDA frequencies and coverages is quantified by prevalence of high-intensity infection and number of rounds needed to decrease that prevalence below 1\%. +Results Including PSAC in the MDA campaigns can reduce the time needed to achieve the elimination target for S. haematobium infections in Malawi by one year. The modelling suggests that in the case of a lower threshold of high-intensity infection, currently set by WHO to 500 eggs per ml of urine, including PSAC in the preventive chemotherapy programmes for 5 years can reduce the number of the high-intensity infection case years for pre-school aged children by up to 9.1 years per 100 children. +Conclusions Regularly treating PSAC in the MDA is likely to lead to overall better health of children as well as a decrease in the severe morbidities caused by persistent schistosomiasis infections and bring forward the date of elimination. Moreover, mass administration of praziquantel to PSAC will decrease the prevalence among the SAC, who are at the most risk of infection.}, + language = {en}, + urldate = {2024-11-06}, + publisher = {medRxiv}, + author = {Hawryluk, Iwona and Mangal, Tara and Nguluwe, Andrew and Kambalame, Chikonzero and Banda, Stanley and Magaleta, Memory and Juziwelo, Lazarus and Hallett, Timothy B.}, + month = dec, + year = {2020}, + note = {Pages: 2020.12.09.20246652}, + keywords = {Analyses using the model}, +} + +@misc{molaro_potential_2024, + title = {The potential impact of declining development assistance for healthcare on population health: projections for {Malawi}}, + copyright = {© 2024, Posted by Cold Spring Harbor Laboratory. This pre-print is available under a Creative Commons License (Attribution 4.0 International), CC BY 4.0, as described at http://creativecommons.org/licenses/by/4.0/}, + shorttitle = {The potential impact of declining development assistance for healthcare on population health}, + url = {https://www.medrxiv.org/content/10.1101/2024.10.11.24315287v1}, + doi = {10.1101/2024.10.11.24315287}, + abstract = {Development assistance for health (DAH) to Malawi will likely decrease as a fraction of GDP in the next few decades. Given the country’s significant reliance on DAH for the delivery of its healthcare services, estimating the impact that this could have on health projections for the country is particularly urgent. We use the Malawi-specific, individual-based “all diseases – whole health-system” Thanzi La Onse model to estimate the impact this could have on health system capacities, proxied by the availability of human resources for health, and consequently on population health outcomes. We estimate that the projected changes in DAH could result in a 7-15.8\% increase in disability-adjusted life years compared to a scenario where health spending as a percentage of GDP remains unchanged. This could cause a reversal of gains achieved to date in many areas of health, although progress against HIV/AIDS appears to be less vulnerable. The burden due to non-communicable diseases, on the other hand, is found to increase irrespective of yearly growth in health expenditure, if assuming current reach and scope of interventions. Finally, we find that greater health expenditure will improve population health outcomes, but at a diminishing rate.}, + language = {en}, + urldate = {2024-11-06}, + publisher = {medRxiv}, + author = {Molaro, Margherita and Revill, Paul and Chalkley, Martin and Mohan, Sakshi and Mangal, Tara and Colbourn, Tim and Collins, Joseph H. and Graham, Matthew M. and Graham, William and Janoušková, Eva and Manthalu, Gerald and Mnjowe, Emmanuel and Mulwafu, Watipaso and Murray-Watson, Rachel and Twea, Pakwanja D. and Phillips, Andrew N. and She, Bingling and Tamuri, Asif U. and Nkhoma, Dominic and Mfutso-Bengo, Joseph and Hallett, Timothy B.}, + month = oct, + year = {2024}, + note = {Pages: 2024.10.11.24315287}, + keywords = {Analyses using the model}, +} + +@article{she_changes_2024, + title = {The changes in health service utilisation in {Malawi} during the {COVID}-19 pandemic}, + volume = {19}, + issn = {1932-6203}, + url = {https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0290823}, + doi = {10.1371/journal.pone.0290823}, + abstract = {Introduction The COVID-19 pandemic and the restriction policies implemented by the Government of Malawi may have disrupted routine health service utilisation. We aimed to find evidence for such disruptions and quantify any changes by service type and level of health care. Methods We extracted nationwide routine health service usage data for 2015–2021 from the electronic health information management systems in Malawi. Two datasets were prepared: unadjusted and adjusted; for the latter, unreported monthly data entries for a facility were filled in through systematic rules based on reported mean values of that facility or facility type and considering both reporting rates and comparability with published data. Using statistical descriptive methods, we first described the patterns of service utilisation in pre-pandemic years (2015–2019). We then tested for evidence of departures from this routine pattern, i.e., service volume delivered being below recent average by more than two standard deviations was viewed as a substantial reduction, and calculated the cumulative net differences of service volume during the pandemic period (2020–2021), in aggregate and within each specific facility. Results Evidence of disruptions were found: from April 2020 to December 2021, services delivered of several types were reduced across primary and secondary levels of care–including inpatient care (-20.03\% less total interactions in that period compared to the recent average), immunisation (-17.61\%), malnutrition treatment (-34.5\%), accidents and emergency services (-16.03\%), HIV (human immunodeficiency viruses) tests (-27.34\%), antiretroviral therapy (ART) initiations for adults (-33.52\%), and ART treatment for paediatrics (-41.32\%). Reductions of service volume were greatest in the first wave of the pandemic during April-August 2020, and whereas some service types rebounded quickly (e.g., outpatient visits from -17.7\% to +3.23\%), many others persisted at lower level through 2021 (e.g., under-five malnutrition treatment from -15.24\% to -42.23\%). The total reduced service volume between April 2020 and December 2021 was 8 066 956 (-10.23\%), equating to 444 units per 1000 persons. Conclusion We have found substantial evidence for reductions in health service delivered in Malawi during the COVID-19 pandemic which may have potential health consequences, the effect of which should inform how decisions are taken in the future to maximise the resilience of healthcare system during similar events.}, + language = {en}, + number = {1}, + urldate = {2024-11-06}, + journal = {PLOS ONE}, + author = {She, Bingling and Mangal, Tara D. and Adjabeng, Anna Y. and Colbourn, Tim and Collins, Joseph H. and Janoušková, Eva and Lin, Ines Li and Mnjowe, Emmanuel and Mohan, Sakshi and Molaro, Margherita and Phillips, Andrew N. and Revill, Paul and Smith, Robert Manning and Twea, Pakwanja D. and Nkhoma, Dominic and Manthalu, Gerald and Hallett, Timothy B.}, + month = jan, + year = {2024}, + note = {Publisher: Public Library of Science}, + keywords = {Analyses using the model, Antiretroviral therapy, COVID 19, HIV, HIV vaccines, Health care facilities, Malawi, Pandemics, Virus testing}, + pages = {e0290823}, +} + +@article{mangal_potential_2021, + title = {Potential impact of intervention strategies on {COVID}-19 transmission in {Malawi}: a mathematical modelling study}, + volume = {11}, + copyright = {© Author(s) (or their employer(s)) 2021. Re-use permitted under CC BY. Published by BMJ.. https://creativecommons.org/licenses/by/4.0/This is an open access article distributed in accordance with the Creative Commons Attribution 4.0 Unported (CC BY 4.0) license, which permits others to copy, redistribute, remix, transform and build upon this work for any purpose, provided the original work is properly cited, a link to the licence is given, and indication of whether changes were made. See: https://creativecommons.org/licenses/by/4.0/.}, + issn = {2044-6055, 2044-6055}, + shorttitle = {Potential impact of intervention strategies on {COVID}-19 transmission in {Malawi}}, + url = {https://bmjopen.bmj.com/content/11/7/e045196}, + doi = {10.1136/bmjopen-2020-045196}, + abstract = {Background COVID-19 mitigation strategies have been challenging to implement in resource-limited settings due to the potential for widespread disruption to social and economic well-being. Here we predict the clinical severity of COVID-19 in Malawi, quantifying the potential impact of intervention strategies and increases in health system capacity. +Methods The infection fatality ratios (IFR) were predicted by adjusting reported IFR for China, accounting for demography, the current prevalence of comorbidities and health system capacity. These estimates were input into an age-structured deterministic model, which simulated the epidemic trajectory with non-pharmaceutical interventions and increases in health system capacity. +Findings The predicted population-level IFR in Malawi, adjusted for age and comorbidity prevalence, is lower than that estimated for China (0.26\%, 95\% uncertainty interval (UI) 0.12\%–0.69\%, compared with 0.60\%, 95\% CI 0.4\% to 1.3\% in China); however, the health system constraints increase the predicted IFR to 0.83\%, 95\% UI 0.49\%–1.39\%. The interventions implemented in January 2021 could potentially avert 54 400 deaths (95\% UI 26 900–97 300) over the course of the epidemic compared with an unmitigated outbreak. Enhanced shielding of people aged ≥60 years could avert 40 200 further deaths (95\% UI 25 300–69 700) and halve intensive care unit admissions at the peak of the outbreak. A novel therapeutic agent which reduces mortality by 0.65 and 0.8 for severe and critical cases, respectively, in combination with increasing hospital capacity, could reduce projected mortality to 2.5 deaths per 1000 population (95\% UI 1.9–3.6). +Conclusion We find the interventions currently used in Malawi are unlikely to effectively prevent SARS-CoV-2 transmission but will have a significant impact on mortality. Increases in health system capacity and the introduction of novel therapeutics are likely to further reduce the projected numbers of deaths.}, + language = {en}, + number = {7}, + urldate = {2024-11-06}, + journal = {BMJ Open}, + author = {Mangal, Tara and Whittaker, Charlie and Nkhoma, Dominic and Ng'ambi, Wingston and Watson, Oliver and Walker, Patrick and Ghani, Azra and Revill, Paul and Colbourn, Timothy and Phillips, Andrew and Hallett, Timothy and Mfutso-Bengo, Joseph}, + month = jul, + year = {2021}, + pmid = {34301651}, + note = {Publisher: British Medical Journal Publishing Group +Section: Epidemiology}, + keywords = {Analyses using the model, COVID-19, epidemiology, infection control, public health}, + pages = {e045196}, +} + +@article{she_health_2024, + title = {Health workforce needs in {Malawi}: analysis of the {Thanzi} {La} {Onse} integrated epidemiological model of care}, + volume = {22}, + issn = {1478-4491}, + shorttitle = {Health workforce needs in {Malawi}}, + url = {https://doi.org/10.1186/s12960-024-00949-2}, + doi = {10.1186/s12960-024-00949-2}, + abstract = {To make the best use of health resources, it is crucial to understand the healthcare needs of a population—including how needs will evolve and respond to changing epidemiological context and patient behaviour—and how this compares to the capabilities to deliver healthcare with the existing workforce. Existing approaches to planning either rely on using observed healthcare demand from a fixed historical period or using models to estimate healthcare needs within a narrow domain (e.g., a specific disease area or health programme). A new data-grounded modelling method is proposed by which healthcare needs and the capabilities of the healthcare workforce can be compared and analysed under a range of scenarios: in particular, when there is much greater propensity for healthcare seeking.}, + number = {1}, + urldate = {2024-11-06}, + journal = {Human Resources for Health}, + author = {She, Bingling and Mangal, Tara D. and Prust, Margaret L. and Heung, Stephanie and Chalkley, Martin and Colbourn, Tim and Collins, Joseph H. and Graham, Matthew M. and Jewell, Britta and Joshi, Purava and Li Lin, Ines and Mnjowe, Emmanuel and Mohan, Sakshi and Molaro, Margherita and Phillips, Andrew N. and Revill, Paul and Smith, Robert Manning and Tamuri, Asif U. and Twea, Pakwanja D. and Manthalu, Gerald and Mfutso-Bengo, Joseph and Hallett, Timothy B.}, + month = sep, + year = {2024}, + keywords = {Analyses using the model, Health care needs, Health services, Health system interactions, Healthcare workforce, Model design}, + pages = {66}, +} + +@article{mohan_factors_2024, + title = {Factors associated with medical consumable availability in level 1 facilities in {Malawi}: a secondary analysis of a facility census}, + volume = {12}, + issn = {2214-109X}, + shorttitle = {Factors associated with medical consumable availability in level 1 facilities in {Malawi}}, + url = {https://www.sciencedirect.com/science/article/pii/S2214109X24000950}, + doi = {10.1016/S2214-109X(24)00095-0}, + abstract = {Background +Medical consumable stock-outs negatively affect health outcomes not only by impeding or delaying the effective delivery of services but also by discouraging patients from seeking care. Consequently, supply chain strengthening is being adopted as a key component of national health strategies. However, evidence on the factors associated with increased consumable availability is limited. +Methods +In this study, we used the 2018–19 Harmonised Health Facility Assessment data from Malawi to identify the factors associated with the availability of consumables in level 1 facilities, ie, rural hospitals or health centres with a small number of beds and a sparsely equipped operating room for minor procedures. We estimate a multilevel logistic regression model with a binary outcome variable representing consumable availability (of 130 consumables across 940 facilities) and explanatory variables chosen based on current evidence. Further subgroup analyses are carried out to assess the presence of effect modification by level of care, facility ownership, and a categorisation of consumables by public health or disease programme, Malawi's Essential Medicine List classification, whether the consumable is a drug or not, and level of average national availability. +Findings +Our results suggest that the following characteristics had a positive association with consumable availability—level 1b facilities or community hospitals had 64\% (odds ratio [OR] 1·64, 95\% CI 1·37–1·97) higher odds of consumable availability than level 1a facilities or health centres, Christian Health Association of Malawi and private-for-profit ownership had 63\% (1·63, 1·40–1·89) and 49\% (1·49, 1·24–1·80) higher odds respectively than government-owned facilities, the availability of a computer had 46\% (1·46, 1·32–1·62) higher odds than in its absence, pharmacists managing drug orders had 85\% (1·85, 1·40–2·44) higher odds than a drug store clerk, proximity to the corresponding regional administrative office (facilities greater than 75 km away had 21\% lower odds [0·79, 0·63–0·98] than facilities within 10 km of the district health office), and having three drug order fulfilments in the 3 months before the survey had 14\% (1·14, 1·02–1·27) higher odds than one fulfilment in 3 months. Further, consumables categorised as vital in Malawi's Essential Medicine List performed considerably better with 235\% (OR 3·35, 95\% CI 1·60–7·05) higher odds than other essential or non-essential consumables and drugs performed worse with 79\% (0·21, 0·08–0·51) lower odds than other medical consumables in terms of availability across facilities. +Interpretation +Our results provide evidence on the areas of intervention with potential to improve consumable availability. Further exploration of the health and resource consequences of the strategies discussed will be useful in guiding investments into supply chain strengthening. +Funding +UK Research and Innovation as part of the Global Challenges Research Fund (Thanzi La Onse; reference MR/P028004/1), the Wellcome Trust (Thanzi La Mawa; reference 223120/Z/21/Z), the UK Medical Research Council, the UK Department for International Development, and the EU (reference MR/R015600/1).}, + number = {6}, + urldate = {2024-11-06}, + journal = {The Lancet Global Health}, + author = {Mohan, Sakshi and Mangal, Tara D and Colbourn, Tim and Chalkley, Martin and Chimwaza, Chikhulupiliro and Collins, Joseph H and Graham, Matthew M and Janoušková, Eva and Jewell, Britta and Kadewere, Godfrey and Li Lin, Ines and Manthalu, Gerald and Mfutso-Bengo, Joseph and Mnjowe, Emmanuel and Molaro, Margherita and Nkhoma, Dominic and Revill, Paul and She, Bingling and Manning Smith, Robert and Tafesse, Wiktoria and Tamuri, Asif U and Twea, Pakwanja and Phillips, Andrew N and Hallett, Timothy B}, + month = jun, + year = {2024}, + keywords = {Analyses using the model}, + pages = {e1027--e1037}, +} + +@article{ngambi_factors_2020, + title = {Factors associated with healthcare seeking behaviour for children in {Malawi}: 2016}, + volume = {25}, + copyright = {© 2020 John Wiley \& Sons Ltd}, + issn = {1365-3156}, + shorttitle = {Factors associated with healthcare seeking behaviour for children in {Malawi}}, + url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/tmi.13499}, + doi = {10.1111/tmi.13499}, + abstract = {Objective To characterise health seeking behaviour (HSB) and determine its predictors amongst children in Malawi in 2016. Methods We used the 2016 Malawi Integrated Household Survey data set. The outcome of interest was HSB, defined as seeking care at a health facility amongst people who reported one or more of a list of possible symptoms given on the questionnaire in the past two weeks. We fitted a multivariate logistic regression model of HSB using a forward step-wise selection method, with age, sex and symptoms entered as a priori variables. Results Of 5350 children, 1666 (32\%) had symptoms in the past two weeks. Of the 1666, 1008 (61\%) sought care at health facility. The children aged 5–14 years were less likely to be taken to health facilities for health care than those aged 0–4 years. Having fever vs. not having fever and having a skin problem vs. not having skin problem were associated with increased likelihood of HSB. Having a headache vs. not having a headache was associated with lower likelihood of accessing care at health facilities (AOR = 0.50, 95\% CI: 0.26–0.96, P = 0.04). Children from urban areas were more likely to be taken to health facilities for health care (AOR = 1.81, 95\% CI: 1.17–2.85, P = 0.008), as were children from households with a high wealth status (AOR = 1.86, 95\% CI: 1.25–2.78, P = 0.02). Conclusion There is a need to understand and address individual, socio-economic and geographical barriers to health seeking to increase access and use of health care and fast-track progress towards Universal Health Coverage.}, + language = {en}, + number = {12}, + urldate = {2024-11-06}, + journal = {Tropical Medicine \& International Health}, + author = {Ng'ambi, Wingston and Mangal, Tara and Phillips, Andrew and Colbourn, Tim and Mfutso-Bengo, Joseph and Revill, Paul and Hallett, Timothy B.}, + year = {2020}, + note = {\_eprint: https://onlinelibrary.wiley.com/doi/pdf/10.1111/tmi.13499}, + keywords = {Healthcare seeking behaviour, Malawi, determinants of health, healthcare seeking behaviour}, + pages = {1486--1495}, +} + +@article{manning_smith_estimating_2022, + title = {Estimating the health burden of road traffic injuries in {Malawi} using an individual-based model}, + volume = {9}, + issn = {2197-1714}, + url = {https://doi.org/10.1186/s40621-022-00386-6}, + doi = {10.1186/s40621-022-00386-6}, + abstract = {Road traffic injuries are a significant cause of death and disability globally. However, in some countries the exact health burden caused by road traffic injuries is unknown. In Malawi, there is no central reporting mechanism for road traffic injuries and so the exact extent of the health burden caused by road traffic injuries is hard to determine. A limited number of models predict the incidence of mortality due to road traffic injury in Malawi. These estimates vary greatly, owing to differences in assumptions, and so the health burden caused on the population by road traffic injuries remains unclear.}, + number = {1}, + urldate = {2024-11-06}, + journal = {Injury Epidemiology}, + author = {Manning Smith, Robert and Cambiano, Valentina and Colbourn, Tim and Collins, Joseph H. and Graham, Matthew and Jewell, Britta and Li Lin, Ines and Mangal, Tara D. and Manthalu, Gerald and Mfutso-Bengo, Joseph and Mnjowe, Emmanuel and Mohan, Sakshi and Ng’ambi, Wingston and Phillips, Andrew N. and Revill, Paul and She, Bingling and Sundet, Mads and Tamuri, Asif and Twea, Pakwanja D. and Hallet, Timothy B.}, + month = jul, + year = {2022}, + keywords = {Analyses using the model, Health burden, Individual-based model, Malawi, Road traffic injuries}, + pages = {21}, +} + +@article{mangal_assessing_2024, + title = {Assessing the effect of health system resources on {HIV} and tuberculosis programmes in {Malawi}: a modelling study}, + volume = {12}, + issn = {2214-109X}, + shorttitle = {Assessing the effect of health system resources on {HIV} and tuberculosis programmes in {Malawi}}, + url = {https://www.sciencedirect.com/science/article/pii/S2214109X24002596}, + doi = {10.1016/S2214-109X(24)00259-6}, + abstract = {Background +Malawi is progressing towards UNAIDS and WHO End TB Strategy targets to eliminate HIV/AIDS and tuberculosis. We aimed to assess the prospective effect of achieving these goals on the health and health system of the country and the influence of consumable constraints. +Methods +In this modelling study, we used the Thanzi la Onse (Health for All) model, which is an individual-based multi-disease simulation model that simulates HIV and tuberculosis transmission, alongside other diseases (eg, malaria, non-communicable diseases, and maternal diseases), and gates access to essential medicines according to empirical estimates of availability. The model integrates dynamic disease modelling with health system engagement behaviour, health system use, and capabilities (ie, personnel and consumables). We used 2018 data on the availability of HIV and tuberculosis consumables (for testing, treatment, and prevention) across all facility levels of the country to model three scenarios of HIV and tuberculosis programme scale-up from Jan 1, 2023, to Dec 31, 2033: a baseline scenario, when coverage remains static using existing consumable constraints; a constrained scenario, in which prioritised interventions are scaled up with fixed consumable constraints; and an unconstrained scenario, in which prioritised interventions are scaled up with maximum availability of all consumables related to HIV and tuberculosis care. +Findings +With uninterrupted medical supplies, in Malawi, we projected HIV and tuberculosis incidence to decrease to 26 (95\% uncertainty interval [UI] 19–35) cases and 55 (23–74) cases per 100 000 person-years by 2033 (from 152 [98–195] cases and 123 [99–160] cases per 100 000 person-years in 2023), respectively, with programme scale-up, averting a total of 12·21 million (95\% UI 11·39–14·16) disability-adjusted life-years. However, the effect was compromised by restricted access to key medicines, resulting in approximately 58 700 additional deaths (33 400 [95\% UI 22 000–41 000] due to AIDS and 25 300 [19 300–30 400] due to tuberculosis) compared with the unconstrained scenario. Between 2023 and 2033, eliminating HIV treatment stockouts could avert an estimated 12 100 deaths compared with the baseline scenario, and improved access to tuberculosis prevention medications could prevent 5600 deaths in addition to those achieved through programme scale-up alone. With programme scale-up under the constrained scenario, consumable stockouts are projected to require an estimated 14·3 million extra patient-facing hours between 2023 and 2033, mostly from clinical or nursing staff, compared with the unconstrained scenario. In 2033, with enhanced screening, 188 000 (81\%) of 232 900 individuals projected to present with active tuberculosis could start tuberculosis treatment within 2 weeks of initial presentation if all required consumables were available, but only 8600 (57\%) of 15 100 presenting under the baseline scenario. +Interpretation +Ignoring frailties in the health-care system, in particular the potential non-availability of consumables, in projections of HIV and tuberculosis programme scale-up might risk overestimating potential health impacts and underestimating required health system resources. Simultaneous health system strengthening alongside programme scale-up is crucial, and should yield greater benefits to population health while mitigating the strain on a heavily constrained health-care system. +Funding +Wellcome and UK Research and Innovation as part of the Global Challenges Research Fund.}, + number = {10}, + urldate = {2024-11-06}, + journal = {The Lancet Global Health}, + author = {Mangal, Tara D and Mohan, Sakshi and Colbourn, Timothy and Collins, Joseph H and Graham, Mathew and Jahn, Andreas and Janoušková, Eva and Lin, Ines Li and Smith, Robert Manning and Mnjowe, Emmanuel and Molaro, Margherita and Mwenyenkulu, Tisungane E and Nkhoma, Dominic and She, Bingling and Tamuri, Asif and Revill, Paul and Phillips, Andrew N and Mfutso-Bengo, Joseph and Hallett, Timothy B}, + month = oct, + year = {2024}, + keywords = {Analyses using the model}, + pages = {e1638--e1648}, +} + +@article{molaro_new_2024, + title = {A new approach to {Health} {Benefits} {Package} design: an application of the {Thanzi} {La} {Onse} model in {Malawi}}, + volume = {20}, + issn = {1553-7358}, + shorttitle = {A new approach to {Health} {Benefits} {Package} design}, + url = {https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1012462}, + doi = {10.1371/journal.pcbi.1012462}, + abstract = {An efficient allocation of limited resources in low-income settings offers the opportunity to improve population-health outcomes given the available health system capacity. Efforts to achieve this are often framed through the lens of “health benefits packages” (HBPs), which seek to establish which services the public healthcare system should include in its provision. Analytic approaches widely used to weigh evidence in support of different interventions and inform the broader HBP deliberative process however have limitations. In this work, we propose the individual-based Thanzi La Onse (TLO) model as a uniquely-tailored tool to assist in the evaluation of Malawi-specific HBPs while addressing these limitations. By mechanistically modelling—and calibrating to extensive, country-specific data—the incidence of disease, health-seeking behaviour, and the capacity of the healthcare system to meet the demand for care under realistic constraints on human resources for health available, we were able to simulate the health gains achievable under a number of plausible HBP strategies for the country. We found that the HBP emerging from a linear constrained optimisation analysis (LCOA) achieved the largest health gain—∼8\% reduction in disability adjusted life years (DALYs) between 2023 and 2042 compared to the benchmark scenario—by concentrating resources on high-impact treatments. This HBP however incurred a relative excess in DALYs in the first few years of its implementation. Other feasible approaches to prioritisation were assessed, including service prioritisation based on patient characteristics, rather than service type. Unlike the LCOA-based HBP, this approach achieved consistent health gains relative to the benchmark scenario on a year- to-year basis, and a 5\% reduction in DALYs over the whole period, which suggests an approach based upon patient characteristics might prove beneficial in the future.}, + language = {en}, + number = {9}, + urldate = {2024-11-06}, + journal = {PLOS Computational Biology}, + author = {Molaro, Margherita and Mohan, Sakshi and She, Bingling and Chalkley, Martin and Colbourn, Tim and Collins, Joseph H. and Connolly, Emilia and Graham, Matthew M. and Janoušková, Eva and Lin, Ines Li and Manthalu, Gerald and Mnjowe, Emmanuel and Nkhoma, Dominic and Twea, Pakwanja D. and Phillips, Andrew N. and Revill, Paul and Tamuri, Asif U. and Mfutso-Bengo, Joseph and Mangal, Tara D. and Hallett, Timothy B.}, + month = sep, + year = {2024}, + note = {Publisher: Public Library of Science}, + keywords = {Analyses using the model, Child and adolescent health policy, Epidemiology, HIV, Health care facilities, Health care policy, Health systems strengthening, Malawi, Medical risk factors}, + pages = {e1012462}, +} + +@misc{mangal_decade_2024, + title = {A {Decade} of {Progress} in {HIV}, {Malaria}, and {Tuberculosis} {Initiatives} in {Malawi}}, + copyright = {© 2024, Posted by Cold Spring Harbor Laboratory. This pre-print is available under a Creative Commons License (Attribution 4.0 International), CC BY 4.0, as described at http://creativecommons.org/licenses/by/4.0/}, + url = {https://www.medrxiv.org/content/10.1101/2024.10.08.24315077v1}, + doi = {10.1101/2024.10.08.24315077}, + abstract = {Objective Huge investments in HIV, TB, and malaria (HTM) control in Malawi have greatly reduced disease burden. However, the joint impact of these services across multiple health domains and the health system resources required to deliver them are not fully understood. +Methods An integrated epidemiological and health system model was used to assess the impact of HTM programmes in Malawi from 2010 to 2019, incorporating interacting disease dynamics, intervention effects, and health system usage. Four scenarios were examined, comparing actual programme delivery with hypothetical scenarios excluding programmes individually and collectively. +Findings From 2010-2019, HTM programmes were estimated to have prevented 1.08 million deaths and 74.89 million DALYs. An additional 15,600 deaths from other causes were also prevented. Life expectancy increased by 13.0 years for males and 16.9 years for females.The HTM programmes accounted for 24.2\% of all health system interactions, including 157.0 million screening/diagnostic tests and 23.2 million treatment appointments. Accounting for the anticipated health deterioration without HTM services, only 41.55 million additional healthcare worker hours were required (17.1\% of total healthcare worker time) to achieve these gains. The HTM programme eliminated the need for 123 million primary care appointments, offset by a net increase in inpatient care demand (9.4 million bed-days) that would have been necessary in its absence. +Conclusions HTM programmes have greatly increased life expectancy, providing direct and spillover effects on health. These investments have alleviated the burden on inpatient and emergency care, which requires more intensive healthcare provider involvement.}, + language = {en}, + urldate = {2024-11-06}, + publisher = {medRxiv}, + author = {Mangal, Tara Danielle and Molaro, Margherita and Nkhoma, Dominic and Colbourn, Timothy and Collins, Joseph H. and Janoušková, Eva and Graham, Matthew M. and Lin, Ines Li and Mnjowe, Emmanuel and Mwenyenkulu, Tisungane E. and Mohan, Sakshi and She, Bingling and Tamuri, Asif U. and Twea, Pakwanja D. and Winskill, Peter and Phillips, Andrew and Mfutso-Bengo, Joseph and Hallett, Timothy B.}, + month = oct, + year = {2024}, + note = {Pages: 2024.10.08.24315077}, + keywords = {Analyses using the model}, +} + +@article{colbourn_modeling_2023, + title = {Modeling {Contraception} and {Pregnancy} in {Malawi}: {A} {Thanzi} {La} {Onse} {Mathematical} {Modeling} {Study}}, + volume = {54}, + copyright = {© 2023 The Authors. Studies in Family Planning published by Wiley Periodicals LLC on behalf of Population Council.}, + issn = {1728-4465}, + shorttitle = {Modeling {Contraception} and {Pregnancy} in {Malawi}}, + url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/sifp.12255}, + doi = {10.1111/sifp.12255}, + abstract = {Malawi has high unmet need for contraception with a costed national plan to increase contraception use. Estimating how such investments might impact future population size in Malawi can help policymakers understand effects and value of policies to increase contraception uptake. We developed a new model of contraception and pregnancy using individual-level data capturing complexities of contraception initiation, switching, discontinuation, and failure by contraception method, accounting for differences by individual characteristics. We modeled contraception scale-up via a population campaign to increase initiation of contraception (Pop) and a postpartum family planning intervention (PPFP). We calibrated the model without new interventions to the UN World Population Prospects 2019 medium variant projection of births for Malawi. Without interventions Malawi's population passes 60 million in 2084; with Pop and PPFP interventions. it peaks below 35 million by 2100. We compare contraception coverage and costs, by method, with and without interventions, from 2023 to 2050. We estimate investments in contraception scale-up correspond to only 0.9 percent of total health expenditure per capita though could result in dramatic reductions of current pressures of very rapid population growth on health services, schools, land, and society, helping Malawi achieve national and global health and development goals.}, + language = {en}, + number = {4}, + urldate = {2024-11-06}, + journal = {Studies in Family Planning}, + author = {Colbourn, Tim and Janoušková, Eva and Li Lin, Ines and Collins, Joseph and Connolly, Emilia and Graham, Matt and Jewel, Britta and Kachale, Fannie and Mangal, Tara and Manthalu, Gerald and Mfutso-Bengo, Joseph and Mnjowe, Emmanuel and Mohan, Sakshi and Molaro, Margherita and Ng'ambi, Wingston and Nkhoma, Dominic and Revill, Paul and She, Bingling and Manning Smith, Robert and Twea, Pakwanja and Tamuri, Asif and Phillips, Andrew and Hallett, Timothy B.}, + year = {2023}, + note = {\_eprint: https://onlinelibrary.wiley.com/doi/pdf/10.1111/sifp.12255}, + keywords = {Analyses using the model}, + pages = {585--607}, +} diff --git a/docs/publications.rst b/docs/publications.rst index 77ae7ef93a..388567208b 100644 --- a/docs/publications.rst +++ b/docs/publications.rst @@ -1,52 +1,10 @@ - ============= Publications ============= These are the publications that have been generated either in the course of the model's development or its application. +:download:`Download a BibTeX file for all publications <./publications.bib>` -Overview of the Model -====================== - -* `A Healthcare Service Delivery and Epidemiological Model for Investigating Resource Allocation for Health: The Thanzi La Onse Model `_ - - -Analyses Using The Model -======================== - -* `The Changes in Health Service Utilisation in Malawi During the COVID-19 Pandemic `_ - -* `Modeling Contraception and Pregnancy in Malawi: A Thanzi La Onse Mathematical Modeling Study `_ - -* `Factors Associated with Consumable Stock-Outs in Malawi: Evidence from a Facility Census `_ - -* `The Effects of Health System Frailties on the Projected Impact of the HIV and TB Programmes in Malawi `_ - -* `Estimating the health burden of road traffic injuries in Malawi using an individual-based model `_ - -* `The potential impact of intervention strategies on COVID-19 transmission in Malawi: A mathematical modelling study. `_ - -* `The potential impact of including pre-school aged children in the praziquantel mass-drug administration programmes on the S.haematobium infections in Malawi: a modelling study `_ - - -Healthcare Seeking Behaviour -============================ - -* `Socio-demographic factors associated with early antenatal care visits among pregnant women in Malawi: 2004–2016 `_ - -* `Factors associated with healthcare seeking behaviour for children in Malawi: 2016. `_ - -* `A cross-sectional study on factors associated with health seeking behaviour of Malawians aged 15+ years in 2016. `_ - - - - - - - - - - - - +.. raw:: html + :file: _publications_list.html diff --git a/docs/requirements.txt b/docs/requirements.txt index e891488c86..68751f2178 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,4 +1,6 @@ sphinx>=1.3 sphinx-rtd-theme +pybtex pyyaml +requests tabulate diff --git a/docs/tlo_parameters.py b/docs/tlo_parameters.py new file mode 100644 index 0000000000..6fb38d102b --- /dev/null +++ b/docs/tlo_parameters.py @@ -0,0 +1,321 @@ +"""Create listings of model parameters in tabular format""" + +import argparse +from collections import defaultdict +from collections.abc import Iterable +from functools import partial +from pathlib import Path +from typing import TypeAlias, get_args +import numpy +import pandas + +import tlo +from tlo import Date, Module, Simulation +from tlo.methods import fullmodel +from tlo.analysis.utils import get_parameters_for_status_quo + + +_TYPE_TO_DESCRIPTION = { + bool: "Boolean", + pandas.Categorical: "Categorical", + pandas.DataFrame: "Dataframe", + pandas.Timestamp: "Date", + defaultdict: "Dictionary", + dict: "Dictionary", + int: "Integer", + numpy.int64: "Integer", + list: "List", + float: "Real", + numpy.float64: "Real", + pandas.Series: "Series", + set: "Set", + str: "String", +} + + +ScalarParameterValue: TypeAlias = float | int | bool | str | numpy.generic | Date +StructuredParameterValue: TypeAlias = ( + dict | list | tuple | set | pandas.Series | pandas.DataFrame +) +ParameterValue: TypeAlias = ( + ScalarParameterValue | pandas.Categorical | StructuredParameterValue +) + +_SCALAR_TYPES = get_args(ScalarParameterValue) + + +ModuleParameterTablesDict: TypeAlias = dict[str, dict[str, pandas.DataFrame]] +ModuleStructuredParametersDict: TypeAlias = dict[ + str, dict[str, pandas.DataFrame | dict[str, pandas.DataFrame]] +] + + +def structured_value_to_dataframe( + value: StructuredParameterValue, +) -> pandas.DataFrame | dict[str, pandas.DataFrame]: + if isinstance(value, (list, tuple, set)): + return pandas.DataFrame.from_records([value], index=["Value"]) + elif isinstance(value, pandas.Series): + return pandas.DataFrame(value) + elif isinstance(value, pandas.DataFrame): + return value + elif isinstance(value, dict): + if all(isinstance(v, _SCALAR_TYPES) for v in value.values()): + return pandas.DataFrame(value, index=["Value"]) + else: + return {k: structured_value_to_dataframe(v) for k, v in value.items()} + else: + raise ValueError( + f"Unrecognized structured value type {type(value)} for value {value}" + ) + + +def get_parameter_tables( + modules: Iterable[Module], + overriden_parameters: dict[str, dict[str, ParameterValue]], + excluded_modules: set[str], + excluded_parameters: dict[str, set[str]], + escape_characters: callable, + format_internal_link: callable, + max_inline_parameter_length: int = 10, +) -> tuple[ModuleParameterTablesDict, ModuleStructuredParametersDict]: + module_parameter_tables = {} + module_structured_parameters = {} + for module in sorted(modules, key=lambda m: m.name): + if module.name in excluded_modules: + continue + parameter_records = [] + module_structured_parameters[module.name] = {} + module_excluded_parameters = excluded_parameters.get(module.name, set()) + for parameter_name, parameter in module.PARAMETERS.items(): + if parameter_name in module_excluded_parameters: + continue + if ( + module.name in overriden_parameters + and parameter_name in overriden_parameters[module.name] + ): + value = overriden_parameters[module.name][parameter_name] + else: + value = module.parameters.get(parameter_name) + if value is None: + continue + record = { + "Name": escape_characters(parameter_name), + "Description": escape_characters(parameter.description), + "Type": _TYPE_TO_DESCRIPTION[type(value)], + } + if ( + isinstance(value, _SCALAR_TYPES) + or isinstance(value, (list, set, tuple)) + and len(value) < max_inline_parameter_length + ): + record["Value"] = str(value) + elif isinstance(value, pandas.Categorical): + assert len(value) == 1 + record["Value"] = str(value[0]) + else: + record["Value"] = format_internal_link( + "...", parameter_id(module.name, parameter_name) + ) + module_structured_parameters[module.name][parameter_name] = ( + structured_value_to_dataframe(value) + ) + parameter_records.append(record) + module_parameter_tables[module.name] = pandas.DataFrame.from_records( + parameter_records, + ) + return module_parameter_tables, module_structured_parameters + + +def parameter_id(module_name, parameter_name): + return f"{module_name}-{parameter_name}" + + +def dataframe_as_table(dataframe, rows_threshold=None, tablefmt="pipe"): + summarize = rows_threshold is not None and len(dataframe) > rows_threshold + if summarize: + original_rows = len(dataframe) + dataframe = dataframe[1:rows_threshold] + table_string = dataframe.to_markdown(index=False, tablefmt=tablefmt) + if summarize: + table_string += ( + f"\n\n*Only first {rows_threshold} rows of {original_rows} are shown.*\n" + ) + return table_string + + +def md_anchor_tag(id: str) -> str: + return f"" + + +def md_list_item(text: str, bullet: str = "-", indent_level: int = 0) -> str: + return " " * indent_level + f"{bullet} {text}\n" + + +def md_hyperlink(link_text: str, url: str) -> str: + return f"[{link_text}]({url})" + + +def md_internal_link_with_backlink_anchor( + link_text: str, id: str, suffix: str = "backlink" +): + return md_anchor_tag(f"{id}-{suffix}") + md_hyperlink(link_text, f"#{id}") + + +def rst_internal_link(link_text: str, id: str): + return f":ref:`{link_text}<{id}>`" + + +def escape_rst_markup_characters(text: str): + return text.replace("_", "\_").replace("*", "\*") + + +def md_anchor_and_backlink(id: str, suffix: str = "backlink"): + return md_anchor_tag(id) + md_hyperlink("↩", f"#{id}-{suffix}") + + +def md_table_of_contents(module_names): + return "\n".join( + [ + md_list_item( + md_internal_link_with_backlink_anchor(module_name, module_name.lower()) + ) + for module_name in module_names + ] + ) + + +def rst_table_of_contents(_module_names): + return ".. contents::\n :local:\n :depth: 1\n :backlinks: entry\n\n" + + +def md_header(text: str, level: int) -> str: + return ("#" * level if level > 0 else "%") + " " + text + "\n\n" + + +def rst_header(title: str, level: int = 0) -> str: + separator_character = '*=-^"'[level] + line = separator_character * len(title) + return (line + "\n" if level == 0 else "") + title + "\n" + line + "\n\n" + + +def md_module_header(module_name): + return md_header(f"{module_name} " + md_anchor_and_backlink(module_name.lower()), 1) + + +def rst_module_header(module_name): + return rst_header(module_name, 1) + + +def md_structured_parameter_header(parameter_name, module_name): + return md_header( + f"{parameter_name} " + + md_anchor_and_backlink(parameter_id(module_name, parameter_name)), + 2, + ) + + +def rst_structured_parameter_header(parameter_name, module_name): + return f".. _{parameter_id(module_name, parameter_name)}:\n\n" + rst_header( + parameter_name, 2 + ) + + +_formatters = { + ".md": { + "header": md_header, + "table_of_contents": md_table_of_contents, + "module_header": md_module_header, + "structured_parameter_header": md_structured_parameter_header, + "dataframe_as_table": partial(dataframe_as_table, tablefmt="pipe"), + "internal_link": md_internal_link_with_backlink_anchor, + "character_escaper": lambda x: x, + }, + ".rst": { + "header": rst_header, + "table_of_contents": rst_table_of_contents, + "module_header": rst_module_header, + "structured_parameter_header": rst_structured_parameter_header, + "dataframe_as_table": partial(dataframe_as_table, tablefmt="grid"), + "internal_link": rst_internal_link, + "character_escaper": escape_rst_markup_characters, + }, +} + + +def write_parameters_file( + output_file_path: Path, + module_parameter_tables: ModuleParameterTablesDict, + module_structured_parameters: ModuleStructuredParametersDict, + summarization_rows_threshold: int = 10, +) -> None: + formatter = _formatters[output_file_path.suffix] + with output_file_path.open("w") as output_file: + output_file.write(formatter["header"]("Parameters", 0)) + output_file.write("Default parameter values used in simulations.\n\n") + output_file.write( + formatter["table_of_contents"](module_parameter_tables.keys()) + ) + output_file.write("\n") + for module_name, parameter_table in module_parameter_tables.items(): + output_file.write(formatter["module_header"](module_name)) + output_file.write(formatter["dataframe_as_table"](parameter_table)) + output_file.write("\n\n") + for ( + parameter_name, + structured_parameter, + ) in module_structured_parameters[module_name].items(): + output_file.write( + formatter["structured_parameter_header"]( + parameter_name, module_name + ) + ) + if isinstance(structured_parameter, dict): + for key, dataframe in structured_parameter.items(): + output_file.write(formatter["header"](key, 3)) + output_file.write( + formatter["dataframe_as_table"]( + dataframe, summarization_rows_threshold + ) + ) + output_file.write("\n\n") + else: + output_file.write( + formatter["dataframe_as_table"]( + structured_parameter, summarization_rows_threshold + ) + ) + output_file.write("\n") + output_file.write("\n") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "resource_file_path", + type=Path, + default=Path(tlo.__file__).parent.parent.parent / "resources", + help="Path to resource directory", + ) + parser.add_argument( + "output_file_path", type=Path, help="Path to file to write tables to" + ) + args = parser.parse_args() + simulation = Simulation( + start_date=Date(2010, 1, 1), seed=1234, log_config={"suppress_stdout": True} + ) + status_quo_parameters = get_parameters_for_status_quo() + simulation.register(*fullmodel.fullmodel(args.resource_file_path)) + internal_link_formatter = _formatters[args.output_file_path.suffix]["internal_link"] + character_escaper = _formatters[args.output_file_path.suffix]["character_escaper"] + module_parameter_tables, module_structured_parameters = get_parameter_tables( + simulation.modules.values(), + status_quo_parameters, + {"HealthBurden", "Wasting"}, + {"Demography": {"gbd_causes_of_death_data"}, "Tb": {"who_incidence_estimates"}}, + character_escaper, + internal_link_formatter, + ) + write_parameters_file( + args.output_file_path, module_parameter_tables, module_structured_parameters + ) diff --git a/docs/tlo_publications.py b/docs/tlo_publications.py new file mode 100644 index 0000000000..f810f08e5e --- /dev/null +++ b/docs/tlo_publications.py @@ -0,0 +1,249 @@ +"""Create publications page from BibTeX database file.""" + +import argparse +import calendar +from collections import defaultdict +from pathlib import Path +from warnings import warn + +import pybtex.database +import requests +from pybtex.backends.html import Backend as HTMLBackend +from pybtex.style.formatting import toplevel +from pybtex.style.formatting.unsrt import Style as UnsrtStyle +from pybtex.style.formatting.unsrt import date as publication_date +from pybtex.style.names import BaseNameStyle, name_part +from pybtex.style.sorting import BaseSortingStyle +from pybtex.style.template import ( + FieldIsMissing, + field, + first_of, + href, + join, + node, + optional, + sentence, + tag, + words, +) + + +class InlineHTMLBackend(HTMLBackend): + """Backend for bibliography output as plain list suitable for inclusion in a HTML document.""" + + def write_prologue(self): + self.output("
    \n") + + def write_epilogue(self): + self.output("
\n") + + def write_entry(self, _key, _label, text): + self.output(f"
  • {text}
  • \n") + + +class DateSortingStyle(BaseSortingStyle): + """Sorting style for bibliography in reverse (newest first) publication date order.""" + + def sorting_key(self, entry): + months = list(calendar.month_name) + return ( + -int(entry.fields.get("year")), + -months.index(entry.fields.get("month", "")), + entry.fields.get("title", ""), + ) + + +class LastOnlyNameStyle(BaseNameStyle): + """Name style showing only last names and associated name particles.""" + + def format(self, person, _abbr=False): + return join[ + name_part(tie=True)[person.rich_prelast_names], + name_part[person.rich_last_names], + name_part(before=", ")[person.rich_lineage_names], + ] + + +@node +def summarized_names(children, context, role, summarize_limit=3, **kwargs): + """Return formatted names with et al. summarization when number exceeds specified limit.""" + + assert not children + + try: + persons = context["entry"].persons[role] + except KeyError: + raise FieldIsMissing(role, context["entry"]) + + name_style = LastOnlyNameStyle() + if len(persons) > summarize_limit: + return words[name_style.format(persons[0]), "et al."].format_data(context) + else: + formatted_names = [name_style.format(person) for person in persons] + return join(**kwargs)[formatted_names].format_data(context) + + +class SummarizedStyle(UnsrtStyle): + """ + Bibliography style showing summarized names, year, title and journal with expandable details. + + Not suitable for use with LaTeX backend due to use of details tags. + """ + + default_sorting_style = DateSortingStyle + + def _format_summarized_names(self, role): + return summarized_names(role, sep=", ", sep2=" and ", last_sep=", and ") + + def _format_label(self, label): + return tag("em")[f"{label}: "] + + def _format_details_as_table(self, details): + return tag("table")[ + toplevel[ + *( + tag("tr")[toplevel[tag("td")[tag("em")[key]], tag("td")[value]]] + for key, value in details.items() + ) + ] + ] + + def _get_summary_template(self, e, type_): + bibtex_type_to_venue_field = {"article": "journal", "misc": "publisher", "inproceedings": "booktitle"} + venue_field = bibtex_type_to_venue_field[type_] + url = first_of[ + optional[join["https://doi.org/", field("doi", raw=True)]], + optional[field("url", raw=True)], + "#", + ] + return href[ + url, + sentence(sep=". ")[ + words[ + self._format_summarized_names("author"), + optional["(", field("year"), ")"], + ], + self.format_title(e, "title", as_sentence=False), + tag("em")[field(venue_field)], + ], + ] + + def _get_details_template(self, type_): + bibtex_type_to_label = {"article": "Journal article", "misc": "Pre-print", "inproceedings": "Conference paper"} + return self._format_details_as_table( + { + "Type": bibtex_type_to_label[type_], + "DOI": optional[field("doi")], + "Date": publication_date, + "Authors": self.format_names("author"), + "Abstract": field("abstract"), + } + ) + + def _get_summarized_template(self, e, type_): + summary_template = self._get_summary_template(e, type_) + details_template = self._get_details_template(type_) + return tag("details")[tag("summary")[summary_template], details_template] + + def get_article_template(self, e): + return self._get_summarized_template(e, "article") + + def get_misc_template(self, e): + return self._get_summarized_template(e, "misc") + + def get_inproceedings_template(self, e): + return self._get_summarized_template(e, "inproceedings") + + +def write_publications_list(stream, bibliography_data, section_names, backend, style): + """Write bibliography data with given backend and style to a stream splitting in to sections.""" + keys_by_section = defaultdict(list) + section_names = [name.lower() for name in section_names] + for key, entry in bibliography_data.entries.items(): + # Section names and keywords normalized to lower case to make matching case-insensitive + keywords = set(k.strip().lower() for k in entry.fields.get("keywords", "").split(",")) + section_names_in_keywords = keywords & set(section_names) + if len(section_names_in_keywords) == 1: + keys_by_section[section_names_in_keywords.pop()].append(key) + elif len(section_names_in_keywords) == 0: + msg = ( + f"BibTeX entry with key {key} does not have a keyword / tag corresponding to " + f"one of section names {section_names} and so will not be included in output." + ) + warn(msg, stacklevel=2) + else: + msg = ( + f"BibTeX entry with key {key} has multiple keywords / tags corresponding to " + f"section names {section_names} and so will not be included in output." + ) + warn(msg, stacklevel=2) + for section_name in section_names: + stream.write(f"

    {section_name.capitalize()}

    \n") + formatted_bibliography = style.format_bibliography( + bibliography_data, keys_by_section[section_name] + ) + backend.write_to_stream(formatted_bibliography, stream) + stream.write("\n") + + +if __name__ == "__main__": + docs_directory = Path(__file__).parent + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + parser.add_argument( + "--bib-file", + type=Path, + default=docs_directory / "publications.bib", + help="BibTeX file containing publication details", + ) + parser.add_argument( + "--output-file", + type=Path, + default=docs_directory / "_publications_list.html", + help="File to write publication list to in HTML format", + ) + parser.add_argument( + "--update-from-zotero", + action="store_true", + help="Update BibTeX file at path specified by --bib-file from Zotero group library", + ) + parser.add_argument( + "--zotero-group-id", + default="5746396", + help="Integer identifier for Zotero group library", + ) + args = parser.parse_args() + if args.update_from_zotero: + endpoint_url = f"https://api.zotero.org/groups/{args.zotero_group_id}/items" + # Zotero API requires maximum number of results to return (limit parameter) + # to be explicitly specified for export formats such as bibtex and allows a + # maximum value of 100 - if we exceed this number of publications will need + # to switch to making multiple requests with different start indices + response = requests.get( + endpoint_url, params={"format": "bibtex", "limit": "100"} + ) + if response.ok: + with open(args.bib_file, "w") as bib_file: + bib_file.write(response.text) + else: + msg = ( + f"Request to {endpoint_url} failed with status code " + f"{response.status_code} ({response.reason})" + ) + raise RuntimeError(msg) + with open(args.output_file, "w") as output_file: + write_publications_list( + stream=output_file, + bibliography_data=pybtex.database.parse_file(args.bib_file), + section_names=[ + "Overview of the model", + "Analyses using the model", + "Healthcare seeking behaviour", + "Healthcare provision", + "Data Collection - Protocol and Analyses", + "Theoretical Frameworks", + ], + backend=InlineHTMLBackend(), + style=SummarizedStyle(), + ) diff --git a/docs/tlo_resources.py b/docs/tlo_resources.py index 02ea407263..afc7f4fed4 100644 --- a/docs/tlo_resources.py +++ b/docs/tlo_resources.py @@ -99,7 +99,7 @@ def excel_to_rst_table(input_path: Path, output_path: Path) -> None: def generate_docs_pages_from_resource_files( resources_directory: Path, docs_directory: Path, - max_file_size_bytes: int = 2**20, + max_file_size_bytes: int = 2**15, ) -> None: root_output_directory = docs_directory / "resources" root_output_directory.mkdir(exist_ok=True) diff --git a/docs/write-ups/CareOfWomenDuringPregnancy.docx b/docs/write-ups/CareOfWomenDuringPregnancy.docx deleted file mode 100644 index 029ca768fc..0000000000 --- a/docs/write-ups/CareOfWomenDuringPregnancy.docx +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9ea2293d04b42ed60c719ff4864670138c886d499fb0ae730f12575cf2abbdc2 -size 760210 diff --git a/docs/write-ups/Epilepsy.docx b/docs/write-ups/Epilepsy.docx index fb8b66055b..344e6ad6fa 100644 --- a/docs/write-ups/Epilepsy.docx +++ b/docs/write-ups/Epilepsy.docx @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b394045e585544fdb83e8ec71993c5f42c0f50bdc8b016f6712c1f2a86994c8f -size 2759724 +oid sha256:1f84018d4a66a782d95b057e25fee043458f907f5a9a973b6685f650c1e2be08 +size 2381944 diff --git a/docs/write-ups/Labour.docx b/docs/write-ups/Labour.docx deleted file mode 100644 index 551a9c4f25..0000000000 --- a/docs/write-ups/Labour.docx +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0fdbc5a8a118384a63b3f622c909684af63e8011714f640015d4b63623f351b6 -size 931746 diff --git a/docs/write-ups/NewbornOutcomes.docx b/docs/write-ups/NewbornOutcomes.docx deleted file mode 100644 index b5b6ef0e29..0000000000 --- a/docs/write-ups/NewbornOutcomes.docx +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b86e91a8bd336a09e9aa55e4f03ce6a41bc062e6b5d5ee51ef044ed3bec77264 -size 395887 diff --git a/docs/write-ups/PostnatalSupervisor.docx b/docs/write-ups/PostnatalSupervisor.docx deleted file mode 100644 index 85840e91b3..0000000000 --- a/docs/write-ups/PostnatalSupervisor.docx +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3f1956bd6ae8a66ed8f7a9dce8a3a675166c906ec4f1959f3ef336d7d660f9ef -size 285655 diff --git a/docs/write-ups/PregnancySupervisor.docx b/docs/write-ups/PregnancySupervisor.docx deleted file mode 100644 index 9c08ece524..0000000000 --- a/docs/write-ups/PregnancySupervisor.docx +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:82be768077e4933feac49ea9be676adc5df548639ddc3225929a9ffa650e76a2 -size 476845 diff --git a/pyproject.toml b/pyproject.toml index f18a736844..a8622e235d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,9 +14,6 @@ classifiers = [ 'Operating System :: OS Independent', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3 :: Only', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: 3.9', - 'Programming Language :: Python :: 3.10', 'Programming Language :: Python :: 3.11', 'Programming Language :: Python :: 3.12', ] @@ -27,7 +24,7 @@ dependencies = [ "pyshp", "squarify", "numpy", - "pandas~=2.0.0", + "pandas~=2.0", "scipy", # Avoid https://foss.heptapod.net/openpyxl/openpyxl/-/issues/1963 "openpyxl==3.1.0", @@ -36,12 +33,14 @@ dependencies = [ "azure-identity", "azure-keyvault", "azure-storage-file-share", + # For saving and loading simulation state + "dill", ] description = "Thanzi la Onse Epidemiology Model" dynamic = ["version"] license = {file = "LICENSE.txt"} readme = "README.md" -requires-python = ">=3.8" +requires-python = ">=3.11" [project.optional-dependencies] dev = [ @@ -58,7 +57,7 @@ dev = [ # Profiling "ansi2html", "psutil", - "pyinstrument>=4.3", + "pyinstrument>=4.7", # Building requirements files "pip-tools", ] @@ -120,7 +119,7 @@ addopts = "-ra --strict-markers --doctest-modules --doctest-glob=*.rst --tb=shor markers = ["group2", "slow"] [tool.ruff] -target-version = "py38" +target-version = "py311" line-length = 120 [tool.setuptools.packages.find] diff --git a/requirements/base.txt b/requirements/base.txt index dc44b868c6..458aa584ea 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -56,6 +56,8 @@ cryptography==41.0.3 # pyjwt cycler==0.11.0 # via matplotlib +dill==0.3.8 + # via tlo (pyproject.toml) et-xmlfile==1.1.0 # via openpyxl fonttools==4.42.1 @@ -112,6 +114,7 @@ pyjwt[crypto]==2.8.0 # via # adal # msal + # pyjwt pyparsing==3.1.1 # via matplotlib pyshp==2.3.1 diff --git a/requirements/dev.txt b/requirements/dev.txt index efd4f0e3e8..e8bbc2e0c9 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.11 # by the following command: # # pip-compile --extra=dev --output-file=requirements/dev.txt @@ -61,7 +61,9 @@ colorama==0.4.6 contourpy==1.1.1 # via matplotlib coverage[toml]==7.3.1 - # via pytest-cov + # via + # coverage + # pytest-cov cryptography==41.0.3 # via # adal @@ -72,14 +74,14 @@ cryptography==41.0.3 # pyjwt cycler==0.11.0 # via matplotlib -dill==0.3.7 - # via pylint +dill==0.3.8 + # via + # pylint + # tlo (pyproject.toml) distlib==0.3.7 # via virtualenv et-xmlfile==1.1.0 # via openpyxl -exceptiongroup==1.1.3 - # via pytest execnet==2.0.2 # via pytest-xdist filelock==3.12.4 @@ -94,10 +96,6 @@ gitpython==3.1.36 # via tlo (pyproject.toml) idna==3.4 # via requests -importlib-metadata==6.8.0 - # via build -importlib-resources==6.1.1 - # via matplotlib iniconfig==2.0.0 # via pytest isodate==0.6.1 @@ -166,12 +164,13 @@ psutil==5.9.5 # via tlo (pyproject.toml) pycparser==2.21 # via cffi -pyinstrument==4.5.3 +pyinstrument==4.7.3 # via tlo (pyproject.toml) pyjwt[crypto]==2.8.0 # via # adal # msal + # pyjwt pylint==3.0.1 # via tlo (pyproject.toml) pyparsing==3.1.1 @@ -221,29 +220,17 @@ smmap==5.0.1 # via gitdb squarify==0.4.3 # via tlo (pyproject.toml) -tomli==2.0.1 - # via - # build - # coverage - # pip-tools - # pylint - # pyproject-api - # pyproject-hooks - # pytest - # tox tomlkit==0.12.1 # via pylint tox==4.11.3 # via tlo (pyproject.toml) typing-extensions==4.8.0 # via - # astroid # azure-core # azure-keyvault-certificates # azure-keyvault-keys # azure-keyvault-secrets # azure-storage-file-share - # pylint tzdata==2023.3 # via pandas urllib3==2.0.4 @@ -254,10 +241,6 @@ virtualenv==20.24.5 # tox wheel==0.41.2 # via pip-tools -zipp==3.17.0 - # via - # importlib-metadata - # importlib-resources # The following packages are considered to be unsafe in a requirements file: # pip diff --git a/resources/ResourceFile_Improved_Healthsystem_And_Healthcare_Seeking.xlsx b/resources/ResourceFile_Improved_Healthsystem_And_Healthcare_Seeking.xlsx index 7ec045407a..1586c251f4 100644 --- a/resources/ResourceFile_Improved_Healthsystem_And_Healthcare_Seeking.xlsx +++ b/resources/ResourceFile_Improved_Healthsystem_And_Healthcare_Seeking.xlsx @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:49282122ff1c60e3bf73765013b6770a2fbd3f0df9a6e3f71e1d4c40e9cdfa2a -size 48238 +oid sha256:e63c16cbd0a069d9d10cf3c7212c8804fb1a047397227485adf348728fa5403b +size 48334 diff --git a/resources/epilepsy/ResourceFile_Epilepsy.xlsx b/resources/epilepsy/ResourceFile_Epilepsy.xlsx index 4bdf5ee91c..8bfa24affb 100644 --- a/resources/epilepsy/ResourceFile_Epilepsy.xlsx +++ b/resources/epilepsy/ResourceFile_Epilepsy.xlsx @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3e3c38418df28aabb98602e1b00e77d3840143a9fff8de495230817042d2ed45 -size 1250058 +oid sha256:94938f9187d5573f068f458263cb6d37ca3ce776eb8dfc9542e5cee0543c8804 +size 1250009 diff --git a/src/scripts/automation/mark_slow_tests.py b/src/scripts/automation/mark_slow_tests.py new file mode 100644 index 0000000000..daa507e2ab --- /dev/null +++ b/src/scripts/automation/mark_slow_tests.py @@ -0,0 +1,298 @@ +"""Script to automatically mark slow running tests with `pytest.mark.slow` decorator.""" + + +import argparse +import difflib +import json +import re +import warnings +from collections import defaultdict +from pathlib import Path +from typing import Dict, NamedTuple, Optional, Set, Tuple, Union + +import redbaron + +SLOW_MARK_DECORATOR = "pytest.mark.slow" + + +class TestFunction(NamedTuple): + module_path: Path + name: str + + +class TestMethod(NamedTuple): + module_path: Path + class_name: str + method_name: str + + +TestNode = Union[TestFunction, TestMethod] + + +def parse_nodeid_last_part(last_part: str) -> Tuple[str, Optional[str]]: + match = re.match(r"(.+)\[(.+)\]", last_part) + if match is not None: + return match[1], match[2] + else: + return last_part, None + + +def parse_nodeid(nodeid: str) -> TestNode: + parts = nodeid.split("::") + if len(parts) == 2: + module_path, last_part = parts + name, _ = parse_nodeid_last_part(last_part) + return TestFunction(Path(module_path), name) + elif len(parts) == 3: + module_path, class_name, last_part = parts + method_name, _ = parse_nodeid_last_part(last_part) + return TestMethod(Path(module_path), class_name, method_name) + else: + msg = f"Test nodeid has unexpected format: {nodeid}" + raise ValueError(msg) + + +def parse_test_report( + json_test_report_path: Path, + remove_slow_threshold: float, + add_slow_threshold: float, +) -> Dict[Path, Dict[str, Set[TestNode]]]: + with open(json_test_report_path, "r") as f: + test_report = json.load(f) + tests_to_change_slow_mark_by_module: defaultdict = defaultdict( + lambda: {"add": set(), "remove": set()} + ) + tests_to_keep_slow_mark_by_module: defaultdict = defaultdict(set) + for test in test_report["tests"]: + if test["outcome"] != "passed": + continue + test_node = parse_nodeid(test["nodeid"]) + marked_slow = "slow" in test["keywords"] + call_duration = test["call"]["duration"] + if marked_slow and call_duration < remove_slow_threshold: + tests_to_change_slow_mark_by_module[test_node.module_path]["remove"].add( + test_node + ) + elif not marked_slow and call_duration > add_slow_threshold: + tests_to_change_slow_mark_by_module[test_node.module_path]["add"].add( + test_node + ) + elif marked_slow: + tests_to_keep_slow_mark_by_module[test_node.module_path].add(test_node) + # Parameterized tests may have different call durations for different parameters + # however slow mark applies to all parameters, therefore if any tests appear in + # both set of tests to keep slow mark and test to remove slow mark (corresponding + # to runs of same test with different parameters) we remove them from the set of + # tests to remove slow mark + for ( + module_path, + test_nodes_to_change, + ) in tests_to_change_slow_mark_by_module.items(): + test_nodes_to_change["remove"].difference_update( + tests_to_keep_slow_mark_by_module[module_path] + ) + return dict(tests_to_change_slow_mark_by_module) + + +def find_function( + module_fst: redbaron.RedBaron, function_name: str +) -> redbaron.DefNode: + return module_fst.find("def", lambda node: node.name == function_name) + + +def find_class_method( + module_fst: redbaron.RedBaron, class_name: str, method_name: str +) -> redbaron.DefNode: + class_fst = module_fst.find("class", lambda node: node.name == class_name) + return class_fst.fund("def", lambda node: node.name == method_name) + + +def find_decorator( + function_fst: redbaron.DefNode, decorator_code: str +) -> redbaron.DecoratorNode: + return function_fst.find( + "decorator", lambda node: str(node.value) == decorator_code + ) + + +def add_decorator(function_fst: redbaron.DefNode, decorator_code: str): + if len(function_fst.decorators) == 0: + function_fst.decorators = f"@{decorator_code}" + else: + function_fst.decorators.append(f"@{decorator_code}") + + +def remove_decorator( + function_fst: redbaron.DefNode, decorator_fst: redbaron.DecoratorNode +): + # Need to remove both decorator and associated end line node so we find index of + # decorator and pop it and next node (which should be end line node) rather than + # use remove method of decorators proxy list directly + decorator_index = function_fst.decorators.node_list.index(decorator_fst) + popped_decorator_fst = function_fst.decorators.node_list.pop(decorator_index) + endline_fst = function_fst.decorators.node_list.pop(decorator_index) + if popped_decorator_fst is not decorator_fst or not isinstance( + endline_fst, redbaron.EndlNode + ): + msg = ( + f"Removed {popped_decorator_fst} and {endline_fst} when expecting " + f"{decorator_fst} and end line node." + ) + raise RuntimeError(msg) + + +def remove_mark_from_tests( + module_fst: redbaron.RedBaron, + tests_to_remove_mark: Set[TestNode], + mark_decorator: str, +): + for test_node in tests_to_remove_mark: + if isinstance(test_node, TestFunction): + function_fst = find_function(module_fst, test_node.name) + else: + function_fst = find_class_method( + module_fst, test_node.class_name, test_node.method_name + ) + decorator_fst = find_decorator(function_fst, mark_decorator) + if decorator_fst is None: + msg = ( + f"Test {test_node} unexpectedly does not have a decorator " + f"{mark_decorator} - this suggests you may be using a JSON test report " + "generated using a different version of tests code." + ) + warnings.warn(msg, stacklevel=2) + else: + remove_decorator(function_fst, decorator_fst) + + +def add_mark_to_tests( + module_fst: redbaron.RedBaron, tests_to_add_mark: Set[TestNode], mark_decorator: str +): + for test_node in tests_to_add_mark: + if isinstance(test_node, TestFunction): + function_fst = find_function(module_fst, test_node.name) + else: + function_fst = find_class_method( + module_fst, test_node.class_name, test_node.method_name + ) + if find_decorator(function_fst, mark_decorator) is not None: + msg = ( + f"Test {test_node} unexpectedly already has a decorator " + f"{mark_decorator} - this suggests you may be using a JSON test report " + "generated using a different version of tests code." + ) + warnings.warn(msg, stacklevel=2) + else: + add_decorator(function_fst, mark_decorator) + + +def add_import(module_fst: redbaron.RedBaron, module_name: str): + last_top_level_import = module_fst.find_all( + "import", lambda node: node.parent is module_fst + )[-1] + import_statement = f"import {module_name}" + if last_top_level_import is not None: + last_top_level_import.insert_after(import_statement) + else: + if isinstance(module_fst[0], redbaron.Nodes.StringNode): + module_fst[0].insert_after(import_statement) + else: + module_fst[0].insert_before(import_statement) + + +def remove_import(module_fst: redbaron.RedBaron, module_name: str): + import_fst = module_fst.find("import", lambda node: module_name in node.modules()) + if len(import_fst.modules()) > 1: + import_fst.remove(module_name) + else: + module_fst.remove(import_fst) + + +def update_test_slow_marks( + tests_to_change_slow_mark_by_module: Dict[Path, Dict[str, Set[TestNode]]], + show_diff: bool, +): + for ( + module_path, + test_nodes_to_change, + ) in tests_to_change_slow_mark_by_module.items(): + with open(module_path, "r") as source_code: + module_fst = redbaron.RedBaron(source_code.read()) + original_module_fst = module_fst.copy() + remove_mark_from_tests( + module_fst, test_nodes_to_change["remove"], SLOW_MARK_DECORATOR + ) + add_mark_to_tests(module_fst, test_nodes_to_change["add"], SLOW_MARK_DECORATOR) + any_marked = ( + module_fst.find( + "decorator", lambda node: str(node.value) == SLOW_MARK_DECORATOR + ) + is not None + ) + pytest_imported = ( + module_fst.find("import", lambda node: "pytest" in node.modules()) + is not None + ) + if any_marked and not pytest_imported: + add_import(module_fst, "pytest") + elif not any_marked and pytest_imported: + pytest_references = module_fst.find_all("name", "pytest") + if ( + len(pytest_references) == 1 + and pytest_references[0].parent_find("import") is not None + ): + remove_import(module_fst, "pytest") + if show_diff: + diff_lines = difflib.unified_diff( + original_module_fst.dumps().split("\n"), + module_fst.dumps().split("\n"), + fromfile=str(module_path), + tofile=f"Updated {module_path}", + ) + print("\n".join(diff_lines), end="") + else: + with open(module_path, "w") as source_code: + source_code.write(module_fst.dumps()) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser("Mark slow running tests with pytest.mark.slow") + parser.add_argument( + "--json-test-report-path", + type=Path, + help="JSON report output from pytest-json-report plugin listing test durations", + ) + parser.add_argument( + "--remove-slow-threshold", + type=float, + default=9.0, + help="Threshold in seconds for test duration below which to remove slow marker", + ) + parser.add_argument( + "--add-slow-threshold", + type=float, + default=11.0, + help="Threshold in seconds for test duration above which to add slow marker", + ) + parser.add_argument( + "--show-diff", + action="store_true", + help="Print line-by-line diff of changes to stdout without changing files", + ) + args = parser.parse_args() + if not args.json_test_report_path.exists(): + msg = f"No file found at --json-test-report-path={args.json_test_report_path}" + raise FileNotFoundError(msg) + # We want a hysteresis effect by having remove_slow_threshold < add_slow_threshold + # so a test with duration close to the thresholds doesn't keep getting marks added + # and removed due to noise in durations + if args.remove_slow_threshold > args.add_slow_threshold: + msg = ( + "Argument --remove-slow-threshold should be less than or equal to " + "--add-slow-threshold" + ) + raise ValueError(msg) + tests_to_change_slow_mark_by_module = parse_test_report( + args.json_test_report_path, args.remove_slow_threshold, args.add_slow_threshold + ) + update_test_slow_marks(tests_to_change_slow_mark_by_module, args.show_diff) diff --git a/src/scripts/calibration_analyses/analysis_scripts/analysis_demography_calibrations.py b/src/scripts/calibration_analyses/analysis_scripts/analysis_demography_calibrations.py index 974e50c567..64ac672158 100644 --- a/src/scripts/calibration_analyses/analysis_scripts/analysis_demography_calibrations.py +++ b/src/scripts/calibration_analyses/analysis_scripts/analysis_demography_calibrations.py @@ -260,6 +260,7 @@ def get_mean_pop_by_age_for_sex_and_year(sex, year): collapse_columns=True, only_mean=True ) + num_by_age = num_by_age.reindex(make_age_grp_types().categories) return num_by_age for year in [2010, 2015, 2018, 2029, 2049]: @@ -718,6 +719,7 @@ def get_counts_of_death_by_period_sex_agegrp(df): deaths_by_agesexperiod.loc[ (deaths_by_agesexperiod['Period'] == period) & (deaths_by_agesexperiod['Sex'] == sex)].groupby( by=['Variant', 'Age_Grp'])['Count'].sum()).unstack() + tot_deaths_byage.columns = pd.Index([label[1] for label in tot_deaths_byage.columns.tolist()]) tot_deaths_byage = tot_deaths_byage.transpose() diff --git a/src/scripts/dependencies/tlo_module_graph.py b/src/scripts/dependencies/tlo_module_graph.py new file mode 100644 index 0000000000..278539db31 --- /dev/null +++ b/src/scripts/dependencies/tlo_module_graph.py @@ -0,0 +1,82 @@ +"""Construct a graph showing dependencies between modules.""" + +import argparse +from pathlib import Path +from typing import Dict, Set + +from tlo.dependencies import DependencyGetter, get_all_dependencies, get_module_class_map +from tlo.methods import Metadata + +try: + import pydot +except ImportError: + pydot = None + + +def construct_module_dependency_graph( + excluded_modules: Set[str], + disease_module_node_defaults: Dict, + other_module_node_defaults: Dict, + get_dependencies: DependencyGetter = get_all_dependencies, +): + """Construct a pydot object representing module dependency graph. + + :param excluded_modules: Set of ``Module`` subclass names to not included in graph. + :param disease_module_node_defaults: Any dot node attributes to apply to by default + to disease module nodes. + :param other_module_node_defaults: Any dot node attributes to apply to by default + to non-disease module nodes. + :param get_dependencies: Function which given a module gets the set of module + dependencies. Defaults to extracting all dependencies. + :return: Pydot directed graph representing module dependencies. + """ + if pydot is None: + raise RuntimeError("pydot package must be installed") + module_class_map = get_module_class_map(excluded_modules) + module_graph = pydot.Dot("modules", graph_type="digraph") + disease_module_subgraph = pydot.Subgraph("disease_modules") + module_graph.add_subgraph(disease_module_subgraph) + other_module_subgraph = pydot.Subgraph("other_modules") + module_graph.add_subgraph(other_module_subgraph) + disease_module_subgraph.set_node_defaults(**disease_module_node_defaults) + other_module_subgraph.set_node_defaults(**other_module_node_defaults) + for name, module_class in module_class_map.items(): + node = pydot.Node(name) + if Metadata.DISEASE_MODULE in module_class.METADATA: + disease_module_subgraph.add_node(node) + else: + other_module_subgraph.add_node(node) + for key, module in module_class_map.items(): + for dependency in get_dependencies(module, module_class_map.keys()): + if dependency not in excluded_modules: + module_graph.add_edge(pydot.Edge(key, dependency)) + return module_graph + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "output_file", type=Path, help=( + "Path to output graph to. File extension will determine output format - for example: dot, dia, png, svg" + ) + ) + args = parser.parse_args() + excluded_modules = { + "Mockitis", + "ChronicSyndrome", + "Skeleton", + "AlriPropertiesOfOtherModules", + "DiarrhoeaPropertiesOfOtherModules", + "DummyHivModule", + "SimplifiedBirths", + "Tb", + } + module_graph = construct_module_dependency_graph( + excluded_modules, + disease_module_node_defaults={"fontname": "Arial", "shape": "box"}, + other_module_node_defaults={"fontname": "Arial", "shape": "ellipse"}, + ) + format = ( + args.output_file.suffix[1:] if args.output_file.suffix else "raw" + ) + module_graph.write(args.output_file, format=format) diff --git a/src/scripts/epilepsy_analyses/analysis_epilepsy.py b/src/scripts/epilepsy_analyses/analysis_epilepsy.py index 735cbc6ce7..bba4d3c479 100644 --- a/src/scripts/epilepsy_analyses/analysis_epilepsy.py +++ b/src/scripts/epilepsy_analyses/analysis_epilepsy.py @@ -28,7 +28,7 @@ start_date = Date(2010, 1, 1) end_date = Date(2020, 1, 1) -popsize = 200000 +popsize = 100_000 # Establish the simulation object log_config = { @@ -40,10 +40,11 @@ 'tlo.methods.demography': logging.INFO, 'tlo.methods.healthsystem': logging.WARNING, 'tlo.methods.healthburden': logging.WARNING, + 'tlo.methods.population': logging.INFO, } } -sim = Simulation(start_date=start_date, seed=0, log_config=log_config) +sim = Simulation(start_date=start_date, seed=0, log_config=log_config, show_progress_bar=True) # make a dataframe that contains the switches for which interventions are allowed or not allowed # during this run. NB. These must use the exact 'registered strings' that the disease modules allow @@ -125,7 +126,8 @@ ) n_seiz_stat_1_3.plot() plt.title('Number with epilepsy (past or current)') -plt.ylim(0, 800000) +plt.gca().set_ylim(bottom=0) +plt.ylabel("Number (not scaled)") plt.tight_layout() plt.show() @@ -135,11 +137,25 @@ ) n_seiz_stat_2_3.plot() plt.title('Number with epilepsy (infrequent or frequent seizures)') -plt.ylim(0, 300000) +plt.gca().set_ylim(bottom=0) +plt.ylabel("Number (not scaled)") plt.tight_layout() plt.show() plt.clf() + +prop_antiepilep_seiz_infreq_or_freq = pd.Series( + output['tlo.methods.epilepsy']['epilepsy_logging']['prop_freq_or_infreq_seiz_on_antiep'].values, + index=output['tlo.methods.epilepsy']['epilepsy_logging']['date'] +) +prop_antiepilep_seiz_infreq_or_freq.plot(color='r') +plt.title('Proportion on antiepileptics\namongst people that have infrequent or frequent epileptic seizures') +plt.ylim(0, 1) +plt.tight_layout() +plt.show() +plt.clf() + + prop_antiepilep_seiz_stat_1 = pd.Series( output['tlo.methods.epilepsy']['epilepsy_logging']['prop_antiepilep_seiz_stat_1'].values, index=output['tlo.methods.epilepsy']['epilepsy_logging']['date'] @@ -179,7 +195,8 @@ ) n_epi_death.plot() plt.title('Number of deaths from epilepsy') -plt.ylim(0, 50) +plt.gca().set_ylim(bottom=0) +plt.ylabel("Number (not scaled)") plt.tight_layout() plt.show() plt.clf() @@ -190,11 +207,21 @@ ) n_antiep.plot() plt.title('Number of people on antiepileptics') -plt.ylim(0, 50000) +plt.gca().set_ylim(bottom=0) +plt.ylabel("Number (not scaled)") plt.tight_layout() plt.show() plt.clf() +(n_antiep / popsize).plot() +plt.title('Proportion of of people (whole population) on antiepileptics') +plt.gca().set_ylim(bottom=0) +plt.ylabel("Number (not scaled)") +plt.tight_layout() +plt.show() +plt.clf() + + epi_death_rate = pd.Series( output['tlo.methods.epilepsy']['epilepsy_logging']['epi_death_rate'].values, index=output['tlo.methods.epilepsy']['epilepsy_logging']['date'] @@ -233,8 +260,7 @@ for _row, period in enumerate(('2010-2014', '2015-2019')): ax = axs[_row] comparison.loc[(period, slice(None), slice(None), CAUSE_NAME)]\ - .droplevel([0, 1, 3])\ - .groupby(axis=0, level=0)\ + .groupby(axis=0, level=1)\ .sum()\ .plot(use_index=True, ax=ax) ax.set_ylabel('Deaths per year') diff --git a/src/scripts/get_properties/module_dependencies_graph.py b/src/scripts/get_properties/module_dependencies_graph.py new file mode 100644 index 0000000000..53812ec268 --- /dev/null +++ b/src/scripts/get_properties/module_dependencies_graph.py @@ -0,0 +1,111 @@ +"""Construct a graph showing dependencies between modules.""" + +import argparse +from pathlib import Path +from typing import Dict, Set + +from tlo.dependencies import DependencyGetter, get_all_dependencies, get_module_class_map +from tlo.methods import Metadata + +try: + import pydot +except ImportError: + pydot = None + + +def construct_module_dependency_graph( + excluded_modules: Set[str], + disease_module_node_defaults: Dict, + other_module_node_defaults: Dict, + get_dependencies: DependencyGetter = get_all_dependencies, +): + """Construct a pydot object representing module dependency graph. + :param excluded_modules: Set of ``Module`` subclass names to not included in graph. + :param disease_module_node_defaults: Any dot node attributes to apply to by default + to disease module nodes. + :param other_module_node_defaults: Any dot node attributes to apply to by default + to non-disease module nodes. + :param get_dependencies: Function which given a module gets the set of module + dependencies. Defaults to extracting all dependencies. + :return: Pydot directed graph representing module dependencies. + """ + if pydot is None: + raise RuntimeError("pydot package must be installed") + + module_class_map = get_module_class_map(excluded_modules) + module_graph = pydot.Dot("modules", graph_type="digraph") + disease_module_subgraph = pydot.Subgraph("disease_modules") + module_graph.add_subgraph(disease_module_subgraph) + other_module_subgraph = pydot.Subgraph("other_modules") + module_graph.add_subgraph(other_module_subgraph) + + # Set default styles for nodes + disease_module_node_defaults["style"] = "filled" + other_module_node_defaults["style"] = "filled" + + for name, module_class in module_class_map.items(): + # Determine attributes based on module type + node_attributes = {} + + if Metadata.DISEASE_MODULE in module_class.METADATA and name.endswith("Cancer"): + node_attributes.update(disease_module_node_defaults) + node_attributes["color"] = "lightblue" # Color for disease modules and Cancer + else: + node_attributes.update(other_module_node_defaults) + node_attributes["color"] = "lightgreen" # Default color for other modules + + # Create the node with determined attributes + node = pydot.Node(name, **node_attributes) + + # Add the node to the appropriate subgraph + if Metadata.DISEASE_MODULE in module_class.METADATA or name.endswith("Cancer"): + disease_module_subgraph.add_node(node) + else: + other_module_subgraph.add_node(node) + + for key, module in module_class_map.items(): + for dependency in get_dependencies(module, module_class_map.keys()): + if dependency not in excluded_modules: + module_graph.add_edge(pydot.Edge(key, dependency)) + + return module_graph + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "output_file", type=Path, help=( + "Path to output graph to. File extension will determine output format - for example: dot, dia, png, svg" + ) + ) + args = parser.parse_args() + + excluded_modules = { + "Mockitis", + "ChronicSyndrome", + "Skeleton", + "AlriPropertiesOfOtherModules", + "DiarrhoeaPropertiesOfOtherModules", + "DummyHivModule", + "SimplifiedBirths", + "Demography", + "HealthBurden", + "SymptomManager", + "DummyTbModule", + "ImprovedHealthSystemAndCareSeekingScenarioSwitcher", + "HealthSeekingBehaviour", + "HealthSystem", + "Deviance", + "SimplifiedPregnancyAndLabour" + } + + module_graph = construct_module_dependency_graph( + excluded_modules, + disease_module_node_defaults={"fontname": "Arial", "shape": "box"}, + other_module_node_defaults={"fontname": "Arial", "shape": "ellipse"}, + ) + + format = ( + args.output_file.suffix[1:] if args.output_file.suffix else "raw" + ) + module_graph.write(args.output_file, format=format) diff --git a/src/scripts/get_properties/properties_graph.py b/src/scripts/get_properties/properties_graph.py new file mode 100644 index 0000000000..8c79787cc6 --- /dev/null +++ b/src/scripts/get_properties/properties_graph.py @@ -0,0 +1,325 @@ +"""Construct a graph showing the property dependency between modules.""" + +import argparse +import importlib +import inspect +import os +import pkgutil +from pathlib import Path +from types import MappingProxyType +from typing import Any, Mapping, Set, Type, Union + +import numpy as np + +import tlo.methods +from tlo import Module +from tlo.analysis.utils import _standardize_short_treatment_id +from tlo.dependencies import DependencyGetter, is_valid_tlo_module_subclass +from tlo.methods import Metadata + +try: + import pydot +except ImportError: + pydot = None + +SHORT_TREATMENT_ID_TO_COLOR_MAP_EXTRA = MappingProxyType({ + '*': 'black', + 'FirstAttendance*': 'darkgrey', + 'Inpatient*': 'silver', + 'Contraception*': 'darkseagreen', + 'AntenatalCare*': 'green', + 'DeliveryCare*': 'limegreen', + 'PostnatalCare*': 'springgreen', + 'CareOfWomenDuringPregnancy*': '#4D804D', + 'Labour*': '#19A719', + 'NewbornOutcomes*': '#19E659', + 'PostnatalSupervisor*': '#5D8C5D', + 'PregnancySupervisor*': '#27C066', + 'Alri*': 'darkorange', + 'Diarrhoea*': 'tan', + 'Undernutrition*': 'gold', + 'Epi*': 'darkgoldenrod', + 'Stunting*': '#D58936', + 'StuntingPropertiesOfOtherModules*': "#EAC143", + 'Wasting*': '#DE9F0E', + 'Hiv*': 'deepskyblue', + 'Malaria*': 'lightsteelblue', + 'Measles*': 'cornflowerblue', + 'Tb*': 'mediumslateblue', + 'Schisto*': 'skyblue', + 'CardioMetabolicDisorders*': 'brown', + 'BladderCancer*': 'orchid', + 'BreastCancer*': 'mediumvioletred', + 'OesophagealCancer*': 'deeppink', + 'ProstateCancer*': 'hotpink', + 'OtherAdultCancer*': 'palevioletred', + 'Depression*': 'indianred', + 'Epilepsy*': 'red', + 'Copd*': 'lightcoral', + 'RTI*': 'lightsalmon', + 'Lifestyle*': 'silver', +}) + + +def get_color_short_treatment_id_extra_modules(short_treatment_id: str) -> str: + """Return the colour (as matplotlib string) assigned to this shorted TREATMENT_ID. + + Returns `np.nan` if treatment_id is not recognised. + """ + return SHORT_TREATMENT_ID_TO_COLOR_MAP_EXTRA.get( + _standardize_short_treatment_id(short_treatment_id), np.nan + ) + + +def get_properties( + module: Union[Module, Type[Module]], +) -> Set[str]: + """Get the properties for a ``Module`` subclass. + + :param module: ``Module`` subclass to get properties for. + :return: Set of ``Module`` subclass names corresponding to properties of ``module``. + """ + if hasattr(module, 'PROPERTIES'): + return module.PROPERTIES + return None + + +def check_properties_in_module(module: Any, properties: Set[str]) -> Set[str]: + """Check if any of the properties are used in the given module's script.""" + used_properties = set() + + # Get the source code of the module + source_code = inspect.getsource(module) + + # Check each property for usage in the source code + for prop in properties: + if prop in source_code: + used_properties.add(prop) + + return used_properties + + +def get_module_property_map(excluded_modules: Set[str]) -> Mapping[str, Set[Type[Module]]]: + """Constructs a map from property names to sets of Module subclass objects. + + :param excluded_modules: Set of Module subclass names to exclude from the map. + + :return: A mapping from property names to sets of corresponding Module subclass objects. + This adds an implicit requirement that the names of all the Module subclasses are unique. + + :raises RuntimeError: Raised if multiple Module subclasses with the same name are defined + (and not included in the excluded_modules set). + """ + properties_dictionary = {} + methods_package_path = os.path.dirname(inspect.getfile(tlo.methods)) + + for _, main_module_name, _ in pkgutil.iter_modules([methods_package_path]): + methods_module = importlib.import_module(f'tlo.methods.{main_module_name}') + for _, obj in inspect.getmembers(methods_module): + if is_valid_tlo_module_subclass(obj, excluded_modules): + properties_dictionary[obj.__name__] = obj + return properties_dictionary + + +def construct_property_dependency_graph( + excluded_modules: Set[str], + disease_module_node_defaults: dict, + other_module_node_defaults: dict, + pregnancy_related_module_node_defaults: dict, + cancer_related_module_node_defaults: dict, + properies_node_defaults: dict, + get_dependencies: DependencyGetter = get_properties, +): + """Construct a pydot object representing module dependency graph. + :param excluded_modules: Set of ``Module`` subclass names to not included in graph. + :param get_dependencies: Function which given a module gets the set of property + dependencies. Defaults to extracting all dependencies. + :param X_node_defaults: Defaults for specified subgraphs. + :return: Pydot directed graph representing module dependencies. + """ + if pydot is None: + raise RuntimeError("pydot package must be installed") + + property_class_map = get_module_property_map(excluded_modules) + property_graph = pydot.Dot("properties", graph_type="digraph", rankdir='LR') + + cancer_module_names = [ + 'BladderCancer', 'BreastCancer', 'OtherAdultCancer', + 'OesophagealCancer', 'ProstateCancer' + ] + + pregnancy_module_names = [ + 'Contraception', 'Labour', 'PregnancySupervisor', + 'PostnatalSupervisor', 'NewbornOutcomes', 'CareOfWomenDuringPregnancy' + ] + + # Subgraphs for different groups of modules - attempt at aesthetics + disease_module_subgraph = pydot.Subgraph("disease_modules") + property_graph.add_subgraph(disease_module_subgraph) + + pregnancy_modules_subgraph = pydot.Subgraph("pregnancy_modules") + property_graph.add_subgraph(pregnancy_modules_subgraph) + + other_module_subgraph = pydot.Subgraph("other_modules") + property_graph.add_subgraph(other_module_subgraph) + + cancer_modules_subgraph = pydot.Subgraph("cancer_modules") + cancer_modules_subgraph.set_rank('same') + property_graph.add_subgraph(cancer_modules_subgraph) + + infectious_diseases_subgraph = pydot.Subgraph("infectious_diseases") + property_graph.add_subgraph(infectious_diseases_subgraph) + + properties_diseases_subgraph = pydot.Subgraph("properties") + property_graph.add_subgraph(properties_diseases_subgraph) + + # Set default styles for nodes + disease_module_node_defaults["style"] = "filled" + other_module_node_defaults["style"] = "filled" + pregnancy_related_module_node_defaults["style"] = "filled" + cancer_related_module_node_defaults["style"] = "filled" + properies_node_defaults["style"] = "filled" + + for name, module_class in property_class_map.items(): # only works for disease modules, not properties + colour = get_color_short_treatment_id_extra_modules(name) + node_attributes = { + "fillcolor": colour, + "color": "black", # Outline color + "fontname": "Arial", + } + + if name in pregnancy_module_names: + node_attributes.update(pregnancy_related_module_node_defaults) + node_attributes["shape"] = "diamond" # Pregnancy modules + pregnancy_modules_subgraph.add_node(pydot.Node(name, **node_attributes)) + + elif name in cancer_module_names: + node_attributes.update(cancer_related_module_node_defaults) + node_attributes["shape"] = "invtrapezium" # Cancer modules + cancer_modules_subgraph.add_node(pydot.Node(name, **node_attributes)) + + elif Metadata.DISEASE_MODULE not in module_class.METADATA: + node_attributes.update(other_module_node_defaults) + node_attributes["shape"] = "ellipse" # Other modules + other_module_subgraph.add_node(pydot.Node(name, **node_attributes)) + else: + node_attributes.update(disease_module_node_defaults) + node_attributes["shape"] = "box" # Disease modules + disease_module_subgraph.add_node(pydot.Node(name, **node_attributes)) + + for key, property_module in property_class_map.items(): + if property_module not in excluded_modules: + properties_of_module = get_dependencies(property_module) + for main_module, dependent_module in property_class_map.items(): + if property_module != dependent_module: + used_properties = check_properties_in_module(dependent_module, properties_of_module) + for property in used_properties: + if property.startswith("ri"): + node_attributes = { + "fillcolor": "darkorange", + "color": "black", # Outline color + "fontname": "Arial", + } + else: + node_attributes = { + "fillcolor": "white", + "color": "black", # Outline color + "fontname": "Arial", + } + node_attributes.update(properies_node_defaults) + node_attributes["shape"] = "square" + properties_diseases_subgraph.add_node(pydot.Node(property, **node_attributes)) + properties_diseases_subgraph.set_rank('same') + property_graph.add_edge(pydot.Edge(property, main_module)) + + return property_graph + + +def property_dependency_map_by_module( + excluded_modules: Set[str], + properies_node_defaults: dict, + output_path: Path, + get_dependencies: DependencyGetter = get_properties, +): + """ + param excluded_modules: modules for which dependencies should not be checked + param properies_node_defaults: default properies of a node + param output_path: where files write to + param get_dependencies: Function which given a module gets the set of property + dependencies. Defaults to extracting all dependencies. + """ + property_node_attributes = { + "fillcolor": "white", + "color": "black", + "fontname": "Arial", + "shape": "square", + "style": "filled", + } + property_class_map = get_module_property_map(excluded_modules) + for key, dependent_module in property_class_map.items(): + if dependent_module not in excluded_modules: + colour = get_color_short_treatment_id_extra_modules(key) + node_attributes = { + "fillcolor": colour, + "color": "black", + "fontname": "Arial", + "shape": "square", + "style": "filled", + } + property_graph = pydot.Dot("properties", graph_type="digraph", rankdir='LR') + property_graph.add_node(pydot.Node(key, **node_attributes)) + for property_key, property_module in property_class_map.items(): + if key != property_key and property_module not in excluded_modules: + properties_of_module = get_dependencies(property_module) + used_properties = check_properties_in_module(dependent_module, properties_of_module) + for property in used_properties: + property_graph.add_node(pydot.Node(property, **property_node_attributes)) + property_graph.add_edge(pydot.Edge(property, key)) + + graph_name = output_path/f"{key}.png" + #print(property_graph) + property_graph.write(graph_name, format="png") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "output_file", type=Path, help=( + "Path to output graph to. File extension will determine output format - for example: dot, dia, png, svg" + ) + ) + args = parser.parse_args() + + excluded_modules = { + "Mockitis", + "ChronicSyndrome", + "Skeleton", + "AlriPropertiesOfOtherModules", + "DiarrhoeaPropertiesOfOtherModules", + "DummyHivModule", + "SimplifiedBirths", + "Demography", + "HealthBurden", + "SymptomManager", + "DummyTbModule", + "ImprovedHealthSystemAndCareSeekingScenarioSwitcher", + "HealthSeekingBehaviour", + "HealthSystem", + "Deviance", + "SimplifiedPregnancyAndLabour", + "DummyDisease", + "Module" + } + property_dependency_map_by_module(excluded_modules, properies_node_defaults={"shape": "square"}, + output_path=args.output_file) + + module_graph = construct_property_dependency_graph( + excluded_modules, + disease_module_node_defaults={"shape": "box"}, + other_module_node_defaults={"shape": "ellipse"}, + pregnancy_related_module_node_defaults={"shape": "diamond"}, + cancer_related_module_node_defaults={"shape": "invtrapezium"}, + properies_node_defaults={"shape": "square"} + ) + + module_graph.write(args.output_file/"property_graph_full.png", format="png") diff --git a/src/scripts/hiv/projections_jan2023/analysis_logged_deviance.py b/src/scripts/hiv/projections_jan2023/analysis_logged_deviance.py index 7a2af7fbed..3902e5d49b 100644 --- a/src/scripts/hiv/projections_jan2023/analysis_logged_deviance.py +++ b/src/scripts/hiv/projections_jan2023/analysis_logged_deviance.py @@ -34,8 +34,8 @@ # %% Run the simulation start_date = Date(2010, 1, 1) -end_date = Date(2022, 1, 1) -popsize = 5000 +end_date = Date(2014, 1, 1) +popsize = 25000 # scenario = 1 @@ -51,7 +51,7 @@ "tlo.methods.tb": logging.INFO, "tlo.methods.demography": logging.INFO, # "tlo.methods.demography.detail": logging.WARNING, - # "tlo.methods.healthsystem.summary": logging.INFO, + "tlo.methods.healthsystem.summary": logging.INFO, # "tlo.methods.healthsystem": logging.INFO, # "tlo.methods.healthburden": logging.INFO, }, @@ -70,7 +70,7 @@ resourcefilepath=resourcefilepath, service_availability=["*"], # all treatment allowed mode_appt_constraints=1, # mode of constraints to do with officer numbers and time - cons_availability="default", # mode for consumable constraints (if ignored, all consumables available) + cons_availability="all", # mode for consumable constraints (if ignored, all consumables available) ignore_priority=False, # do not use the priority information in HSI event to schedule capabilities_coefficient=1.0, # multiplier for the capabilities of health officers use_funded_or_actual_staffing="actual", # actual: use numbers/distribution of staff available currently @@ -89,7 +89,7 @@ # set the scenario sim.modules["Hiv"].parameters["do_scaleup"] = True sim.modules["Hiv"].parameters["scaleup_start_year"] = 2019 -# sim.modules["Tb"].parameters["scenario"] = scenario +sim.modules["Tb"].parameters["first_line_test"] = 'xpert' # sim.modules["Tb"].parameters["scenario_start_date"] = Date(2010, 1, 1) # sim.modules["Tb"].parameters["scenario_SI"] = "z" diff --git a/src/scripts/profiling/run_profiling.py b/src/scripts/profiling/run_profiling.py index 882894d6af..180a7571ab 100644 --- a/src/scripts/profiling/run_profiling.py +++ b/src/scripts/profiling/run_profiling.py @@ -12,6 +12,7 @@ from pyinstrument.renderers import ConsoleRenderer, HTMLRenderer from pyinstrument.session import Session from scale_run import save_arguments_to_json, scale_run +from shared import memory_statistics try: from ansi2html import Ansi2HTMLConverter @@ -168,6 +169,8 @@ def record_run_statistics( **profiling_session_statistics(profiling_session), # Disk input/output statistics **disk_statistics(disk_usage), + # Process memory statistics + **memory_statistics(), # Statistics from end end-state of the simulation **simulation_statistics(completed_sim), # User-defined additional stats (if any) @@ -222,7 +225,7 @@ def run_profiling( "initial_population": initial_population, "log_filename": "scale_run_profiling", "log_level": "WARNING", - "parse_log_file": False, + "parse_log_file": True, "show_progress_bar": show_progress_bar, "seed": 0, "disable_health_system": False, @@ -245,7 +248,7 @@ def run_profiling( # Profile scale_run disk_at_start = disk_io_counters() - completed_simulation = scale_run( + completed_simulation, logs_dict = scale_run( **scale_run_args, output_dir=output_dir, profiler=profiler ) disk_at_end = disk_io_counters() @@ -303,7 +306,8 @@ def run_profiling( timeline=False, color=True, flat=True, - processor_options={"show_regex": ".*/tlo/.*", "hide_regex": ".*/pandas/.*"} + flat_time="total", + processor_options={"show_regex": ".*/tlo/.*", "hide_regex": ".*/pandas/.*", "filter_threshold": 1e-3} ) converter = Ansi2HTMLConverter(title=output_name) print(f"Writing {output_html_file}", end="...", flush=True) @@ -323,6 +327,13 @@ def run_profiling( additional_stats=additional_stats, ) print("done") + + # Write out logged profiling statistics + logged_statistics_file = output_dir / f"{output_name}.logged-stats.csv" + print(f"Writing {logged_statistics_file}", end="...", flush=True) + logs_dict["tlo.profiling"]["stats"].to_csv(logged_statistics_file, index=False) + print("done") + if __name__ == "__main__": diff --git a/src/scripts/profiling/scale_run.py b/src/scripts/profiling/scale_run.py index 735d1e7ba3..1e5d8042b3 100644 --- a/src/scripts/profiling/scale_run.py +++ b/src/scripts/profiling/scale_run.py @@ -13,6 +13,7 @@ from shared import print_checksum, schedule_profile_log from tlo import Date, Simulation, logging +from tlo.analysis.utils import LogsDict from tlo.analysis.utils import parse_log_file as parse_log_file_fn from tlo.methods.fullmodel import fullmodel @@ -55,14 +56,10 @@ def scale_run( ignore_warnings: bool = False, log_final_population_checksum: bool = True, profiler: Optional["Profiler"] = None, -) -> Simulation: +) -> Simulation | tuple[Simulation, LogsDict]: if ignore_warnings: warnings.filterwarnings("ignore") - # Start profiler if one has been passed - if profiler is not None: - profiler.start() - # Simulation period start_date = Date(2010, 1, 1) end_date = start_date + pd.DateOffset(years=years, months=months) @@ -70,9 +67,14 @@ def scale_run( log_config = { "filename": log_filename, "directory": output_dir, - "custom_levels": {"*": getattr(logging, log_level)}, + # Ensure tlo.profiling log records always recorded + "custom_levels": {"*": getattr(logging, log_level), "tlo.profiling": logging.INFO}, "suppress_stdout": disable_log_output_to_stdout, } + + # Start profiler if one has been passed + if profiler is not None: + profiler.start() sim = Simulation( start_date=start_date, @@ -102,17 +104,19 @@ def scale_run( # Run the simulation sim.make_initial_population(n=initial_population) - schedule_profile_log(sim) + schedule_profile_log(sim, frequency_months=1) sim.simulate(end_date=end_date) + + # Stop profiling session + if profiler is not None: + profiler.stop() + if log_final_population_checksum: print_checksum(sim) if save_final_population: sim.population.props.to_pickle(output_dir / "final_population.pkl") - if parse_log_file: - parse_log_file_fn(sim.log_filepath) - if record_hsi_event_details: with open(output_dir / "hsi_event_details.json", "w") as json_file: json.dump( @@ -124,10 +128,11 @@ def scale_run( ], json_file, ) + + if parse_log_file: + logs_dict = parse_log_file_fn(sim.log_filepath) + return sim, logs_dict - # Stop profiling session - if profiler is not None: - profiler.stop() return sim diff --git a/src/scripts/profiling/shared.py b/src/scripts/profiling/shared.py index cc972cfa66..caa06cf468 100644 --- a/src/scripts/profiling/shared.py +++ b/src/scripts/profiling/shared.py @@ -4,6 +4,11 @@ import pandas as pd +try: + import psutil +except ImportError: + psutil = None + from tlo import DateOffset, Simulation, logging from tlo.events import PopulationScopeEventMixin, RegularEvent from tlo.util import hash_dataframe @@ -12,9 +17,34 @@ logger.setLevel(logging.INFO) +def memory_statistics() -> dict[str, float]: + """ + Extract memory usage statistics in current process using `psutil` if available. + Statistics are returned as a dictionary. If `psutil` not installed an empty dict is returned. + + Key / value pairs are: + memory_rss_MiB: float + Resident set size in mebibytes. The non-swapped physical memory the process has used. + memory_vms_MiB: float + Virtual memory size in mebibytes. The total amount of virtual memory used by the process. + memory_uss_MiB: float + Unique set size in mebibytes. The memory which is unique to a process and which would be freed if the process + was terminated right now + """ + if psutil is None: + return {} + process = psutil.Process() + memory_info = process.memory_full_info() + return { + "memory_rss_MiB": memory_info.rss / 2**20, + "memory_vms_MiB": memory_info.vms / 2**20, + "memory_uss_MiB": memory_info.uss / 2**20, + } + + class LogProgress(RegularEvent, PopulationScopeEventMixin): - def __init__(self, module): - super().__init__(module, frequency=DateOffset(months=3)) + def __init__(self, module, frequency_months=3): + super().__init__(module, frequency=DateOffset(months=frequency_months)) self.time = time.time() def apply(self, population): @@ -26,16 +56,18 @@ def apply(self, population): key="stats", data={ "time": datetime.datetime.now().isoformat(), - "duration": duration, - "alive": df.is_alive.sum(), - "total": len(df), + "duration_minutes": duration, + "pop_df_number_alive": df.is_alive.sum(), + "pop_df_rows": len(df), + "pop_df_mem_MiB": df.memory_usage(index=True, deep=True).sum() / 2**20, + **memory_statistics(), }, ) -def schedule_profile_log(sim: Simulation) -> None: +def schedule_profile_log(sim: Simulation, frequency_months: int = 3) -> None: """Schedules the log progress event, used only for profiling""" - sim.schedule_event(LogProgress(sim.modules["Demography"]), sim.start_date) + sim.schedule_event(LogProgress(sim.modules["Demography"], frequency_months), sim.start_date) def print_checksum(sim: Simulation) -> None: diff --git a/src/tlo/analysis/life_expectancy.py b/src/tlo/analysis/life_expectancy.py index 6e3e9b4e83..ebde940f66 100644 --- a/src/tlo/analysis/life_expectancy.py +++ b/src/tlo/analysis/life_expectancy.py @@ -99,6 +99,36 @@ def _aggregate_person_years_by_age(results_folder, target_period) -> pd.DataFram return py_by_sex_and_agegroup +def calculate_probability_of_dying(interval_width, fraction_of_last_age_survived, sex, _person_years_at_risk, + _number_of_deaths_in_interval) -> pd.DataFrame: + """Returns the probability of dying in each interval""" + + person_years_by_sex = _person_years_at_risk.xs(key=sex, level='sex') + + number_of_deaths_by_sex = _number_of_deaths_in_interval.xs(key=sex, level='sex') + + death_rate_in_interval = number_of_deaths_by_sex / person_years_by_sex + + death_rate_in_interval = death_rate_in_interval.fillna(0) + + if death_rate_in_interval.loc['90'] == 0: + death_rate_in_interval.loc['90'] = death_rate_in_interval.loc['85-89'] + + condition = number_of_deaths_by_sex > ( + + person_years_by_sex / interval_width / interval_width) + + probability_of_dying_in_interval = pd.Series(index=number_of_deaths_by_sex.index, dtype=float) + + probability_of_dying_in_interval[condition] = 1 + + probability_of_dying_in_interval[~condition] = interval_width * death_rate_in_interval / ( + + 1 + interval_width * (1 - fraction_of_last_age_survived) * death_rate_in_interval) + + probability_of_dying_in_interval.at['90'] = 1 + return probability_of_dying_in_interval, death_rate_in_interval + def _estimate_life_expectancy( _person_years_at_risk: pd.Series, @@ -124,29 +154,11 @@ def _estimate_life_expectancy( # separate male and female data for sex in ['M', 'F']: - person_years_by_sex = _person_years_at_risk.xs(key=sex, level='sex') - number_of_deaths_by_sex = _number_of_deaths_in_interval.xs(key=sex, level='sex') - - death_rate_in_interval = number_of_deaths_by_sex / person_years_by_sex - # if no deaths or person-years, produces nan - death_rate_in_interval = death_rate_in_interval.fillna(0) - # if no deaths in age 90+, set death rate equal to value in age 85-89 - if death_rate_in_interval.loc['90'] == 0: - death_rate_in_interval.loc['90'] = death_rate_in_interval.loc['85-89'] - - # Calculate the probability of dying in the interval - # condition checks whether the observed number deaths is significantly higher than would be expected - # based on population years at risk and survival fraction - # if true, suggests very high mortality rates and returns value 1 - condition = number_of_deaths_by_sex > ( - person_years_by_sex / interval_width / fraction_of_last_age_survived) - probability_of_dying_in_interval = pd.Series(index=number_of_deaths_by_sex.index, dtype=float) - probability_of_dying_in_interval[condition] = 1 - probability_of_dying_in_interval[~condition] = interval_width * death_rate_in_interval / ( - 1 + interval_width * (1 - fraction_of_last_age_survived) * death_rate_in_interval) - # all those surviving to final interval die during this interval - probability_of_dying_in_interval.at['90'] = 1 - + probability_of_dying_in_interval, death_rate_in_interval = calculate_probability_of_dying(interval_width, + fraction_of_last_age_survived, + sex, + _person_years_at_risk, + _number_of_deaths_in_interval) # number_alive_at_start_of_interval # keep dtype as float in case using aggregated outputs # note range stops BEFORE the specified number @@ -248,3 +260,90 @@ def get_life_expectancy_estimates( else: return summarize(results=output, only_mean=False, collapse_columns=False) + + +def _calculate_probability_of_premature_death_for_single_run( + age_before_which_death_is_defined_as_premature: int, + person_years_at_risk: pd.Series, + number_of_deaths_in_interval: pd.Series +) -> Dict[str, float]: + """ + For a single run, estimate the probability of dying before the defined premature age for males and females. + Returns: Dict (keys by "M" and "F" for the sex, values the estimated probability of dying before the defined + premature age). + """ + probability_of_premature_death = dict() + + age_group_labels = person_years_at_risk.index.get_level_values('age_group').unique() + interval_width = [ + 5 if '90' in interval else int(interval.split('-')[1]) - int(interval.split('-')[0]) + 1 + if '-' in interval else 1 for interval in age_group_labels.categories + ] + number_age_groups = len(interval_width) + fraction_of_last_age_survived = pd.Series([0.5] * number_age_groups, index=age_group_labels) + + for sex in ['M', 'F']: + probability_of_dying_in_interval, death_rate_in_interval = calculate_probability_of_dying(interval_width, + fraction_of_last_age_survived, + sex, + person_years_at_risk, + number_of_deaths_in_interval) + + # Calculate cumulative probability of dying before the defined premature age + cumulative_probability_of_dying = 0 + proportion_alive_at_start_of_interval = 1.0 + + for age_group, prob in probability_of_dying_in_interval.items(): + if int(age_group.split('-')[0]) >= age_before_which_death_is_defined_as_premature: + break + cumulative_probability_of_dying += proportion_alive_at_start_of_interval * prob + proportion_alive_at_start_of_interval *= (1 - prob) + + probability_of_premature_death[sex] = cumulative_probability_of_dying + + return probability_of_premature_death + + +def get_probability_of_premature_death( + results_folder: Path, + target_period: Tuple[datetime.date, datetime.date], + summary: bool = True, + age_before_which_death_is_defined_as_premature: int = 70 +) -> pd.DataFrame: + """ + Produces sets of probability of premature death for each draw/run. + + Args: + - results_folder (PosixPath): The path to the results folder containing log, `tlo.methods.demography` + - target period (tuple of dates): Declare the date range (inclusively) in which the probability is to be estimated. + - summary (bool): Declare whether to return a summarized value (mean with 95% uncertainty intervals) + or return the estimate for each draw/run. + - age_before_which_death_is_defined_as_premature (int): proposed in defined in Norheim et al.(2015) to be 70 years + + Returns: + - pd.DataFrame: The DataFrame with the probability estimates for every draw/run in the results folder; + or, with option `summary=True`, summarized (central, lower, upper estimates) for each draw. + """ + info = get_scenario_info(results_folder) + deaths = _num_deaths_by_age_group(results_folder, target_period) + person_years = _aggregate_person_years_by_age(results_folder, target_period) + + prob_for_each_draw_and_run = dict() + + for draw in range(info['number_of_draws']): + for run in range(info['runs_per_draw']): + prob_for_each_draw_and_run[(draw, run)] = _calculate_probability_of_premature_death_for_single_run( + age_before_which_death_is_defined_as_premature=age_before_which_death_is_defined_as_premature, + number_of_deaths_in_interval=deaths[(draw, run)], + person_years_at_risk=person_years[(draw, run)] + ) + + output = pd.DataFrame.from_dict(prob_for_each_draw_and_run) + output.index.name = "sex" + output.columns = output.columns.set_names(level=[0, 1], names=['draw', 'run']) + + if not summary: + return output + + else: + return summarize(results=output, only_mean=False, collapse_columns=False) diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py index 88fd065cfc..cbd98fe640 100644 --- a/src/tlo/analysis/utils.py +++ b/src/tlo/analysis/utils.py @@ -1,6 +1,7 @@ """ General utility functions for TLO analysis """ +import fileinput import gzip import json import os @@ -86,6 +87,40 @@ def parse_log_file(log_filepath, level: int = logging.INFO): return LogsDict({name: handle.name for name, handle in module_name_to_filehandle.items()}, level) +def merge_log_files(log_path_1: Path, log_path_2: Path, output_path: Path) -> None: + """Merge two log files, skipping any repeated header lines. + + :param log_path_1: Path to first log file to merge. Records from this log file will + appear first in merged log file. + :param log_path_2: Path to second log file to merge. Records from this log file will + appear after those in log file at `log_path_1` and any header lines in this file + which are also present in log file at `log_path_1` will be skipped. + :param output_path: Path to write merged log file to. Must not be one of `log_path_1` + or `log_path_2` as data is read from files while writing to this path. + """ + if output_path == log_path_1 or output_path == log_path_2: + msg = "output_path must not be equal to log_path_1 or log_path_2" + raise ValueError(msg) + with fileinput.input(files=(log_path_1, log_path_2), mode="r") as log_lines: + with output_path.open("w") as output_file: + written_header_lines = {} + for log_line in log_lines: + log_data = json.loads(log_line) + if "type" in log_data and log_data["type"] == "header": + if log_data["uuid"] in written_header_lines: + previous_header_line = written_header_lines[log_data["uuid"]] + if previous_header_line == log_line: + continue + else: + msg = ( + "Inconsistent header lines with matching UUIDs found when merging logs:\n" + f"{previous_header_line}\n{log_line}\n" + ) + raise RuntimeError(msg) + written_header_lines[log_data["uuid"]] = log_line + output_file.write(log_line) + + def write_log_to_excel(filename, log_dataframes): """Takes the output of parse_log_file() and creates an Excel file from dataframes""" metadata = list() @@ -290,7 +325,9 @@ def generate_series(dataframe: pd.DataFrame) -> pd.Series: try: df: pd.DataFrame = load_pickled_dataframes(results_folder, draw, run, module)[module][key] output_from_eval: pd.Series = generate_series(df) - assert pd.Series == type(output_from_eval), 'Custom command does not generate a pd.Series' + assert isinstance(output_from_eval, pd.Series), ( + 'Custom command does not generate a pd.Series' + ) if do_scaling: res[draw_run] = output_from_eval * get_multiplier(draw, run) else: diff --git a/src/tlo/bitset_handler/bitset_extension.py b/src/tlo/bitset_handler/bitset_extension.py new file mode 100644 index 0000000000..92d7af734f --- /dev/null +++ b/src/tlo/bitset_handler/bitset_extension.py @@ -0,0 +1,706 @@ +from __future__ import annotations + +import operator +import re +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Dict, + Iterable, + List, + Optional, + Sequence, + Set, + Tuple, + Type, + TypeAlias, +) + +import numpy as np +import pandas as pd +from numpy.typing import NDArray +from pandas._typing import TakeIndexer, type_t +from pandas.core.arrays.base import ExtensionArray +from pandas.core.dtypes.base import ExtensionDtype + +if TYPE_CHECKING: + from pandas._typing import type_t + +BYTE_WIDTH = 8 +BooleanArray: TypeAlias = np.ndarray[bool] +CastableForPandasOps: TypeAlias = ( + "ElementType" + | Iterable["ElementType"] + | NDArray[np.uint8] + | NDArray[np.bytes_] + | "BitsetArray" +) +SingletonForPandasOps: TypeAlias = "ElementType" | Iterable["ElementType"] +# Assume nodes are strings, else we can't construct from string when passed the name! +# We can likely get around this with some careful planning, but we'd have to figure out how +# to pass type-metadata for the elements from inside the output of self.name, so that casting +# was successful. +ElementType: TypeAlias = str + + +class BitsetDtype(ExtensionDtype): + """ + A Bitset is represented by a fixed-width string, whose characters are each a uint8. + Elements of the set map 1:1 to these characters. + + If the elements set is indexed starting from 0, then: + - The quotient of these indices (modulo 8) is the character within the string that contains the bit representing the element, + - The remainder (modulo 8) is the index within said character that represents the element itself. + + The element map takes an element of the bitset as a key, and returns a tuple whose first element is the + corresponding string-character index, and the latter the uint8 representation of the element within that + string character. + """ + _element_map: Dict[ElementType, Tuple[int, np.uint8]] + _elements: Tuple[ElementType] + _index_map: Dict[Tuple[int, np.uint8], ElementType] + _metadata = ("_elements",) + + @classmethod + def construct_array_type(cls) -> type_t[BitsetArray]: + return BitsetArray + + @classmethod + def construct_from_string(cls, string: str) -> BitsetDtype: + """ + Construct an instance of this class by passing in a string of the form + that str() produces. + + That is, given a string of the form + bitset(#elements): e1, e2, e3, ... + + this method will return a BitsetDtype with elements e1, e2, e3, ... etc. + + The bitset(#elements): prefix is not required, simply passing a comma-separated + string of values will suffice to construct a bitset with those elements. + The prefix is typically supplied when constructing an implicit instance as part of + a call to `pd.Series` with the `dtype` parameter set to a string, + """ + if not isinstance(string, str): + raise TypeError(f"'construct_from_string' expects a string, got {type(string)}") + + string_has_bitset_prefix = re.match(r"bitset\((\d+)\):", string) + n_elements = None + if string_has_bitset_prefix: + prefix = string_has_bitset_prefix.group(0) + # Remove prefix + string = string.removeprefix(prefix) + # Extract number of elements if provided though + n_elements = int(re.search(r"(\d+)", prefix).group(0)) + if "," not in string: + raise TypeError( + "Need at least 2 (comma-separated) elements in string to construct bitset." + ) + else: + iterable_values = tuple(s.strip() for s in string.split(",")) + if n_elements is not None and len(iterable_values) != n_elements: + raise ValueError( + f"Requested bitset with {n_elements} elements, but provided {len(iterable_values)} elements: {iterable_values}" + ) + return BitsetDtype(s.strip() for s in string.split(",")) + + @property + def elements(self) -> Tuple[ElementType]: + return self._elements + + @property + def fixed_width(self) -> int: + """ + Fixed-length of the character string that represents this bitset. + """ + return (self.n_elements - 1) // BYTE_WIDTH + 1 + + @property + def n_elements(self) -> int: + return len(self._elements) + + @property + def na_value(self) -> np.bytes_: + return self.type(self.fixed_width) + + @property + def name(self) -> str: + return self.__str__() + + @property + def np_array_dtype(self) -> np.dtype: + return np.dtype((bytes, self.fixed_width)) + + @property + def type(self) -> Type[np.bytes_]: + return self.np_array_dtype.type + + def __init__(self, elements: Iterable[ElementType]) -> None: + # Take only unique elements. + # Sort elements alphabetically for consistency when constructing Bitsets that + # represent the same items. + # Cast all element types to strings so that construct_from_string does not need + # metadata about the type of each element. + provided_elements = sorted([e for e in elements]) + if not all( + isinstance(e, ElementType) for e in provided_elements + ): + raise TypeError(f"BitSet elements must type {ElementType}") + self._elements = tuple( + sorted(set(provided_elements), key=lambda x: provided_elements.index(x)) + ) + + if len(self._elements) <= 1: + raise ValueError("Bitsets must have at least 2 possible elements (use bool for 1-element sets).") + + # Setup the element map and its inverse, one-time initialisation cost. + self._element_map = { + e: (index // BYTE_WIDTH, np.uint8(2 ** (index % BYTE_WIDTH))) + for index, e in enumerate(self._elements) + } + self._index_map = {loc: element for element, loc in self._element_map.items()} + + def __repr__(self) -> str: + return f"bitset({self.n_elements}): {', '.join(str(e) for e in self._elements)}" + + def __str__(self) -> str: + return self.__repr__() + + def as_bytes(self, collection: Iterable[ElementType] | ElementType) -> np.bytes_: + """ + Return the bytes representation of this set or single element. + """ + return np.bytes_(self.as_uint8_array(collection)) + + def as_set(self, binary_repr: np.bytes_) -> Set[ElementType]: + """ + Return the set corresponding to the binary representation provided. + """ + elements_in_set = set() + for char_index, byte_value in enumerate(binary_repr): + bin_rep = format(byte_value, "b") + elements_in_set |= { + self._index_map[(char_index, np.uint8(2**i))] + for i, bit in enumerate(reversed(bin_rep)) + if bit == "1" + } + return elements_in_set + + def as_uint8_array(self, collection: Iterable[ElementType] | ElementType) -> NDArray[np.uint8]: + """ + Return the collection of elements as a 1D array of ``self.fixed_width`` uint8s. + Each uint8 corresponds to the bitwise representation of a single character + in a character string. + + A single element will be broadcast to a (1,) numpy array. + """ + if isinstance(collection, ElementType): + collection = set(collection) + + output = np.zeros((self.fixed_width, 1), dtype=np.uint8) + for element in collection: + char, bin_repr = self._element_map[element] + output[char] |= bin_repr + return output.squeeze(axis=1) + + def element_loc(self, element: ElementType) -> Tuple[int, np.uint8]: + """ + Location in of the bit corresponding to the element in this bitset. + + Each element in the bitset is mapped to a single bit via the _element_map, and + can be located by specifying both: + - The index of the character in the fixed-width string that represents the bitset. + - The power of 2 within the uint8 representation of the the single character that corresponds to the element. + + For example, a bitset of 18 elements is stored as a fixed-width string of 3 characters, + giving 24 bits to utilise. These are further subdivided into groups of 8, the first 8 + corresponding to the uint8 representation of the 0-indexed character, and so on. Each element within + this bitset is assigned a power of two within one of the character representations. + + :param element: Element value to locate. + :returns: The character index, and ``np.uint8`` representation of the element, unpacked in that order. + """ + return self._element_map[element] + + +class BitsetArray(ExtensionArray): + """ + Represents a series of Bitsets; each element in the series is a fixed-width bytestring, + which represents some possible combination of elements of a bitset as defined by + ``self.dtype``. + + When extracting a single entry via ``.loc`` or ``.at``, the value returned is a ``set``. + This means that operations such as ``self.loc[0] |= {"1"}`` will behave as set operations + from base Python. This is achieved by setting the behaviour of the ``__setitem__`` method + to interpret ``set`` values as representations of the underlying bitset, thus causing them + to be cast to their bytestring representation being being assigned. + + Supported Operations (slices) + ----------------------------- + When operating on slices or masks of the series, we have to re-implement the desired operators + so that users can continue to pass ``set``s as scalar arguments on the left. As a general rule + of thumb, if a binary operator can be performed on ``set``s, it will also work identically, + but entry-wise, on a bitset series. + + ``NodeType`` instances will be cast to ``set``s if provided as singletons. Comparisons will be + performed entry-wise if a suitable vector of values is provided as the comparison target. + + Currently implemented methods are: + + = : + Directly assign the value on the right to the entry/entries on the left. + +, | : + Perform union of the values on the left with those on the right. + +=, |= : + In-place union; add values on the right to the sets on the left. + & : + Perform intersection of the values on the left with those on the right. + &= : + In-place intersection; retain only elements on the left that appear on the right. + -, -= : + Remove the values on the right from the sets on the left. + <, <= : + Entry-wise subset (strict subset) with the values on the right. + >, >= : + Entry-wise superset (strict superset) with the values on the right. + Note that the >= operation is the equivalent of entry-wise "if the values on the right + are contained in the bitsets on the left". + """ + + _data: NDArray[np.bytes_] + _dtype: BitsetDtype + + @staticmethod + def uint8s_to_byte_string(arr: np.ndarray[np.uint8]) -> NDArray[np.bytes_]: + """ + Returns a view of an array of ``np.uint8``s of shape ``(M, N)`` + as an array of ``M`` fixed-width byte strings of size ``N``. + """ + fixed_width = arr.shape[1] + return arr.view(f"{fixed_width}S").squeeze() + + @classmethod + def _concat_same_type(cls, to_concat: Sequence[BitsetArray]) -> BitsetArray: + concat_data = np.concatenate(bsa._data for bsa in to_concat) + return cls(concat_data, to_concat[0].dtype) + + @classmethod + def _from_sequence( + cls, scalars: Iterable[Set[ElementType] | ElementType], *, dtype: BitsetDtype | None = None, copy: bool = False + ) -> BitsetArray: + """ + Construct a new BitSetArray from a sequence of scalars. + + :param scalars: Sequence of sets of elements (or single-values to be interpreted as single-element sets). + :param dtype: Cast to this datatype, only BitsetDtype is supported if not None. + If None, an attempt will be made to construct an appropriate BitsetDtype using the scalar values provided. + :param copy: If True, copy the underlying data. Default False. + """ + # Check that we have only been passed sets as scalars. Implicitly convert single-items to sets. + for i, s in enumerate(scalars): + if not isinstance(s, set): + if isinstance(s, ElementType): + scalars[i] = set(s) + else: + raise ValueError(f"{s} cannot be cast to an element of a bitset.") + + # If no dtype has been provided, attempt to construct an appropriate BitsetDtype. + if dtype is None: + # Determine the elements in the bitset by looking through the scalars + all_elements = set().union(scalars) + dtype = BitsetDtype(all_elements) + elif not isinstance(dtype, BitsetDtype): + raise TypeError(f"BitsetArray cannot be constructed with dtype {dtype}") + + # With an appropriate dtype, we can construct the data array to pass to the constructor. + # We will need to convert each of our scalars to their binary representations before passing though. + data = np.zeros((len(scalars),), dtype=dtype.np_array_dtype) + view_format = f"{dtype.fixed_width}B" if dtype.fixed_width != 1 else "(1,1)B" + data_view = data.view(view_format) + for series_index, s in enumerate(scalars): + for element in s: + char, u8_repr = dtype.element_loc(element=element) + data_view[series_index, char] |= u8_repr + return cls(data, dtype, copy=copy) + + @classmethod + def _from_factorized(cls, uniques: np.ndarray, original: BitsetArray) -> BitsetArray: + return cls(uniques, original.dtype) + + @property + def _uint8_view_format(self) -> str: + """ + Format string to be applied to self._data, so that the output of + + self._data.view() + + returns a numpy array of shape (len(self), self.dtype.fixed_width) + and dtype uint8. + """ + return f"({self.dtype.fixed_width},)B" + + @property + def _uint8_view(self) -> NDArray[np.bytes_]: + """ + Returns a view of the fixed-width byte strings stored in ``self._data`` + as an array of ``numpy.uint8``s, with shape + + ``(len(self._data), self.dtype.fixed_width)``. + + Each row ``i`` of this view corresponds to a bitset stored in this array. + The value at index ``i, j`` in this view is the ``uint8`` that represents + character ``j`` in ``self._data[i]``, which can have bitwise operations + performed on it. + """ + return self._data.view(self._uint8_view_format) + + @property + def as_sets(self) -> List[Set[ElementType]]: + """ + Return a list whose entry i is the set representation of the + bitset in entry i of this array. + """ + return [self.dtype.as_set(x) for x in self._data] + + @property + def dtype(self) -> BitsetDtype: + return self._dtype + + @property + def nbytes(self) -> int: + return self._data.nbytes + + def __init__( + self, + data: Iterable | np.ndarray, + dtype: BitsetDtype, + copy: bool = False, + ) -> None: + """ """ + if not isinstance(dtype, BitsetDtype): + raise TypeError("BitsetArray must have BitsetDtype data.") + + self._data = np.array(data, copy=copy, dtype=dtype.type) + self._dtype = dtype + + def __add__( + self, other: CastableForPandasOps + ) -> BitsetArray: + """ + Entry-wise union with other. + + - If other is ``NodeType`` or ``Iterable[NodeType]``, perform entry-wise OR with the set + representing the passed element values. + - If other is ``BitsetArray`` of compatible shape, take entry-wise union. + - If other is compatible ``np.ndarray``, take entry-wise union. + + Under the hood this is bitwise OR with other; self OR other. + """ + return BitsetArray( + self.__operate_bitwise( + lambda A, B: A | B, other, return_as_bytestring=True + ), + dtype=self.dtype, + ) + + def __and__(self, other: CastableForPandasOps + ) -> BitsetArray: + """ + Entry-wise intersection with other. + + - If other is ``NodeType`` or ``Iterable[NodeType]``, perform entry-wise AND with the set + representing the passed element values. + - If other is ``BitsetArray`` of compatible shape, take entry-wise intersection. + - If other is compatible ``np.ndarray``, take entry-wise intersection. + + Under the hood this is bitwise AND with other; self AND other. + """ + return BitsetArray( + self.__operate_bitwise( + lambda A, B: A & B, other, return_as_bytestring=True + ), + dtype=self.dtype, + ) + + def __cast_before_comparison_op( + self, value: CastableForPandasOps + ) -> Set[ElementType] | bool: + """ + Common steps taken before employing comparison operations on this class. + + Converts the value passed (as safely as possible) to a set, which can then + be compared with the bitsets stored in the instance. + + Return values are the converted value, and whether this value should be considered + a scalar-set (False) or a collection of sets (True). + """ + if isinstance(value, ElementType): + return set(value), False + elif isinstance(value, set): + return value, False + elif isinstance(value, BitsetArray): + return value.as_sets, True + elif isinstance(value, np.ndarray): + return [ + self.dtype.as_set(bytestr) + for bytestr in self.uint8s_to_byte_string(self.__cast_to_uint8(value)) + ] + # Last ditch attempt - we might have been given a list of sets, for example... + try: + value = set(value) + if all([isinstance(item, ElementType) for item in value]): + return value, False + elif all([isinstance(item, set) for item in value]): + return value, True + except Exception as e: + raise ValueError(f"Cannot compare bitsets with: {value}") from e + + def __cast_to_uint8(self, other: CastableForPandasOps) -> NDArray[np.uint8]: + """ + Casts the passed object to a ``np.uint8`` array that is compatible with bitwise operations + on ``self._uint8_view``. See the docstring for behaviour in the various usage cases. + + Scalar elements: + Cast to single-element sets, then treated as set. + + Sets: + Are converted to the (array of) uint8s that represents the set. + + ``np.ndarray``s of ``np.uint8`` + Are returned if they have the same number of columns as ``self._uint8_view``. + + ``np.ndarray``s of ``np.dtype("Sx")`` + If ``x`` corresponds to the same fixed-width as ``self.dtype.np_array_dtype``, are cast + to the corresponding ``np.uint8`` view, like ``self._uint8_view`` is from ``self._data``. + + BitsetArrays + Return their ``_uint8_view`` attribute. + """ + if isinstance(other, ElementType): + # Treat single-elements as single-element sets + other = set(other) + if isinstance(other, BitsetArray): + if self.dtype != other.dtype: + raise TypeError("Cannot cast a different Bitset to this one!") + else: + cast = other._uint8_view + elif isinstance(other, np.ndarray): + if other.size == 0: + cast = self.dtype.as_uint8_array({}) + elif (other == other[0]).all(): + cast = self.dtype.as_uint8_array(other[0]) + elif other.dtype == np.uint8 and other.shape[0] == self._uint8_view.shape[0]: + # Compatible uint8s, possibly a view of another fixed-width bytestring array + cast = other + elif other.dtype == self.dtype.np_array_dtype: + # An array of compatible fixed-width bytestrings + cast = other.view(self._uint8_view_format) + elif other.dtype == object and all(isinstance(s, (ElementType, set)) for s in other): + # We might have been passed an object array, where each object is a set or singleton that + # we need to convert. + as_bytes = np.array([self.dtype.as_bytes(s) for s in other], dtype=self.dtype.np_array_dtype) + cast = as_bytes.view(self._uint8_view_format) + else: + raise ValueError(f"Cannot convert {other} to an array of uint8s representing a bitset") + else: + # Must be a collection of elements (or will error), so cast. + cast = self.dtype.as_uint8_array(other) + return cast + + def __comparison_op(self, other: CastableForPandasOps, op: Callable[[Set[ElementType], Set[ElementType]], bool]) -> BooleanArray: + """ + Abstract method for strict and non-strict comparison operations. + + Notably, __eq__ does not redirect here since it is more efficient for us to convert + the single value to a bytestring and use numpy array comparison. + + For the other set comparison methods however, it's easier as a first implementation + for us to convert to sets and run the set operations. If there was a Pythonic way + of doing "bitwise less than" and "bitwise greater than", we could instead take the + same approach as in __operate_bitwise: + - Convert the inputs to ``NDArray[np.bytes_]``. + - Compare using __operate_bitwise with self._data. + + which would avoid us having to cast everything to a list and then do a list + comprehension (the numpy direct array comparison should be faster). + """ + if isinstance(other, (pd.Series, pd.DataFrame, pd.Index)): + return NotImplemented + other, is_vector = self.__cast_before_comparison_op(other) + + if is_vector: + return np.array([op(s, other[i]) for i, s in enumerate(self.as_sets)]) + else: + return np.array([op(s, other) for s in self.as_sets], dtype=bool) + + def __contains__(self, item: SingletonForPandasOps | Any) -> BooleanArray | bool: + if isinstance(item, ElementType): + item = set(item) + if isinstance(item, set): + return item in self.as_sets + else: + return super().__contains__(item) + + def __eq__(self, other) -> bool: + if isinstance(other, (pd.Series, pd.DataFrame, pd.Index)): + return NotImplemented + elif isinstance(other, ElementType): + other = set(other) + + if isinstance(other, set): + ans = self._data == self.dtype.as_bytes(other) + else: + ans = self._data == other + return np.squeeze(ans) + + def __getitem__(self, item: int | slice | NDArray) -> BitsetArray: + return ( + self.dtype.as_set(self._data[item]) + if isinstance(item, int) + else BitsetArray(self._data[item], dtype=self.dtype) + ) + + def __ge__(self, other: SingletonForPandasOps) -> BooleanArray: + """ + Entry-wise non-strict superset: self >= other_set. + """ + return self.__comparison_op(other, operator.ge) + + def __gt__(self, other: SingletonForPandasOps) -> BooleanArray: + """ + Entry-wise strict superset: self > other_set. + """ + return self.__comparison_op(other, operator.gt) + + def __len__(self) -> int: + return self._data.shape[0] + + def __le__(self, other: SingletonForPandasOps) -> BooleanArray: + """ + Entry-wise non-strict subset: self <= other_set. + """ + return self.__comparison_op(other, operator.le) + + def __lt__(self, other: SingletonForPandasOps) -> BooleanArray: + """ + Entry-wise strict subset: self < other_set. + """ + return self.__comparison_op(other, operator.lt) + + def __operate_bitwise( + self, + op: Callable[[NDArray[np.uint8], NDArray[np.uint8]], NDArray[np.uint8]], + r_value: CastableForPandasOps, + l_value: Optional[CastableForPandasOps] = None, + return_as_bytestring: bool = False, + ) -> NDArray[np.bytes_] | NDArray[np.uint8]: + """ + Perform a bitwise operation on two compatible ``np.ndarray``s of ``np.uint8``s. + + By default, the left value passed to the operator is assumed to be ``self._uint8_data``. + + Return value is the result of the bitwise operation, as an array of uint8s. If you wish + to have this converted to the corresponding bytestring(s) before returning, use the + return_as_bytestring argument. + + :param op: Bitwise operation to perform on input values. + :param r_value: Right-value to pass to the operator. + :param l_value: Left-value to pass to the operator. + :param return_as_bytestring: Result will be returned as a fixed-width bytestring. + """ + l_value = self._uint8_view if l_value is None else self.__cast_to_uint8(l_value) + op_result = op(l_value, self.__cast_to_uint8(r_value)) + if return_as_bytestring: + op_result = self.uint8s_to_byte_string(op_result) + return op_result + + def __or__( + self, other: CastableForPandasOps + ) -> BitsetArray: + """ + Entry-wise union with other, delegating to ``self.__add__``. + + np.ndarrays of objects will attempt to interpret their elements as bitsets. + """ + return self.__add__(other) + + def __setitem__( + self, + key: int | slice | NDArray, + value: ( + np.bytes_ + | ElementType + | Set[ElementType] + | Sequence[np.bytes_ | ElementType| Set[ElementType]] + ), + ) -> None: + if isinstance(value, ElementType) or isinstance(value, set): + # Interpret this as a "scalar" set that we want to set all values to + value = self.dtype.as_bytes(value) + elif isinstance(value, np.bytes_): + # Value is a scalar that we don't need to convert + pass + else: + # Assume value is a sequence, and we will have to convert each value in turn + value = [ + v if isinstance(v, np.bytes_) else self.dtype.as_bytes(v) for v in value + ] + self._data[key] = value + + def __sub__( + self, other: CastableForPandasOps + ) -> BitsetArray: + """ + Remove elements from the Bitsets represented here. + + - If other is ``NodeType``, remove the single element from all series entries. + - If other is ``Iterable[NodeType]``, remove all elements from all series entries. + - If other is ``BitsetArray`` of compatible shape, take element-wise complements of series entries. + - If other is compatible ``np.ndarray``, take element-wise complements of series entries. + + Under the hood this the bitwise operation self AND (NOT other). + """ + return BitsetArray( + self.__operate_bitwise( + lambda A, B: A & (~B), other, return_as_bytestring=True + ), + dtype=self.dtype, + ) + + def _formatter(self, boxed: bool = False) -> Callable[[np.bytes_], str | None]: + if boxed: # If rendering an individual data value + return lambda x: ",".join(x) if x else "{}" + return repr # Render the table itself + + def copy(self) -> BitsetArray: + return BitsetArray(self._data, self.dtype, copy=True) + + def isna(self) -> NDArray: + """ + TODO: This isn't a great way to express missing data, but equally a bitset doesn't really ever contain missing data... + """ + return np.isnan(self._data) + + def take( + self, + indices: TakeIndexer, + *, + allow_fill: bool = False, + fill_value: Optional[np.bytes_ | Set[ElementType]] = None, + ) -> BitsetArray: + if allow_fill: + if isinstance(fill_value, set): + fill_value = self.dtype.as_bytes(fill_value) + elif fill_value is None: + fill_value = self.dtype.na_value + elif not isinstance(fill_value, self.dtype.type): + raise TypeError( + f"Fill value must be of type {self.dtype.type} (got {type(fill_value).__name__})" + ) + scalars = np.empty((len(indices), ), dtype=self.dtype.type) + scalars[indices[indices >= 0]] = self._data[indices[indices >= 0]] + scalars[indices[indices < 0]] = fill_value + else: + scalars = np.take(self._data, indices) + return self._from_sequence(scalars) diff --git a/src/tlo/cli.py b/src/tlo/cli.py index c5b0c3f86d..578c3a6619 100644 --- a/src/tlo/cli.py +++ b/src/tlo/cli.py @@ -13,9 +13,9 @@ import dateutil.parser import pandas as pd from azure import batch -from azure.batch import batch_auth from azure.batch import models as batch_models from azure.batch.models import BatchErrorException +from azure.common.credentials import ServicePrincipalCredentials from azure.core.exceptions import ResourceExistsError, ResourceNotFoundError from azure.identity import DefaultAzureCredential from azure.keyvault.secrets import SecretClient @@ -132,9 +132,7 @@ def batch_submit(ctx, scenario_file, asserts_on, more_memory, keep_pool_alive, i azure_directory = f"{config['DEFAULT']['USERNAME']}/{job_id}" batch_client = get_batch_client( - config["BATCH"]["NAME"], - config["BATCH"]["KEY"], - config["BATCH"]["URL"] + config["BATCH"]["CLIENT_ID"], config["BATCH"]["SECRET"], config["AZURE"]["TENANT_ID"], config["BATCH"]["URL"] ) create_file_share( @@ -247,8 +245,16 @@ def batch_submit(ctx, scenario_file, asserts_on, more_memory, keep_pool_alive, i try: # Create the job that will run the tasks. - create_job(batch_client, vm_size, pool_node_count, job_id, - container_conf, [mount_configuration], keep_pool_alive) + create_job( + batch_client, + vm_size, + pool_node_count, + job_id, + container_conf, + [mount_configuration], + keep_pool_alive, + config["BATCH"]["SUBNET_ID"], + ) # Add the tasks to the job. add_tasks(batch_client, user_identity, job_id, image_name, @@ -297,9 +303,7 @@ def batch_terminate(ctx, job_id): return batch_client = get_batch_client( - config["BATCH"]["NAME"], - config["BATCH"]["KEY"], - config["BATCH"]["URL"] + config["BATCH"]["CLIENT_ID"], config["BATCH"]["SECRET"], config["AZURE"]["TENANT_ID"], config["BATCH"]["URL"] ) # check the job is running @@ -331,10 +335,9 @@ def batch_job(ctx, job_id, raw, show_tasks): print(">Querying batch system\r", end="") config = load_config(ctx.obj['config_file']) batch_client = get_batch_client( - config["BATCH"]["NAME"], - config["BATCH"]["KEY"], - config["BATCH"]["URL"] + config["BATCH"]["CLIENT_ID"], config["BATCH"]["SECRET"], config["AZURE"]["TENANT_ID"], config["BATCH"]["URL"] ) + tasks = None try: @@ -402,9 +405,7 @@ def batch_list(ctx, status, n, find, username): username = config["DEFAULT"]["USERNAME"] batch_client = get_batch_client( - config["BATCH"]["NAME"], - config["BATCH"]["KEY"], - config["BATCH"]["URL"] + config["BATCH"]["CLIENT_ID"], config["BATCH"]["SECRET"], config["AZURE"]["TENANT_ID"], config["BATCH"]["URL"] ) # create client to connect to file share @@ -581,9 +582,12 @@ def load_server_config(kv_uri, tenant_id) -> Dict[str, Dict]: return {"STORAGE": storage_config, "BATCH": batch_config, "REGISTRY": registry_config} -def get_batch_client(name, key, url): +def get_batch_client(client_id, secret, tenant_id, url): """Create a Batch service client""" - credentials = batch_auth.SharedKeyCredentials(name, key) + resource = "https://batch.core.windows.net/" + + credentials = ServicePrincipalCredentials(client_id=client_id, secret=secret, tenant=tenant_id, resource=resource) + batch_client = batch.BatchServiceClient(credentials, batch_url=url) return batch_client @@ -697,10 +701,19 @@ def upload_local_file(connection_string, local_file_path, share_name, dest_file_ print("ResourceNotFoundError:", ex.message) -def create_job(batch_service_client, vm_size, pool_node_count, job_id, - container_conf, mount_configuration, keep_pool_alive): +def create_job( + batch_service_client, + vm_size, + pool_node_count, + job_id, + container_conf, + mount_configuration, + keep_pool_alive, + subnet_id, +): """Creates a job with the specified ID, associated with the specified pool. + :param subnet_id: :param batch_service_client: A Batch service client. :type batch_service_client: `azure.batch.BatchServiceClient` :param str vm_size: Type of virtual machine to use as pool. @@ -740,6 +753,11 @@ def create_job(batch_service_client, vm_size, pool_node_count, job_id, $NodeDeallocationOption = taskcompletion; """ + network_configuration = batch_models.NetworkConfiguration( + subnet_id=subnet_id, + public_ip_address_configuration=batch_models.PublicIPAddressConfiguration(provision="noPublicIPAddresses"), + ) + pool = batch_models.PoolSpecification( virtual_machine_configuration=virtual_machine_configuration, vm_size=vm_size, @@ -747,6 +765,8 @@ def create_job(batch_service_client, vm_size, pool_node_count, job_id, task_slots_per_node=1, enable_auto_scale=True, auto_scale_formula=auto_scale_formula, + network_configuration=network_configuration, + target_node_communication_mode="simplified", ) auto_pool_specification = batch_models.AutoPoolSpecification( diff --git a/src/tlo/core.py b/src/tlo/core.py index 3d3fd3c171..9fbbf08893 100644 --- a/src/tlo/core.py +++ b/src/tlo/core.py @@ -8,14 +8,18 @@ import json from enum import Enum, auto -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any, Dict, FrozenSet, List, Optional import numpy as np import pandas as pd if TYPE_CHECKING: + from pathlib import Path from typing import Optional + from tlo.methods import Metadata + from tlo.methods.causes import Cause + from tlo.population import Population from tlo.simulation import Simulation class Types(Enum): @@ -76,7 +80,7 @@ class Specifiable: Types.BITSET: int, } - def __init__(self, type_, description, categories=None): + def __init__(self, type_: Types, description: str, categories: List[str] = None): """Create a new Specifiable. :param type_: an instance of Types giving the type of allowed values @@ -94,16 +98,16 @@ def __init__(self, type_, description, categories=None): self.categories = categories @property - def python_type(self): + def python_type(self) -> type: """Return the Python type corresponding to this Specifiable.""" return self.PYTHON_TYPE_MAP[self.type_] @property - def pandas_type(self): + def pandas_type(self) -> type: """Return the Pandas type corresponding to this Specifiable.""" return self.PANDAS_TYPE_MAP[self.type_] - def __repr__(self): + def __repr__(self) -> str: """Return detailed description of Specifiable.""" delimiter = " === " @@ -131,8 +135,17 @@ class Property(Specifiable): object: float("nan"), np.uint32: 0, } - - def __init__(self, type_, description, categories=None, *, ordered=False): + _default_value_override: Any + + def __init__( + self, + type_: Types, + description: str, + categories: List[str] = None, + *, + ordered: bool = False, + default_value: Optional[Any] = None, + ) -> None: """Create a new property specification. :param type_: An instance of ``Types`` giving the type of allowed values of this @@ -142,17 +155,53 @@ def __init__(self, type_, description, categories=None, *, ordered=False): ``Types.CATEGORICAL``. :param ordered: Whether categories are ordered if ``type_`` is ``Types.CATEGORICAL``. + :param default_value: The default value for the property. """ if type_ in [Types.SERIES, Types.DATA_FRAME]: raise TypeError("Property cannot be of type SERIES or DATA_FRAME.") + super().__init__(type_, description, categories) self.ordered = ordered + # Use _default_value setter method to set property initial value + self._default_value = default_value @property - def _default_value(self): - return self.PANDAS_TYPE_DEFAULT_VALUE_MAP[self.pandas_type] + def _default_value(self) -> Any: + """ + Default value for this property, which will be used to fill the respective columns + of the population dataframe, for example. + + If not explicitly set, it will fall back on the ``PANDAS_TYPE_DEFAULT_TYPE_MAP``. + If a value is provided, it must: + + - Be of the corresponding TYPE for the property. + - If ``type_`` is ``Types.CATEGORICAL``, it must also be a possible category. + """ + return ( + self.PANDAS_TYPE_DEFAULT_VALUE_MAP[self.pandas_type] + if self._default_value_override is None + else self._default_value_override + ) - def create_series(self, name, size): + @_default_value.setter + def _default_value(self, new_val: Any) -> None: + if new_val is not None: + # Check for valid category + if self.type_ is Types.CATEGORICAL: + if new_val not in self.categories: + raise ValueError( + f"Value {new_val} is not a valid category, so cannot be set as the default." + ) + # If not categorical, check for valid data type for default + elif not isinstance(new_val, self.python_type): + raise ValueError( + f"Trying to set a default value of type {type(new_val).__name__}, " + f"which is different from Property's type of {type(self.python_type).__name__}." + ) + # Outside block so that providing new_val = None reverts to Property-wide default. + self._default_value_override = new_val + + def create_series(self, name: str, size: int) -> pd.Series: """Create a Pandas Series for this property. The values will be left uninitialised. @@ -201,48 +250,47 @@ class attribute on a subclass. # Subclasses can override this to declare the set of initialisation dependencies # Declares modules that need to be registered in simulation and initialised before # this module - INIT_DEPENDENCIES = frozenset() + INIT_DEPENDENCIES: FrozenSet[str] = frozenset() # Subclasses can override this to declare the set of optional init. dependencies # Declares modules that need to be registered in simulation and initialised before # this module if they are present, but are not required otherwise - OPTIONAL_INIT_DEPENDENCIES = frozenset() + OPTIONAL_INIT_DEPENDENCIES: FrozenSet[str] = frozenset() # Subclasses can override this to declare the set of additional dependencies # Declares any modules that need to be registered in simulation in addition to those # in INIT_DEPENDENCIES to allow running simulation - ADDITIONAL_DEPENDENCIES = frozenset() + ADDITIONAL_DEPENDENCIES: FrozenSet[str] = frozenset() # Subclasses can override this to declare the set of modules that this module can be # used in place of as a dependency - ALTERNATIVE_TO = frozenset() + ALTERNATIVE_TO: FrozenSet[str] = frozenset() # Subclasses can override this set to add metadata tags to their class # See tlo.methods.Metadata class - METADATA = {} + METADATA: FrozenSet[Metadata] = frozenset() - # Subclasses can override this set to declare the causes death that this module contributes to + # Subclasses can override this dict to declare the causes death that this module contributes to # This is a dict of the form { None: """Construct a new disease module ready to be included in a simulation. Initialises an empty parameters dictionary and module-specific random number @@ -255,7 +303,7 @@ def __init__(self, name=None): self.name = name or self.__class__.__name__ self.sim: Optional[Simulation] = None - def load_parameters_from_dataframe(self, resource: pd.DataFrame): + def load_parameters_from_dataframe(self, resource: pd.DataFrame) -> None: """Automatically load parameters from resource dataframe, updating the class parameter dictionary Goes through parameters dict self.PARAMETERS and updates the self.parameters with values @@ -316,7 +364,7 @@ def load_parameters_from_dataframe(self, resource: pd.DataFrame): # Save the values to the parameters self.parameters[parameter_name] = parameter_value - def read_parameters(self, data_folder): + def read_parameters(self, data_folder: str | Path) -> None: """Read parameter values from file, if required. Must be implemented by subclasses. @@ -326,23 +374,41 @@ def read_parameters(self, data_folder): """ raise NotImplementedError - def initialise_population(self, population): + def initialise_population(self, population: Population) -> None: """Set our property values for the initial population. - Must be implemented by subclasses. - This method is called by the simulation when creating the initial population, and is responsible for assigning initial values, for every individual, of those properties 'owned' by this module, i.e. those declared in its PROPERTIES dictionary. + By default, all ``Property``s in ``self.PROPERTIES`` will have + their columns in the population dataframe set to the default value. + + Modules that wish to implement this behaviour do not need to implement this method, + it will be inherited automatically. Modules that wish to perform additional steps + during the initialise_population stage should reimplement this method and call + + ```python + super().initialise_population(population=population) + ``` + + at the beginning of the method, then proceed with their additional steps. Modules that + do not wish to inherit this default behaviour should re-implement initialise_population + without the call to ``super()`` above. + TODO: We probably need to declare somehow which properties we 'read' here, so the simulation knows what order to initialise modules in! - :param population: the population of individuals + :param population: The population of individuals in the simulation. """ - raise NotImplementedError + df = population.props + + for property_name, property in self.PROPERTIES.items(): + df.loc[df.is_alive, property_name] = ( + property._default_value + ) - def initialise_simulation(self, sim): + def initialise_simulation(self, sim: Simulation) -> None: """Get ready for simulation start. Must be implemented by subclasses. @@ -353,7 +419,7 @@ def initialise_simulation(self, sim): """ raise NotImplementedError - def pre_initialise_population(self): + def pre_initialise_population(self) -> None: """Carry out any work before any populations have been initialised This optional method allows access to all other registered modules, before any of @@ -361,7 +427,7 @@ def pre_initialise_population(self): when a module's properties rely upon information from other modules. """ - def on_birth(self, mother_id, child_id): + def on_birth(self, mother_id: int, child_id: int) -> None: """Initialise our properties for a newborn individual. Must be implemented by subclasses. @@ -373,6 +439,6 @@ def on_birth(self, mother_id, child_id): """ raise NotImplementedError - def on_simulation_end(self): + def on_simulation_end(self) -> None: """This is called after the simulation has ended. Modules do not need to declare this.""" diff --git a/src/tlo/dependencies.py b/src/tlo/dependencies.py index 8003b44328..03a847d315 100644 --- a/src/tlo/dependencies.py +++ b/src/tlo/dependencies.py @@ -57,6 +57,67 @@ def get_all_dependencies( ) +def get_missing_dependencies( + module_instances: Iterable[Module], + get_dependencies: DependencyGetter = get_all_dependencies, +) -> Set[str]: + """Get the set of missing required dependencies if any from an iterable of modules. + + :param module_instances: Iterable of ``Module`` subclass instances to get missing + dependencies for. + :param get_dependencies: Callable which extracts the set of dependencies to check + for from a module instance. Defaults to extracting all dependencies. + :return: Set of ``Module`` subclass names corresponding to missing dependencies. + """ + module_instances = list(module_instances) + modules_present = {type(module).__name__ for module in module_instances} + modules_present_are_alternatives_to = set.union( + # Force conversion to set to avoid errors when using set.union with frozenset + *(set(module.ALTERNATIVE_TO) for module in module_instances) + ) + modules_required = set.union( + *(set(get_dependencies(module, modules_present)) for module in module_instances) + ) + + missing_dependencies = modules_required - modules_present + return ( + missing_dependencies - modules_present_are_alternatives_to + ) + + +def initialise_missing_dependencies(modules: Iterable[Module], **module_kwargs) -> Set[Module]: + """Get list of initialised instances of any missing dependencies for an iterable of modules. + + :param modules: Iterable of ``Module`` subclass instances to get instances of missing + dependencies for. + :param module_kwargs: Any keyword arguments to use when initialising missing + module dependencies. + :return: Set of ``Module`` subclass instances corresponding to missing dependencies. + """ + module_class_map: Mapping[str, Type[Module]] = get_module_class_map(set()) + all_module_instances: list[Module] = list(modules) + + def add_missing_module_instances(modules: list[Module], all_missing_module_names: set[str]) -> None: + """ add missing module instances to all_module_instances list + :param modules: Iterable of registered modules + :param all_missing_module_names: Set of missing module names + """ + missing_dependencies: set[str] = get_missing_dependencies( + modules, get_all_dependencies + ) + if len(missing_dependencies) > 0: + all_missing_module_names |= missing_dependencies + missing_module_instances: list[Module] = [ + module_class_map[dependency](**module_kwargs) + for dependency in missing_dependencies + ] + modules.extend(missing_module_instances) + add_missing_module_instances(modules, all_missing_module_names) + + add_missing_module_instances(all_module_instances, set()) + return set(all_module_instances) - set(modules) + + def get_all_required_dependencies( module: Union[Module, Type[Module]], module_names_present: Optional[Set[str]] = None @@ -76,7 +137,7 @@ def get_all_required_dependencies( def topologically_sort_modules( module_instances: Iterable[Module], - get_dependencies: DependencyGetter = get_init_dependencies, + get_dependencies: DependencyGetter = get_init_dependencies ) -> Generator[Module, None, None]: """Generator which yields topological sort of modules based on their dependencies. @@ -120,6 +181,7 @@ def depth_first_search(module): dependencies = get_dependencies( module_instance_map[module], module_instance_map.keys() ) + for dependency in sorted(dependencies): if dependency not in module_instance_map: alternatives_with_instances = [ @@ -264,23 +326,12 @@ def check_dependencies_present( :raises ModuleDependencyError: Raised if any dependencies are missing. """ - module_instances = list(module_instances) - modules_present = {type(module).__name__ for module in module_instances} - modules_present_are_alternatives_to = set.union( - # Force conversion to set to avoid errors when using set.union with frozenset - *(set(module.ALTERNATIVE_TO) for module in module_instances) - ) - modules_required = set.union( - *(set(get_dependencies(module, modules_present)) for module in module_instances) + missing_dependencies = get_missing_dependencies( + module_instances, get_dependencies ) - missing_dependencies = modules_required - modules_present - missing_dependencies_without_alternatives_present = ( - missing_dependencies - modules_present_are_alternatives_to - ) - if not missing_dependencies_without_alternatives_present == set(): - + if len(missing_dependencies) > 0: raise ModuleDependencyError( 'One or more required dependency is missing from the module list and no ' 'alternative to this / these modules are available either: ' - f'{missing_dependencies_without_alternatives_present}' + f'{missing_dependencies}' ) diff --git a/src/tlo/methods/consumables.py b/src/tlo/methods/consumables.py index 674035ad98..e51a95fe74 100644 --- a/src/tlo/methods/consumables.py +++ b/src/tlo/methods/consumables.py @@ -54,13 +54,13 @@ def __init__(self, self._prob_item_codes_available = None # Data on the probability of each item_code being available self._is_available = None # Dict of sets giving the set of item_codes available, by facility_id self._is_unknown_item_available = None # Whether an unknown item is available, by facility_id - self._not_recognised_item_codes = set() # The item codes requested but which are not recognised. + self._not_recognised_item_codes = defaultdict(set) # The item codes requested but which are not recognised. # Save designations self._item_code_designations = item_code_designations # Save all item_codes that are defined and pd.Series with probs of availability from ResourceFile - self.item_codes, self._processed_consumables_data = \ + self.item_codes, self._processed_consumables_data = \ self._process_consumables_data(availability_data=availability_data) # Set the availability based on the argument provided (this can be updated later after the class is initialised) @@ -199,7 +199,8 @@ def _determine_default_return_value(cons_availability, default_return_value): def _request_consumables(self, facility_info: 'FacilityInfo', # noqa: F821 - item_codes: dict, + essential_item_codes: dict, + optional_item_codes: Optional[dict] = None, to_log: bool = True, treatment_id: Optional[str] = None ) -> dict: @@ -208,40 +209,52 @@ def _request_consumables(self, :param facility_info: The facility_info from which the request for consumables originates :param item_codes: dict of the form {: } for the items requested + :param optional_item_codes: dict of the form {: } for the optional items requested :param to_log: whether the request is logged. :param treatment_id: the TREATMENT_ID of the HSI (which is entered to the log, if provided). :return: dict of the form {: } indicating the availability of each item requested. """ + # If optional_item_codes is None, treat it as an empty dictionary + optional_item_codes = optional_item_codes or {} + _all_item_codes = {**essential_item_codes, **optional_item_codes} # Issue warning if any item_code is not recognised. - if not self.item_codes.issuperset(item_codes.keys()): - self._not_recognised_item_codes.add((treatment_id, tuple(set(item_codes.keys()) - self.item_codes))) + not_recognised_item_codes = _all_item_codes.keys() - self.item_codes + if len(not_recognised_item_codes) > 0: + self._not_recognised_item_codes[treatment_id] |= not_recognised_item_codes # Look-up whether each of these items is available in this facility currently: - available = self._lookup_availability_of_consumables(item_codes=item_codes, facility_info=facility_info) + available = self._lookup_availability_of_consumables(item_codes=_all_item_codes, facility_info=facility_info) # Log the request and the outcome: if to_log: - items_available = {k: v for k, v in item_codes.items() if available[k]} - items_not_available = {k: v for k, v in item_codes.items() if not available[k]} - logger.info(key='Consumables', - data={ - 'TREATMENT_ID': (treatment_id if treatment_id is not None else ""), - 'Item_Available': str(items_available), - 'Item_NotAvailable': str(items_not_available), - }, - # NB. Casting the data to strings because logger complains with dict of varying sizes/keys - description="Record of each consumable item that is requested." - ) - - self._summary_counter.record_availability(items_available=items_available, - items_not_available=items_not_available) + items_available = {k: v for k, v in _all_item_codes.items() if available[k]} + items_not_available = {k: v for k, v in _all_item_codes.items() if not available[k]} + + # Log items used if all essential items are available + items_used = items_available if all(available.get(k, False) for k in essential_item_codes) else {} + + logger.info( + key='Consumables', + data={ + 'TREATMENT_ID': treatment_id or "", + 'Item_Available': str(items_available), + 'Item_NotAvailable': str(items_not_available), + 'Item_Used': str(items_used), + }, + description="Record of requested and used consumable items." + ) + self._summary_counter.record_availability( + items_available=items_available, + items_not_available=items_not_available, + items_used=items_used, + ) # Return the result of the check on availability return available def _lookup_availability_of_consumables(self, - facility_info: 'FacilityInfo', # noqa: F821 + facility_info: 'FacilityInfo', # noqa: F821 item_codes: dict ) -> dict: """Lookup whether a particular item_code is in the set of available items for that facility (in @@ -266,24 +279,22 @@ def _lookup_availability_of_consumables(self, def on_simulation_end(self): """Do tasks at the end of the simulation. - + Raise warnings and enter to log about item_codes not recognised. """ - if self._not_recognised_item_codes: + if len(self._not_recognised_item_codes) > 0: + not_recognised_item_codes = { + treatment_id if treatment_id is not None else "": sorted(codes) + for treatment_id, codes in self._not_recognised_item_codes.items() + } warnings.warn( UserWarning( - f"Item_Codes were not recognised./n" - f"{self._not_recognised_item_codes}" + f"Item_Codes were not recognised.\n{not_recognised_item_codes}" ) ) logger.info( key="item_codes_not_recognised", - data={ - _treatment_id if _treatment_id is not None else "": list( - _item_codes - ) - for _treatment_id, _item_codes in self._not_recognised_item_codes - }, + data=not_recognised_item_codes, ) def on_end_of_year(self): @@ -364,10 +375,11 @@ def _reset_internal_stores(self) -> None: self._items = { 'Available': defaultdict(int), - 'NotAvailable': defaultdict(int) + 'NotAvailable': defaultdict(int), + 'Used': defaultdict(int), } - def record_availability(self, items_available: dict, items_not_available: dict) -> None: + def record_availability(self, items_available: dict, items_not_available: dict, items_used: dict) -> None: """Add information about the availability of requested items to the running summaries.""" # Record items that were available @@ -378,6 +390,10 @@ def record_availability(self, items_available: dict, items_not_available: dict) for _item, _num in items_not_available.items(): self._items['NotAvailable'][_item] += _num + # Record items that were used + for _item, _num in items_used.items(): + self._items['Used'][_item] += _num + def write_to_log_and_reset_counters(self): """Log summary statistics and reset the data structures.""" @@ -388,6 +404,7 @@ def write_to_log_and_reset_counters(self): data={ "Item_Available": self._items['Available'], "Item_NotAvailable": self._items['NotAvailable'], + "Item_Used": self._items['Used'], }, ) diff --git a/src/tlo/methods/contraception.py b/src/tlo/methods/contraception.py index ab6c633f4c..09cb394804 100644 --- a/src/tlo/methods/contraception.py +++ b/src/tlo/methods/contraception.py @@ -1146,12 +1146,12 @@ def __init__(self, module, person_id, new_contraceptive): self.TREATMENT_ID = "Contraception_Routine" self.ACCEPTED_FACILITY_LEVEL = _facility_level - - @property - def EXPECTED_APPT_FOOTPRINT(self): - """Return the expected appt footprint based on contraception method and whether the HSI has been rescheduled.""" - person_id = self.target current_method = self.sim.population.props.loc[person_id].co_contraception + self.EXPECTED_APPT_FOOTPRINT = self._get_appt_footprint(current_method) + + + def _get_appt_footprint(self, current_method: str): + """Return the appointment footprint based on contraception method and whether the HSI has been rescheduled.""" if self._number_of_times_run > 0: # if it is to re-schedule due to unavailable consumables return self.make_appt_footprint({}) # if to switch to a method @@ -1165,9 +1165,11 @@ def EXPECTED_APPT_FOOTPRINT(self): elif self.new_contraceptive in ['male_condom', 'other_modern', 'pill']: return self.make_appt_footprint({'PharmDispensing': 1}) else: + warnings.warn( + "Assumed empty footprint for Contraception Routine appt because couldn't find actual case.", + stacklevel=3, + ) return self.make_appt_footprint({}) - warnings.warn(UserWarning("Assumed empty footprint for Contraception Routine appt because couldn't find" - "actual case.")) def apply(self, person_id, squeeze_factor): """If the relevant consumable is available, do change in contraception and log it""" @@ -1266,6 +1268,8 @@ def apply(self, person_id, squeeze_factor): ): self.reschedule() + return self._get_appt_footprint(current_method) + def post_apply_hook(self): self._number_of_times_run += 1 diff --git a/src/tlo/methods/epi.py b/src/tlo/methods/epi.py index 1cdf8a1612..0ad0c75c1f 100644 --- a/src/tlo/methods/epi.py +++ b/src/tlo/methods/epi.py @@ -182,7 +182,10 @@ def initialise_simulation(self, sim): sim.schedule_event(EpiLoggingEvent(self), sim.date + DateOffset(years=1)) # HPV vaccine given from 2018 onwards - sim.schedule_event(HpvScheduleEvent(self), Date(2018, 1, 1)) + if self.sim.date.year < 2018: + sim.schedule_event(HpvScheduleEvent(self), Date(2018, 1, 1)) + else: + sim.schedule_event(HpvScheduleEvent(self), Date(self.sim.date.year, 1, 1)) # Look up item codes for consumables self.get_item_codes() diff --git a/src/tlo/methods/epilepsy.py b/src/tlo/methods/epilepsy.py index 5645d55e34..cc8c0f8cca 100644 --- a/src/tlo/methods/epilepsy.py +++ b/src/tlo/methods/epilepsy.py @@ -100,6 +100,15 @@ def __init__(self, name=None, resourcefilepath=None): 'daly_wt_epilepsy_seizure_free': Parameter( Types.REAL, 'disability weight for less severe epilepsy' 'controlled phase - code 862' ), + 'prob_start_anti_epilep_when_seizures_detected_in_generic_first_appt': Parameter( + Types.REAL, 'probability that someone who has had a seizure is started on anti-epileptics. This is ' + 'calibrated to induce the correct proportion of persons with epilepsy currently receiving ' + 'anti-epileptics.' + ), + 'max_num_of_failed_attempts_before_defaulting': Parameter( + Types.INT, 'maximum number of time an HSI can be repeated if the relevant essential consumables are not ' + 'available.' + ), } """ @@ -406,8 +415,14 @@ def do_at_generic_first_appt_emergency( **kwargs, ) -> None: if "seizures" in symptoms: - event = HSI_Epilepsy_Start_Anti_Epileptic(person_id=person_id, module=self) - schedule_hsi_event(event, priority=0, topen=self.sim.date) + # Determine if treatment will start - depends on probability of prescribing, which is calibrated to + # induce the right proportion of persons with epilepsy receiving treatment. + + prob_start = self.parameters['prob_start_anti_epilep_when_seizures_detected_in_generic_first_appt'] + + if self.rng.random_sample() < prob_start: + event = HSI_Epilepsy_Start_Anti_Epileptic(person_id=person_id, module=self) + schedule_hsi_event(event, priority=0, topen=self.sim.date) class EpilepsyEvent(RegularEvent, PopulationScopeEventMixin): @@ -576,12 +591,17 @@ def apply(self, population): cum_deaths = (~df.is_alive).sum() + # Proportion of those with infrequent or frequent seizures currently on anti-epileptics + prop_freq_or_infreq_seiz_on_antiep = status_groups[2:].ep_antiep.sum() / status_groups[2:].is_alive.sum() \ + if status_groups[2:].is_alive.sum() > 0 else 0 + logger.info(key='epilepsy_logging', data={ 'prop_seiz_stat_0': status_groups['prop_seiz_stats'].iloc[0], 'prop_seiz_stat_1': status_groups['prop_seiz_stats'].iloc[1], 'prop_seiz_stat_2': status_groups['prop_seiz_stats'].iloc[2], 'prop_seiz_stat_3': status_groups['prop_seiz_stats'].iloc[3], + 'prop_freq_or_infreq_seiz_on_antiep': prop_freq_or_infreq_seiz_on_antiep, 'prop_antiepilep_seiz_stat_0': status_groups['prop_seiz_stat_on_anti_ep'].iloc[0], 'prop_antiepilep_seiz_stat_1': status_groups['prop_seiz_stat_on_anti_ep'].iloc[1], 'prop_antiepilep_seiz_stat_2': status_groups['prop_seiz_stat_on_anti_ep'].iloc[2], @@ -608,6 +628,9 @@ def __init__(self, module, person_id): self.EXPECTED_APPT_FOOTPRINT = self.make_appt_footprint({'Over5OPD': 1}) self.ACCEPTED_FACILITY_LEVEL = '1b' + self._MAX_NUMBER_OF_FAILED_ATTEMPTS_BEFORE_DEFAULTING = module.parameters['max_num_of_failed_attempts_before_defaulting'] + self._counter_of_failed_attempts_due_to_unavailable_medicines = 0 + def apply(self, person_id, squeeze_factor): df = self.sim.population.props hs = self.sim.modules["HealthSystem"] @@ -639,8 +662,12 @@ def apply(self, person_id, squeeze_factor): priority=0 ) - else: + elif ( + self._counter_of_failed_attempts_due_to_unavailable_medicines + < self._MAX_NUMBER_OF_FAILED_ATTEMPTS_BEFORE_DEFAULTING + ): # If no medicine is available, run this HSI again next month + self._counter_of_failed_attempts_due_to_unavailable_medicines += 1 self.module.sim.modules['HealthSystem'].schedule_hsi_event(hsi_event=self, topen=self.sim.date + pd.DateOffset(months=1), tclose=None, @@ -652,7 +679,7 @@ class HSI_Epilepsy_Follow_Up(HSI_Event, IndividualScopeEventMixin): def __init__(self, module, person_id): super().__init__(module, person_id=person_id) - self._MAX_NUMBER_OF_FAILED_ATTEMPTS_BEFORE_DEFAULTING = 2 + self._MAX_NUMBER_OF_FAILED_ATTEMPTS_BEFORE_DEFAULTING = module.parameters['max_num_of_failed_attempts_before_defaulting'] self._DEFAULT_APPT_FOOTPRINT = self.make_appt_footprint({'Over5OPD': 1}) self._REPEATED_APPT_FOOTPRINT = self.make_appt_footprint({'PharmDispensing': 1}) diff --git a/src/tlo/methods/equipment.py b/src/tlo/methods/equipment.py index bf0d6fc0ae..62776fb3ad 100644 --- a/src/tlo/methods/equipment.py +++ b/src/tlo/methods/equipment.py @@ -221,16 +221,16 @@ def write_to_log(self) -> None: mfl = self.master_facilities_list - def set_of_keys_or_empty_set(x: Union[set, dict]): - if isinstance(x, set): - return x - elif isinstance(x, dict): - return set(x.keys()) + def sorted_keys_or_empty_list(x: Union[dict, None]) -> list: + if isinstance(x, dict): + return sorted(x.keys()) else: - return set() + return [] set_of_equipment_ever_used_at_each_facility_id = pd.Series({ - fac_id: set_of_keys_or_empty_set(self._record_of_equipment_used_by_facility_id.get(fac_id, set())) + fac_id: sorted_keys_or_empty_list( + self._record_of_equipment_used_by_facility_id.get(fac_id) + ) for fac_id in mfl['Facility_ID'] }, name='EquipmentEverUsed').astype(str) diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py index cf51b4c0ab..5c6b2022e1 100644 --- a/src/tlo/methods/healthsystem.py +++ b/src/tlo/methods/healthsystem.py @@ -165,7 +165,7 @@ class HealthSystem(Module): 'use_funded_or_actual_staffing': Parameter( Types.STRING, "If `actual`, then use the numbers and distribution of staff estimated to be available" " currently; If `funded`, then use the numbers and distribution of staff that are " - "potentially available. If 'funded_plus`, then use a dataset in which the allocation of " + "potentially available. If `funded_plus`, then use a dataset in which the allocation of " "staff to facilities is tweaked so as to allow each appointment type to run at each " "facility_level in each district for which it is defined. N.B. This parameter is " "over-ridden if an argument is provided to the module initialiser.", @@ -775,6 +775,9 @@ def initialise_simulation(self, sim): # whilst the actual scaling will only take effect from 2011 onwards. sim.schedule_event(DynamicRescalingHRCapabilities(self), Date(sim.date)) + # Schedule the logger to occur at the start of every year + sim.schedule_event(HealthSystemLogger(self), Date(sim.date.year, 1, 1)) + def on_birth(self, mother_id, child_id): self.bed_days.on_birth(self.sim.population.props, mother_id, child_id) @@ -936,22 +939,21 @@ def setup_daily_capabilities(self, use_funded_or_actual_staffing): This is called when the value for `use_funded_or_actual_staffing` is set - at the beginning of the simulation and when the assumption when the underlying assumption for `use_funded_or_actual_staffing` is updated""" # * Store 'DailyCapabilities' in correct format and using the specified underlying assumptions - self._daily_capabilities = self.format_daily_capabilities(use_funded_or_actual_staffing) + self._daily_capabilities, self._daily_capabilities_per_staff = self.format_daily_capabilities(use_funded_or_actual_staffing) # Also, store the set of officers with non-zero daily availability # (This is used for checking that scheduled HSI events do not make appointment requiring officers that are # never available.) self._officers_with_availability = set(self._daily_capabilities.index[self._daily_capabilities > 0]) - def format_daily_capabilities(self, use_funded_or_actual_staffing: str) -> pd.Series: + def format_daily_capabilities(self, use_funded_or_actual_staffing: str) -> tuple[pd.Series,pd.Series]: """ - This will updates the dataframe for the self.parameters['Daily_Capabilities'] so as to include - every permutation of officer_type_code and facility_id, with zeros against permutations where no capacity + This will updates the dataframe for the self.parameters['Daily_Capabilities'] so as to: + 1. include every permutation of officer_type_code and facility_id, with zeros against permutations where no capacity is available. - - It also give the dataframe an index that is useful for merging on (based on Facility_ID and Officer Type) - + 2. Give the dataframe an index that is useful for merging on (based on Facility_ID and Officer Type) (This is so that its easier to track where demands are being placed where there is no capacity) + 3. Compute daily capabilities per staff. This will be used to compute staff count in a way that is independent of assumed efficiency. """ # Get the capabilities data imported (according to the specified underlying assumptions). @@ -959,6 +961,10 @@ def format_daily_capabilities(self, use_funded_or_actual_staffing: str) -> pd.Se self.parameters[f'Daily_Capabilities_{use_funded_or_actual_staffing}'] ) capabilities = capabilities.rename(columns={'Officer_Category': 'Officer_Type_Code'}) # neaten + + # Create new column where capabilities per staff are computed + capabilities['Mins_Per_Day_Per_Staff'] = capabilities['Total_Mins_Per_Day']/capabilities['Staff_Count'] + # Create dataframe containing background information about facility and officer types facility_ids = self.parameters['Master_Facilities_List']['Facility_ID'].values @@ -978,7 +984,10 @@ def format_daily_capabilities(self, use_funded_or_actual_staffing: str) -> pd.Se # Merge in information about facility from Master Facilities List mfl = self.parameters['Master_Facilities_List'] capabilities_ex = capabilities_ex.merge(mfl, on='Facility_ID', how='left') - + + # Create a copy of this to store staff counts + capabilities_per_staff_ex = capabilities_ex.copy() + # Merge in information about officers # officer_types = self.parameters['Officer_Types_Table'][['Officer_Type_Code', 'Officer_Type']] # capabilities_ex = capabilities_ex.merge(officer_types, on='Officer_Type_Code', how='left') @@ -991,6 +1000,13 @@ def format_daily_capabilities(self, use_funded_or_actual_staffing: str) -> pd.Se how='left', ) capabilities_ex = capabilities_ex.fillna(0) + + capabilities_per_staff_ex = capabilities_per_staff_ex.merge( + capabilities[['Facility_ID', 'Officer_Type_Code', 'Mins_Per_Day_Per_Staff']], + on=['Facility_ID', 'Officer_Type_Code'], + how='left', + ) + capabilities_per_staff_ex = capabilities_per_staff_ex.fillna(0) # Give the standard index: capabilities_ex = capabilities_ex.set_index( @@ -999,6 +1015,14 @@ def format_daily_capabilities(self, use_funded_or_actual_staffing: str) -> pd.Se + '_Officer_' + capabilities_ex['Officer_Type_Code'] ) + + # Give the standard index: + capabilities_per_staff_ex = capabilities_per_staff_ex.set_index( + 'FacilityID_' + + capabilities_ex['Facility_ID'].astype(str) + + '_Officer_' + + capabilities_ex['Officer_Type_Code'] + ) # Rename 'Total_Minutes_Per_Day' capabilities_ex = capabilities_ex.rename(columns={'Total_Mins_Per_Day': 'Total_Minutes_Per_Day'}) @@ -1006,9 +1030,10 @@ def format_daily_capabilities(self, use_funded_or_actual_staffing: str) -> pd.Se # Checks assert abs(capabilities_ex['Total_Minutes_Per_Day'].sum() - capabilities['Total_Mins_Per_Day'].sum()) < 1e-7 assert len(capabilities_ex) == len(facility_ids) * len(officer_type_codes) + assert len(capabilities_per_staff_ex) == len(facility_ids) * len(officer_type_codes) # return the pd.Series of `Total_Minutes_Per_Day' indexed for each type of officer at each facility - return capabilities_ex['Total_Minutes_Per_Day'] + return capabilities_ex['Total_Minutes_Per_Day'], capabilities_per_staff_ex['Mins_Per_Day_Per_Staff'] def _rescale_capabilities_to_capture_effective_capability(self): # Notice that capabilities will only be expanded through this process @@ -1030,6 +1055,11 @@ def _rescale_capabilities_to_capture_effective_capability(self): ) if rescaling_factor > 1 and rescaling_factor != float("inf"): self._daily_capabilities[officer] *= rescaling_factor + + # We assume that increased daily capabilities is a result of each staff performing more + # daily patient facing time per day than contracted (or equivalently performing appts more + # efficiently). + self._daily_capabilities_per_staff[officer] *= rescaling_factor def update_consumables_availability_to_represent_merging_of_levels_1b_and_2(self, df_original): """To represent that facility levels '1b' and '2' are merged together under the label '2', we replace the @@ -2653,6 +2683,11 @@ def _reset_internal_stores(self) -> None: self._appts_by_level = {_level: defaultdict(int) for _level in ('0', '1a', '1b', '2', '3', '4')} # <--Same as `self._appts` but also split by facility_level + # Log HSI_Events that have a non-blank appointment footprint + self._no_blank_appt_treatment_ids = defaultdict(int) # As above, but for `HSI_Event`s with non-blank footprint + self._no_blank_appt_appts = defaultdict(int) # As above, but for `HSI_Event`s that with non-blank footprint + self._no_blank_appt_by_level = {_level: defaultdict(int) for _level in ('0', '1a', '1b', '2', '3', '4')} + # Log HSI_Events that never ran to monitor shortcoming of Health System self._never_ran_treatment_ids = defaultdict(int) # As above, but for `HSI_Event`s that never ran self._never_ran_appts = defaultdict(int) # As above, but for `HSI_Event`s that have never ran @@ -2686,6 +2721,13 @@ def record_hsi_event(self, self._appts[appt_type] += number self._appts_by_level[level][appt_type] += number + # Count the non-blank appointment footprints + if len(appt_footprint): + self._no_blank_appt_treatment_ids[treatment_id] += 1 + for appt_type, number in appt_footprint: + self._no_blank_appt_appts[appt_type] += number + self._no_blank_appt_by_level[level][appt_type] += number + def record_never_ran_hsi_event(self, treatment_id: str, hsi_event_name: str, @@ -2730,6 +2772,15 @@ def write_to_log_and_reset_counters(self): } }, ) + logger_summary.info( + key="HSI_Event_non_blank_appt_footprint", + description="Same as for key 'HSI_Event' but limited to HSI_Event that have non-blank footprints", + data={ + "TREATMENT_ID": self._no_blank_appt_treatment_ids, + "Number_By_Appt_Type_Code": self._no_blank_appt_appts, + "Number_By_Appt_Type_Code_And_Level": self._no_blank_appt_by_level, + }, + ) # Log summary of HSI_Events that never ran logger_summary.info( @@ -2948,3 +2999,34 @@ def apply(self, population): f"Now using mode: " f"{self.module.mode_appt_constraints}" ) + + +class HealthSystemLogger(RegularEvent, PopulationScopeEventMixin): + """ This event runs at the start of each year and does any logging jobs for the HealthSystem module.""" + + def __init__(self, module): + super().__init__(module, frequency=DateOffset(years=1)) + + def apply(self, population): + """Things to do at the start of the year""" + self.log_number_of_staff() + + def log_number_of_staff(self): + """Write to the summary log with the counts of staff (by cadre/facility/level) taking into account: + * Any scaling of capabilities that has taken place, year-by-year, or cadre-by-cadre + * Any re-scaling that has taken place at the transition into Mode 2. + """ + + hs = self.module # HealthSystem module + + # Compute staff counts from available capabilities (hs.capabilities_today) and daily capabilities per staff, + # both of which would have been rescaled to current efficiency levels if scale_to_effective_capabilities=True + # This returns the number of staff counts normalised by the self.capabilities_coefficient parameter + current_staff_count = dict((hs.capabilities_today/hs._daily_capabilities_per_staff).sort_index()) + + logger_summary.info( + key="number_of_hcw_staff", + description="The number of hcw_staff this year", + data=current_staff_count, + ) + diff --git a/src/tlo/methods/hiv.py b/src/tlo/methods/hiv.py index d6455cc861..a8c621b6c1 100644 --- a/src/tlo/methods/hiv.py +++ b/src/tlo/methods/hiv.py @@ -1503,20 +1503,20 @@ def per_capita_testing_rate(self): df = self.sim.population.props - # get number of tests performed in last time period - if self.sim.date.year == 2011: - number_tests_new = df.hv_number_tests.sum() + if not self.stored_test_numbers: + # If it's the first year, set previous_test_numbers to 0 previous_test_numbers = 0 - else: + # For subsequent years, retrieve the last stored number previous_test_numbers = self.stored_test_numbers[-1] - # calculate number of tests now performed - cumulative, include those who have died - number_tests_new = df.hv_number_tests.sum() + # Calculate number of tests now performed - cumulative, include those who have died + number_tests_new = df.hv_number_tests.sum() + # Store the number of tests performed in this year for future reference self.stored_test_numbers.append(number_tests_new) - # number of tests performed in last time period + # Number of tests performed in the last time period number_tests_in_last_period = number_tests_new - previous_test_numbers # per-capita testing rate diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py index b76a865d2d..c252a40974 100644 --- a/src/tlo/methods/hsi_event.py +++ b/src/tlo/methods/hsi_event.py @@ -225,7 +225,8 @@ def get_consumables( # Checking the availability and logging: rtn = self.healthcare_system.consumables._request_consumables( - item_codes={**_item_codes, **_optional_item_codes}, + essential_item_codes=_item_codes, + optional_item_codes=_optional_item_codes, to_log=_to_log, facility_info=self.facility_info, treatment_id=self.TREATMENT_ID, diff --git a/src/tlo/methods/rti.py b/src/tlo/methods/rti.py index 18c1987483..68ef59fcf0 100644 --- a/src/tlo/methods/rti.py +++ b/src/tlo/methods/rti.py @@ -2325,24 +2325,24 @@ def look_up_consumable_item_codes(self): # fractures.) } self.cons_item_codes['open_fracture_treatment'] = { - get_item_codes('Ceftriaxone 1g, PFR_each_CMST'): 2000, - get_item_codes('Cetrimide 15% + chlorhexidine 1.5% solution.for dilution _5_CMST'): 500, + get_item_codes('Ceftriaxone 1g, PFR_each_CMST'): 2, + get_item_codes('Cetrimide 15% + chlorhexidine 1.5% solution.for dilution _5_CMST'): 100, get_item_codes("Gauze, absorbent 90cm x 40m_each_CMST"): 100, get_item_codes('Suture pack'): 1, } self.cons_item_codes["open_fracture_treatment_additional_if_contaminated"] = { - get_item_codes('Metronidazole, injection, 500 mg in 100 ml vial'): 1500 + get_item_codes('Metronidazole, injection, 500 mg in 100 ml vial'): 3 } self.cons_item_codes['laceration_treatment_suture_pack'] = { - get_item_codes('Suture pack'): 0, + get_item_codes('Suture pack'): 1, } self.cons_item_codes['laceration_treatment_cetrimide_chlorhexidine'] = { - get_item_codes('Cetrimide 15% + chlorhexidine 1.5% solution.for dilution _5_CMST'): 500, + get_item_codes('Cetrimide 15% + chlorhexidine 1.5% solution.for dilution _5_CMST'): 100, } self.cons_item_codes['burn_treatment_per_burn'] = { - get_item_codes("Gauze, absorbent 90cm x 40m_each_CMST"): 0, - get_item_codes('Cetrimide 15% + chlorhexidine 1.5% solution.for dilution _5_CMST'): 0, + get_item_codes("Gauze, absorbent 90cm x 40m_each_CMST"): 100, + get_item_codes('Cetrimide 15% + chlorhexidine 1.5% solution.for dilution _5_CMST'): 100, } self.cons_item_codes['ringers lactate for multiple burns'] = { get_item_codes("ringer's lactate (Hartmann's solution), 1000 ml_12_IDA"): 4000 @@ -2353,16 +2353,16 @@ def look_up_consumable_item_codes(self): get_item_codes("diclofenac sodium 25 mg, enteric coated_1000_IDA"): 300 } self.cons_item_codes['pain_management_moderate'] = { - get_item_codes("tramadol HCl 100 mg/2 ml, for injection_100_IDA"): 300 + get_item_codes("tramadol HCl 100 mg/2 ml, for injection_100_IDA"): 3 } self.cons_item_codes['pain_management_severe'] = { - get_item_codes("morphine sulphate 10 mg/ml, 1 ml, injection (nt)_10_IDA"): 120 + get_item_codes("morphine sulphate 10 mg/ml, 1 ml, injection (nt)_10_IDA"): 12 } self.cons_item_codes['major_surgery'] = { # request a general anaesthetic get_item_codes("Halothane (fluothane)_250ml_CMST"): 100, # clean the site of the surgery - get_item_codes("Chlorhexidine 1.5% solution_5_CMST"): 500, + get_item_codes("Chlorhexidine 1.5% solution_5_CMST"): 600, # tools to begin surgery get_item_codes("Scalpel blade size 22 (individually wrapped)_100_CMST"): 1, # administer an IV @@ -2375,7 +2375,7 @@ def look_up_consumable_item_codes(self): # administer pain killer get_item_codes('Pethidine, 50 mg/ml, 2 ml ampoule'): 6, # administer antibiotic - get_item_codes("Ampicillin injection 500mg, PFR_each_CMST"): 1000, + get_item_codes("Ampicillin injection 500mg, PFR_each_CMST"): 2, # equipment used by surgeon, gloves and facemask get_item_codes('Disposables gloves, powder free, 100 pieces per box'): 1, get_item_codes('surgical face mask, disp., with metal nose piece_50_IDA'): 1, @@ -2386,7 +2386,7 @@ def look_up_consumable_item_codes(self): # request a local anaesthetic get_item_codes("Halothane (fluothane)_250ml_CMST"): 100, # clean the site of the surgery - get_item_codes("Chlorhexidine 1.5% solution_5_CMST"): 500, + get_item_codes("Chlorhexidine 1.5% solution_5_CMST"): 300, # tools to begin surgery get_item_codes("Scalpel blade size 22 (individually wrapped)_100_CMST"): 1, # administer an IV @@ -2399,7 +2399,7 @@ def look_up_consumable_item_codes(self): # administer pain killer get_item_codes('Pethidine, 50 mg/ml, 2 ml ampoule'): 6, # administer antibiotic - get_item_codes("Ampicillin injection 500mg, PFR_each_CMST"): 1000, + get_item_codes("Ampicillin injection 500mg, PFR_each_CMST"): 2, # equipment used by surgeon, gloves and facemask get_item_codes('Disposables gloves, powder free, 100 pieces per box'): 1, get_item_codes('surgical face mask, disp., with metal nose piece_50_IDA'): 1, diff --git a/src/tlo/methods/tb.py b/src/tlo/methods/tb.py index 623ee2e483..c067a78929 100644 --- a/src/tlo/methods/tb.py +++ b/src/tlo/methods/tb.py @@ -681,8 +681,13 @@ def get_consumables_for_dx_and_tx(self): # TB Sputum smear test # assume that if smear-positive, sputum smear test is 100% specific and sensitive - self.item_codes_for_consumables_required['sputum_test'] = \ - hs.get_item_codes_from_package_name("Microscopy Test") + self.item_codes_for_consumables_required['sputum_test'] = hs.get_item_code_from_item_name("ZN Stain") + self.item_codes_for_consumables_required['sputum_container'] = hs.get_item_code_from_item_name( + "Sputum container") + self.item_codes_for_consumables_required['slides'] = hs.get_item_code_from_item_name( + "Microscope slides, lime-soda-glass, pack of 50") + self.item_codes_for_consumables_required['gloves'] = hs.get_item_code_from_item_name( + "Gloves, exam, latex, disposable, pair") self.sim.modules['HealthSystem'].dx_manager.register_dx_test( tb_sputum_test_smear_positive=DxTest( @@ -690,7 +695,10 @@ def get_consumables_for_dx_and_tx(self): target_categories=["active"], sensitivity=p["sens_sputum_smear_positive"], specificity=p["spec_sputum_smear_positive"], - item_codes=self.item_codes_for_consumables_required['sputum_test'] + item_codes=self.item_codes_for_consumables_required['sputum_test'], + optional_item_codes=[self.item_codes_for_consumables_required['sputum_container'], + self.item_codes_for_consumables_required['slides'], + self.item_codes_for_consumables_required['gloves']] ) ) self.sim.modules['HealthSystem'].dx_manager.register_dx_test( @@ -699,13 +707,16 @@ def get_consumables_for_dx_and_tx(self): target_categories=["active"], sensitivity=0.0, specificity=1.0, - item_codes=self.item_codes_for_consumables_required['sputum_test'] + item_codes=self.item_codes_for_consumables_required['sputum_test'], + optional_item_codes=[self.item_codes_for_consumables_required['sputum_container'], + self.item_codes_for_consumables_required['slides'], + self.item_codes_for_consumables_required['gloves']] ) ) # TB GeneXpert self.item_codes_for_consumables_required['xpert_test'] = \ - hs.get_item_codes_from_package_name("Xpert test") + hs.get_item_code_from_item_name("Xpert") # sensitivity/specificity set for smear status of cases self.sim.modules["HealthSystem"].dx_manager.register_dx_test( @@ -714,7 +725,10 @@ def get_consumables_for_dx_and_tx(self): target_categories=["active"], sensitivity=p["sens_xpert_smear_positive"], specificity=p["spec_xpert_smear_positive"], - item_codes=self.item_codes_for_consumables_required['xpert_test'] + item_codes=self.item_codes_for_consumables_required['xpert_test'], + optional_item_codes=[self.item_codes_for_consumables_required['sputum_container'], + self.item_codes_for_consumables_required['slides'], + self.item_codes_for_consumables_required['gloves']] ) ) self.sim.modules["HealthSystem"].dx_manager.register_dx_test( @@ -723,13 +737,17 @@ def get_consumables_for_dx_and_tx(self): target_categories=["active"], sensitivity=p["sens_xpert_smear_negative"], specificity=p["spec_xpert_smear_negative"], - item_codes=self.item_codes_for_consumables_required['xpert_test'] + item_codes=self.item_codes_for_consumables_required['xpert_test'], + optional_item_codes=[self.item_codes_for_consumables_required['sputum_container'], + self.item_codes_for_consumables_required['slides'], + self.item_codes_for_consumables_required['gloves']] ) ) # TB Chest x-ray - self.item_codes_for_consumables_required['chest_xray'] = { - hs.get_item_code_from_item_name("X-ray"): 1} + self.item_codes_for_consumables_required['chest_xray'] = hs.get_item_code_from_item_name("X-ray") + self.item_codes_for_consumables_required['lead_apron'] = hs.get_item_code_from_item_name( + "Lead rubber x-ray protective aprons up to 150kVp 0.50mm_each_CMST") # sensitivity/specificity set for smear status of cases self.sim.modules["HealthSystem"].dx_manager.register_dx_test( @@ -738,7 +756,8 @@ def get_consumables_for_dx_and_tx(self): target_categories=["active"], sensitivity=p["sens_xray_smear_positive"], specificity=p["spec_xray_smear_positive"], - item_codes=self.item_codes_for_consumables_required['chest_xray'] + item_codes=self.item_codes_for_consumables_required['chest_xray'], + optional_item_codes=self.item_codes_for_consumables_required['lead_apron'] ) ) self.sim.modules["HealthSystem"].dx_manager.register_dx_test( @@ -747,7 +766,8 @@ def get_consumables_for_dx_and_tx(self): target_categories=["active"], sensitivity=p["sens_xray_smear_negative"], specificity=p["spec_xray_smear_negative"], - item_codes=self.item_codes_for_consumables_required['chest_xray'] + item_codes=self.item_codes_for_consumables_required['chest_xray'], + optional_item_codes=self.item_codes_for_consumables_required['lead_apron'] ) ) @@ -1796,7 +1816,7 @@ def apply(self, person_id, squeeze_factor): if self.facility_level == "1a": self.sim.modules["HealthSystem"].schedule_hsi_event( hsi_event=HSI_Tb_ScreeningAndRefer( - person_id=person_id, module=self.module, facility_level="2" + person_id=person_id, module=self.module, facility_level="1b" ), topen=self.sim.date + DateOffset(days=1), tclose=None, diff --git a/src/tlo/scenario.py b/src/tlo/scenario.py index aa1be42aa9..f64325f9ec 100644 --- a/src/tlo/scenario.py +++ b/src/tlo/scenario.py @@ -73,6 +73,7 @@ def draw_parameters(self, draw_number, rng): from tlo import Date, Simulation, logging from tlo.analysis.utils import parse_log_file +from tlo.util import str_to_pandas_date logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) @@ -141,6 +142,16 @@ def parse_arguments(self, extra_arguments: List[str]) -> None: self.arguments = extra_arguments parser = argparse.ArgumentParser() + parser.add_argument( + "--resume-simulation", + type=str, + help="Directory containing suspended state files to resume simulation from", + ) + parser.add_argument( + "--suspend-date", + type=str_to_pandas_date, + help="Date to suspend the simulation at", + ) # add arguments from the subclass self.add_arguments(parser) @@ -382,20 +393,58 @@ def run_sample_by_number(self, output_directory, draw_number, sample_number): sample = self.get_sample(draw, sample_number) log_config = self.scenario.get_log_config(output_directory) - logger.info(key="message", data=f"Running draw {sample['draw_number']}, sample {sample['sample_number']}") - - sim = Simulation( - start_date=self.scenario.start_date, - seed=sample["simulation_seed"], - log_config=log_config + logger.info( + key="message", + data=f"Running draw {sample['draw_number']}, sample {sample['sample_number']}", ) - sim.register(*self.scenario.modules()) - if sample["parameters"] is not None: - self.override_parameters(sim, sample["parameters"]) - - sim.make_initial_population(n=self.scenario.pop_size) - sim.simulate(end_date=self.scenario.end_date) + # if user has specified a restore simulation, we load it from a pickle file + if ( + hasattr(self.scenario, "resume_simulation") + and self.scenario.resume_simulation is not None + ): + suspended_simulation_path = ( + Path(self.scenario.resume_simulation) + / str(draw_number) + / str(sample_number) + / "suspended_simulation.pickle" + ) + logger.info( + key="message", + data=f"Loading pickled suspended simulation from {suspended_simulation_path}", + ) + sim = Simulation.load_from_pickle(pickle_path=suspended_simulation_path, log_config=log_config) + else: + sim = Simulation( + start_date=self.scenario.start_date, + seed=sample["simulation_seed"], + log_config=log_config, + ) + sim.register(*self.scenario.modules()) + + if sample["parameters"] is not None: + self.override_parameters(sim, sample["parameters"]) + + sim.make_initial_population(n=self.scenario.pop_size) + sim.initialise(end_date=self.scenario.end_date) + + # if user has specified a suspend date, we run the simulation to that date and + # save it to a pickle file + if ( + hasattr(self.scenario, "suspend_date") + and self.scenario.suspend_date is not None + ): + sim.run_simulation_to(to_date=self.scenario.suspend_date) + suspended_simulation_path = Path(log_config["directory"]) / "suspended_simulation.pickle" + sim.save_to_pickle(pickle_path=suspended_simulation_path) + sim.close_output_file() + logger.info( + key="message", + data=f"Simulation suspended at {self.scenario.suspend_date} and saved to {suspended_simulation_path}", + ) + else: + sim.run_simulation_to(to_date=self.scenario.end_date) + sim.finalise() if sim.log_filepath is not None: outputs = parse_log_file(sim.log_filepath) diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py index 1853c76063..547edf1d23 100644 --- a/src/tlo/simulation.py +++ b/src/tlo/simulation.py @@ -1,58 +1,102 @@ """The main simulation controller.""" +from __future__ import annotations + import datetime import heapq import itertools import time from collections import OrderedDict from pathlib import Path -from typing import Dict, Optional, Union +from typing import TYPE_CHECKING, Optional import numpy as np +try: + import dill + + DILL_AVAILABLE = True +except ImportError: + DILL_AVAILABLE = False + from tlo import Date, Population, logging -from tlo.dependencies import check_dependencies_present, topologically_sort_modules +from tlo.dependencies import ( + check_dependencies_present, + initialise_missing_dependencies, + topologically_sort_modules, +) from tlo.events import Event, IndividualScopeEventMixin from tlo.progressbar import ProgressBar +if TYPE_CHECKING: + from tlo.core import Module + from tlo.logging.core import LogLevel + logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) -class Simulation: - """The main control centre for a simulation. +class SimulationPreviouslyInitialisedError(Exception): + """Exception raised when trying to initialise an already initialised simulation.""" - This class contains the core simulation logic and event queue, and holds - references to all the information required to run a complete simulation: - the population, disease modules, etc. - Key attributes include: +class SimulationNotInitialisedError(Exception): + """Exception raised when trying to run simulation before initialising.""" - `date` - The current simulation date. - `modules` - A list of the disease modules contributing to this simulation. +class Simulation: + """The main control centre for a simulation. + + This class contains the core simulation logic and event queue, and holds references + to all the information required to run a complete simulation: the population, + disease modules, etc. - `population` - The Population being simulated. + Key attributes include: - `rng` - The simulation-level random number generator. - Note that individual modules also have their own random number generator - with independent state. + :ivar date: The current simulation date. + :ivar modules: A dictionary of the disease modules used in this simulation, keyed + by the module name. + :ivar population: The population being simulated. + :ivar rng: The simulation-level random number generator. + + .. note:: + Individual modules also have their own random number generator with independent + state. """ - def __init__(self, *, start_date: Date, seed: int = None, log_config: dict = None, - show_progress_bar=False): + def __init__( + self, + *, + start_date: Date, + seed: Optional[int] = None, + log_config: Optional[dict] = None, + show_progress_bar: bool = False, + resourcefilepath: Optional[Path] = None, + ): """Create a new simulation. - :param start_date: the date the simulation begins; must be given as - a keyword parameter for clarity - :param seed: the seed for random number generator. class will create one if not supplied - :param log_config: sets up the logging configuration for this simulation - :param show_progress_bar: whether to show a progress bar instead of the logger - output during the simulation + :param start_date: The date the simulation begins; must be given as + a keyword parameter for clarity. + :param seed: The seed for random number generator. class will create one if not + supplied + :param log_config: Dictionary specifying logging configuration for this + simulation. Can have entries: `filename` - prefix for log file name, final + file name will have a date time appended, if not present default is to not + output log to a file; `directory` - path to output directory to write log + file to, default if not specified is to output to the `outputs` folder; + `custom_levels` - dictionary to set logging levels, '*' can be used as a key + for all registered modules; `suppress_stdout` - if `True`, suppresses + logging to standard output stream (default is `False`). + :param show_progress_bar: Whether to show a progress bar instead of the logger + output during the simulation. + :param resourcefilepath: Path to resource files folder. Assign ``None` if no + path is provided. + + .. note:: + The `custom_levels` entry in `log_config` argument can be used to disable + logging on all disease modules by setting a high level to `*`, and then + enabling logging on one module of interest by setting a low level, for + example ``{'*': logging.CRITICAL 'tlo.methods.hiv': logging.INFO}``. """ # simulation self.date = self.start_date = start_date @@ -63,6 +107,7 @@ def __init__(self, *, start_date: Date, seed: int = None, log_config: dict = Non self.population: Optional[Population] = None self.show_progress_bar = show_progress_bar + self.resourcefilepath = resourcefilepath # logging if log_config is None: @@ -72,27 +117,40 @@ def __init__(self, *, start_date: Date, seed: int = None, log_config: dict = Non # random number generator - seed_from = 'auto' if seed is None else 'user' + seed_from = "auto" if seed is None else "user" self._seed = seed self._seed_seq = np.random.SeedSequence(seed) logger.info( - key='info', - data=f'Simulation RNG {seed_from} entropy = {self._seed_seq.entropy}' + key="info", + data=f"Simulation RNG {seed_from} entropy = {self._seed_seq.entropy}", ) self.rng = np.random.RandomState(np.random.MT19937(self._seed_seq)) - def _configure_logging(self, filename: str = None, directory: Union[Path, str] = "./outputs", - custom_levels: Dict[str, int] = None, suppress_stdout: bool = False): - """Configure logging, can write logging to a logfile in addition the default of stdout. - - Minimum custom levels for each logger can be specified for filtering out messages - - :param filename: Prefix for logfile name, final logfile will have a datetime appended + # Whether simulation has been initialised + self._initialised = False + + def _configure_logging( + self, + filename: Optional[str] = None, + directory: Path | str = "./outputs", + custom_levels: Optional[dict[str, LogLevel]] = None, + suppress_stdout: bool = False + ): + """Configure logging of simulation outputs. + + Can write log output to a file in addition the default of `stdout`. Mnimum + custom levels for each logger can be specified for filtering out messages. + + :param filename: Prefix for log file name, final file name will have a date time + appended. :param directory: Path to output directory, default value is the outputs folder. - :param custom_levels: dictionary to set logging levels, '*' can be used as a key for all registered modules. - This is likely to be used to disable all disease modules, and then enable one of interest - e.g. ``{'*': logging.CRITICAL 'tlo.methods.hiv': logging.INFO}`` - :param suppress_stdout: If True, suppresses logging to standard output stream (default is False) + :param custom_levels: Dictionary to set logging levels, '*' can be used as a key + for all registered modules. This is likely to be used to disable logging on + all disease modules by setting a high level, and then enable one of interest + by setting a low level, for example + ``{'*': logging.CRITICAL 'tlo.methods.hiv': logging.INFO}``. + :param suppress_stdout: If `True`, suppresses logging to standard output stream + (default is `False`). :return: Path of the log file if a filename has been given. """ @@ -113,7 +171,7 @@ def _configure_logging(self, filename: str = None, directory: Union[Path, str] = self._custom_log_levels = custom_levels if filename and directory: - timestamp = datetime.datetime.now().strftime('%Y-%m-%dT%H%M%S') + timestamp = datetime.datetime.now().strftime("%Y-%m-%dT%H%M%S") log_path = Path(directory) / f"{filename}__{timestamp}.log" self.output_file = logging.set_output_file(log_path) logger.info(key='info', data=f'Log output: {log_path}') @@ -122,29 +180,45 @@ def _configure_logging(self, filename: str = None, directory: Union[Path, str] = return None @property - def log_filepath(self): + def log_filepath(self) -> Path: """The path to the log file, if one has been set.""" return self._log_filepath - def register(self, *modules, sort_modules=True, check_all_dependencies=True): + def register( + self, + *modules: Module, + sort_modules: bool = True, + check_all_dependencies: bool = True, + auto_register_dependencies: bool = False, + ) -> None: """Register one or more disease modules with the simulation. - :param modules: the disease module(s) to use as part of this simulation. + :param modules: The disease module(s) to use as part of this simulation. Multiple modules may be given as separate arguments to one call. :param sort_modules: Whether to topologically sort the modules so that any initialisation dependencies (specified by the ``INIT_DEPENDENCIES`` attribute) of a module are initialised before the module itself is. A - ``ModuleDependencyError`` exception will be raised if there are missing - initialisation dependencies or circular initialisation dependencies between - modules that cannot be resolved. If this flag is set to ``True`` there is - also a requirement that at most one instance of each module is registered - and ``MultipleModuleInstanceError`` will be raised if this is not the case. + :py:exc:`.ModuleDependencyError` exception will be raised if there are + missing initialisation dependencies or circular initialisation dependencies + between modules that cannot be resolved. If this flag is set to ``True`` + there is also a requirement that at most one instance of each module is + registered and :py:exc:`.MultipleModuleInstanceError` will be raised if this + is not the case. :param check_all_dependencies: Whether to check if all of each module's declared dependencies (that is, the union of the ``INIT_DEPENDENCIES`` and ``ADDITIONAL_DEPENDENCIES`` attributes) have been included in the set of - modules to be registered. A ``ModuleDependencyError`` exception will + modules to be registered. A :py:exc:`.ModuleDependencyError` exception will be raised if there are missing dependencies. + :param auto_register_dependencies: Whether to register missing module dependencies + or not. If this argument is set to True, all module dependencies will be + automatically registered. """ + if auto_register_dependencies: + modules = [ + *modules, + *initialise_missing_dependencies(modules, resourcefilepath=self.resourcefilepath) + ] + if sort_modules: modules = list(topologically_sort_modules(modules)) if check_all_dependencies: @@ -152,30 +226,32 @@ def register(self, *modules, sort_modules=True, check_all_dependencies=True): # Iterate over modules and per-module seed sequences spawned from simulation # level seed sequence for module, seed_seq in zip(modules, self._seed_seq.spawn(len(modules))): - assert module.name not in self.modules, f'A module named {module.name} has already been registered' + assert ( + module.name not in self.modules + ), f"A module named {module.name} has already been registered" # Seed the RNG for the registered module using spawned seed sequence logger.info( - key='info', + key="info", data=( - f'{module.name} RNG auto (entropy, spawn key) = ' - f'({seed_seq.entropy}, {seed_seq.spawn_key[0]})' - ) + f"{module.name} RNG auto (entropy, spawn key) = " + f"({seed_seq.entropy}, {seed_seq.spawn_key[0]})" + ), ) module.rng = np.random.RandomState(np.random.MT19937(seed_seq)) self.modules[module.name] = module module.sim = self - module.read_parameters('') + module.read_parameters("") if self._custom_log_levels: logging.set_logging_levels(self._custom_log_levels) - def make_initial_population(self, *, n): + def make_initial_population(self, *, n: int) -> None: """Create the initial population to simulate. - :param n: the number of individuals to create; must be given as - a keyword parameter for clarity + :param n: The number of individuals to create; must be given as + a keyword parameter for clarity. """ start = time.time() @@ -193,63 +269,46 @@ def make_initial_population(self, *, n): for module in self.modules.values(): start1 = time.time() module.initialise_population(self.population) - logger.debug(key='debug', data=f'{module.name}.initialise_population() {time.time() - start1} s') + logger.debug( + key="debug", + data=f"{module.name}.initialise_population() {time.time() - start1} s", + ) end = time.time() - logger.info(key='info', data=f'make_initial_population() {end - start} s') + logger.info(key="info", data=f"make_initial_population() {end - start} s") - def simulate(self, *, end_date): - """Simulation until the given end date + def initialise(self, *, end_date: Date) -> None: + """Initialise all modules in simulation. - :param end_date: when to stop simulating. Only events strictly before this - date will be allowed to occur. - Must be given as a keyword parameter for clarity. + :param end_date: Date to end simulation on - accessible to modules to allow + initialising data structures which may depend (in size for example) on the + date range being simulated. """ - start = time.time() + if self._initialised: + msg = "initialise method should only be called once" + raise SimulationPreviouslyInitialisedError(msg) + self.date = self.start_date self.end_date = end_date # store the end_date so that others can reference it - for module in self.modules.values(): module.initialise_simulation(self) + self._initialised = True - progress_bar = None - if self.show_progress_bar: - num_simulated_days = (end_date - self.start_date).days - progress_bar = ProgressBar( - num_simulated_days, "Simulation progress", unit="day") - progress_bar.start() - - while self.event_queue: - event, date = self.event_queue.next_event() - - if self.show_progress_bar: - simulation_day = (date - self.start_date).days - stats_dict = { - "date": str(date.date()), - "dataframe size": str(len(self.population.props)), - "queued events": str(len(self.event_queue)), - } - if "HealthSystem" in self.modules: - stats_dict["queued HSI events"] = str( - len(self.modules["HealthSystem"].HSI_EVENT_QUEUE) - ) - progress_bar.update(simulation_day, stats_dict=stats_dict) - - if date >= end_date: - self.date = end_date - break - self.fire_single_event(event, date) - - # The simulation has ended. - if self.show_progress_bar: - progress_bar.stop() + def finalise(self, wall_clock_time: Optional[float] = None) -> None: + """Finalise all modules in simulation and close logging file if open. + :param wall_clock_time: Optional argument specifying total time taken to + simulate, to be written out to log before closing. + """ for module in self.modules.values(): module.on_simulation_end() + if wall_clock_time is not None: + logger.info(key="info", data=f"simulate() {wall_clock_time} s") + self.close_output_file() - logger.info(key='info', data=f'simulate() {time.time() - start} s') - - # From Python logging.shutdown + def close_output_file(self) -> None: + """Close logging file if open.""" if self.output_file: + # From Python logging.shutdown try: self.output_file.acquire() self.output_file.flush() @@ -258,52 +317,121 @@ def simulate(self, *, end_date): pass finally: self.output_file.release() + self.output_file = None - def schedule_event(self, event, date): - """Schedule an event to happen on the given future date. + def _initialise_progress_bar(self, end_date: Date) -> ProgressBar: + num_simulated_days = (end_date - self.date).days + progress_bar = ProgressBar( + num_simulated_days, "Simulation progress", unit="day" + ) + progress_bar.start() + return progress_bar + + def _update_progress_bar(self, progress_bar: ProgressBar, date: Date) -> None: + simulation_day = (date - self.start_date).days + stats_dict = { + "date": str(date.date()), + "dataframe size": str(len(self.population.props)), + "queued events": str(len(self.event_queue)), + } + if "HealthSystem" in self.modules: + stats_dict["queued HSI events"] = str( + len(self.modules["HealthSystem"].HSI_EVENT_QUEUE) + ) + progress_bar.update(simulation_day, stats_dict=stats_dict) + + def run_simulation_to(self, *, to_date: Date) -> None: + """Run simulation up to a specified date. - :param event: the Event to schedule - :param date: when the event should happen + Unlike :py:meth:`simulate` this method does not initialise or finalise + simulation and the date simulated to can be any date before or equal to + simulation end date. + + :param to_date: Date to simulate up to but not including - must be before or + equal to simulation end date specified in call to :py:meth:`initialise`. """ - assert date >= self.date, 'Cannot schedule events in the past' + if not self._initialised: + msg = "Simulation must be initialised before calling run_simulation_to" + raise SimulationNotInitialisedError(msg) + if to_date > self.end_date: + msg = f"to_date {to_date} after simulation end date {self.end_date}" + raise ValueError(msg) + if self.show_progress_bar: + progress_bar = self._initialise_progress_bar(to_date) + while ( + len(self.event_queue) > 0 and self.event_queue.date_of_next_event < to_date + ): + event, date = self.event_queue.pop_next_event_and_date() + if self.show_progress_bar: + self._update_progress_bar(progress_bar, date) + self.fire_single_event(event, date) + self.date = to_date + if self.show_progress_bar: + progress_bar.stop() - assert 'TREATMENT_ID' not in dir(event), \ - 'This looks like an HSI event. It should be handed to the healthsystem scheduler' - assert (event.__str__().find('HSI_') < 0), \ - 'This looks like an HSI event. It should be handed to the healthsystem scheduler' + def simulate(self, *, end_date: Date) -> None: + """Simulate until the given end date + + :param end_date: When to stop simulating. Only events strictly before this + date will be allowed to occur. Must be given as a keyword parameter for + clarity. + """ + start = time.time() + self.initialise(end_date=end_date) + self.run_simulation_to(to_date=end_date) + self.finalise(time.time() - start) + + def schedule_event(self, event: Event, date: Date) -> None: + """Schedule an event to happen on the given future date. + + :param event: The event to schedule. + :param date: wWen the event should happen. + """ + assert date >= self.date, "Cannot schedule events in the past" + + assert "TREATMENT_ID" not in dir( + event + ), "This looks like an HSI event. It should be handed to the healthsystem scheduler" + assert ( + event.__str__().find("HSI_") < 0 + ), "This looks like an HSI event. It should be handed to the healthsystem scheduler" assert isinstance(event, Event) self.event_queue.schedule(event=event, date=date) - def fire_single_event(self, event, date): + def fire_single_event(self, event: Event, date: Date) -> None: """Fires the event once for the given date - :param event: :py:class:`Event` to fire - :param date: the date of the event + :param event: :py:class:`Event` to fire. + :param date: The date of the event. """ self.date = date event.run() - def do_birth(self, mother_id): + def do_birth(self, mother_id: int) -> int: """Create a new child person. We create a new person in the population and then call the `on_birth` method in all modules to initialise the child's properties. - :param mother_id: the maternal parent - :return: the new child + :param mother_id: Row index label of the maternal parent. + :return: Row index label of the new child. """ child_id = self.population.do_birth() for module in self.modules.values(): module.on_birth(mother_id, child_id) return child_id - def find_events_for_person(self, person_id: int): + def find_events_for_person(self, person_id: int) -> list[tuple[Date, Event]]: """Find the events in the queue for a particular person. - :param person_id: the person_id of interest - :returns list of tuples (date_of_event, event) for that person_id in the queue. - - NB. This is for debugging and testing only - not for use in real simulations as it is slow + + :param person_id: The row index of the person of interest. + :return: List of tuples `(date_of_event, event)` for that `person_id` in the + queue. + + .. note:: + This is for debugging and testing only. Not for use in real simulations as it + is slow. """ person_events = [] @@ -314,6 +442,40 @@ def find_events_for_person(self, person_id: int): return person_events + def save_to_pickle(self, pickle_path: Path) -> None: + """Save simulation state to a pickle file using :py:mod:`dill`. + + Requires :py:mod:`dill` to be importable. + + :param pickle_path: File path to save simulation state to. + """ + if not DILL_AVAILABLE: + raise RuntimeError("Cannot save to pickle as dill is not installed") + with open(pickle_path, "wb") as pickle_file: + dill.dump(self, pickle_file) + + @staticmethod + def load_from_pickle( + pickle_path: Path, log_config: Optional[dict] = None + ) -> Simulation: + """Load simulation state from a pickle file using :py:mod:`dill`. + + Requires :py:mod:`dill` to be importable. + + :param pickle_path: File path to load simulation state from. + :param log_config: New log configuration to override previous configuration. If + `None` previous configuration (including output file) will be retained. + + :returns: Loaded :py:class:`Simulation` object. + """ + if not DILL_AVAILABLE: + raise RuntimeError("Cannot load from pickle as dill is not installed") + with open(pickle_path, "rb") as pickle_file: + simulation = dill.load(pickle_file) + if log_config is not None: + simulation._log_filepath = simulation._configure_logging(**log_config) + return simulation + class EventQueue: """A simple priority queue for events. @@ -326,23 +488,32 @@ def __init__(self): self.counter = itertools.count() self.queue = [] - def schedule(self, event, date): + def schedule(self, event: Event, date: Date) -> None: """Schedule a new event. - :param event: the event to schedule - :param date: when it should happen + :param event: The event to schedule. + :param date: When it should happen. """ entry = (date, event.priority, next(self.counter), event) heapq.heappush(self.queue, entry) - def next_event(self): - """Get the earliest event in the queue. + def pop_next_event_and_date(self) -> tuple[Event, Date]: + """Get and remove the earliest event and corresponding date in the queue. - :returns: an (event, date) pair + :returns: An `(event, date)` pair. """ date, _, _, event = heapq.heappop(self.queue) return event, date - def __len__(self): - """:return: the length of the queue""" + @property + def date_of_next_event(self) -> Date: + """Get the date of the earliest event in queue without removing from queue. + + :returns: Date of next event in queue. + """ + date, *_ = self.queue[0] + return date + + def __len__(self) -> int: + """:return: The length of the queue.""" return len(self.queue) diff --git a/src/tlo/util.py b/src/tlo/util.py index 77924e4fa3..cbab8c4741 100644 --- a/src/tlo/util.py +++ b/src/tlo/util.py @@ -1,11 +1,13 @@ """This file contains helpful utility functions.""" import hashlib from collections import defaultdict -from typing import Dict, List, Optional, Set, Union +from pathlib import Path +from typing import Any, Dict, List, Optional, Set, Union import numpy as np import pandas as pd -from pandas import DateOffset +from pandas import DataFrame, DateOffset +from pandas._typing import DtypeArg from tlo import Population, Property, Types @@ -436,3 +438,96 @@ def get_person_id_to_inherit_from(child_id, mother_id, population_dataframe, rng return abs(mother_id) elif mother_id >= 0: return mother_id + + +def convert_excel_files_to_csv(folder: Path, files: Optional[list[str]] = None, *, delete_excel_files: bool = False) -> None: + """ convert Excel files to csv files. + + :param folder: Folder containing Excel files. + :param files: List of Excel file names to convert to csv files. When `None`, all Excel files in the folder and + subsequent folders within this folder will be converted to csv files with Excel file name becoming + folder name and sheet names becoming csv file names. + :param delete_excel_files: When true, the Excel file we are generating csv files from will get deleted. + """ + # get path to Excel files + if files is None: + excel_file_paths = sorted(folder.rglob("*.xlsx")) + else: + excel_file_paths = [folder / file for file in files] + # exit function if no Excel file is given or found within the path + if excel_file_paths is None: + return + + for excel_file_path in excel_file_paths: + sheet_dataframes: dict[Any, DataFrame] = pd.read_excel(excel_file_path, sheet_name=None) + excel_file_directory: Path = excel_file_path.with_suffix("") + # Create a container directory for per sheet CSVs + if excel_file_directory.exists(): + print(f"Directory {excel_file_directory} already exists") + else: + excel_file_directory.mkdir() + # Write a CSV for each worksheet + for sheet_name, dataframe in sheet_dataframes.items(): + dataframe.to_csv(f'{excel_file_directory / sheet_name}.csv', index=False) + + if delete_excel_files: + # Remove no longer needed Excel file + Path(folder/excel_file_path).unlink() + + +def read_csv_files(folder: Path, + dtype: DtypeArg | dict[str, DtypeArg] | None = None, + files: str | int | list[str] | None = 0) -> DataFrame | dict[str, DataFrame]: + """ + A function to read CSV files in a similar way pandas reads Excel files (:py:func:`pandas.read_excel`). + + NB: Converting Excel files to csv files caused all columns that had no relevant data to simulation (i.e. + parameter descriptions or data references) to be named `Unnamed1, Unnamed2, ....., UnnamedN` in the csv files. + We are therefore using :py:func:`pandas.filter` to track all unnamed columns and silently drop them using + :py:func:`pandas.drop`. + + :param folder: Path to folder containing CSV files to read. + :param dtype: allows passing in a dictionary of datatypes in cases where you want different datatypes per column + :param files: preferred csv file name(s). This is the same as sheet names in Excel file. Note that if None(no files + selected) then all csv files in the containing folder will be read + + Please take note of the following behaviours: + ----------------------------------------------- + - if files argument is initialised to zero(default) and the folder contains one or multiple files, + this method will return a dataframe. If the folder contain multiple files, it is good to + specify file names or initialise files argument with None to ensure correct files are selected + - if files argument is initialised to None and the folder contains one or multiple files, this method + will return a dataframe dictionary + - if the folder contains multiple files and files argument is initialised with one file name this + method will return a dataframe. it will return a dataframe dictionary when files argument is + initialised with a list of multiple file names + + """ + all_data: dict[str, DataFrame] = {} # dataframes dictionary + + def clean_dataframe(dataframes_dict: dict[str, DataFrame]) -> None: + """ silently drop all columns that have no relevant data to simulation (all columns with a name starting with + Unnamed + :param dataframes_dict: Dictionary of dataframes to clean + """ + for _key, dataframe in dataframes_dict.items(): + all_data[_key] = dataframe.drop(dataframe.filter(like='Unnamed'), axis=1) # filter and drop Unnamed columns + + return_dict = False # a flag that will determine whether the output should be a dictionary or a DatFrame + if isinstance(files, list): + return_dict = True + elif isinstance(files, int) or files is None: + return_dict = files is None + files = [f_name.stem for f_name in folder.glob("*.csv")] + elif isinstance(files, str): + files = [files] + else: + raise TypeError(f"Value passed for files argument {files} is not one of expected types.") + + for f_name in files: + all_data[f_name] = pd.read_csv((folder / f_name).with_suffix(".csv"), dtype=dtype) + # clean and return the dataframe dictionary + clean_dataframe(all_data) + # return a dictionary if return_dict flag is set to True else return a dataframe + return all_data if return_dict else next(iter(all_data.values())) + diff --git a/tests/bitset_handler/__init__.py b/tests/bitset_handler/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/bitset_handler/conftest.py b/tests/bitset_handler/conftest.py new file mode 100644 index 0000000000..41b6ab3e6f --- /dev/null +++ b/tests/bitset_handler/conftest.py @@ -0,0 +1,95 @@ +""" +Implements the fixtures required in +https://github.com/pandas-dev/pandas/blob/bdb509f95a8c0ff16530cedb01c2efc822c0d314/pandas/core/dtypes/dtypes.py, + +which allows us to run the pandas-provided test suite for custom dtypes. +Additional tests and fixtures can be defined on top of those listed in the link above, if we want to +run our own tests. +""" + +from typing import List, Set + +import numpy as np +import pytest +from numpy.random import PCG64, Generator +from numpy.typing import NDArray + +from tlo.bitset_handler.bitset_extension import BitsetArray, BitsetDtype, ElementType + + +@pytest.fixture(scope="session") +def _rng() -> Generator: + return Generator(PCG64(seed=0)) + + +@pytest.fixture(scope="session") +def _set_elements() -> Set[ElementType]: + return {"1", "2", "3", "4", "5", "a", "b", "c", "d", "e"} + + +@pytest.fixture(scope="session") +def dtype(_set_elements: Set[ElementType]) -> BitsetDtype: + return BitsetDtype(_set_elements) + + +@pytest.fixture(scope="session") +def _1st_3_entries() -> List[Set[ElementType]]: + """ + We will fix the first 3 entries of the data fixture, + which is helpful to ensure we have some explicit test + values that we can directly change if needed. + """ + return [ + {"1", "e"}, {"a", "d"}, {"2", "4", "5"}, + ] + +@pytest.fixture(scope="session") +def _raw_sets( + _1st_3_entries: List[Set[ElementType]], _rng: Generator, _set_elements: Set[ElementType] +) -> List[Set[ElementType]]: + """ + Length 100 list of sets, the first 3 of which are those in + the _1st_3_entries fixture. These sets will be used as the + 'raw_data' for the Bitset Extension test suite. + """ + set_entries = list(_1st_3_entries) + elements = list(_set_elements) + for _ in range(100-len(_1st_3_entries)): + set_entries.append( + { + elements[i] + for i in _rng.integers( + 0, len(elements), size=_rng.integers(0, len(elements)) + ) + } + ) + return set_entries + +@pytest.fixture(scope="session") +def _raw_data( + _raw_sets: List[Set[ElementType]], dtype: BitsetDtype +) -> NDArray[np.bytes_]: + data = np.zeros((100,), dtype=dtype.np_array_dtype) + for i, set_value in enumerate(_raw_sets): + data[i] = dtype.as_bytes(set_value) + return data + + +@pytest.fixture(scope="session") +def data( + _raw_data: NDArray[np.bytes_], dtype: BitsetDtype +) -> BitsetArray: + return BitsetArray(data=_raw_data, dtype=dtype, copy=True) + + +@pytest.fixture +def data_for_twos(dtype: BitsetDtype) -> None: + pytest.skip(f"{dtype} does not support divmod") + + +@pytest.fixture +def data_missing(dtype: BitsetDtype) -> np.ndarray: + data = np.zeros((2,), dtype=dtype.np_array_dtype) + data[0] = dtype.na_value + data[1] = dtype.as_bytes({"a"}) + return data diff --git a/tests/bitset_handler/test_bitset_pandas_dtype.py b/tests/bitset_handler/test_bitset_pandas_dtype.py new file mode 100644 index 0000000000..156f9e49e6 --- /dev/null +++ b/tests/bitset_handler/test_bitset_pandas_dtype.py @@ -0,0 +1,28 @@ +import re + +import pytest +from pandas.tests.extension.base import BaseDtypeTests + +from tlo.bitset_handler.bitset_extension import BitsetDtype + + +class TestBitsetDtype(BaseDtypeTests): + """ + Setting the dtype fixture, above, to out BitsetDtype results in us inheriting + all default pandas tests for extension Dtypes. + + Additional tests can be added to this class if we so desire. + """ + + def test_construct_from_string_another_type_raises( + self, dtype: BitsetDtype + ) -> None: + """ + Reimplementation as the error message we expect is different from that provided + by base ``pandas`` implementation. + """ + msg = ( + "Need at least 2 (comma-separated) elements in string to construct bitset." + ) + with pytest.raises(TypeError, match=re.escape(msg)): + type(dtype).construct_from_string("another_type") diff --git a/tests/bitset_handler/test_bitset_set_like_interactions.py b/tests/bitset_handler/test_bitset_set_like_interactions.py new file mode 100644 index 0000000000..801703ce24 --- /dev/null +++ b/tests/bitset_handler/test_bitset_set_like_interactions.py @@ -0,0 +1,162 @@ +""" +Tests for set-like interactions with a pd.Series object of BitsetDtype. +""" +import operator +from typing import Any, Callable, Iterable, List, Set + +import pandas as pd +import pytest + +from tlo.bitset_handler.bitset_extension import BitsetDtype, CastableForPandasOps, ElementType + + +def seq_of_sets_to_series(sets: Iterable[Set[ElementType]], dtype: BitsetDtype) -> pd.Series: + """ + Casts a sequence of sets representing a single BitsetDtype to a + series with those entries of the appropriate dtype. + """ + return pd.Series(data=sets, dtype=dtype, copy=True) + + +@pytest.fixture(scope="function") +def small_series(_1st_3_entries: List[Set[ElementType]], dtype: BitsetDtype): + """ + Recall that the first 3 entries are always fixed in confest; + repeating the values here just for ease of reference: + + {"1", "e"}, + {"a", "d"}, + {"2", "4", "5"}, + """ + return pd.Series(_1st_3_entries, dtype=dtype, copy=True) + + +@pytest.mark.parametrize( + ["op", "r_value", "expected"], + [ + pytest.param( + [operator.or_, operator.add, operator.sub], + set(), + [{"1", "e"}, {"a", "d"}, {"2", "4", "5"}], + id="ADD, OR, SUB w/ empty set", + ), + pytest.param( + [operator.or_, operator.add], + "a", + [{"1", "a", "e"}, {"a", "d"}, {"2", "4", "5", "a"}], + id="ADD, OR w/ scalar element", + ), + pytest.param( + [operator.or_, operator.add], + {"1", "2", "a", "d"}, + [ + {"1", "2", "a", "d", "e"}, + {"1", "2", "a", "d"}, + {"1", "2", "4", "5", "a", "d"}, + ], + id="ADD, OR w/ multiple-entry set", + ), + pytest.param( + operator.and_, + set(), + [set()] * 3, + id="AND w/ empty set", + ), + pytest.param( + operator.and_, + "a", + [set(), {"a"}, set()], + id="AND w/ scalar element", + ), + pytest.param( + operator.and_, + {"1", "a"}, + [{"1"}, {"a"}, set()], + id="AND w/ multiple-entry set", + ), + pytest.param( + [operator.eq, operator.le, operator.lt], + set(), + pd.Series([False, False, False], dtype=bool), + id="EQ, LE, LT w/ empty set", + ), + pytest.param( + [operator.eq, operator.le, operator.lt], + "a", + pd.Series([False, False, False], dtype=bool), + id="EQ, LE, LT w/ scalar element", + ), + pytest.param( + [operator.eq, operator.ge, operator.le], + {"1", "e"}, + pd.Series([True, False, False], dtype=bool), + id="EQ, GE, LE w/ multiple-entry set", + ), + pytest.param( + [operator.ge, operator.gt], + set(), + pd.Series([True, True, True], dtype=bool), + id="GE, GT w/ empty set", + ), + pytest.param( + [operator.ge, operator.gt], + "a", + pd.Series([False, True, False], dtype=bool), + id="GE, GT w/ scalar element", + ), + pytest.param( + [operator.gt, operator.lt], + {"1, e"}, + pd.Series([False, False, False], dtype=bool), + id="GT, LT w/ multiple-entry set", + ), + pytest.param( + operator.sub, + "a", + [{"1", "e"}, {"d"}, {"2", "4", "5"}], + id="SUB w/ scalar element", + ), + pytest.param( + operator.sub, + {"1", "2", "d", "e"}, + [set(), {"a"}, {"4", "5"}], + id="SUB w/ multiple-entry set", + ), + ], +) +def test_series_operation_with_value( + small_series: pd.Series, + dtype: BitsetDtype, + op: List[Callable[[Any, Any], Any]] | Callable[[Any, Any], Any], + r_value: CastableForPandasOps, + expected: List[Set[ElementType]] | pd.Series +) -> None: + """ + The expected value can be passed in as either a list of sets that will be + converted to the appropriate pd.Series of bitsets, or as an explicit pd.Series + of booleans (which is used when testing the comparison operations ==, <=, etc). + + If r_value is a scalar, the test will run once using the scalar as the r_value, + and then again using the cast of the scalar to a set of one element as the r_value. + - In cases such as this, the two results are expected to be the same, + which saves us verbiage in the list of test cases above. + """ + expected = ( + seq_of_sets_to_series(expected, dtype) + if isinstance(expected, list) + else expected + ) + + if not isinstance(op, list): + op = [op] + if isinstance(r_value, ElementType): + r_values = [r_value, {r_value}] + else: + r_values = [r_value] + + for operation in op: + for r_v in r_values: + result = operation(small_series, r_v) + assert ( + expected == result + ).all(), f"Series do not match after operation {operation.__name__} with {r_v} on the right." diff --git a/tests/conftest.py b/tests/conftest.py index 47d6c3fa16..33b463343a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -34,4 +34,4 @@ def pytest_collection_modifyitems(config, items): def pytest_generate_tests(metafunc): if "seed" in metafunc.fixturenames: - metafunc.parametrize("seed", metafunc.config.getoption("seed")) + metafunc.parametrize("seed", metafunc.config.getoption("seed"), scope="session") diff --git a/tests/resources/ResourceFile_test_convert_to_csv/ResourceFile_test_convert_to_csv.xlsx b/tests/resources/ResourceFile_test_convert_to_csv/ResourceFile_test_convert_to_csv.xlsx new file mode 100644 index 0000000000..84edbd2636 --- /dev/null +++ b/tests/resources/ResourceFile_test_convert_to_csv/ResourceFile_test_convert_to_csv.xlsx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af1a6a6aa24a7de385efdf1564da3e3abfbba9fe467d92212b5c87b127e899f6 +size 10714 diff --git a/tests/resources/probability_premature_death/0/0/tlo.methods.demography.pickle b/tests/resources/probability_premature_death/0/0/tlo.methods.demography.pickle new file mode 100644 index 0000000000..896ce51bf6 --- /dev/null +++ b/tests/resources/probability_premature_death/0/0/tlo.methods.demography.pickle @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2cddd2f691393fc27e990170f76ff12a2962d3fbee986deee459a6eb4996fd7 +size 243603 diff --git a/tests/test_analysis.py b/tests/test_analysis.py index 2e77a87e06..4bf4b6a1cb 100644 --- a/tests/test_analysis.py +++ b/tests/test_analysis.py @@ -1,4 +1,5 @@ import os +import textwrap from pathlib import Path from typing import List @@ -18,6 +19,7 @@ get_parameters_for_improved_healthsystem_and_healthcare_seeking, get_parameters_for_status_quo, get_root_path, + merge_log_files, mix_scenarios, order_of_coarse_appt, order_of_short_treatment_ids, @@ -698,3 +700,99 @@ def check_log(log): sim = Simulation(start_date=Date(2010, 1, 1), seed=seed, log_config=log_config) check_log(run_simulation_and_cause_one_death(sim)) + +def test_merge_log_files(tmp_path): + log_file_path_1 = tmp_path / "log_file_1" + log_file_path_1.write_text( + textwrap.dedent( + """\ + {"uuid": "b07", "type": "header", "module": "m0", "key": "info", "level": "INFO", "columns": {"msg": "str"}, "description": null} + {"uuid": "b07", "date": "2010-01-01T00:00:00", "values": ["0"]} + {"uuid": "0b3", "type": "header", "module": "m1", "key": "a", "level": "INFO", "columns": {"msg": "str"}, "description": "A"} + {"uuid": "0b3", "date": "2010-01-01T00:00:00", "values": ["1"]} + {"uuid": "ed4", "type": "header", "module": "m2", "key": "b", "level": "INFO", "columns": {"msg": "str"}, "description": "B"} + {"uuid": "ed4", "date": "2010-01-02T00:00:00", "values": ["2"]} + {"uuid": "477", "type": "header", "module": "m2", "key": "c", "level": "INFO", "columns": {"msg": "str"}, "description": "C"} + {"uuid": "477", "date": "2010-01-02T00:00:00", "values": ["3"]} + {"uuid": "b5c", "type": "header", "module": "m2", "key": "d", "level": "INFO", "columns": {"msg": "str"}, "description": "D"} + {"uuid": "b5c", "date": "2010-01-03T00:00:00", "values": ["4"]} + {"uuid": "477", "date": "2010-01-03T00:00:00", "values": ["5"]} + """ + ) + ) + log_file_path_2 = tmp_path / "log_file_2" + log_file_path_2.write_text( + textwrap.dedent( + """\ + {"uuid": "b07", "type": "header", "module": "m0", "key": "info", "level": "INFO", "columns": {"msg": "str"}, "description": null} + {"uuid": "b07", "date": "2010-01-04T00:00:00", "values": ["6"]} + {"uuid": "ed4", "type": "header", "module": "m2", "key": "b", "level": "INFO", "columns": {"msg": "str"}, "description": "B"} + {"uuid": "ed4", "date": "2010-01-04T00:00:00", "values": ["7"]} + {"uuid": "ed4", "date": "2010-01-05T00:00:00", "values": ["8"]} + {"uuid": "0b3", "type": "header", "module": "m1", "key": "a", "level": "INFO", "columns": {"msg": "str"}, "description": "A"} + {"uuid": "0b3", "date": "2010-01-06T00:00:00", "values": ["9"]} + {"uuid": "a19", "type": "header", "module": "m3", "key": "e", "level": "INFO", "columns": {"msg": "str"}, "description": "E"} + {"uuid": "a19", "date": "2010-01-03T00:00:00", "values": ["10"]} + """ + ) + ) + expected_merged_log_file_content = textwrap.dedent( + """\ + {"uuid": "b07", "type": "header", "module": "m0", "key": "info", "level": "INFO", "columns": {"msg": "str"}, "description": null} + {"uuid": "b07", "date": "2010-01-01T00:00:00", "values": ["0"]} + {"uuid": "0b3", "type": "header", "module": "m1", "key": "a", "level": "INFO", "columns": {"msg": "str"}, "description": "A"} + {"uuid": "0b3", "date": "2010-01-01T00:00:00", "values": ["1"]} + {"uuid": "ed4", "type": "header", "module": "m2", "key": "b", "level": "INFO", "columns": {"msg": "str"}, "description": "B"} + {"uuid": "ed4", "date": "2010-01-02T00:00:00", "values": ["2"]} + {"uuid": "477", "type": "header", "module": "m2", "key": "c", "level": "INFO", "columns": {"msg": "str"}, "description": "C"} + {"uuid": "477", "date": "2010-01-02T00:00:00", "values": ["3"]} + {"uuid": "b5c", "type": "header", "module": "m2", "key": "d", "level": "INFO", "columns": {"msg": "str"}, "description": "D"} + {"uuid": "b5c", "date": "2010-01-03T00:00:00", "values": ["4"]} + {"uuid": "477", "date": "2010-01-03T00:00:00", "values": ["5"]} + {"uuid": "b07", "date": "2010-01-04T00:00:00", "values": ["6"]} + {"uuid": "ed4", "date": "2010-01-04T00:00:00", "values": ["7"]} + {"uuid": "ed4", "date": "2010-01-05T00:00:00", "values": ["8"]} + {"uuid": "0b3", "date": "2010-01-06T00:00:00", "values": ["9"]} + {"uuid": "a19", "type": "header", "module": "m3", "key": "e", "level": "INFO", "columns": {"msg": "str"}, "description": "E"} + {"uuid": "a19", "date": "2010-01-03T00:00:00", "values": ["10"]} + """ + ) + merged_log_file_path = tmp_path / "merged_log_file" + merge_log_files(log_file_path_1, log_file_path_2, merged_log_file_path) + merged_log_file_content = merged_log_file_path.read_text() + assert merged_log_file_content == expected_merged_log_file_content + + +def test_merge_log_files_with_inconsistent_headers_raises(tmp_path): + log_file_path_1 = tmp_path / "log_file_1" + log_file_path_1.write_text( + textwrap.dedent( + """\ + {"uuid": "b07", "type": "header", "module": "m0", "key": "info", "level": "INFO", "columns": {"msg": "str"}, "description": null} + {"uuid": "b07", "date": "2010-01-01T00:00:00", "values": ["0"]} + """ + ) + ) + log_file_path_2 = tmp_path / "log_file_2" + log_file_path_2.write_text( + textwrap.dedent( + """\ + {"uuid": "b07", "type": "header", "module": "m0", "key": "info", "level": "INFO", "columns": {"msg": "int"}, "description": null} + {"uuid": "b07", "date": "2010-01-04T00:00:00", "values": [1]} + """ + ) + ) + merged_log_file_path = tmp_path / "merged_log_file" + with pytest.raises(RuntimeError, match="Inconsistent header lines"): + merge_log_files(log_file_path_1, log_file_path_2, merged_log_file_path) + + +def test_merge_log_files_inplace_raises(tmp_path): + log_file_path_1 = tmp_path / "log_file_1" + log_file_path_1.write_text("foo") + log_file_path_2 = tmp_path / "log_file_2" + log_file_path_2.write_text("bar") + with pytest.raises(ValueError, match="output_path"): + merge_log_files(log_file_path_1, log_file_path_2, log_file_path_1) + with pytest.raises(ValueError, match="output_path"): + merge_log_files(log_file_path_1, log_file_path_2, log_file_path_2) diff --git a/tests/test_consumables.py b/tests/test_consumables.py index 6eee6dac38..c45f1532ed 100644 --- a/tests/test_consumables.py +++ b/tests/test_consumables.py @@ -61,12 +61,12 @@ def test_using_recognised_item_codes(seed): # Make requests for consumables (which would normally come from an instance of `HSI_Event`). rtn = cons._request_consumables( - item_codes={0: 1, 1: 1}, + essential_item_codes={0: 1, 1: 1}, facility_info=facility_info_0 ) assert {0: False, 1: True} == rtn - assert not cons._not_recognised_item_codes # No item_codes recorded as not recognised. + assert len(cons._not_recognised_item_codes) == 0 # No item_codes recorded as not recognised. def test_unrecognised_item_code_is_recorded(seed): @@ -88,12 +88,12 @@ def test_unrecognised_item_code_is_recorded(seed): # Make requests for consumables (which would normally come from an instance of `HSI_Event`). rtn = cons._request_consumables( - item_codes={99: 1}, + essential_item_codes={99: 1}, facility_info=facility_info_0 ) assert isinstance(rtn[99], bool) - assert cons._not_recognised_item_codes # Some item_codes recorded as not recognised. + assert len(cons._not_recognised_item_codes) > 0 # Some item_codes recorded as not recognised. # Check warning is issued at end of simulation with pytest.warns(UserWarning) as recorded_warnings: @@ -128,7 +128,8 @@ def test_consumables_availability_options(seed): cons.on_start_of_day(date=date) assert _expected_result == cons._request_consumables( - item_codes={_item_code: 1 for _item_code in all_items_request}, to_log=False, facility_info=facility_info_0 + essential_item_codes={_item_code: 1 for _item_code in all_items_request}, + to_log=False, facility_info=facility_info_0 ) @@ -153,7 +154,8 @@ def request_item(cons, item_code: Union[list, int]): item_code = [item_code] return all(cons._request_consumables( - item_codes={_i: 1 for _i in item_code}, to_log=False, facility_info=facility_info_0 + essential_item_codes={_i: 1 for _i in item_code}, + to_log=False, facility_info=facility_info_0 ).values()) rng = get_rng(seed) @@ -250,7 +252,7 @@ def test_consumables_available_at_right_frequency(seed): for _ in range(n_trials): cons.on_start_of_day(date=date) rtn = cons._request_consumables( - item_codes=requested_items, + essential_item_codes=requested_items, facility_info=facility_info_0, ) for _i in requested_items: @@ -273,6 +275,47 @@ def is_obs_frequency_consistent_with_expected_probability(n_obs, n_trials, p): p=average_availability_of_known_items) +@pytest.mark.parametrize("p_known_items, expected_items_used", [ + # Test 1 + ({0: 0.0, 1: 1.0, 2: 1.0, 3: 1.0}, {}), + # Test 2 + ({0: 1.0, 1: 1.0, 2: 0.0, 3: 1.0}, {0: 5, 1: 10, 3: 2}) +]) +def test_items_used_includes_only_available_items(seed, p_known_items, expected_items_used): + """ + Test that 'items_used' includes only items that are available. + Items should only be logged if the essential items are ALL available + If essential items are available, then optional items can be logged as items_used if available + Test 1: should not have any items_used as essential item 0 is not available + Test 2: should have essential items logged as items_used, but optional item 2 is not available + """ + + data = create_dummy_data_for_cons_availability( + intrinsic_availability=p_known_items, + months=[1], + facility_ids=[0] + ) + rng = get_rng(seed) + date = datetime.datetime(2010, 1, 1) + + cons = Consumables(availability_data=data, rng=rng) + + # Define essential and optional item codes + essential_item_codes = {0: 5, 1: 10} # these must match parameters above + optional_item_codes = {2: 7, 3: 2} + + cons.on_start_of_day(date=date) + cons._request_consumables( + essential_item_codes=essential_item_codes, + optional_item_codes=optional_item_codes, + facility_info=facility_info_0, + ) + + # Access items used from the Consumables summary counter + items_used = getattr(cons._summary_counter, '_items', {}).get('Used') + assert items_used == expected_items_used, f"Expected items_used to be {expected_items_used}, but got {items_used}" + + def get_sim_with_dummy_module_registered(tmpdir=None, run=True, data=None): """Return an initialised simulation object with a Dummy Module registered. If the `data` argument is provided, the parameter in HealthSystem that holds the data on consumables availability is over-written.""" @@ -321,7 +364,7 @@ def initialise_simulation(self, sim): return sim -def get_dummy_hsi_event_instance(module, facility_id=None): +def get_dummy_hsi_event_instance(module, facility_id=None, to_log=False): """Make an HSI Event that runs for person_id=0 in a particular facility_id and requests consumables, and for which its parent is the identified module.""" @@ -340,7 +383,7 @@ def apply(self, person_id, squeeze_factor): """Requests all recognised consumables.""" self.get_consumables( item_codes=list(self.sim.modules['HealthSystem'].consumables.item_codes), - to_log=True, + to_log=to_log, return_individual_results=False ) @@ -446,7 +489,7 @@ def schedule_hsi_that_will_request_consumables(sim): # Schedule the HSI event for person_id=0 sim.modules['HealthSystem'].schedule_hsi_event( - hsi_event=get_dummy_hsi_event_instance(module=sim.modules['DummyModule'], facility_id=0), + hsi_event=get_dummy_hsi_event_instance(module=sim.modules['DummyModule'], facility_id=0, to_log=True), topen=sim.start_date, tclose=None, priority=0 @@ -500,12 +543,12 @@ def test_every_declared_consumable_for_every_possible_hsi_using_actual_data(recw facility_id=_facility_id ) for _item_code in item_codes: - hsi_event.get_consumables(item_codes=_item_code) + hsi_event.get_consumables(item_codes=_item_code, to_log=False) sim.modules['HealthSystem'].on_simulation_end() - # Check that no warnings raised or item_codes recorded as being not recogised. - assert not sim.modules['HealthSystem'].consumables._not_recognised_item_codes + # Check that no warnings raised or item_codes recorded as being not recognised. + assert len(sim.modules['HealthSystem'].consumables._not_recognised_item_codes) == 0 assert not any_warnings_about_item_code(recwarn) diff --git a/tests/test_equipment.py b/tests/test_equipment.py index 1167023aa8..e7b8f03ccc 100644 --- a/tests/test_equipment.py +++ b/tests/test_equipment.py @@ -1,5 +1,6 @@ """This file contains all the tests to do with Equipment.""" import os +from ast import literal_eval from pathlib import Path from typing import Dict @@ -259,7 +260,7 @@ def all_equipment_ever_used(log: Dict) -> set: (at any facility).""" s = set() for i in log["EquipmentEverUsed_ByFacilityID"]['EquipmentEverUsed']: - s.update(eval(i)) + s.update(literal_eval(i)) return s # * An HSI that declares no use of any equipment (logs should be empty). @@ -474,7 +475,7 @@ def initialise_simulation(self, sim): # Read log to find what equipment used df = parse_log_file(sim.log_filepath)["tlo.methods.healthsystem.summary"]['EquipmentEverUsed_ByFacilityID'] df = df.drop(index=df.index[~df['Facility_Level'].isin(item_code_needed_at_each_level.keys())]) - df['EquipmentEverUsed'] = df['EquipmentEverUsed'].apply(eval).apply(list) + df['EquipmentEverUsed'] = df['EquipmentEverUsed'].apply(literal_eval) # Check that equipment used at each level matches expectations assert item_code_needed_at_each_level == df.groupby('Facility_Level')['EquipmentEverUsed'].sum().apply(set).to_dict() diff --git a/tests/test_healthsystem.py b/tests/test_healthsystem.py index 6568d1df56..6eeabc4995 100644 --- a/tests/test_healthsystem.py +++ b/tests/test_healthsystem.py @@ -952,7 +952,7 @@ def apply(self, person_id, squeeze_factor): } == set(detailed_hsi_event.columns) assert {'date', 'Frac_Time_Used_Overall', 'Frac_Time_Used_By_Facility_ID', 'Frac_Time_Used_By_OfficerType', } == set(detailed_capacity.columns) - assert {'date', 'TREATMENT_ID', 'Item_Available', 'Item_NotAvailable' + assert {'date', 'TREATMENT_ID', 'Item_Available', 'Item_NotAvailable', 'Item_Used' } == set(detailed_consumables.columns) bed_types = sim.modules['HealthSystem'].bed_days.bed_types @@ -1019,6 +1019,9 @@ def dict_all_close(dict_1, dict_2): assert summary_consumables['Item_NotAvailable'].apply(pd.Series).sum().to_dict() == \ detailed_consumables['Item_NotAvailable'].apply( lambda x: {f'{k}': v for k, v in eval(x).items()}).apply(pd.Series).sum().to_dict() + assert summary_consumables['Item_Used'].apply(pd.Series).sum().to_dict() == \ + detailed_consumables['Item_Used'].apply( + lambda x: {f'{k}': v for k, v in eval(x).items()}).apply(pd.Series).sum().to_dict() # - Bed-Days (bed-type by bed-type and year by year) for _bed_type in bed_types: @@ -2570,3 +2573,69 @@ def get_capabilities(yearly_scaling: bool, scaling_by_level: bool, rescaling: bo caps_scaling_by_both_with_rescaling = get_capabilities(yearly_scaling=True, scaling_by_level=True, rescaling=True) assert caps_scaling_by_both_with_rescaling > caps_only_scaling_by_level_with_rescaling assert caps_scaling_by_both_with_rescaling > caps_only_scaling_by_year_with_rescaling + + +def test_logging_of_only_hsi_events_with_non_blank_footprints(tmpdir): + """Run the simulation with an HSI_Event that may have a blank_footprint and examine the healthsystem.summary logger. + * If the footprint is blank, the HSI event should be recorded in the usual loggers but not the 'no_blank' logger + * If the footprint is non-blank, the HSI event should be recorded in the usual and the 'no_blank' loggers. + """ + + def run_simulation_and_return_healthsystem_summary_log(tmpdir: Path, blank_footprint: bool) -> dict: + """Return the `healthsystem.summary` logger for a simulation. In that simulation, there is HSI_Event run on the + first day of the simulation and its `EXPECTED_APPT_FOOTPRINT` may or may not be blank. The simulation is run for one + year in order that the summary logger is active (it runs annually).""" + + class HSI_Dummy(HSI_Event, IndividualScopeEventMixin): + def __init__(self, module, person_id, _is_footprint_blank): + super().__init__(module, person_id=person_id) + self.TREATMENT_ID = 'Dummy' + self.ACCEPTED_FACILITY_LEVEL = '0' + self.EXPECTED_APPT_FOOTPRINT = self.make_appt_footprint({}) if blank_footprint \ + else self.make_appt_footprint({'ConWithDCSA': 1}) + + def apply(self, person_id, squeeze_factor): + pass + + class DummyModule(Module): + METADATA = {Metadata.DISEASE_MODULE} + + def read_parameters(self, data_folder): + pass + + def initialise_population(self, population): + pass + + def initialise_simulation(self, sim): + hsi_event = HSI_Dummy(module=self, person_id=0, _is_footprint_blank=blank_footprint) + sim.modules['HealthSystem'].schedule_hsi_event(hsi_event=hsi_event, topen=sim.date, priority=0) + + start_date = Date(2010, 1, 1) + sim = Simulation(start_date=start_date, seed=0, log_config={'filename': 'tmp', 'directory': tmpdir}) + sim.register( + demography.Demography(resourcefilepath=resourcefilepath), + healthsystem.HealthSystem(resourcefilepath=resourcefilepath, mode_appt_constraints=0), + DummyModule(), + # Disable sorting + checks to avoid error due to missing dependencies + sort_modules=False, + check_all_dependencies=False + ) + sim.make_initial_population(n=100) + sim.simulate(end_date=sim.start_date + pd.DateOffset(years=1)) + + return parse_log_file(sim.log_filepath)['tlo.methods.healthsystem.summary'] + # When the footprint is blank: + log = run_simulation_and_return_healthsystem_summary_log(tmpdir, blank_footprint=True) + assert log['HSI_Event']['TREATMENT_ID'].iloc[0] == {'Dummy': 1} # recorded in usual logger + assert log['HSI_Event_non_blank_appt_footprint']['TREATMENT_ID'].iloc[0] == {} # not recorded in 'non-blank' logger + + # When the footprint is non-blank: + log = run_simulation_and_return_healthsystem_summary_log(tmpdir, blank_footprint=False) + assert not log['HSI_Event'].empty + assert 'TREATMENT_ID' in log['HSI_Event'].columns + assert 'TREATMENT_ID' in log['HSI_Event_non_blank_appt_footprint'].columns + assert( log['HSI_Event']['TREATMENT_ID'].iloc[0] + == log['HSI_Event_non_blank_appt_footprint']['TREATMENT_ID'].iloc[0] + == {'Dummy': 1} + # recorded in both the usual and the 'non-blank' logger + ) diff --git a/tests/test_life_expectancy.py b/tests/test_life_expectancy.py index 2465580f65..0a77f02310 100644 --- a/tests/test_life_expectancy.py +++ b/tests/test_life_expectancy.py @@ -1,10 +1,15 @@ import datetime import os +import pickle from pathlib import Path +import numpy as np import pandas as pd -from tlo.analysis.life_expectancy import get_life_expectancy_estimates +from tlo.analysis.life_expectancy import ( + get_life_expectancy_estimates, + get_probability_of_premature_death, +) def test_get_life_expectancy(): @@ -33,3 +38,57 @@ def test_get_life_expectancy(): assert sorted(rtn_full.index.to_list()) == ["F", "M"] assert list(rtn_full.columns.names) == ['draw', 'run'] assert rtn_full.columns.levels[1].to_list() == [0, 1] + + +def test_probability_premature_death(tmpdir, age_before_which_death_is_defined_as_premature: int = 70): + """ + Test the calculation of the probability of premature death from a simulated cohort. + + This function loads results from a dummy cohort (N = 100, with 37 F and 63 M) simulation where all individuals start + at age 0. The simulation was then run for 70 years (2010 - 2080), during which individuals could die but nobody + could be born. In this dummy data set, 6 F die and 23 M die prematurely, giving a probability of premature death as + 0.16 and 0.37, respectively. The premature deaths amongst these individuals is then the number that have died + before the age of 70 (default value). + This test uses the calculates the probability of premature death separately for males and females using the + data from this simulated run and the function get_probability_of_premature_death. + It then compares these simulated probabilities against the total number of deaths before the age of 70 (default) + that occurred in the simulated cohort. + """ + # load results from a dummy cohort where everyone starts at age 0. + target_period = (datetime.date(2010, 1, 1), datetime.date(2080, 12, 31)) + + results_folder_dummy_results = Path(os.path.dirname(__file__)) / 'resources' / 'probability_premature_death' + pickled_file = os.path.join(results_folder_dummy_results, '0', '0', 'tlo.methods.demography.pickle') + + # - Compute 'manually' from raw data + with open(pickled_file, 'rb') as file: + demography_data = pickle.load(file) + initial_popsize = {'F': demography_data['population']['female'][0], 'M': demography_data['population']['male'][0]} + deaths_total = demography_data['death'][['sex', 'age']] + num_premature_deaths = deaths_total.loc[deaths_total['age'] < age_before_which_death_is_defined_as_premature] \ + .groupby('sex') \ + .size() \ + .to_dict() + prob_premature_death = {s: num_premature_deaths[s] / initial_popsize[s] for s in ("M", "F")} + + # - Compute using utility function + probability_premature_death_summary = get_probability_of_premature_death( + results_folder=results_folder_dummy_results, + target_period=target_period, + summary=True, + ) + + # Confirm both methods gives the same answer + # (Absolute tolerance of this test is reasonably large (1%) as small assumptions made in the calculation of the + # cumulative probability of death in each age-group mean that the manual computation done here and the calculation + # performed in the utility function are not expected to agree perfectly.) + assert np.isclose( + probability_premature_death_summary.loc["F"].loc[(0, 'mean')], + prob_premature_death['F'], + atol=0.01 + ) + assert np.isclose( + probability_premature_death_summary.loc["M"].loc[(0, 'mean')], + prob_premature_death['M'], + atol=0.01 + ) diff --git a/tests/test_module_dependencies.py b/tests/test_module_dependencies.py index ca5bf58482..8ed5b6811e 100644 --- a/tests/test_module_dependencies.py +++ b/tests/test_module_dependencies.py @@ -1,5 +1,4 @@ """Tests for automatic checking and ordering of method module dependencies.""" - import os from pathlib import Path from random import seed as set_seed @@ -8,7 +7,7 @@ import pytest -from tlo import Date, Module, Simulation +from tlo import Date, Module, Simulation, logging from tlo.dependencies import ( ModuleDependencyError, get_all_dependencies, @@ -17,6 +16,7 @@ get_module_class_map, topologically_sort_modules, ) +from tlo.methods import hiv, simplified_births try: resourcefilepath = Path(os.path.dirname(__file__)) / "../resources" @@ -28,7 +28,6 @@ simulation_end_date = Date(2010, 9, 1) simulation_initial_population = 1000 - module_class_map = get_module_class_map( excluded_modules={ "Module", @@ -51,7 +50,6 @@ def sim(seed): @pytest.fixture def dependent_module_pair(): - class Module1(Module): pass @@ -67,7 +65,7 @@ def dependent_module_chain(): type( f'Module{i}', (Module,), - {'INIT_DEPENDENCIES': frozenset({f'Module{i-1}'})} if i != 0 else {} + {'INIT_DEPENDENCIES': frozenset({f'Module{i - 1}'})} if i != 0 else {} ) for i in range(10) ] @@ -251,8 +249,8 @@ def test_module_dependencies_complete(sim, module_class): for module in module_class_map.values() # Skip test for NewbornOutcomes as long simulation needed for birth events to occur and dependencies to be used if module.__name__ not in { - 'NewbornOutcomes' - } + 'NewbornOutcomes' + } for dependency_name in sorted(get_all_required_dependencies(module)) ], ids=lambda pair: f"{pair[0].__name__}, {pair[1].__name__}" @@ -285,3 +283,76 @@ def test_module_dependencies_all_required(sim, module_and_dependency_pair): 'does not appear to be required to run simulation without errors and so ' f'should be removed from the dependencies of {module_class.__name__}.' ) + + +def test_auto_register_module_dependencies(tmpdir): + """ check if module dependencies are registered as expected when an argument to auto register modules in simulation + is set to True """ + # configure logging + log_config = { + 'filename': 'LogFile', + 'directory': tmpdir, + 'custom_levels': { + '*': logging.CRITICAL, + 'tlo.method.demography': logging.INFO + } + } + # set simulation start date + start_date = Date(2010, 1, 1) + + # register required modules for a simple simulation. We have included copd for as it has some dependencies. We want + # to test if the dependencies can be automatically registered when the auto register argument in simulation + # is set to True + def register_disease_modules_manually(): + """ Test manually registering disease modules without including all dependencies and leaving to false an + option to auto register missing dependencies. This should fail with module dependency error """ + with pytest.raises(ModuleDependencyError, match='missing'): + # configure simulation + sim = Simulation(start_date=start_date, seed=0, log_config=log_config, resourcefilepath=resourcefilepath) + # the lines below should fail with missing dependencies + sim.register(hiv.Hiv(resourcefilepath=resourcefilepath)) + + def register_disease_modules_using_labour_modules_for_births(): + """ Test registering disease modules without including all dependencies and not using simplified births + module BUT setting to true an option to auto register missing dependencies. This should register all necessary + modules including all labour modules """ + # configure simulation + sim = Simulation(start_date=start_date, seed=0, log_config=log_config, resourcefilepath=resourcefilepath) + # re-register modules with auto-register-module argument set to True and using labour modules for births + sim.register(hiv.Hiv(resourcefilepath=resourcefilepath), + auto_register_dependencies=True) + # get module dependencies + required_dependencies = get_all_required_dependencies(sim.modules["Hiv"]) + # check registered dependencies + registered_module_names = set(sim.modules.keys()) + # all required dependencies should be available in registered dependencies + assert required_dependencies <= registered_module_names + + def register_disease_modules_using_simplified_births_for_births(): + """ Test registering disease modules without including all dependencies BUT setting to true an option to auto + register missing dependencies and using simplified births module.This should register all necessary modules + except labour modules since we're using simplified births """ + # configure simulation + sim = Simulation(start_date=start_date, seed=0, log_config=log_config, resourcefilepath=resourcefilepath) + sim.register(hiv.Hiv(resourcefilepath=resourcefilepath), + simplified_births.SimplifiedBirths(resourcefilepath=resourcefilepath), + auto_register_dependencies=True + ) + # now that we're using simplified births we want to ensure that all alternative dependencies are not registered + alternative_dependencies = simplified_births.SimplifiedBirths.ALTERNATIVE_TO + # get registered modules + registered_module_names = set(sim.modules.keys()) + # no alternative dependency(labour modules) should get registered when using simplified births + for dependency in alternative_dependencies: + assert dependency not in registered_module_names, (f'{dependency} should not be registered when simplified' + f' module has been registered') + + # test registering disease modules manually(when all dependencies are not included and auto register missing + # dependencies option is set to false) + register_disease_modules_manually() + + # test auto registering disease modules using labor modules for births + register_disease_modules_using_labour_modules_for_births() + + # test auto registering disease modules using simplified module for births + register_disease_modules_using_simplified_births_for_births() diff --git a/tests/test_simulation.py b/tests/test_simulation.py new file mode 100644 index 0000000000..c26b501c47 --- /dev/null +++ b/tests/test_simulation.py @@ -0,0 +1,323 @@ +from pathlib import Path +from typing import Dict, List + +import numpy as np +import pytest + +from tlo import Date, DateOffset, Module, Population, Simulation, logging +from tlo.analysis.utils import merge_log_files, parse_log_file +from tlo.methods.fullmodel import fullmodel +from tlo.methods.healthsystem import HSI_Event, HSIEventQueueItem +from tlo.simulation import ( + EventQueue, + SimulationNotInitialisedError, + SimulationPreviouslyInitialisedError, +) + + +def _check_basic_simulation_attributes_equal( + simulation_1: Simulation, simulation_2: Simulation +) -> None: + for attribute in [ + "start_date", + "end_date", + "date", + "show_progress_bar", + "_custom_log_levels", + "_seed", + "_initialised", + ]: + assert getattr(simulation_1, attribute) == getattr(simulation_2, attribute) + + +def _nested_dict_are_equal(nested_dict_1: dict, nested_dict_2: dict) -> bool: + for key, value in nested_dict_1.items(): + if key not in nested_dict_2: + return False + if isinstance(value, np.ndarray): + if not np.all(value == nested_dict_2[key]): + return False + elif isinstance(value, dict): + if not _nested_dict_are_equal(value, nested_dict_2[key]): + return False + elif value != nested_dict_2[key]: + return False + return True + + +def _check_random_state_equal( + rng_1: np.random.RandomState, rng_2: np.random.RandomState +) -> None: + rng_state_1 = rng_1.get_state(legacy=False) + rng_state_2 = rng_2.get_state(legacy=False) + assert _nested_dict_are_equal(rng_state_1, rng_state_2) + + +def _check_population_equal(population_1: Population, population_2: Population) -> None: + assert population_1.initial_size == population_2.initial_size + assert population_1.new_row.equals(population_2.new_row) + assert population_1.new_rows.equals(population_2.new_rows) + assert population_1.next_person_id == population_2.next_person_id + assert population_1.props.equals(population_2.props) + + +def _check_modules_are_equal( + modules_dict_1: Dict[str, Module], modules_dict_2: Dict[str, Module] +) -> None: + for module_name, module_1 in modules_dict_1.items(): + assert module_name in modules_dict_2 + module_2 = modules_dict_2[module_name] + assert module_2.PARAMETERS == module_1.PARAMETERS + assert module_2.PROPERTIES == module_1.PROPERTIES + _check_random_state_equal(module_1.rng, module_2.rng) + + +def _check_event_queues_are_equal( + event_queue_1: EventQueue, event_queue_2: EventQueue +) -> None: + assert len(event_queue_1) == len(event_queue_2) + for (*date_priority_count_1, event_1), (*date_priority_count_2, event_2) in zip( + event_queue_1.queue, event_queue_2.queue + ): + assert date_priority_count_1 == date_priority_count_2 + if isinstance(event_1.target, Population): + # We don't check for equality of populations here as we do separately and + # it would create a lot of redundancy to check for every event + assert isinstance(event_2.target, Population) + else: + assert event_1.target == event_2.target + assert event_1.priority == event_1.priority + assert type(event_1.module) is type(event_2.module) # noqa: E721 + + +def _check_hsi_events_are_equal(hsi_event_1: HSI_Event, hsi_event_2: HSI_Event) -> None: + if isinstance(hsi_event_1.target, Population): + # We don't check for equality of populations here as we do separately and + # it would create a lot of redundancy to check for every HSI event + assert isinstance(hsi_event_2.target, Population) + else: + assert hsi_event_1.target == hsi_event_2.target + assert hsi_event_1.module.name == hsi_event_2.module.name + assert hsi_event_1.TREATMENT_ID == hsi_event_2.TREATMENT_ID + assert hsi_event_1.ACCEPTED_FACILITY_LEVEL == hsi_event_2.ACCEPTED_FACILITY_LEVEL + assert hsi_event_1.BEDDAYS_FOOTPRINT == hsi_event_2.BEDDAYS_FOOTPRINT + assert ( + hsi_event_1._received_info_about_bed_days + == hsi_event_2._received_info_about_bed_days + ) + assert hsi_event_1.expected_time_requests == hsi_event_2.expected_time_requests + assert hsi_event_1.facility_info == hsi_event_2.facility_info + + +def _check_hsi_event_queues_are_equal( + hsi_event_queue_1: List[HSIEventQueueItem], + hsi_event_queue_2: List[HSIEventQueueItem], +) -> None: + assert len(hsi_event_queue_1) == len(hsi_event_queue_2) + for hsi_event_queue_item_1, hsi_event_queue_item_2 in zip( + hsi_event_queue_1, hsi_event_queue_2 + ): + assert hsi_event_queue_item_1.priority == hsi_event_queue_item_2.priority + assert hsi_event_queue_item_1.topen == hsi_event_queue_item_2.topen + assert ( + hsi_event_queue_item_1.rand_queue_counter + == hsi_event_queue_item_2.rand_queue_counter + ) + assert hsi_event_queue_item_1.tclose == hsi_event_queue_item_2.tclose + _check_hsi_events_are_equal( + hsi_event_queue_item_1.hsi_event, hsi_event_queue_item_2.hsi_event + ) + + +def _check_simulations_are_equal( + simulation_1: Simulation, simulation_2: Simulation +) -> None: + _check_basic_simulation_attributes_equal(simulation_1, simulation_2) + _check_modules_are_equal(simulation_1.modules, simulation_2.modules) + _check_random_state_equal(simulation_1.rng, simulation_2.rng) + _check_event_queues_are_equal(simulation_1.event_queue, simulation_2.event_queue) + _check_hsi_event_queues_are_equal( + simulation_1.modules["HealthSystem"].HSI_EVENT_QUEUE, + simulation_2.modules["HealthSystem"].HSI_EVENT_QUEUE, + ) + _check_population_equal(simulation_1.population, simulation_2.population) + + +@pytest.fixture(scope="module") +def resource_file_path(): + return Path(__file__).parents[1] / "resources" + + +@pytest.fixture(scope="module") +def initial_population_size(): + return 5000 + + +@pytest.fixture(scope="module") +def start_date(): + return Date(2010, 1, 1) + + +@pytest.fixture(scope="module") +def end_date(start_date): + return start_date + DateOffset(days=180) + + +@pytest.fixture(scope="module") +def intermediate_date(start_date, end_date): + return start_date + (end_date - start_date) / 2 + + +@pytest.fixture(scope="module") +def logging_custom_levels(): + return {"*": logging.INFO} + + +def _simulation_factory( + output_directory, start_date, seed, resource_file_path, logging_custom_levels +): + log_config = { + "filename": "test", + "directory": output_directory, + "custom_levels": logging_custom_levels, + } + simulation = Simulation( + start_date=start_date, + seed=seed, + log_config=log_config, + ) + simulation.register( + *fullmodel( + resourcefilepath=resource_file_path, + ) + ) + return simulation + + +@pytest.fixture +def simulation(tmp_path, start_date, seed, resource_file_path, logging_custom_levels): + return _simulation_factory( + tmp_path, start_date, seed, resource_file_path, logging_custom_levels + ) + + +@pytest.fixture(scope="module") +def simulated_simulation( + tmp_path_factory, + start_date, + end_date, + seed, + resource_file_path, + initial_population_size, + logging_custom_levels, +): + tmp_path = tmp_path_factory.mktemp("simulated_simulation") + simulation = _simulation_factory( + tmp_path, start_date, seed, resource_file_path, logging_custom_levels + ) + simulation.make_initial_population(n=initial_population_size) + simulation.simulate(end_date=end_date) + return simulation + + +def test_save_to_pickle_creates_file(tmp_path, simulation): + pickle_path = tmp_path / "simulation.pkl" + simulation.save_to_pickle(pickle_path=pickle_path) + assert pickle_path.exists() + + +def test_save_load_pickle_after_initialising( + tmp_path, simulation, initial_population_size +): + simulation.make_initial_population(n=initial_population_size) + simulation.initialise(end_date=simulation.start_date) + pickle_path = tmp_path / "simulation.pkl" + simulation.save_to_pickle(pickle_path=pickle_path) + loaded_simulation = Simulation.load_from_pickle(pickle_path) + _check_simulations_are_equal(simulation, loaded_simulation) + + +def test_save_load_pickle_after_simulating(tmp_path, simulated_simulation): + pickle_path = tmp_path / "simulation.pkl" + simulated_simulation.save_to_pickle(pickle_path=pickle_path) + loaded_simulation = Simulation.load_from_pickle(pickle_path) + _check_simulations_are_equal(simulated_simulation, loaded_simulation) + + +def _check_parsed_logs_are_equal( + log_path_1: Path, + log_path_2: Path, + module_name_key_pairs_to_skip: set[tuple[str, str]], +) -> None: + logs_dict_1 = parse_log_file(log_path_1) + logs_dict_2 = parse_log_file(log_path_2) + assert logs_dict_1.keys() == logs_dict_2.keys() + for module_name in logs_dict_1.keys(): + module_logs_1 = logs_dict_1[module_name] + module_logs_2 = logs_dict_2[module_name] + assert module_logs_1.keys() == module_logs_2.keys() + for key in module_logs_1: + if key == "_metadata": + assert module_logs_1[key] == module_logs_2[key] + elif (module_name, key) not in module_name_key_pairs_to_skip: + assert module_logs_1[key].equals(module_logs_2[key]) + + +@pytest.mark.slow +def test_continuous_and_interrupted_simulations_equal( + tmp_path, + simulation, + simulated_simulation, + initial_population_size, + intermediate_date, + end_date, + logging_custom_levels, +): + simulation.make_initial_population(n=initial_population_size) + simulation.initialise(end_date=end_date) + simulation.run_simulation_to(to_date=intermediate_date) + pickle_path = tmp_path / "simulation.pkl" + simulation.save_to_pickle(pickle_path=pickle_path) + simulation.close_output_file() + log_config = { + "filename": "test_continued", + "directory": tmp_path, + "custom_levels": logging_custom_levels, + } + interrupted_simulation = Simulation.load_from_pickle(pickle_path, log_config) + interrupted_simulation.run_simulation_to(to_date=end_date) + interrupted_simulation.finalise() + _check_simulations_are_equal(simulated_simulation, interrupted_simulation) + merged_log_path = tmp_path / "concatenated.log" + merge_log_files( + simulation.log_filepath, interrupted_simulation.log_filepath, merged_log_path + ) + _check_parsed_logs_are_equal( + simulated_simulation.log_filepath, merged_log_path, {("tlo.simulation", "info")} + ) + + +def test_run_simulation_to_past_end_date_raises( + simulation, initial_population_size, end_date +): + simulation.make_initial_population(n=initial_population_size) + simulation.initialise(end_date=end_date) + with pytest.raises(ValueError, match="after simulation end date"): + simulation.run_simulation_to(to_date=end_date + DateOffset(days=1)) + + +def test_run_simulation_without_initialisation_raises( + simulation, initial_population_size, end_date +): + simulation.make_initial_population(n=initial_population_size) + with pytest.raises(SimulationNotInitialisedError): + simulation.run_simulation_to(to_date=end_date) + + +def test_initialise_simulation_twice_raises( + simulation, initial_population_size, end_date +): + simulation.make_initial_population(n=initial_population_size) + simulation.initialise(end_date=end_date) + with pytest.raises(SimulationPreviouslyInitialisedError): + simulation.initialise(end_date=end_date) diff --git a/tests/test_utils.py b/tests/test_utils.py index 02ae63b7ba..0e6b13d83b 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,6 +1,7 @@ """Unit tests for utility functions.""" import os import pickle +import shutil import string import types from pathlib import Path @@ -14,7 +15,7 @@ from tlo import Date, Simulation from tlo.analysis.utils import parse_log_file from tlo.methods import demography -from tlo.util import DEFAULT_MOTHER_ID +from tlo.util import DEFAULT_MOTHER_ID, convert_excel_files_to_csv, read_csv_files path_to_files = Path(os.path.dirname(__file__)) @@ -317,3 +318,183 @@ def check_hash_is_valid(dfh): # check hash differs for different dataframes if not dataframes[i].equals(dataframes[j]): assert df_hash != tlo.util.hash_dataframe(dataframes[j]) + + +def copy_files_to_temporal_directory_and_return_path(tmpdir): + """ copy resource files in tests/resources to a temporal directory and return its path + + :param tmpdir: path to a temporal directory + + """ + resource_filepath = path_to_files / 'resources' + tmpdir_resource_filepath = Path(tmpdir / 'resources') + shutil.copytree(resource_filepath, tmpdir_resource_filepath) + return tmpdir_resource_filepath + + +def test_pass_datatypes_to_read_csv_method(tmpdir): + """ test passing column datatypes to read csv method. Final column datatype should change to what has been passed """ + # copy and get resource files path in the temporal directory + path_to_tmpdir = Path(tmpdir) + sample_data = pd.DataFrame(data={'numbers1': [5,6,8,4,9,6], 'numbers2': [19,27,53,49,75,56]}, dtype=int) + sample_data.to_csv(tmpdir/'sample_data.csv', index=False) + # read from the sample data file + read_sample_data = read_csv_files(path_to_tmpdir, files='sample_data') + # confirm column datatype is what was assigned + assert read_sample_data.numbers1.dtype == 'int' and read_sample_data.numbers2.dtype == 'int' + # define new datatypes + datatype = {'numbers1': int, 'numbers2': float} + # pass the new datatypes to read csv method and confirm datatype has changed to what has been declared now + assign_dtype = read_csv_files(path_to_tmpdir, files='sample_data', dtype=datatype) + assert assign_dtype.numbers1.dtype == 'int' and assign_dtype.numbers2.dtype == 'float' + + +def test_read_csv_file_method_passing_none_to_files_argument(tmpdir): + """ test reading csv files with one file in the target resource file and setting to None the files argument + + Expectations + 1. should return a dictionary + 2. the dictionary key name should match file name + """ + # copy and get resource files path in the temporal directory + tmpdir_resource_filepath = copy_files_to_temporal_directory_and_return_path(tmpdir) + # choose an Excel file with one sheet in it and convert it to csv file + convert_excel_files_to_csv(tmpdir_resource_filepath, files=['ResourceFile_load-parameters.xlsx']) + # get the folder containing the newly converted csv file and check the expected behavior + this_csv_resource_folder = tmpdir_resource_filepath/"ResourceFile_load-parameters" + file_names = [csv_file_path.stem for csv_file_path in this_csv_resource_folder.rglob("*.csv")] + one_csv_file_in_folder_dict = read_csv_files(this_csv_resource_folder, files=None) + assert isinstance(one_csv_file_in_folder_dict, dict) + assert set(one_csv_file_in_folder_dict.keys()) == set(file_names) + + +def test_read_csv_method_with_default_value_for_files_argument(tmpdir): + """ read csv method when no file name(s) is supplied to the files argument + i) should return a dataframe of the first csv file in the folder. Similar to pd.read_excel returning + a dataframe of first sheet in the file. + + :param tmpdir: path to a temporal directory + + """ + tmpdir_resource_filepath = copy_files_to_temporal_directory_and_return_path(tmpdir) + file_names = [csv_file_path.stem for csv_file_path in tmpdir_resource_filepath.rglob("*.csv")] + df_no_files = read_csv_files(tmpdir_resource_filepath) + fist_file_in_folder_df = read_csv_files(tmpdir_resource_filepath, files=file_names[0]) + assert isinstance(df_no_files, pd.DataFrame) + pd.testing.assert_frame_equal(fist_file_in_folder_df, df_no_files) + + +def test_read_csv_method_with_one_file(tmpdir): + """ test read csv method when one file name is supplied to files argument. should return a dataframe + :param tmpdir: path to a temporal directory + + """ + tmpdir_resource_filepath = copy_files_to_temporal_directory_and_return_path(tmpdir) + df = read_csv_files(tmpdir_resource_filepath, files='df_at_healthcareseeking') + assert isinstance(df, pd.DataFrame) + + +def test_read_csv_method_with_multiple_files(tmpdir): + """ read csv method when multiple file names are supplied. + i) should return dictionary. + ii) dictionary keys should match supplied file names + iii) all dictionary values should be dataframes + + :param tmpdir: path to a temporal directory + + """ + tmpdir_resource_filepath = copy_files_to_temporal_directory_and_return_path(tmpdir) + file_names = ['df_at_healthcareseeking', 'df_at_init_of_lifestyle'] + df_dict = read_csv_files(tmpdir_resource_filepath, files=file_names) + assert isinstance(df_dict, dict) + assert set(df_dict.keys()) == set(file_names) + for _key, dataframe in df_dict.items(): + assert isinstance(dataframe, pd.DataFrame) + + +def test_read_csv_method_output_matches_previously_used_read_excel(tmpdir): + """ check read from csv method produces same output as the read Excel file + :param tmpdir: path to a temporal directory + + """ + tmpdir_resource_filepath = copy_files_to_temporal_directory_and_return_path(tmpdir) + excel_file_path = Path(tmpdir_resource_filepath + / 'ResourceFile_test_convert_to_csv/ResourceFile_test_convert_to_csv.xlsx') + xls = pd.ExcelFile(excel_file_path) + sheet_names = xls.sheet_names + # convert the above Excel file into csv equivalent. we will use the newly converted files to determine if + # loading parameters from Excel file will be equal to loading parameters from the converted csv files + convert_excel_files_to_csv(folder=Path(tmpdir_resource_filepath / 'ResourceFile_test_convert_to_csv'), + files=[excel_file_path.name]) + + # get excel sheet names + df_excel = pd.read_excel(xls, sheet_name=sheet_names) + + # read newly converted csv files using read_csv_files method + df_csv = read_csv_files(Path(str(excel_file_path).split('.')[0]), + files=sheet_names) + + # dictionary keys from both dataframe dictionaries should match + assert isinstance(df_excel, dict) and isinstance(df_csv, dict) + assert df_excel.keys() == df_csv.keys() + for key in df_excel: + assert df_excel[key].astype(str).equals(df_csv[key].astype(str)) + + +def test_convert_excel_files_method(tmpdir): + """ Test converting Excel files to csv equivalent is done as expected + + 1) Excel file name should become the name of the folder containing the newly converted csv files + 2) Excel file sheet names should become csv file names + 3) if files are given, the function should only convert to excel only those given files in a folder + 4) if no files are given, all Excel files in the parent folder and subsequent folders within the parent folder + should get converted to csv files + + """ + + def check_logic_of_converting_excel_files_to_csv_files(folder: Path, files: list) -> None: + """ check converting Excel files to csv files is done as expected + 1) check that a new directory to hold the newly created csv files has been created + 2) check that this new directory name matches the Excel file name it has been created from + 3) check csv files are created and that the csv names should match sheet names of an Excel file they + have been created from + """ + # check that the above function has created a folder named `ResourceFile_load-parameters`(name of the Excel + # file) and a csv file named `parameter_values` (Excel file sheet name). + excel_file_paths = [folder / file for file in files] + + for excel_file_path in excel_file_paths: + xl = pd.ExcelFile(excel_file_path) + path_to_new_directory = excel_file_path.with_suffix("") + # new folder should be created + assert path_to_new_directory.exists() and path_to_new_directory.is_dir() + # the new folder name should be the same as the Excel file name + assert excel_file_path.stem == path_to_new_directory.name + for sheet_name in xl.sheet_names: + path_to_new_file = Path(path_to_new_directory / f'{sheet_name}.csv') + # new csv file(s) should be created with name(s) resembling sheet name(s) in excel file + assert path_to_new_file.exists() and path_to_new_file.is_file() + assert sheet_name == path_to_new_file.name.split('.')[0] + + + # get resource file path + resourcefilepath = path_to_files / 'resources' + tmpdir_resourcefilepath = Path(tmpdir/'resources') + shutil.copytree(resourcefilepath, tmpdir_resourcefilepath) + + # check convert to csv logic when a list of file name(s) is given + excel_file = ['ResourceFile_load-parameters.xlsx'] + convert_excel_files_to_csv(tmpdir_resourcefilepath, files=excel_file) + # check new folder containing csv file is created. The folder name and csv file name should resemble the supplied + # Excel file name and sheet name respectively + check_logic_of_converting_excel_files_to_csv_files(tmpdir_resourcefilepath, files=excel_file) + + # check convert to csv logic when no list of file name(s) is given + excel_files = [file for file in tmpdir_resourcefilepath.rglob("*.xlsx")] + if excel_files is None: + excel_files = excel_file + + convert_excel_files_to_csv(tmpdir_resourcefilepath) + # check behaviours are as expected. New folders containing csv files should be created with names resembling the + # Excel file they were created from + check_logic_of_converting_excel_files_to_csv_files(tmpdir_resourcefilepath, excel_files) diff --git a/tox.ini b/tox.ini index e2417422e8..d25f446a25 100644 --- a/tox.ini +++ b/tox.ini @@ -41,6 +41,13 @@ deps = pytest pytest-cov +[testenv:py3-latest] +deps = + pytest + pytest-xdist +commands = + {posargs:pytest -n auto -vv tests} + [testenv:spell] setenv = SPELLCHECK=1 @@ -59,20 +66,23 @@ deps = ; require setuptools_scm for getting version info setuptools_scm commands = - sphinx-apidoc -e -f -o {toxinidir}/docs/reference {toxinidir}/src/tlo + sphinx-apidoc -e -o {toxinidir}/docs/reference {toxinidir}/src/tlo ; Generate API documentation for TLO methods python docs/tlo_methods_rst.py ; Generate data sources page python docs/tlo_data_sources.py ; Generate contributors page python docs/tlo_contributors.py + ; Generate publications page + python docs/tlo_publications.py ; Generate resources files page python docs/tlo_resources.py ; Generate HSI events listing python src/tlo/analysis/hsi_events.py --output-file docs/_hsi_events.rst --output-format rst-list python src/tlo/analysis/hsi_events.py --output-file docs/hsi_events.csv --output-format csv - sphinx-build {posargs:-E} -b html docs dist/docs - -sphinx-build -b linkcheck docs dist/docs + ; Generate parameters listing + python docs/tlo_parameters.py {toxinidir}{/}resources {toxinidir}{/}docs{/}parameters.rst + sphinx-build -b html docs dist/docs [testenv:check] deps = @@ -130,9 +140,26 @@ commands = python {toxinidir}/src/scripts/automation/update_citation.py skip_install = true deps = pyyaml +[testenv:update-publications] +commands = python {toxinidir}/docs/tlo_publications.py --update-from-zotero +skip_install = true +deps = + pybtex + requests + [testenv:requirements] commands = pip-compile --output-file {toxinidir}/requirements/base.txt pip-compile --extra dev --output-file {toxinidir}/requirements/dev.txt skip_install = true deps = pip-tools + +[testenv:markslow] +deps = + -r{toxinidir}/requirements/base.txt + pytest + pytest-json-report + redbaron +commands = + pytest tests --json-report --json-report-file {toxinidir}/test-report.json --json-report-omit collectors log traceback streams warnings + python {toxinidir}/src/scripts/automation/mark_slow_tests.py --json-test-report-path test-report.json