From a83db42bc3bf7c99e4aa6ee5ff290578dfaa6ecc Mon Sep 17 00:00:00 2001
From: Matt Graham <matthew.m.graham@gmail.com>
Date: Mon, 23 Sep 2024 15:07:28 +0100
Subject: [PATCH 01/14] Enable all pycodestyle rules in Ruff

---
 pyproject.toml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index f18a736844..3ce298c1ae 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -123,6 +123,10 @@ markers = ["group2", "slow"]
 target-version = "py38"
 line-length = 120
 
+[tool.ruff.lint]
+select = ["E", "F"]
+per-file-ignores = {"src/scripts/**" = ["E501"]}
+
 [tool.setuptools.packages.find]
 where = ["src"]
 

From 46676cd377c2affb55493d8f0bcbb9b4cb8ce7ce Mon Sep 17 00:00:00 2001
From: Matt Graham <matthew.m.graham@gmail.com>
Date: Mon, 23 Sep 2024 15:08:12 +0100
Subject: [PATCH 02/14] Reformat lines to fix max line length violations

---
 src/tlo/methods/alri.py           |  3 +-
 src/tlo/methods/contraception.py  |  4 +-
 src/tlo/methods/equipment.py      | 88 ++++++++++++++++---------------
 src/tlo/methods/hiv.py            | 27 +++++++---
 src/tlo/methods/hsi_event.py      |  8 +--
 src/tlo/methods/rti.py            |  8 ++-
 src/tlo/methods/symptommanager.py |  8 +--
 src/tlo/simulation.py             | 11 ++--
 tests/test_equipment.py           |  6 ++-
 tests/test_healthsystem.py        | 12 +++--
 tests/test_malaria.py             |  7 ++-
 11 files changed, 111 insertions(+), 71 deletions(-)

diff --git a/src/tlo/methods/alri.py b/src/tlo/methods/alri.py
index 70ac14fe2d..f004e19819 100644
--- a/src/tlo/methods/alri.py
+++ b/src/tlo/methods/alri.py
@@ -3040,7 +3040,8 @@ def apply(self, person_id):
 
         assert 'fast_breathing_pneumonia' == \
                self.module.get_imci_classification_based_on_symptoms(
-                   child_is_younger_than_2_months=False, symptoms=self.sim.modules['SymptomManager'].has_what(person_id=person_id)
+                   child_is_younger_than_2_months=False, 
+                   symptoms=self.sim.modules['SymptomManager'].has_what(person_id=person_id)
                )
 
 
diff --git a/src/tlo/methods/contraception.py b/src/tlo/methods/contraception.py
index ab6c633f4c..4f223af58d 100644
--- a/src/tlo/methods/contraception.py
+++ b/src/tlo/methods/contraception.py
@@ -164,7 +164,9 @@ def read_parameters(self, data_folder):
         """Import the relevant sheets from the ResourceFile (excel workbook) and declare values for other parameters
         (CSV ResourceFile).
         """
-        workbook = pd.read_excel(Path(self.resourcefilepath) / 'contraception' / 'ResourceFile_Contraception.xlsx', sheet_name=None)
+        workbook = pd.read_excel(
+            Path(self.resourcefilepath) / 'contraception' / 'ResourceFile_Contraception.xlsx', sheet_name=None
+        )
 
         # Import selected sheets from the workbook as the parameters
         sheet_names = [
diff --git a/src/tlo/methods/equipment.py b/src/tlo/methods/equipment.py
index bf0d6fc0ae..7405a2dfb6 100644
--- a/src/tlo/methods/equipment.py
+++ b/src/tlo/methods/equipment.py
@@ -12,43 +12,44 @@
 
 
 class Equipment:
-    """This is the equipment class. It maintains a current record of the availability of equipment in the
-     health system. It is expected that this is instantiated by the :py:class:`~.HealthSystem` module.
-
-     The basic paradigm is that an :py:class:`~.HSI_Event` can declare equipment that is required for delivering the healthcare
-     service that the ``HSI_Event`` represents. The ``HSI_Event`` uses :py:meth:`HSI_event.add_equipment` to make these declarations,
-     with reference to the items of equipment that are defined in ``ResourceFile_EquipmentCatalogue.csv``. (These
-     declaration can be in the form of the descriptor or the equipment item code). These declarations can be used when
-     the ``HSI_Event`` is created but before it is run (in ``__init__``), or during execution of the ``HSI_Event`` (in :py:meth:`.HSI_Event.apply`).
-
-     As the ``HSI_Event`` can declare equipment that is required before it is run, the HealthSystem *can* use this to
-     prevent an ``HSI_Event`` running if the equipment declared is not available. Note that for equipment that is declared
-     whilst the ``HSI_Event`` is running, there are no checks on availability, and the ``HSI_Event`` is allowed to continue
-     running even if equipment is declared is not available. For this reason, the ``HSI_Event`` should declare equipment
-     that is *essential* for the healthcare service in its ``__init__`` method. If the logic inside the ``apply`` method
-     of the ``HSI_Event`` depends on the availability of equipment, then it can find the probability with which
-     item(s) will be available using :py:meth:`.HSI_Event.probability_equipment_available`.
-
-     The data on the availability of equipment data refers to the proportion of facilities in a district of a
-     particular level (i.e., the ``Facility_ID``) that do have that piece of equipment. In the model, we do not know
-     which actual facility the person is attending (there are many actual facilities grouped together into one
-     ``Facility_ID`` in the model). Therefore, the determination of whether equipment is available is made
-     probabilistically for the ``HSI_Event`` (i.e., the probability that the actual facility being attended by the
-     person has the equipment is represented by the proportion of such facilities that do have that equipment). It is
-     assumed that the probabilities of each item being available are independent of one other (so that the
-     probability of all items being available is the product of the probabilities for each item). This probabilistic
-     determination of availability is only done _once_ for the ``HSI_Event``: i.e., if the equipment is determined to
-     not be available for the instance of the ``HSI_Event``, then it will remain not available if the same event is
-     re-scheduled / re-entered into the ``HealthSystem`` queue. This represents that if the facility that a particular
-     person attends for the ``HSI_Event`` does not have the equipment available, then it will still not be available on
-     another day.
-
-     Where data on availability is not provided for an item, the probability of availability is inferred from the
-     average availability of other items in that facility ID. Likewise, the probability of an item being available
-     at a facility ID is inferred from the average availability of that item at other facilities. If an item code is
-     referred in ``add_equipment`` that is not recognised (not included in :py:attr:`catalogue`), a :py:exc:`UserWarning` is issued, but
-     that item is then silently ignored. If a facility ID is ever referred that is not recognised (not included in
-     :py:attr:`master_facilities_list`), an :py:exc:`AssertionError` is raised.
+    """
+    This is the equipment class. It maintains a current record of the availability of equipment in the health system. It
+    is expected that this is instantiated by the :py:class:`~.HealthSystem` module.
+
+    The basic paradigm is that an :py:class:`~.HSI_Event` can declare equipment that is required for delivering the
+    healthcare service that the ``HSI_Event`` represents. The ``HSI_Event`` uses :py:meth:`HSI_event.add_equipment` to
+    make these declarations, with reference to the items of equipment that are defined in
+    ``ResourceFile_EquipmentCatalogue.csv``. (These declaration can be in the form of the descriptor or the equipment
+    item code). These declarations can be used when the ``HSI_Event`` is created but before it is run (in ``__init__``),
+    or during execution of the ``HSI_Event`` (in :py:meth:`.HSI_Event.apply`).
+
+    As the ``HSI_Event`` can declare equipment that is required before it is run, the HealthSystem *can* use this to
+    prevent an ``HSI_Event`` running if the equipment declared is not available. Note that for equipment that is
+    declared whilst the ``HSI_Event`` is running, there are no checks on availability, and the ``HSI_Event`` is allowed
+    to continue running even if equipment is declared is not available. For this reason, the ``HSI_Event`` should
+    declare equipment that is *essential* for the healthcare service in its ``__init__`` method. If the logic inside the
+    ``apply`` method of the ``HSI_Event`` depends on the availability of equipment, then it can find the probability
+    with which item(s) will be available using :py:meth:`.HSI_Event.probability_equipment_available`.
+
+    The data on the availability of equipment data refers to the proportion of facilities in a district of a particular
+    level (i.e., the ``Facility_ID``) that do have that piece of equipment. In the model, we do not know which actual
+    facility the person is attending (there are many actual facilities grouped together into one ``Facility_ID`` in the
+    model). Therefore, the determination of whether equipment is available is made probabilistically for the
+    ``HSI_Event`` (i.e., the probability that the actual facility being attended by the person has the equipment is
+    represented by the proportion of such facilities that do have that equipment). It is assumed that the probabilities
+    of each item being available are independent of one other (so that the probability of all items being available is
+    the product of the probabilities for each item). This probabilistic determination of availability is only done
+    _once_ for the ``HSI_Event``: i.e., if the equipment is determined to not be available for the instance of the
+    ``HSI_Event``, then it will remain not available if the same event is re-scheduled / re-entered into the
+    ``HealthSystem`` queue. This represents that if the facility that a particular person attends for the ``HSI_Event``
+    does not have the equipment available, then it will still not be available on another day.
+
+    Where data on availability is not provided for an item, the probability of availability is inferred from the average
+    availability of other items in that facility ID. Likewise, the probability of an item being available at a facility
+    ID is inferred from the average availability of that item at other facilities. If an item code is referred in
+    ``add_equipment`` that is not recognised (not included in :py:attr:`catalogue`), a :py:exc:`UserWarning` is issued,
+    but that item is then silently ignored. If a facility ID is ever referred that is not recognised (not included in
+    :py:attr:`master_facilities_list`), an :py:exc:`AssertionError` is raised.
 
     :param catalogue: The database of all recognised item_codes.
     :param data_availability: Specifies the probability with which each equipment (identified by an ``item_code``) is
@@ -56,9 +57,10 @@ class Equipment:
         and every facility ID in the :py:attr`master_facilities_list`.
     :param: rng: The random number generator object to use for random numbers.
     :param availability: Determines the mode availability of the equipment. If 'default' then use the availability
-        specified in :py:attr:`data_availability`; if 'none', then let no equipment be ever be available; if 'all', then all
-        equipment is always available.
-    :param master_facilities_list: The :py:class:`~pandas.DataFrame` with the line-list of all the facilities in the health system.
+        specified in :py:attr:`data_availability`; if 'none', then let no equipment be ever be available; if 'all', then
+        all equipment is always available.
+    :param master_facilities_list: The :py:class:`~pandas.DataFrame` with the line-list of all the facilities in the
+        health system.
     """
 
     def __init__(
@@ -90,7 +92,6 @@ def __init__(
         # {facility_id: {item_code: count}}.
         self._record_of_equipment_used_by_facility_id = defaultdict(Counter)
 
-
     def on_simulation_end(self):
         """Things to do when the simulation ends:
          * Log (to the summary logger) the equipment that has been used.
@@ -117,7 +118,10 @@ def _get_equipment_availability_probabilities(self) -> pd.Series:
         calculation if the equipment availability change event occurs during the simulation.
         """
         dat = self.data_availability.set_index(
-            [self.data_availability["Facility_ID"].astype(np.int64), self.data_availability["Item_Code"].astype(np.int64)]
+            [
+                self.data_availability["Facility_ID"].astype(np.int64),
+                self.data_availability["Item_Code"].astype(np.int64),
+            ]
         )["Pr_Available"]
 
         # Confirm that there is an estimate for every item_code at every facility_id
diff --git a/src/tlo/methods/hiv.py b/src/tlo/methods/hiv.py
index d6455cc861..a8ddbb4446 100644
--- a/src/tlo/methods/hiv.py
+++ b/src/tlo/methods/hiv.py
@@ -1121,10 +1121,14 @@ def update_parameters_for_program_scaleup(self):
         # prep poll for AGYW - target to the highest risk
         # increase retention to 75% for FSW and AGYW
         p["prob_prep_for_agyw"] = scaled_params["prob_prep_for_agyw"]
-        p["probability_of_being_retained_on_prep_every_3_months"] = scaled_params["probability_of_being_retained_on_prep_every_3_months"]
+        p["probability_of_being_retained_on_prep_every_3_months"] = scaled_params[
+            "probability_of_being_retained_on_prep_every_3_months"
+        ]
 
         # perfect retention on ART
-        p["probability_of_being_retained_on_art_every_3_months"] = scaled_params["probability_of_being_retained_on_art_every_3_months"]
+        p["probability_of_being_retained_on_art_every_3_months"] = scaled_params[
+            "probability_of_being_retained_on_art_every_3_months"
+        ]
 
         # increase probability of VMMC after hiv test
         p["prob_circ_after_hiv_test"] = scaled_params["prob_circ_after_hiv_test"]
@@ -2899,11 +2903,20 @@ def get_drugs(self, age_of_person):
         if age_of_person < p["ART_age_cutoff_young_child"]:
             # Formulation for young children
             drugs_available = self.get_consumables(
-                item_codes={self.module.item_codes_for_consumables_required[
-                                'First line ART regimen: young child']: dispensation_days * 2},
-                optional_item_codes={self.module.item_codes_for_consumables_required[
-                                         'First line ART regimen: young child: cotrimoxazole']: dispensation_days * 240},
-                return_individual_results=True)
+                item_codes={
+                    self.module.item_codes_for_consumables_required[
+                        "First line ART regimen: young child"
+                    ]: dispensation_days
+                    * 2
+                },
+                optional_item_codes={
+                    self.module.item_codes_for_consumables_required[
+                        "First line ART regimen: young child: cotrimoxazole"
+                    ]: dispensation_days
+                    * 240
+                },
+                return_individual_results=True,
+            )
 
         elif age_of_person <= p["ART_age_cutoff_older_child"]:
             # Formulation for older children
diff --git a/src/tlo/methods/hsi_event.py b/src/tlo/methods/hsi_event.py
index b76a865d2d..506681f2a3 100644
--- a/src/tlo/methods/hsi_event.py
+++ b/src/tlo/methods/hsi_event.py
@@ -289,10 +289,10 @@ def is_all_declared_equipment_available(self) -> bool:
         """Returns ``True`` if all the (currently) declared items of equipment are available. This is called by the
         ``HealthSystem`` module before the HSI is run and so is looking only at those items that are declared when this
         instance was created. The evaluation of whether equipment is available is only done *once* for this instance of
-        the event: i.e., if the equipment is not available for the instance of this ``HSI_Event``, then it will remain not
-        available if the same event is re-scheduled/re-entered into the HealthSystem queue. This is representing that
-        if the facility that a particular person attends for the ``HSI_Event`` does not have the equipment available, then
-        it will also not be available on another day."""
+        the event: i.e., if the equipment is not available for the instance of this ``HSI_Event``, then it will remain
+        not available if the same event is re-scheduled/re-entered into the HealthSystem queue. This is representing
+        that if the facility that a particular person attends for the ``HSI_Event`` does not have the equipment
+        available, then it will also not be available on another day."""
 
         if self._is_all_declared_equipment_available is None:
             # Availability has not already been evaluated: determine availability
diff --git a/src/tlo/methods/rti.py b/src/tlo/methods/rti.py
index 18c1987483..9ba7c98861 100644
--- a/src/tlo/methods/rti.py
+++ b/src/tlo/methods/rti.py
@@ -3955,7 +3955,9 @@ def __init__(self, module, person_id):
         self.EXPECTED_APPT_FOOTPRINT = self.make_appt_footprint({'AccidentsandEmerg': 1})
         self.ACCEPTED_FACILITY_LEVEL = '1b'
         self._number_of_times_this_event_has_run = 0
-        self._maximum_number_times_event_should_run = self.module.parameters['maximum_number_of_times_HSI_events_should_run']
+        self._maximum_number_times_event_should_run = self.module.parameters[
+            "maximum_number_of_times_HSI_events_should_run"
+        ]
 
     def apply(self, person_id, squeeze_factor):
         df = self.sim.population.props
@@ -4840,7 +4842,9 @@ def __init__(self, module, person_id):
         self.ACCEPTED_FACILITY_LEVEL = '1b'
         self.BEDDAYS_FOOTPRINT = self.make_beddays_footprint({})
         self._number_of_times_this_event_has_run = 0
-        self._maximum_number_times_event_should_run = self.module.parameters['maximum_number_of_times_HSI_events_should_run']
+        self._maximum_number_times_event_should_run = self.module.parameters[
+            "maximum_number_of_times_HSI_events_should_run"
+        ]
 
         p = self.module.parameters
         self.prob_perm_disability_with_treatment_severe_TBI = p['prob_perm_disability_with_treatment_severe_TBI']
diff --git a/src/tlo/methods/symptommanager.py b/src/tlo/methods/symptommanager.py
index 67389e283e..bbbb251d85 100644
--- a/src/tlo/methods/symptommanager.py
+++ b/src/tlo/methods/symptommanager.py
@@ -475,10 +475,10 @@ def has_what(
         This is a helper function that will give a list of strings for the symptoms that a _single_ person
         is currently experiencing.
 
-        If working in a `tlo.population.IndividualProperties` context, one can pass the context object
-        instead of supplying the person's DataFrame index.
-        Note that at least one of these inputs must be passed as a keyword argument however.
-        In the event that both arguments are passed, the individual_details argument takes precedence over the person_id.
+        If working in a `tlo.population.IndividualProperties` context, one can pass the context object instead of
+        supplying the person's DataFrame index. Note that at least one of these inputs must be passed as a keyword
+        argument however. In the event that both arguments are passed, the individual_details argument takes precedence
+        over the person_id.
 
         Optionally can specify disease_module_name to limit to the symptoms caused by that disease module.
 
diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py
index 4b1f34abed..aeb457ad44 100644
--- a/src/tlo/simulation.py
+++ b/src/tlo/simulation.py
@@ -75,7 +75,6 @@ def __init__(self, *, start_date: Date, seed: int = None, log_config: dict = Non
             log_config = {}
         self._custom_log_levels = None
         self._log_filepath = self._configure_logging(**log_config)
-        
 
         # random number generator
         seed_from = 'auto' if seed is None else 'user'
@@ -132,7 +131,13 @@ def log_filepath(self):
         """The path to the log file, if one has been set."""
         return self._log_filepath
 
-    def register(self, *modules, sort_modules=True, check_all_dependencies=True, auto_register_dependencies: bool = False):
+    def register(
+        self,
+        *modules,
+        sort_modules=True,
+        check_all_dependencies=True,
+        auto_register_dependencies: bool = False,
+    ):
         """Register one or more disease modules with the simulation.
 
         :param modules: the disease module(s) to use as part of this simulation.
@@ -151,7 +156,7 @@ def register(self, *modules, sort_modules=True, check_all_dependencies=True, aut
             modules to be registered. A ``ModuleDependencyError`` exception will
             be raised if there are missing dependencies.
         :param auto_register_dependencies: Whether to register missing module dependencies or not. If this argument is
-         set to True, all module dependencies will be automatically registered.
+            set to True, all module dependencies will be automatically registered.
         """
         if auto_register_dependencies:
             modules = [
diff --git a/tests/test_equipment.py b/tests/test_equipment.py
index 1167023aa8..887feeb3f1 100644
--- a/tests/test_equipment.py
+++ b/tests/test_equipment.py
@@ -241,7 +241,6 @@ def initialise_simulation(self, sim):
     return parse_log_file(sim.log_filepath)["tlo.methods.healthsystem.summary"]
 
 
-
 def test_equipment_use_is_logged(seed, tmpdir):
     """Check that an HSI that after an HSI is run, the logs reflect the use of the equipment (and correctly record the
      name of the HSI and the facility_level at which ran).
@@ -477,4 +476,7 @@ def initialise_simulation(self, sim):
     df['EquipmentEverUsed'] = df['EquipmentEverUsed'].apply(eval).apply(list)
 
     # Check that equipment used at each level matches expectations
-    assert item_code_needed_at_each_level == df.groupby('Facility_Level')['EquipmentEverUsed'].sum().apply(set).to_dict()
+    assert (
+        item_code_needed_at_each_level
+        == df.groupby("Facility_Level")["EquipmentEverUsed"].sum().apply(set).to_dict()
+    )
diff --git a/tests/test_healthsystem.py b/tests/test_healthsystem.py
index ca26316758..a3afe4ef2a 100644
--- a/tests/test_healthsystem.py
+++ b/tests/test_healthsystem.py
@@ -2565,8 +2565,12 @@ def get_capabilities(yearly_scaling: bool, scaling_by_level: bool, rescaling: bo
     assert caps_scaling_by_both > caps_only_scaling_by_year
 
     # - When there is also rescaling as we go from Mode 2 into Mode 1
-    caps_only_scaling_by_level_with_rescaling = get_capabilities(yearly_scaling=False, scaling_by_level=True, rescaling=True)
-    caps_only_scaling_by_year_with_rescaling = get_capabilities(yearly_scaling=True, scaling_by_level=False, rescaling=True)
+    caps_only_scaling_by_level_with_rescaling = get_capabilities(
+        yearly_scaling=False, scaling_by_level=True, rescaling=True
+    )
+    caps_only_scaling_by_year_with_rescaling = get_capabilities(
+        yearly_scaling=True, scaling_by_level=False, rescaling=True
+    )
     caps_scaling_by_both_with_rescaling = get_capabilities(yearly_scaling=True, scaling_by_level=True, rescaling=True)
     assert caps_scaling_by_both_with_rescaling > caps_only_scaling_by_level_with_rescaling
     assert caps_scaling_by_both_with_rescaling > caps_only_scaling_by_year_with_rescaling
@@ -2580,8 +2584,8 @@ def test_logging_of_only_hsi_events_with_non_blank_footprints(tmpdir):
 
     def run_simulation_and_return_healthsystem_summary_log(tmpdir: Path, blank_footprint: bool) -> dict:
         """Return the `healthsystem.summary` logger for a simulation. In that simulation, there is HSI_Event run on the
-        first day of the simulation and its `EXPECTED_APPT_FOOTPRINT` may or may not be blank. The simulation is run for one
-        year in order that the summary logger is active (it runs annually)."""
+        first day of the simulation and its `EXPECTED_APPT_FOOTPRINT` may or may not be blank. The simulation is run for
+        one year in order that the summary logger is active (it runs annually)."""
 
         class HSI_Dummy(HSI_Event, IndividualScopeEventMixin):
             def __init__(self, module, person_id, _is_footprint_blank):
diff --git a/tests/test_malaria.py b/tests/test_malaria.py
index 2b16da0000..9e8191fc2c 100644
--- a/tests/test_malaria.py
+++ b/tests/test_malaria.py
@@ -517,7 +517,12 @@ def test_individual_testing_and_treatment(sim):
     pollevent.run()
 
     assert not pd.isnull(df.at[person_id, "ma_date_symptoms"])
-    assert set(sim.modules['SymptomManager'].has_what(person_id=person_id)) == {"fever", "headache", "vomiting", "stomachache"}
+    assert set(sim.modules["SymptomManager"].has_what(person_id=person_id)) == {
+        "fever",
+        "headache",
+        "vomiting",
+        "stomachache",
+    }
 
     # check rdt is scheduled
     date_event, event = [

From 14f71b90c9f6438319015e2f3571f537965299d8 Mon Sep 17 00:00:00 2001
From: Matt Graham <matthew.m.graham@gmail.com>
Date: Mon, 23 Sep 2024 15:09:44 +0100
Subject: [PATCH 03/14] Enable Ruff isort rules

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 3ce298c1ae..cee3f79e95 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -124,7 +124,7 @@ target-version = "py38"
 line-length = 120
 
 [tool.ruff.lint]
-select = ["E", "F"]
+select = ["E", "F", "I"]
 per-file-ignores = {"src/scripts/**" = ["E501"]}
 
 [tool.setuptools.packages.find]

From 64e93cd4b4f2c8628c2ac425679882fc7d3089b8 Mon Sep 17 00:00:00 2001
From: Matt Graham <matthew.m.graham@gmail.com>
Date: Mon, 23 Sep 2024 15:11:57 +0100
Subject: [PATCH 04/14] Fix Ruff isort rule violation

---
 src/scripts/hiv/projections_jan2023/analysis_full_model.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/scripts/hiv/projections_jan2023/analysis_full_model.py b/src/scripts/hiv/projections_jan2023/analysis_full_model.py
index 2386ea56c6..067b29b1c8 100644
--- a/src/scripts/hiv/projections_jan2023/analysis_full_model.py
+++ b/src/scripts/hiv/projections_jan2023/analysis_full_model.py
@@ -5,7 +5,6 @@
 
 import datetime
 import pickle
-# import random
 from pathlib import Path
 
 from tlo import Date, Simulation, logging

From 2aacfcd963be476ce427762a0476dc3fc47e7924 Mon Sep 17 00:00:00 2001
From: Matt Graham <matthew.m.graham@gmail.com>
Date: Mon, 23 Sep 2024 15:12:11 +0100
Subject: [PATCH 05/14] Remove isort from checks as redundant

---
 tox.ini | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tox.ini b/tox.ini
index 2c8355b7a4..52cae5ed0c 100644
--- a/tox.ini
+++ b/tox.ini
@@ -86,7 +86,6 @@ deps =
     ; require pytest so pylint can lint tests
     pytest
     pygments
-    isort
     twine
     pyyaml
     cffconvert
@@ -96,7 +95,6 @@ commands =
     ; ignore that _version.py file generated by setuptools_scm is not tracked by VCS
     check-manifest --ignore **/_version.py {toxinidir}
     ruff check src tests
-    isort --check-only --diff src tests
     pylint src tests
     python {toxinidir}/src/scripts/automation/update_citation.py --check
     cffconvert --validate

From d7725617087d20002f6fe2bb276899654aedc553 Mon Sep 17 00:00:00 2001
From: Matt Graham <matthew.m.graham@gmail.com>
Date: Mon, 23 Sep 2024 15:33:07 +0100
Subject: [PATCH 06/14] Enable pycodestyle warning rules

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index cee3f79e95..ccb78892b2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -124,7 +124,7 @@ target-version = "py38"
 line-length = 120
 
 [tool.ruff.lint]
-select = ["E", "F", "I"]
+select = ["E", "F", "I", "W"]
 per-file-ignores = {"src/scripts/**" = ["E501"]}
 
 [tool.setuptools.packages.find]

From 23b30abd5d12c6746f34fdd20335d747b6dd180e Mon Sep 17 00:00:00 2001
From: Matt Graham <matthew.m.graham@gmail.com>
Date: Tue, 24 Sep 2024 13:14:04 +0100
Subject: [PATCH 07/14] Remove instances of trailing whitespace + whitespace on
 new lines

---
 src/scripts/profiling/run_profiling.py   | 2 +-
 src/scripts/profiling/scale_run.py       | 6 +++---
 src/scripts/task_runner/generate_html.py | 4 ++--
 src/tlo/analysis/utils.py                | 2 +-
 src/tlo/core.py                          | 6 +++---
 src/tlo/logging/helpers.py               | 4 ++--
 src/tlo/methods/alri.py                  | 2 +-
 src/tlo/methods/consumables.py           | 2 +-
 src/tlo/methods/demography.py            | 2 +-
 src/tlo/population.py                    | 2 +-
 tests/test_healthburden.py               | 4 ++--
 tests/test_symptommanager.py             | 2 +-
 12 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/src/scripts/profiling/run_profiling.py b/src/scripts/profiling/run_profiling.py
index 882894d6af..20386fe13b 100644
--- a/src/scripts/profiling/run_profiling.py
+++ b/src/scripts/profiling/run_profiling.py
@@ -295,7 +295,7 @@ def run_profiling(
         print(f"Writing {output_ipysession_file}", end="...", flush=True)
         scale_run_session.save(output_ipysession_file)
         print("done")
-        
+
     if write_flat_html:
         output_html_file = output_dir / f"{output_name}.flat.html"
         console_renderer = ConsoleRenderer(
diff --git a/src/scripts/profiling/scale_run.py b/src/scripts/profiling/scale_run.py
index 735d1e7ba3..c4dfa57e46 100644
--- a/src/scripts/profiling/scale_run.py
+++ b/src/scripts/profiling/scale_run.py
@@ -26,10 +26,10 @@ def save_arguments_to_json(arguments_dict: dict, output_path: Path):
     with open(output_path, "w") as f:
         json.dump(
             {
-                k: str(v) if isinstance(v, Path) else v 
+                k: str(v) if isinstance(v, Path) else v
                 for k, v in arguments_dict.items()
-            }, 
-            f, 
+            },
+            f,
             indent=4
         )
 
diff --git a/src/scripts/task_runner/generate_html.py b/src/scripts/task_runner/generate_html.py
index b9116c57f2..1a9b4a385b 100644
--- a/src/scripts/task_runner/generate_html.py
+++ b/src/scripts/task_runner/generate_html.py
@@ -101,8 +101,8 @@ def get_html_for_commit(commit_dir: Path) -> str:
 <body>
 <h1>$title</h1>
 <p style="font-size: small;">
-    This page was generated on $generated_time. The 
-    <a href="https://github.com/UCL/TLOmodel/actions/workflows/calibration.yaml">calibration workflow</a> runs every 
+    This page was generated on $generated_time. The
+    <a href="https://github.com/UCL/TLOmodel/actions/workflows/calibration.yaml">calibration workflow</a> runs every
     night on the latest new commit on the master branch. <a href="#" id="toggleIncomplete">toggle incomplete</a>
 </p>
 $body
diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py
index 2808ab57a5..4a71c8500b 100644
--- a/src/tlo/analysis/utils.py
+++ b/src/tlo/analysis/utils.py
@@ -1131,7 +1131,7 @@ def get_parameters_for_status_quo() -> Dict:
             "equip_availability": "all",  # <--- NB. Existing calibration is assuming all equipment is available
         },
     }
-    
+
 def get_parameters_for_standard_mode2_runs() -> Dict:
     """
     Returns a dictionary of parameters and their updated values to indicate
diff --git a/src/tlo/core.py b/src/tlo/core.py
index 9fbbf08893..c6364458dc 100644
--- a/src/tlo/core.py
+++ b/src/tlo/core.py
@@ -170,7 +170,7 @@ def _default_value(self) -> Any:
         """
         Default value for this property, which will be used to fill the respective columns
         of the population dataframe, for example.
-        
+
         If not explicitly set, it will fall back on the ``PANDAS_TYPE_DEFAULT_TYPE_MAP``.
         If a value is provided, it must:
 
@@ -386,8 +386,8 @@ def initialise_population(self, population: Population) -> None:
 
         Modules that wish to implement this behaviour do not need to implement this method,
         it will be inherited automatically. Modules that wish to perform additional steps
-        during the initialise_population stage should reimplement this method and call 
-        
+        during the initialise_population stage should reimplement this method and call
+
         ```python
         super().initialise_population(population=population)
         ```
diff --git a/src/tlo/logging/helpers.py b/src/tlo/logging/helpers.py
index 99fc51c473..ec6239239c 100644
--- a/src/tlo/logging/helpers.py
+++ b/src/tlo/logging/helpers.py
@@ -59,11 +59,11 @@ def get_dataframe_row_as_dict_for_logging(
     columns: Optional[Iterable[str]] = None,
 ) -> dict:
     """Get row of a pandas dataframe in a format suitable for logging.
-    
+
     Retrieves entries for all or a subset of columns for a particular row in a dataframe
     and returns a dict keyed by column name, with values NumPy or pandas extension types
     which should be the same for all rows in dataframe.
-    
+
     :param dataframe: Population properties dataframe to get properties from.
     :param row_label: Unique index label identifying row in dataframe.
     :param columns: Set of column names to extract - if ``None``, the default, all
diff --git a/src/tlo/methods/alri.py b/src/tlo/methods/alri.py
index f004e19819..b9f3048071 100644
--- a/src/tlo/methods/alri.py
+++ b/src/tlo/methods/alri.py
@@ -3040,7 +3040,7 @@ def apply(self, person_id):
 
         assert 'fast_breathing_pneumonia' == \
                self.module.get_imci_classification_based_on_symptoms(
-                   child_is_younger_than_2_months=False, 
+                   child_is_younger_than_2_months=False,
                    symptoms=self.sim.modules['SymptomManager'].has_what(person_id=person_id)
                )
 
diff --git a/src/tlo/methods/consumables.py b/src/tlo/methods/consumables.py
index 674035ad98..01264cb1f0 100644
--- a/src/tlo/methods/consumables.py
+++ b/src/tlo/methods/consumables.py
@@ -266,7 +266,7 @@ def _lookup_availability_of_consumables(self,
 
     def on_simulation_end(self):
         """Do tasks at the end of the simulation.
-         
+
         Raise warnings and enter to log about item_codes not recognised.
         """
         if self._not_recognised_item_codes:
diff --git a/src/tlo/methods/demography.py b/src/tlo/methods/demography.py
index e58f3895f4..3b4d20265e 100644
--- a/src/tlo/methods/demography.py
+++ b/src/tlo/methods/demography.py
@@ -134,7 +134,7 @@ def __init__(self, name=None, resourcefilepath=None, equal_allocation_by_distric
         ),
 
         'district_num_of_residence': Property(
-            Types.CATEGORICAL, 
+            Types.CATEGORICAL,
             'The district number in which the person is resident',
             categories=['SET_AT_RUNTIME']
         ),
diff --git a/src/tlo/population.py b/src/tlo/population.py
index 37f5fccfdf..26af2f3185 100644
--- a/src/tlo/population.py
+++ b/src/tlo/population.py
@@ -212,7 +212,7 @@ def individual_properties(
         In this case on exit from the ``with`` block in which the context is entered,
         any updates to the individual properties will be written back to the population
         dataframe.
-        
+
         Once the ``with`` block in which the context is entered has been exited the view
         returned will raise an error on any subsequent attempts at reading or writing
         properties.
diff --git a/tests/test_healthburden.py b/tests/test_healthburden.py
index 585549e9db..a01a00f585 100644
--- a/tests/test_healthburden.py
+++ b/tests/test_healthburden.py
@@ -131,7 +131,7 @@ def test_arithmetic_of_disability_aggregation_calcs(seed):
     rfp = Path(os.path.dirname(__file__)) / '../resources'
 
     class ModuleWithPersonsAffected(Module):
-        
+
         def __init__(self, persons_affected, name=None):
             super().__init__(name=name)
             self.persons_affected = persons_affected
@@ -249,7 +249,7 @@ def report_daly_values(self):
         DiseaseThatCausesB(persons_affected=1),
         DiseaseThatCausesAandB(persons_affected=2),
         # intentionally two instances of DiseaseThatCausesC
-        DiseaseThatCausesC(persons_affected=3, name='DiseaseThatCausesC1'),  
+        DiseaseThatCausesC(persons_affected=3, name='DiseaseThatCausesC1'),
         DiseaseThatCausesC(persons_affected=3, name='DiseaseThatCausesC2'),
         DiseaseThatCausesNothing(),
         # Disable sorting to allow registering multiple instances of DiseaseThatCausesC
diff --git a/tests/test_symptommanager.py b/tests/test_symptommanager.py
index 73ea7619d0..05981a8867 100644
--- a/tests/test_symptommanager.py
+++ b/tests/test_symptommanager.py
@@ -237,7 +237,7 @@ def test_has_what_via_individual_properties(seed, supply_disease_module: bool):
     5     0           1           0
     6     0           0           1
     7     0           0           0
-    
+
     We will then assert that has_what returns the expected symptoms for the
     individuals, and that supplying either the person_id keyword or the
     individual_properties keyword gives the same answer.

From e1b1ef88c6d4e6724a2448cf39242b34fab0fa4c Mon Sep 17 00:00:00 2001
From: Matt Graham <matthew.m.graham@gmail.com>
Date: Tue, 24 Sep 2024 13:14:51 +0100
Subject: [PATCH 08/14] Ignore pycodestyle warning rules in scripts

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index ccb78892b2..574a66f364 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -125,7 +125,7 @@ line-length = 120
 
 [tool.ruff.lint]
 select = ["E", "F", "I", "W"]
-per-file-ignores = {"src/scripts/**" = ["E501"]}
+per-file-ignores = {"src/scripts/**" = ["E501", "W"]}
 
 [tool.setuptools.packages.find]
 where = ["src"]

From e7d71bb8cdd6a700c7ed700eb1aae11be496e403 Mon Sep 17 00:00:00 2001
From: Matt Graham <matthew.m.graham@gmail.com>
Date: Tue, 24 Sep 2024 13:23:32 +0100
Subject: [PATCH 09/14] Remove trailing whitespace in consumables log record
 logic

---
 src/tlo/methods/consumables.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tlo/methods/consumables.py b/src/tlo/methods/consumables.py
index eee490b669..c631a15560 100644
--- a/src/tlo/methods/consumables.py
+++ b/src/tlo/methods/consumables.py
@@ -272,7 +272,7 @@ def on_simulation_end(self):
         """
         if len(self._not_recognised_item_codes) > 0:
             not_recognised_item_codes = {
-                treatment_id if treatment_id is not None else "": sorted(codes) 
+                treatment_id if treatment_id is not None else "": sorted(codes)
                 for treatment_id, codes in self._not_recognised_item_codes.items()
             }
             warnings.warn(

From ccac4969321013b384542848f7a080555249696f Mon Sep 17 00:00:00 2001
From: Matt Graham <matthew.m.graham@gmail.com>
Date: Thu, 21 Nov 2024 17:26:45 +0000
Subject: [PATCH 10/14] Remove long inlined log file content to external files

---
 .../merge_log_files/expected_merged.txt       | 17 ++++
 .../inconsistent_headers_source_1.txt         |  2 +
 .../inconsistent_headers_source_2.txt         |  2 +
 tests/resources/merge_log_files/source_1.txt  | 11 +++
 tests/resources/merge_log_files/source_2.txt  |  9 +++
 tests/test_analysis.py                        | 81 ++-----------------
 6 files changed, 48 insertions(+), 74 deletions(-)
 create mode 100644 tests/resources/merge_log_files/expected_merged.txt
 create mode 100644 tests/resources/merge_log_files/inconsistent_headers_source_1.txt
 create mode 100644 tests/resources/merge_log_files/inconsistent_headers_source_2.txt
 create mode 100644 tests/resources/merge_log_files/source_1.txt
 create mode 100644 tests/resources/merge_log_files/source_2.txt

diff --git a/tests/resources/merge_log_files/expected_merged.txt b/tests/resources/merge_log_files/expected_merged.txt
new file mode 100644
index 0000000000..3aa7b38e12
--- /dev/null
+++ b/tests/resources/merge_log_files/expected_merged.txt
@@ -0,0 +1,17 @@
+{"uuid": "b07", "type": "header", "module": "m0", "key": "info", "level": "INFO", "columns": {"msg": "str"}, "description": null}
+{"uuid": "b07", "date": "2010-01-01T00:00:00", "values": ["0"]}
+{"uuid": "0b3", "type": "header", "module": "m1", "key": "a", "level": "INFO", "columns": {"msg": "str"}, "description": "A"}
+{"uuid": "0b3", "date": "2010-01-01T00:00:00", "values": ["1"]}
+{"uuid": "ed4", "type": "header", "module": "m2", "key": "b", "level": "INFO", "columns": {"msg": "str"}, "description": "B"}
+{"uuid": "ed4", "date": "2010-01-02T00:00:00", "values": ["2"]}
+{"uuid": "477", "type": "header", "module": "m2", "key": "c", "level": "INFO", "columns": {"msg": "str"}, "description": "C"}
+{"uuid": "477", "date": "2010-01-02T00:00:00", "values": ["3"]}
+{"uuid": "b5c", "type": "header", "module": "m2", "key": "d", "level": "INFO", "columns": {"msg": "str"}, "description": "D"}
+{"uuid": "b5c", "date": "2010-01-03T00:00:00", "values": ["4"]}
+{"uuid": "477", "date": "2010-01-03T00:00:00", "values": ["5"]}
+{"uuid": "b07", "date": "2010-01-04T00:00:00", "values": ["6"]}
+{"uuid": "ed4", "date": "2010-01-04T00:00:00", "values": ["7"]}
+{"uuid": "ed4", "date": "2010-01-05T00:00:00", "values": ["8"]}
+{"uuid": "0b3", "date": "2010-01-06T00:00:00", "values": ["9"]}
+{"uuid": "a19", "type": "header", "module": "m3", "key": "e", "level": "INFO", "columns": {"msg": "str"}, "description": "E"}
+{"uuid": "a19", "date": "2010-01-03T00:00:00", "values": ["10"]}
diff --git a/tests/resources/merge_log_files/inconsistent_headers_source_1.txt b/tests/resources/merge_log_files/inconsistent_headers_source_1.txt
new file mode 100644
index 0000000000..bc9dc7d19c
--- /dev/null
+++ b/tests/resources/merge_log_files/inconsistent_headers_source_1.txt
@@ -0,0 +1,2 @@
+{"uuid": "b07", "type": "header", "module": "m0", "key": "info", "level": "INFO", "columns": {"msg": "str"}, "description": null}
+{"uuid": "b07", "date": "2010-01-01T00:00:00", "values": ["0"]}
diff --git a/tests/resources/merge_log_files/inconsistent_headers_source_2.txt b/tests/resources/merge_log_files/inconsistent_headers_source_2.txt
new file mode 100644
index 0000000000..04a03fb907
--- /dev/null
+++ b/tests/resources/merge_log_files/inconsistent_headers_source_2.txt
@@ -0,0 +1,2 @@
+{"uuid": "b07", "type": "header", "module": "m0", "key": "info", "level": "INFO", "columns": {"msg": "int"}, "description": null}
+{"uuid": "b07", "date": "2010-01-04T00:00:00", "values": [1]}
diff --git a/tests/resources/merge_log_files/source_1.txt b/tests/resources/merge_log_files/source_1.txt
new file mode 100644
index 0000000000..9f23afa12f
--- /dev/null
+++ b/tests/resources/merge_log_files/source_1.txt
@@ -0,0 +1,11 @@
+{"uuid": "b07", "type": "header", "module": "m0", "key": "info", "level": "INFO", "columns": {"msg": "str"}, "description": null}
+{"uuid": "b07", "date": "2010-01-01T00:00:00", "values": ["0"]}
+{"uuid": "0b3", "type": "header", "module": "m1", "key": "a", "level": "INFO", "columns": {"msg": "str"}, "description": "A"}
+{"uuid": "0b3", "date": "2010-01-01T00:00:00", "values": ["1"]}
+{"uuid": "ed4", "type": "header", "module": "m2", "key": "b", "level": "INFO", "columns": {"msg": "str"}, "description": "B"}
+{"uuid": "ed4", "date": "2010-01-02T00:00:00", "values": ["2"]}
+{"uuid": "477", "type": "header", "module": "m2", "key": "c", "level": "INFO", "columns": {"msg": "str"}, "description": "C"}
+{"uuid": "477", "date": "2010-01-02T00:00:00", "values": ["3"]}
+{"uuid": "b5c", "type": "header", "module": "m2", "key": "d", "level": "INFO", "columns": {"msg": "str"}, "description": "D"}
+{"uuid": "b5c", "date": "2010-01-03T00:00:00", "values": ["4"]}
+{"uuid": "477", "date": "2010-01-03T00:00:00", "values": ["5"]}
diff --git a/tests/resources/merge_log_files/source_2.txt b/tests/resources/merge_log_files/source_2.txt
new file mode 100644
index 0000000000..780edbeed0
--- /dev/null
+++ b/tests/resources/merge_log_files/source_2.txt
@@ -0,0 +1,9 @@
+{"uuid": "b07", "type": "header", "module": "m0", "key": "info", "level": "INFO", "columns": {"msg": "str"}, "description": null}
+{"uuid": "b07", "date": "2010-01-04T00:00:00", "values": ["6"]}
+{"uuid": "ed4", "type": "header", "module": "m2", "key": "b", "level": "INFO", "columns": {"msg": "str"}, "description": "B"}
+{"uuid": "ed4", "date": "2010-01-04T00:00:00", "values": ["7"]}
+{"uuid": "ed4", "date": "2010-01-05T00:00:00", "values": ["8"]}
+{"uuid": "0b3", "type": "header", "module": "m1", "key": "a", "level": "INFO", "columns": {"msg": "str"}, "description": "A"}
+{"uuid": "0b3", "date": "2010-01-06T00:00:00", "values": ["9"]}
+{"uuid": "a19", "type": "header", "module": "m3", "key": "e", "level": "INFO", "columns": {"msg": "str"}, "description": "E"}
+{"uuid": "a19", "date": "2010-01-03T00:00:00", "values": ["10"]}
diff --git a/tests/test_analysis.py b/tests/test_analysis.py
index 0f42b2d851..99ab2f4982 100644
--- a/tests/test_analysis.py
+++ b/tests/test_analysis.py
@@ -1,5 +1,4 @@
 import os
-import textwrap
 from pathlib import Path
 from typing import List
 
@@ -688,61 +687,10 @@ def check_log(log):
 
 
 def test_merge_log_files(tmp_path):
-    log_file_path_1 = tmp_path / "log_file_1"
-    log_file_path_1.write_text(
-        textwrap.dedent(
-            """\
-            {"uuid": "b07", "type": "header", "module": "m0", "key": "info", "level": "INFO", "columns": {"msg": "str"}, "description": null}
-            {"uuid": "b07", "date": "2010-01-01T00:00:00", "values": ["0"]}
-            {"uuid": "0b3", "type": "header", "module": "m1", "key": "a", "level": "INFO", "columns": {"msg": "str"}, "description": "A"}
-            {"uuid": "0b3", "date": "2010-01-01T00:00:00", "values": ["1"]}
-            {"uuid": "ed4", "type": "header", "module": "m2", "key": "b", "level": "INFO", "columns": {"msg": "str"}, "description": "B"}
-            {"uuid": "ed4", "date": "2010-01-02T00:00:00", "values": ["2"]}
-            {"uuid": "477", "type": "header", "module": "m2", "key": "c", "level": "INFO", "columns": {"msg": "str"}, "description": "C"}
-            {"uuid": "477", "date": "2010-01-02T00:00:00", "values": ["3"]}
-            {"uuid": "b5c", "type": "header", "module": "m2", "key": "d", "level": "INFO", "columns": {"msg": "str"}, "description": "D"}
-            {"uuid": "b5c", "date": "2010-01-03T00:00:00", "values": ["4"]}
-            {"uuid": "477", "date": "2010-01-03T00:00:00", "values": ["5"]}
-            """
-        )
-    )
-    log_file_path_2 = tmp_path / "log_file_2"
-    log_file_path_2.write_text(
-        textwrap.dedent(
-            """\
-            {"uuid": "b07", "type": "header", "module": "m0", "key": "info", "level": "INFO", "columns": {"msg": "str"}, "description": null}
-            {"uuid": "b07", "date": "2010-01-04T00:00:00", "values": ["6"]}
-            {"uuid": "ed4", "type": "header", "module": "m2", "key": "b", "level": "INFO", "columns": {"msg": "str"}, "description": "B"}
-            {"uuid": "ed4", "date": "2010-01-04T00:00:00", "values": ["7"]}
-            {"uuid": "ed4", "date": "2010-01-05T00:00:00", "values": ["8"]}
-            {"uuid": "0b3", "type": "header", "module": "m1", "key": "a", "level": "INFO", "columns": {"msg": "str"}, "description": "A"}
-            {"uuid": "0b3", "date": "2010-01-06T00:00:00", "values": ["9"]}
-            {"uuid": "a19", "type": "header", "module": "m3", "key": "e", "level": "INFO", "columns": {"msg": "str"}, "description": "E"}
-            {"uuid": "a19", "date": "2010-01-03T00:00:00", "values": ["10"]}
-            """
-        )
-    )
-    expected_merged_log_file_content = textwrap.dedent(
-        """\
-        {"uuid": "b07", "type": "header", "module": "m0", "key": "info", "level": "INFO", "columns": {"msg": "str"}, "description": null}
-        {"uuid": "b07", "date": "2010-01-01T00:00:00", "values": ["0"]}
-        {"uuid": "0b3", "type": "header", "module": "m1", "key": "a", "level": "INFO", "columns": {"msg": "str"}, "description": "A"}
-        {"uuid": "0b3", "date": "2010-01-01T00:00:00", "values": ["1"]}
-        {"uuid": "ed4", "type": "header", "module": "m2", "key": "b", "level": "INFO", "columns": {"msg": "str"}, "description": "B"}
-        {"uuid": "ed4", "date": "2010-01-02T00:00:00", "values": ["2"]}
-        {"uuid": "477", "type": "header", "module": "m2", "key": "c", "level": "INFO", "columns": {"msg": "str"}, "description": "C"}
-        {"uuid": "477", "date": "2010-01-02T00:00:00", "values": ["3"]}
-        {"uuid": "b5c", "type": "header", "module": "m2", "key": "d", "level": "INFO", "columns": {"msg": "str"}, "description": "D"}
-        {"uuid": "b5c", "date": "2010-01-03T00:00:00", "values": ["4"]}
-        {"uuid": "477", "date": "2010-01-03T00:00:00", "values": ["5"]}
-        {"uuid": "b07", "date": "2010-01-04T00:00:00", "values": ["6"]}
-        {"uuid": "ed4", "date": "2010-01-04T00:00:00", "values": ["7"]}
-        {"uuid": "ed4", "date": "2010-01-05T00:00:00", "values": ["8"]}
-        {"uuid": "0b3", "date": "2010-01-06T00:00:00", "values": ["9"]}
-        {"uuid": "a19", "type": "header", "module": "m3", "key": "e", "level": "INFO", "columns": {"msg": "str"}, "description": "E"}
-        {"uuid": "a19", "date": "2010-01-03T00:00:00", "values": ["10"]}
-        """
-    )
+    resources_directory = Path(__file__).parent / "resources" / "merge_log_files"
+    log_file_path_1 = resources_directory / "source_1.txt"
+    log_file_path_2 = resources_directory / "source_2.txt"
+    expected_merged_log_file_content = (resources_directory / "expected_merged.txt").read_text()
     merged_log_file_path = tmp_path / "merged_log_file"
     merge_log_files(log_file_path_1, log_file_path_2, merged_log_file_path)
     merged_log_file_content = merged_log_file_path.read_text()
@@ -750,24 +698,9 @@ def test_merge_log_files(tmp_path):
 
 
 def test_merge_log_files_with_inconsistent_headers_raises(tmp_path):
-    log_file_path_1 = tmp_path / "log_file_1"
-    log_file_path_1.write_text(
-        textwrap.dedent(
-            """\
-            {"uuid": "b07", "type": "header", "module": "m0", "key": "info", "level": "INFO", "columns": {"msg": "str"}, "description": null}
-            {"uuid": "b07", "date": "2010-01-01T00:00:00", "values": ["0"]}
-            """
-        )
-    )
-    log_file_path_2 = tmp_path / "log_file_2"
-    log_file_path_2.write_text(
-        textwrap.dedent(
-            """\
-            {"uuid": "b07", "type": "header", "module": "m0", "key": "info", "level": "INFO", "columns": {"msg": "int"}, "description": null}
-            {"uuid": "b07", "date": "2010-01-04T00:00:00", "values": [1]}
-            """
-        )
-    )
+    resources_directory = Path(__file__).parent / "resources" / "merge_log_files"
+    log_file_path_1 = resources_directory / "inconsistent_headers_source_1.txt"
+    log_file_path_2 = resources_directory / "inconsistent_headers_source_2.txt"
     merged_log_file_path = tmp_path / "merged_log_file"
     with pytest.raises(RuntimeError, match="Inconsistent header lines"):
         merge_log_files(log_file_path_1, log_file_path_2, merged_log_file_path)

From 66752a3dde8fcf44b509ad36286b0e04cd08b204 Mon Sep 17 00:00:00 2001
From: Matt Graham <matthew.m.graham@gmail.com>
Date: Thu, 21 Nov 2024 17:32:48 +0000
Subject: [PATCH 11/14] Fix white space / line length violations in bitset
 handler files

---
 src/tlo/bitset_handler/bitset_extension.py | 40 +++++++++++++---------
 tests/bitset_handler/conftest.py           |  2 +-
 2 files changed, 24 insertions(+), 18 deletions(-)

diff --git a/src/tlo/bitset_handler/bitset_extension.py b/src/tlo/bitset_handler/bitset_extension.py
index 92d7af734f..55ee3f98be 100644
--- a/src/tlo/bitset_handler/bitset_extension.py
+++ b/src/tlo/bitset_handler/bitset_extension.py
@@ -46,16 +46,16 @@
 
 class BitsetDtype(ExtensionDtype):
     """
-    A Bitset is represented by a fixed-width string, whose characters are each a uint8.
-    Elements of the set map 1:1 to these characters.
+    A Bitset is represented by a fixed-width string, whose characters are each a uint8. Elements of the set map 1:1 to
+    these characters.
 
-    If the elements set is indexed starting from 0, then:
-    - The quotient of these indices (modulo 8) is the character within the string that contains the bit representing the element,
-    - The remainder (modulo 8) is the index within said character that represents the element itself.
+    If the elements set is indexed starting from 0, then: - The quotient of these indices (modulo 8) is the character
+    within the string that contains the bit representing the element, - The remainder (modulo 8) is the index within
+    said character that represents the element itself.
 
     The element map takes an element of the bitset as a key, and returns a tuple whose first element is the
-    corresponding string-character index, and the latter the uint8 representation of the element within that
-    string character.
+    corresponding string-character index, and the latter the uint8 representation of the element within that string
+    character.
     """
     _element_map: Dict[ElementType, Tuple[int, np.uint8]]
     _elements: Tuple[ElementType]
@@ -71,7 +71,7 @@ def construct_from_string(cls, string: str) -> BitsetDtype:
         """
         Construct an instance of this class by passing in a string of the form
         that str(<instance of this class>) produces.
-        
+
         That is, given a string of the form
         bitset(#elements): e1, e2, e3, ...
 
@@ -101,7 +101,8 @@ def construct_from_string(cls, string: str) -> BitsetDtype:
             iterable_values = tuple(s.strip() for s in string.split(","))
         if n_elements is not None and len(iterable_values) != n_elements:
             raise ValueError(
-                f"Requested bitset with {n_elements} elements, but provided {len(iterable_values)} elements: {iterable_values}"
+                f"Requested bitset with {n_elements} elements, "
+                f"but provided {len(iterable_values)} elements: {iterable_values}"
             )
         return BitsetDtype(s.strip() for s in string.split(","))
 
@@ -351,7 +352,7 @@ def _uint8_view(self) -> NDArray[np.bytes_]:
         Each row ``i`` of this view corresponds to a bitset stored in this array.
         The value at index ``i, j`` in this view is the ``uint8`` that represents
         character ``j`` in ``self._data[i]``, which can have bitwise operations
-        performed on it.  
+        performed on it.
         """
         return self._data.view(self._uint8_view_format)
 
@@ -463,17 +464,17 @@ def __cast_to_uint8(self, other: CastableForPandasOps) -> NDArray[np.uint8]:
 
         Scalar elements:
             Cast to single-element sets, then treated as set.
-        
+
         Sets:
             Are converted to the (array of) uint8s that represents the set.
-        
+
         ``np.ndarray``s of ``np.uint8``
             Are returned if they have the same number of columns as ``self._uint8_view``.
-        
+
         ``np.ndarray``s of ``np.dtype("Sx")``
             If ``x`` corresponds to the same fixed-width as ``self.dtype.np_array_dtype``, are cast
             to the corresponding ``np.uint8`` view, like ``self._uint8_view`` is from ``self._data``.
-        
+
         BitsetArrays
             Return their ``_uint8_view`` attribute.
         """
@@ -508,13 +509,17 @@ def __cast_to_uint8(self, other: CastableForPandasOps) -> NDArray[np.uint8]:
             cast = self.dtype.as_uint8_array(other)
         return cast
 
-    def __comparison_op(self, other: CastableForPandasOps, op: Callable[[Set[ElementType], Set[ElementType]], bool]) -> BooleanArray:
+    def __comparison_op(
+        self,
+        other: CastableForPandasOps,
+        op: Callable[[Set[ElementType], Set[ElementType]], bool],
+    ) -> BooleanArray:
         """
         Abstract method for strict and non-strict comparison operations.
 
         Notably, __eq__ does not redirect here since it is more efficient for us to convert
         the single value to a bytestring and use numpy array comparison.
-        
+
         For the other set comparison methods however, it's easier as a first implementation
         for us to convert to sets and run the set operations.  If there was a Pythonic way
         of doing "bitwise less than" and "bitwise greater than", we could instead take the
@@ -678,7 +683,8 @@ def copy(self) -> BitsetArray:
 
     def isna(self) -> NDArray:
         """
-        TODO: This isn't a great way to express missing data, but equally a bitset doesn't really ever contain missing data...
+        TODO: This isn't a great way to express missing data, but equally a bitset doesn't really ever contain
+        missing data...
         """
         return np.isnan(self._data)
 
diff --git a/tests/bitset_handler/conftest.py b/tests/bitset_handler/conftest.py
index 41b6ab3e6f..5cd111ae62 100644
--- a/tests/bitset_handler/conftest.py
+++ b/tests/bitset_handler/conftest.py
@@ -1,5 +1,5 @@
 """
-Implements the fixtures required in 
+Implements the fixtures required in
 https://github.com/pandas-dev/pandas/blob/bdb509f95a8c0ff16530cedb01c2efc822c0d314/pandas/core/dtypes/dtypes.py,
 
 which allows us to run the pandas-provided test suite for custom dtypes.

From 29731990b4ae2ecf411b5f126cef906e2adfde01 Mon Sep 17 00:00:00 2001
From: Matt Graham <matthew.m.graham@gmail.com>
Date: Thu, 21 Nov 2024 17:33:15 +0000
Subject: [PATCH 12/14] Trailing white space autofixes

---
 src/tlo/methods/healthsystem.py | 12 ++++++------
 src/tlo/simulation.py           |  2 +-
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py
index 5c6b2022e1..1dbd47fb63 100644
--- a/src/tlo/methods/healthsystem.py
+++ b/src/tlo/methods/healthsystem.py
@@ -961,7 +961,7 @@ def format_daily_capabilities(self, use_funded_or_actual_staffing: str) -> tuple
                 self.parameters[f'Daily_Capabilities_{use_funded_or_actual_staffing}']
         )
         capabilities = capabilities.rename(columns={'Officer_Category': 'Officer_Type_Code'})  # neaten
-        
+
         # Create new column where capabilities per staff are computed
         capabilities['Mins_Per_Day_Per_Staff'] = capabilities['Total_Mins_Per_Day']/capabilities['Staff_Count']
 
@@ -984,10 +984,10 @@ def format_daily_capabilities(self, use_funded_or_actual_staffing: str) -> tuple
         # Merge in information about facility from Master Facilities List
         mfl = self.parameters['Master_Facilities_List']
         capabilities_ex = capabilities_ex.merge(mfl, on='Facility_ID', how='left')
-        
+
         # Create a copy of this to store staff counts
         capabilities_per_staff_ex = capabilities_ex.copy()
-        
+
         # Merge in information about officers
         # officer_types = self.parameters['Officer_Types_Table'][['Officer_Type_Code', 'Officer_Type']]
         # capabilities_ex = capabilities_ex.merge(officer_types, on='Officer_Type_Code', how='left')
@@ -1000,7 +1000,7 @@ def format_daily_capabilities(self, use_funded_or_actual_staffing: str) -> tuple
             how='left',
         )
         capabilities_ex = capabilities_ex.fillna(0)
-        
+
         capabilities_per_staff_ex = capabilities_per_staff_ex.merge(
             capabilities[['Facility_ID', 'Officer_Type_Code', 'Mins_Per_Day_Per_Staff']],
             on=['Facility_ID', 'Officer_Type_Code'],
@@ -1015,7 +1015,7 @@ def format_daily_capabilities(self, use_funded_or_actual_staffing: str) -> tuple
             + '_Officer_'
             + capabilities_ex['Officer_Type_Code']
         )
-        
+
         # Give the standard index:
         capabilities_per_staff_ex = capabilities_per_staff_ex.set_index(
             'FacilityID_'
@@ -1055,7 +1055,7 @@ def _rescale_capabilities_to_capture_effective_capability(self):
             )
             if rescaling_factor > 1 and rescaling_factor != float("inf"):
                 self._daily_capabilities[officer] *= rescaling_factor
-                
+
                 # We assume that increased daily capabilities is a result of each staff performing more
                 # daily patient facing time per day than contracted (or equivalently performing appts more
                 # efficiently).
diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py
index cb9245ba7d..6ba216ae90 100644
--- a/src/tlo/simulation.py
+++ b/src/tlo/simulation.py
@@ -130,7 +130,7 @@ def __init__(
 
     def _configure_logging(
         self,
-        filename: Optional[str] = None, 
+        filename: Optional[str] = None,
         directory: Path | str = "./outputs",
         custom_levels: Optional[dict[str, LogLevel]] = None,
         suppress_stdout: bool = False

From 649442a417722668d94d76045afac98b1321fc38 Mon Sep 17 00:00:00 2001
From: Matt Graham <matthew.m.graham@gmail.com>
Date: Thu, 21 Nov 2024 17:36:39 +0000
Subject: [PATCH 13/14] Line length and white space manual fixes

---
 src/tlo/analysis/utils.py       |  2 +-
 src/tlo/methods/epilepsy.py     | 10 +++++++---
 src/tlo/methods/healthsystem.py | 13 +++++++------
 src/tlo/simulation.py           | 18 +++++++++---------
 src/tlo/util.py                 |  7 ++++---
 5 files changed, 28 insertions(+), 22 deletions(-)

diff --git a/src/tlo/analysis/utils.py b/src/tlo/analysis/utils.py
index e605400332..1c9dd72cfb 100644
--- a/src/tlo/analysis/utils.py
+++ b/src/tlo/analysis/utils.py
@@ -89,7 +89,7 @@ def parse_log_file(log_filepath, level: int = logging.INFO):
 
 def merge_log_files(log_path_1: Path, log_path_2: Path, output_path: Path) -> None:
     """Merge two log files, skipping any repeated header lines.
-    
+
     :param log_path_1: Path to first log file to merge. Records from this log file will
         appear first in merged log file.
     :param log_path_2: Path to second log file to merge. Records from this log file will
diff --git a/src/tlo/methods/epilepsy.py b/src/tlo/methods/epilepsy.py
index cc8c0f8cca..db4775f2d5 100644
--- a/src/tlo/methods/epilepsy.py
+++ b/src/tlo/methods/epilepsy.py
@@ -628,7 +628,9 @@ def __init__(self, module, person_id):
         self.EXPECTED_APPT_FOOTPRINT = self.make_appt_footprint({'Over5OPD': 1})
         self.ACCEPTED_FACILITY_LEVEL = '1b'
 
-        self._MAX_NUMBER_OF_FAILED_ATTEMPTS_BEFORE_DEFAULTING = module.parameters['max_num_of_failed_attempts_before_defaulting']
+        self._MAX_NUMBER_OF_FAILED_ATTEMPTS_BEFORE_DEFAULTING = module.parameters[
+            "max_num_of_failed_attempts_before_defaulting"
+        ]
         self._counter_of_failed_attempts_due_to_unavailable_medicines = 0
 
     def apply(self, person_id, squeeze_factor):
@@ -679,8 +681,10 @@ class HSI_Epilepsy_Follow_Up(HSI_Event, IndividualScopeEventMixin):
     def __init__(self, module, person_id):
         super().__init__(module, person_id=person_id)
 
-        self._MAX_NUMBER_OF_FAILED_ATTEMPTS_BEFORE_DEFAULTING = module.parameters['max_num_of_failed_attempts_before_defaulting']
-        self._DEFAULT_APPT_FOOTPRINT = self.make_appt_footprint({'Over5OPD': 1})
+        self._MAX_NUMBER_OF_FAILED_ATTEMPTS_BEFORE_DEFAULTING = module.parameters[
+            "max_num_of_failed_attempts_before_defaulting"
+        ]
+        self._DEFAULT_APPT_FOOTPRINT = self.make_appt_footprint({"Over5OPD": 1})
         self._REPEATED_APPT_FOOTPRINT = self.make_appt_footprint({'PharmDispensing': 1})
 
         self.TREATMENT_ID = "Epilepsy_Treatment_Followup"
diff --git a/src/tlo/methods/healthsystem.py b/src/tlo/methods/healthsystem.py
index 1dbd47fb63..845f63904a 100644
--- a/src/tlo/methods/healthsystem.py
+++ b/src/tlo/methods/healthsystem.py
@@ -939,7 +939,9 @@ def setup_daily_capabilities(self, use_funded_or_actual_staffing):
         This is called when the value for `use_funded_or_actual_staffing` is set - at the beginning of the simulation
          and when the assumption when the underlying assumption for `use_funded_or_actual_staffing` is updated"""
         # * Store 'DailyCapabilities' in correct format and using the specified underlying assumptions
-        self._daily_capabilities, self._daily_capabilities_per_staff = self.format_daily_capabilities(use_funded_or_actual_staffing)
+        self._daily_capabilities, self._daily_capabilities_per_staff = (
+            self.format_daily_capabilities(use_funded_or_actual_staffing)
+        )
 
         # Also, store the set of officers with non-zero daily availability
         # (This is used for checking that scheduled HSI events do not make appointment requiring officers that are
@@ -949,11 +951,12 @@ def setup_daily_capabilities(self, use_funded_or_actual_staffing):
     def format_daily_capabilities(self, use_funded_or_actual_staffing: str) -> tuple[pd.Series,pd.Series]:
         """
         This will updates the dataframe for the self.parameters['Daily_Capabilities'] so as to:
-        1. include every permutation of officer_type_code and facility_id, with zeros against permutations where no capacity
-        is available.
+        1. include every permutation of officer_type_code and facility_id, with zeros against permutations where no
+        capacity is available.
         2. Give the dataframe an index that is useful for merging on (based on Facility_ID and Officer Type)
         (This is so that its easier to track where demands are being placed where there is no capacity)
-        3. Compute daily capabilities per staff. This will be used to compute staff count in a way that is independent of assumed efficiency.
+        3. Compute daily capabilities per staff. This will be used to compute staff count in a way that is independent
+        of assumed efficiency.
         """
 
         # Get the capabilities data imported (according to the specified underlying assumptions).
@@ -965,7 +968,6 @@ def format_daily_capabilities(self, use_funded_or_actual_staffing: str) -> tuple
         # Create new column where capabilities per staff are computed
         capabilities['Mins_Per_Day_Per_Staff'] = capabilities['Total_Mins_Per_Day']/capabilities['Staff_Count']
 
-
         # Create dataframe containing background information about facility and officer types
         facility_ids = self.parameters['Master_Facilities_List']['Facility_ID'].values
         officer_type_codes = set(self.parameters['Officer_Types_Table']['Officer_Category'].values)
@@ -3029,4 +3031,3 @@ def log_number_of_staff(self):
             description="The number of hcw_staff this year",
             data=current_staff_count,
         )
-
diff --git a/src/tlo/simulation.py b/src/tlo/simulation.py
index 6ba216ae90..3f9ce18ec8 100644
--- a/src/tlo/simulation.py
+++ b/src/tlo/simulation.py
@@ -57,8 +57,8 @@ class Simulation:
     :ivar modules: A dictionary of the disease modules used in this simulation, keyed
        by the module name.
     :ivar population: The population being simulated.
-    :ivar rng: The simulation-level random number generator. 
-    
+    :ivar rng: The simulation-level random number generator.
+
     .. note::
        Individual modules also have their own random number generator with independent
        state.
@@ -80,7 +80,7 @@ def __init__(
         :param seed: The seed for random number generator. class will create one if not
             supplied
         :param log_config: Dictionary specifying logging configuration for this
-            simulation. Can have entries: `filename` - prefix for log file name, final 
+            simulation. Can have entries: `filename` - prefix for log file name, final
             file name will have a date time appended, if not present default is to not
             output log to a file; `directory` - path to output directory to write log
             file to, default if not specified is to output to the `outputs` folder;
@@ -89,9 +89,9 @@ def __init__(
             logging to standard output stream (default is `False`).
         :param show_progress_bar: Whether to show a progress bar instead of the logger
             output during the simulation.
-        :param resourcefilepath: Path to resource files folder. Assign ``None` if no 
+        :param resourcefilepath: Path to resource files folder. Assign ``None` if no
             path is provided.
-            
+
         .. note::
            The `custom_levels` entry in `log_config` argument can be used to disable
            logging on all disease modules by setting a high level to `*`, and then
@@ -136,7 +136,7 @@ def _configure_logging(
         suppress_stdout: bool = False
     ):
         """Configure logging of simulation outputs.
-         
+
         Can write log output to a file in addition the default of `stdout`. Mnimum
         custom levels for each logger can be specified for filtering out messages.
 
@@ -209,7 +209,7 @@ def register(
             modules to be registered. A :py:exc:`.ModuleDependencyError` exception will
             be raised if there are missing dependencies.
         :param auto_register_dependencies: Whether to register missing module dependencies
-            or not. If this argument is set to True, all module dependencies will be 
+            or not. If this argument is set to True, all module dependencies will be
             automatically registered.
         """
         if auto_register_dependencies:
@@ -423,7 +423,7 @@ def do_birth(self, mother_id: int) -> int:
 
     def find_events_for_person(self, person_id: int) -> list[tuple[Date, Event]]:
         """Find the events in the queue for a particular person.
-    
+
         :param person_id: The row index of the person of interest.
         :return: List of tuples `(date_of_event, event)` for that `person_id` in the
             queue.
@@ -463,7 +463,7 @@ def load_from_pickle(
 
         :param pickle_path: File path to load simulation state from.
         :param log_config: New log configuration to override previous configuration. If
-            `None` previous configuration (including output file) will be retained. 
+            `None` previous configuration (including output file) will be retained.
 
         :returns: Loaded :py:class:`Simulation` object.
         """
diff --git a/src/tlo/util.py b/src/tlo/util.py
index 168b1d41a1..cb7e3a9825 100644
--- a/src/tlo/util.py
+++ b/src/tlo/util.py
@@ -439,8 +439,10 @@ def get_person_id_to_inherit_from(child_id, mother_id, population_dataframe, rng
         return mother_id
 
 
-def convert_excel_files_to_csv(folder: Path, files: Optional[list[str]] = None, *, delete_excel_files: bool = False) -> None:
-    """ convert Excel files to csv files.
+def convert_excel_files_to_csv(
+    folder: Path, files: Optional[list[str]] = None, *, delete_excel_files: bool = False
+) -> None:
+    """convert Excel files to csv files.
 
     :param folder: Folder containing Excel files.
     :param files: List of Excel file names to convert to csv files. When `None`, all Excel files in the folder and
@@ -509,4 +511,3 @@ def clean_dataframe(dataframes_dict: dict[str, DataFrame]) -> None:
     clean_dataframe(all_data)
     # If only one file loaded return dataframe directly rather than dict
     return next(iter(all_data.values())) if len(all_data) == 1 else all_data
-

From 050cdae0212a225ce8c5bbdae255332e4b5b104d Mon Sep 17 00:00:00 2001
From: Matt Graham <matthew.m.graham@gmail.com>
Date: Fri, 22 Nov 2024 09:21:29 +0000
Subject: [PATCH 14/14] Shorten too long docstring in tests

---
 tests/test_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_utils.py b/tests/test_utils.py
index 0e6b13d83b..2d9827d59e 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -333,7 +333,7 @@ def copy_files_to_temporal_directory_and_return_path(tmpdir):
 
 
 def test_pass_datatypes_to_read_csv_method(tmpdir):
-    """ test passing column datatypes to read csv method. Final column datatype should change to what has been passed """
+    """Test passing column datatypes to read csv method. Final column datatype should change to what has been passed."""
     # copy and get resource files path in the temporal directory
     path_to_tmpdir = Path(tmpdir)
     sample_data = pd.DataFrame(data={'numbers1': [5,6,8,4,9,6], 'numbers2': [19,27,53,49,75,56]}, dtype=int)