UCL · matt-graham · Mar 20, 2024 · Dec 15, 2023 · Dec 15, 2023 · Dec 15, 2023
diff --git a/.github/workflows/run-profiling.yaml b/.github/workflows/run-profiling.yaml
@@ -31,17 +31,33 @@ jobs:
   set-variables:
     name: Create unique output file identifier and artifact name
     runs-on: ubuntu-latest
+    if: (github.event_name != 'issue_comment') || ((github.event_name == 'issue_comment') && (github.event.comment.body == '/run profiling'))
     outputs:
       profiling-output-dir: profiling_results/
       profiling-filename: ${{ steps.set-profiling-filename.outputs.name }}
       artifact-name: ${{ steps.set-artifact-name.outputs.name }}
-      profiling-on-sha: ${{ steps.set-github-info.outputs.sha }}
+      profiling-on-sha: ${{ steps.determine-correct-sha.outputs.result }}
       profiling-event-trigger: ${{ steps.set-github-info.outputs.event }}
     steps:
+      - id: determine-correct-sha
+        uses: actions/github-script@v7
+        with:
+          result-encoding: string
+          script: |
+            if (!context.payload.issue.pull_request) {
+              return context.sha;
+            };
+            const { data: pr } = await github.rest.pulls.get({
+              owner: context.issue.owner,
+              repo: context.issue.repo,
+              pull_number: context.issue.number,
+            });
+            return pr.head.sha;
+
       - id: set-profiling-filename
         name: Set profiling output file name
         run: |
-          echo "name=${GITHUB_EVENT_NAME}_${GITHUB_RUN_NUMBER}_${GITHUB_SHA}" >> "${GITHUB_OUTPUT}"
+          echo "name=${GITHUB_EVENT_NAME}_${GITHUB_RUN_NUMBER}_${{ steps.determine-correct-sha.outputs.result }}" >> "${GITHUB_OUTPUT}"
 
       - id: set-artifact-name
         name: Set artifact name
@@ -51,7 +67,7 @@ jobs:
       - id: set-github-info
         name: Fix Git and GitHub information when passing between workflows
         run: |
-          echo "sha=${GITHUB_SHA}" >> "${GITHUB_OUTPUT}"
+          echo "sha=${{ steps.determine-correct-sha.outputs.result }}" >> "${GITHUB_OUTPUT}"
           echo "event=${GITHUB_EVENT_NAME}" >> "${GITHUB_OUTPUT}"
 
   profile-on-comment:
@@ -65,6 +81,7 @@ jobs:
       commands: |
         tox -vv -e profile -- \
           --html \
+          --flat-html \
           --root-output-dir ${{ needs.set-variables.outputs.profiling-output-dir }} \
           --output-name ${{ needs.set-variables.outputs.profiling-filename }} \
           --additional-stats \
@@ -98,6 +115,7 @@ jobs:
         run: |
           tox -vv -e profile -- \
             --html \
+            --flat-html \
             --root-output-dir ${{ needs.set-variables.outputs.profiling-output-dir }} \
             --output-name ${{ needs.set-variables.outputs.profiling-filename }} \
             --additional-stats \

diff --git a/pyproject.toml b/pyproject.toml
@@ -56,6 +56,7 @@ dev = [
     "pylint",
     "ruff",
     # Profiling
+    "ansi2html",
     "psutil",
     "pyinstrument>=4.3",
     # Building requirements files

diff --git a/requirements/dev.txt b/requirements/dev.txt
@@ -6,6 +6,8 @@
 #
 adal==1.2.7
     # via msrestazure
+ansi2html==1.9.1
+    # via tlo (pyproject.toml)
 astroid==3.0.0
     # via pylint
 azure-batch==14.0.0

diff --git a/src/scripts/profiling/run_profiling.py b/src/scripts/profiling/run_profiling.py
@@ -9,10 +9,16 @@
 import numpy as np
 from psutil import disk_io_counters
 from pyinstrument import Profiler
-from pyinstrument.renderers import HTMLRenderer
+from pyinstrument.renderers import ConsoleRenderer, HTMLRenderer
 from pyinstrument.session import Session
 from scale_run import save_arguments_to_json, scale_run
 
+try:
+    from ansi2html import Ansi2HTMLConverter
+    ANSI2HTML_AVAILABLE = True
+except ImportError:
+    ANSI2HTML_AVAILABLE = False
+
 from tlo import Simulation
 
 _PROFILING_RESULTS: Path = (Path(__file__).parents[3] / "profiling_results").resolve()
@@ -176,6 +182,7 @@ def run_profiling(
     output_name: str = "profiling",
     write_html: bool = False,
     write_pyisession: bool = False,
+    write_flat_html: bool = True,
     interval: float = 2e-1,
     initial_population: int = 50000,
     simulation_years: int = 5,
@@ -187,6 +194,12 @@ def run_profiling(
     Uses pyinstrument to profile the scale_run simulation,
     writing the output in the requested formats.
     """
+    if write_flat_html and not ANSI2HTML_AVAILABLE:
+        # Check if flat HTML output requested but ansi2html module not available at
+        # _start_ of function to avoid erroring after a potentially long profiling run
+        msg = "ansi2html required for flat HTML output."
+        raise ValueError(msg)
+
     additional_stats = dict(() if additional_stats is None else additional_stats)
 
     # Create the profiler to record the stack
@@ -253,7 +266,11 @@ def run_profiling(
         # Renderer initialisation options:
         # show_all: removes library calls where identifiable
         # timeline: if true, samples are left in chronological order rather than total time
-        html_renderer = HTMLRenderer(show_all=False, timeline=False)
+        html_renderer = HTMLRenderer(
+            show_all=False,
+            timeline=False,
+            processor_options={"show_regex": ".*/tlo/.*", "hide_regex": ".*/pandas/.*"}
+        )
         print(f"Writing {output_html_file}", end="...", flush=True)
         with open(output_html_file, "w") as f:
             f.write(html_renderer.render(scale_run_session))
@@ -268,13 +285,29 @@ def run_profiling(
                 f"\tWas        : {additional_stats['html_output']}"
                 f"\tReplaced by: {output_html_file}"
             )
-        additional_stats["html_output"] = str(output_html_file)
+        additional_stats["html_output"] = str(output_html_file.name)
 
     if write_pyisession:
         output_ipysession_file = output_dir / f"{output_name}.pyisession"
         print(f"Writing {output_ipysession_file}", end="...", flush=True)
         scale_run_session.save(output_ipysession_file)
         print("done")
+
+    if write_flat_html:
+        output_html_file = output_dir / f"{output_name}.flat.html"
+        console_renderer = ConsoleRenderer(
+            show_all=False,
+            timeline=False,
+            color=True,
+            flat=True,
+            processor_options={"show_regex": ".*/tlo/.*", "hide_regex": ".*/pandas/.*"}
+        )
+        converter = Ansi2HTMLConverter(title=output_name)
+        print(f"Writing {output_html_file}", end="...", flush=True)
+        with open(output_html_file, "w") as f:
+            f.write(converter.convert(console_renderer.render(scale_run_session)))
+        print("done")
+        additional_stats["flat_html_output"] = str(output_html_file.name)
 
     # Write the statistics file, main output
     output_stat_file = output_dir / f"{output_name}.stats.json"
@@ -329,6 +362,12 @@ def run_profiling(
         action="store_true",
         dest="write_pyisession",
     )
+    parser.add_argument(
+        "--flat-html",
+        action="store_true",
+        help="Write flat HTML output in addition to statistics output.",
+        dest="write_flat_html",
+    )
     parser.add_argument(
         "-i",
         "--interval-seconds",