From b27ba95d736db049d9a2a01168456b954fba2e8c Mon Sep 17 00:00:00 2001 From: Matt Graham Date: Fri, 15 Dec 2023 15:31:00 +0000 Subject: [PATCH 1/8] Write file name not full path as html_output stat --- src/scripts/profiling/run_profiling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scripts/profiling/run_profiling.py b/src/scripts/profiling/run_profiling.py index 49410e2b54..c58801867d 100644 --- a/src/scripts/profiling/run_profiling.py +++ b/src/scripts/profiling/run_profiling.py @@ -268,7 +268,7 @@ def run_profiling( f"\tWas : {additional_stats['html_output']}" f"\tReplaced by: {output_html_file}" ) - additional_stats["html_output"] = str(output_html_file) + additional_stats["html_output"] = str(output_html_file.name) if write_pyisession: output_ipysession_file = output_dir / f"{output_name}.pyisession" From 9e2405b2487dcb070babe455b4c306a0aaa9f9ae Mon Sep 17 00:00:00 2001 From: Matt Graham Date: Fri, 15 Dec 2023 15:31:38 +0000 Subject: [PATCH 2/8] Explicitly show tlo and hide pandas frames in profiling output --- src/scripts/profiling/run_profiling.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/scripts/profiling/run_profiling.py b/src/scripts/profiling/run_profiling.py index c58801867d..af5ece5ccd 100644 --- a/src/scripts/profiling/run_profiling.py +++ b/src/scripts/profiling/run_profiling.py @@ -253,7 +253,11 @@ def run_profiling( # Renderer initialisation options: # show_all: removes library calls where identifiable # timeline: if true, samples are left in chronological order rather than total time - html_renderer = HTMLRenderer(show_all=False, timeline=False) + html_renderer = HTMLRenderer( + show_all=False, + timeline=False, + processor_options={"show_regex": ".*/tlo/.*", "hide_regex": ".*/pandas/.*"} + ) print(f"Writing {output_html_file}", end="...", flush=True) with open(output_html_file, "w") as f: f.write(html_renderer.render(scale_run_session)) From c0804db39b931fe380800a0e9b46e64539e2dd4d Mon Sep 17 00:00:00 2001 From: Matt Graham Date: Fri, 15 Dec 2023 15:34:09 +0000 Subject: [PATCH 3/8] Add option for flat profiling output --- .github/workflows/run-profiling.yaml | 2 ++ pyproject.toml | 1 + requirements/dev.txt | 2 ++ src/scripts/profiling/run_profiling.py | 33 +++++++++++++++++++++++++- 4 files changed, 37 insertions(+), 1 deletion(-) diff --git a/.github/workflows/run-profiling.yaml b/.github/workflows/run-profiling.yaml index 8fe79f8dcf..864457a7a2 100644 --- a/.github/workflows/run-profiling.yaml +++ b/.github/workflows/run-profiling.yaml @@ -65,6 +65,7 @@ jobs: commands: | tox -vv -e profile -- \ --html \ + --flat-html \ --root-output-dir ${{ needs.set-variables.outputs.profiling-output-dir }} \ --output-name ${{ needs.set-variables.outputs.profiling-filename }} \ --additional-stats \ @@ -97,6 +98,7 @@ jobs: run: | tox -vv -e profile -- \ --html \ + --flat-html \ --root-output-dir ${{ needs.set-variables.outputs.profiling-output-dir }} \ --output-name ${{ needs.set-variables.outputs.profiling-filename }} \ --additional-stats \ diff --git a/pyproject.toml b/pyproject.toml index 25aae73d33..638a45be0e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,6 +56,7 @@ dev = [ "pylint", "ruff", # Profiling + "ansi2html", "psutil", "pyinstrument>=4.3", # Building requirements files diff --git a/requirements/dev.txt b/requirements/dev.txt index d34d5cead1..efd4f0e3e8 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -6,6 +6,8 @@ # adal==1.2.7 # via msrestazure +ansi2html==1.9.1 + # via tlo (pyproject.toml) astroid==3.0.0 # via pylint azure-batch==14.0.0 diff --git a/src/scripts/profiling/run_profiling.py b/src/scripts/profiling/run_profiling.py index af5ece5ccd..88d62d88ef 100644 --- a/src/scripts/profiling/run_profiling.py +++ b/src/scripts/profiling/run_profiling.py @@ -9,10 +9,16 @@ import numpy as np from psutil import disk_io_counters from pyinstrument import Profiler -from pyinstrument.renderers import HTMLRenderer +from pyinstrument.renderers import ConsoleRenderer, HTMLRenderer from pyinstrument.session import Session from scale_run import save_arguments_to_json, scale_run +try: + from ansi2html import Ansi2HTMLConverter + ANSI2HTML_AVAILABLE = True +except ImportError: + ANSI2HTML_AVAILABLE = False + from tlo import Simulation _PROFILING_RESULTS: Path = (Path(__file__).parents[3] / "profiling_results").resolve() @@ -176,6 +182,7 @@ def run_profiling( output_name: str = "profiling", write_html: bool = False, write_pyisession: bool = False, + write_flat_html: bool = True, interval: float = 1e-1, initial_population: int = 50000, simulation_years: int = 5, @@ -279,6 +286,24 @@ def run_profiling( print(f"Writing {output_ipysession_file}", end="...", flush=True) scale_run_session.save(output_ipysession_file) print("done") + + if write_flat_html: + if not ANSI2HTML_AVAILABLE: + raise ValueError("ansi2html required for flat HTML output.") + output_html_file = output_dir / f"{output_name}.flat.html" + console_renderer = ConsoleRenderer( + show_all=False, + timeline=False, + color=True, + flat=True, + processor_options={"show_regex": ".*/tlo/.*", "hide_regex": ".*/pandas/.*"} + ) + converter = Ansi2HTMLConverter(title=output_name) + print(f"Writing {output_html_file}", end="...", flush=True) + with open(output_html_file, "w") as f: + f.write(converter.convert(console_renderer.render(scale_run_session))) + print("done") + additional_stats["flat_html_output"] = str(output_html_file.name) # Write the statistics file, main output output_stat_file = output_dir / f"{output_name}.stats.json" @@ -333,6 +358,12 @@ def run_profiling( action="store_true", dest="write_pyisession", ) + parser.add_argument( + "--flat-html", + action="store_true", + help="Write flat HTML output in addition to statistics output.", + dest="write_flat_html", + ) parser.add_argument( "-i", "--interval-seconds", From fcdb791a5320a91914c73aeddd7bc8d450a4f2c1 Mon Sep 17 00:00:00 2001 From: willGraham01 <1willgraham@gmail.com> Date: Tue, 19 Mar 2024 09:47:23 +0000 Subject: [PATCH 4/8] Only run profiling variable setup if we actually need to --- .github/workflows/run-profiling.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/run-profiling.yaml b/.github/workflows/run-profiling.yaml index 864457a7a2..af222aa05f 100644 --- a/.github/workflows/run-profiling.yaml +++ b/.github/workflows/run-profiling.yaml @@ -31,6 +31,7 @@ jobs: set-variables: name: Create unique output file identifier and artifact name runs-on: ubuntu-latest + if: (github.event_name != 'issue_comment') || ((github.event_name == 'issue_comment') && (github.event.comment.body == '/run profiling')) outputs: profiling-output-dir: profiling_results/ profiling-filename: ${{ steps.set-profiling-filename.outputs.name }} From 8e9808af58674d789c4f39c99ec3e0f695de50e2 Mon Sep 17 00:00:00 2001 From: willGraham01 <1willgraham@gmail.com> Date: Tue, 19 Mar 2024 09:50:53 +0000 Subject: [PATCH 5/8] Fix SHA that is reported on comment-triggered PRs --- .github/workflows/run-profiling.yaml | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/.github/workflows/run-profiling.yaml b/.github/workflows/run-profiling.yaml index af222aa05f..dbd2da084a 100644 --- a/.github/workflows/run-profiling.yaml +++ b/.github/workflows/run-profiling.yaml @@ -36,13 +36,29 @@ jobs: profiling-output-dir: profiling_results/ profiling-filename: ${{ steps.set-profiling-filename.outputs.name }} artifact-name: ${{ steps.set-artifact-name.outputs.name }} - profiling-on-sha: ${{ steps.set-github-info.outputs.sha }} + profiling-on-sha: ${{ steps.sha.outputs.result }} profiling-event-trigger: ${{ steps.set-github-info.outputs.event }} steps: + - name: Get SHA of last commit on default branch if issue or pull-request branch + id: sha + uses: actions/github-script@v7 + with: + result-encoding: string + script: | + if (!context.payload.issue.pull_request) { + return context.sha; + }; + const { data: pr } = await github.rest.pulls.get({ + owner: context.issue.owner, + repo: context.issue.repo, + pull_number: context.issue.number, + }); + return pr.head.sha; + - id: set-profiling-filename name: Set profiling output file name run: | - echo "name=${GITHUB_EVENT_NAME}_${GITHUB_RUN_NUMBER}_${GITHUB_SHA}" >> "${GITHUB_OUTPUT}" + echo "name=${GITHUB_EVENT_NAME}_${GITHUB_RUN_NUMBER}_${{ steps.sha.outputs.result }}" >> "${GITHUB_OUTPUT}" - id: set-artifact-name name: Set artifact name @@ -52,7 +68,7 @@ jobs: - id: set-github-info name: Fix Git and GitHub information when passing between workflows run: | - echo "sha=${GITHUB_SHA}" >> "${GITHUB_OUTPUT}" + echo "sha=${{ steps.sha.outputs.result }}" >> "${GITHUB_OUTPUT}" echo "event=${GITHUB_EVENT_NAME}" >> "${GITHUB_OUTPUT}" profile-on-comment: From 78ce790e6d22d7f2a08ceee3d4393d4b916d656f Mon Sep 17 00:00:00 2001 From: willGraham01 <1willgraham@gmail.com> Date: Tue, 19 Mar 2024 09:54:24 +0000 Subject: [PATCH 6/8] Slightly more verbose step name --- .github/workflows/run-profiling.yaml | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/workflows/run-profiling.yaml b/.github/workflows/run-profiling.yaml index dbd2da084a..e175099309 100644 --- a/.github/workflows/run-profiling.yaml +++ b/.github/workflows/run-profiling.yaml @@ -36,11 +36,10 @@ jobs: profiling-output-dir: profiling_results/ profiling-filename: ${{ steps.set-profiling-filename.outputs.name }} artifact-name: ${{ steps.set-artifact-name.outputs.name }} - profiling-on-sha: ${{ steps.sha.outputs.result }} + profiling-on-sha: ${{ steps.determine-correct-sha.outputs.result }} profiling-event-trigger: ${{ steps.set-github-info.outputs.event }} steps: - - name: Get SHA of last commit on default branch if issue or pull-request branch - id: sha + - id: determine-correct-sha uses: actions/github-script@v7 with: result-encoding: string @@ -58,7 +57,7 @@ jobs: - id: set-profiling-filename name: Set profiling output file name run: | - echo "name=${GITHUB_EVENT_NAME}_${GITHUB_RUN_NUMBER}_${{ steps.sha.outputs.result }}" >> "${GITHUB_OUTPUT}" + echo "name=${GITHUB_EVENT_NAME}_${GITHUB_RUN_NUMBER}_${{ steps.determine-correct-sha.outputs.result }}" >> "${GITHUB_OUTPUT}" - id: set-artifact-name name: Set artifact name @@ -68,7 +67,7 @@ jobs: - id: set-github-info name: Fix Git and GitHub information when passing between workflows run: | - echo "sha=${{ steps.sha.outputs.result }}" >> "${GITHUB_OUTPUT}" + echo "sha=${{ steps.determine-correct-sha.outputs.result }}" >> "${GITHUB_OUTPUT}" echo "event=${GITHUB_EVENT_NAME}" >> "${GITHUB_OUTPUT}" profile-on-comment: From 666462b34913465d6de7ac737e14f272908b2f62 Mon Sep 17 00:00:00 2001 From: Matt Graham Date: Wed, 20 Mar 2024 10:53:49 +0000 Subject: [PATCH 7/8] Fail early when ansi2html not available and flat HTML output requested --- src/scripts/profiling/run_profiling.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/scripts/profiling/run_profiling.py b/src/scripts/profiling/run_profiling.py index 88d62d88ef..f82e662286 100644 --- a/src/scripts/profiling/run_profiling.py +++ b/src/scripts/profiling/run_profiling.py @@ -194,6 +194,12 @@ def run_profiling( Uses pyinstrument to profile the scale_run simulation, writing the output in the requested formats. """ + if write_flat_html and not ANSI2HTML_AVAILABLE: + # Check if flat HTML output requested but ansi2html module not available at + # _start_ of function to avoid erroring after a potentially long profiling run + msg = "ansi2html required for flat HTML output." + raise ValueError(msg) + additional_stats = dict(() if additional_stats is None else additional_stats) # Create the profiler to record the stack @@ -288,8 +294,6 @@ def run_profiling( print("done") if write_flat_html: - if not ANSI2HTML_AVAILABLE: - raise ValueError("ansi2html required for flat HTML output.") output_html_file = output_dir / f"{output_name}.flat.html" console_renderer = ConsoleRenderer( show_all=False, From 9ad4aef086f3e0b7b39dc7b8135d3ea4dab4467c Mon Sep 17 00:00:00 2001 From: Matt Graham Date: Wed, 20 Mar 2024 11:57:14 +0000 Subject: [PATCH 8/8] Disable progress bar and log output by default during profiling runs --- .github/workflows/run-profiling.yaml | 6 ++++-- src/scripts/profiling/run_profiling.py | 15 ++++++++++++++- src/scripts/profiling/scale_run.py | 7 +++++++ 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/.github/workflows/run-profiling.yaml b/.github/workflows/run-profiling.yaml index d392a18f74..8721ce5ca8 100644 --- a/.github/workflows/run-profiling.yaml +++ b/.github/workflows/run-profiling.yaml @@ -86,7 +86,8 @@ jobs: --output-name ${{ needs.set-variables.outputs.profiling-filename }} \ --additional-stats \ sha=${{ needs.set-variables.outputs.profiling-on-sha }} \ - trigger=${{ needs.set-variables.outputs.profiling-event-trigger }} + trigger=${{ needs.set-variables.outputs.profiling-event-trigger }} \ + --disable-log-output-to-stdout description: Profiled run of the model timeout-minutes: 8640 application-organization: UCL @@ -120,7 +121,8 @@ jobs: --output-name ${{ needs.set-variables.outputs.profiling-filename }} \ --additional-stats \ sha=${{ needs.set-variables.outputs.profiling-on-sha }} \ - trigger=${{ needs.set-variables.outputs.profiling-event-trigger }} + trigger=${{ needs.set-variables.outputs.profiling-event-trigger }} \ + --disable-log-output-to-stdout ## Upload the output as an artifact so we can push it to the profiling repository - name: Save results as artifact diff --git a/src/scripts/profiling/run_profiling.py b/src/scripts/profiling/run_profiling.py index f4fcc39cab..30b46bb3b1 100644 --- a/src/scripts/profiling/run_profiling.py +++ b/src/scripts/profiling/run_profiling.py @@ -189,6 +189,8 @@ def run_profiling( simulation_months: int = 0, mode_appt_constraints: Literal[0, 1, 2] = 2, additional_stats: Optional[List[Tuple[str, str]]] = None, + show_progress_bar: bool = False, + disable_log_output_to_stdout: bool = False, ) -> None: """ Uses pyinstrument to profile the scale_run simulation, @@ -221,7 +223,7 @@ def run_profiling( "log_filename": "scale_run_profiling", "log_level": "WARNING", "parse_log_file": False, - "show_progress_bar": True, + "show_progress_bar": show_progress_bar, "seed": 0, "disable_health_system": False, "disable_spurious_symptoms": False, @@ -231,6 +233,7 @@ def run_profiling( "record_hsi_event_details": False, "ignore_warnings": True, "log_final_population_checksum": False, + "disable_log_output_to_stdout": disable_log_output_to_stdout, } output_arg_file = output_dir / f"{output_name}.args.json" @@ -421,6 +424,16 @@ def run_profiling( "as strings." ), ) + parser.add_argument( + "--show-progress-bar", + help="Show simulation progress bar during simulation rather than log output", + action="store_true", + ) + parser.add_argument( + "--disable-log-output-to-stdout", + help="Disable simulation log output being displayed in stdout stream", + action="store_true", + ) args = parser.parse_args() diff --git a/src/scripts/profiling/scale_run.py b/src/scripts/profiling/scale_run.py index a96e7999ec..735d1e7ba3 100644 --- a/src/scripts/profiling/scale_run.py +++ b/src/scripts/profiling/scale_run.py @@ -44,6 +44,7 @@ def scale_run( log_level: Literal["CRITICAL", "DEBUG", "FATAL", "WARNING", "INFO"] = "WARNING", parse_log_file: bool = False, show_progress_bar: bool = False, + disable_log_output_to_stdout: bool = False, seed: int = 0, disable_health_system: bool = False, disable_spurious_symptoms: bool = False, @@ -70,6 +71,7 @@ def scale_run( "filename": log_filename, "directory": output_dir, "custom_levels": {"*": getattr(logging, log_level)}, + "suppress_stdout": disable_log_output_to_stdout, } sim = Simulation( @@ -192,6 +194,11 @@ def scale_run( help="Show progress bar during simulation rather than log output", action="store_true", ) + parser.add_argument( + "--disable-log-output-to-stdout", + help="Disable log output being displayed in stdout stream", + action="store_true", + ) parser.add_argument( "--seed", help="Seed for base pseudo-random number generator",