diff --git a/README.md b/README.md index de3a17ad..b26bddf6 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,7 @@ https://doi.org/10.1016/j.buildenv.2022.109166 ***Open Source Acknowledgments*** -For a detailed list of the open-source dependencies used in this project along with their respective licenses, please refer to [License Information](open-source-licences/README.md). This includes both the core dependencies specified in the project's requirements and their transitive dependencies. +For a detailed list of the open-source dependencies used in this project along with their respective licenses, please refer to [License Information](open-source-licences/README.md). This includes both the core dependencies specified in the project's requirements and their transitive dependencies. The information also features a distribution diagram of licenses and a brief description of each of them. @@ -147,7 +147,6 @@ voila caimira/apps/expert/caimira.ipynb --port=8080 Then visit http://localhost:8080. - ### Running the tests ``` @@ -155,6 +154,18 @@ pip install -e .[test] pytest ./caimira ``` +### Running the profiler + +The profiler is enabled in one of the following cases: +- the calculator app is running in `debug` mode +- the environment variable `CAIMIRA_PROFILER_ENABLED` is set to 1 + +When visiting http://localhost:8080/profiler, you can start a new session and choose between [PyInstrument](https://github.com/joerick/pyinstrument) or [cProfile](https://docs.python.org/3/library/profile.html#module-cProfile). The app includes two different profilers, mainly because they can give different information. + +Keep the profiler page open. Then, in another window, navigate to any page in CAiMIRA, for example generate a new report. Refresh the profiler page, and click on the `Report` link to see the profiler output. + +The sessions are stored in a local file in the `/tmp` folder. To share it across multiple web nodes, a shared storage should be added to all web nodes. The folder can be customized via the environment variable `CAIMIRA_PROFILER_CACHE_DIR`. + ### Building the whole environment for local development **Simulate the docker build that takes place on openshift with:** diff --git a/caimira/apps/calculator/__init__.py b/caimira/apps/calculator/__init__.py index 842f2de6..148dae62 100644 --- a/caimira/apps/calculator/__init__.py +++ b/caimira/apps/calculator/__init__.py @@ -25,6 +25,7 @@ from tornado.web import Application, RequestHandler, StaticFileHandler from tornado.httpclient import AsyncHTTPClient, HTTPRequest import tornado.log +from caimira.profiler import CaimiraProfiler, Profilers from caimira.store.data_registry import DataRegistry from caimira.store.data_service import DataService @@ -43,7 +44,53 @@ # increase the overall CAiMIRA version (found at ``caimira.__version__``). __version__ = "4.14.3" -LOG = logging.getLogger("APP") +LOG = logging.getLogger("Calculator") + + +class ProfilerPage(RequestHandler): + """Render the profiler page. + + This class does not inherit from BaseRequestHandler to avoid profiling the + profiler page itself. + """ + def get(self) -> None: + profiler = CaimiraProfiler() + + template_environment = self.settings["template_environment"] + template = template_environment.get_template("profiler.html.j2") + report = template.render( + user=AnonymousUser(), + active_page="Profiler", + xsrf_form_html=self.xsrf_form_html(), + is_active=profiler.is_active, + sessions=profiler.sessions, + ) + self.finish(report) + + def post(self) -> None: + profiler = CaimiraProfiler() + + if self.get_argument("start", None) is not None: + name = self.get_argument("name", None) + profiler_type = Profilers.from_str(self.get_argument("profiler_type", "")) + profiler.start_session(name, profiler_type) + elif self.get_argument("stop", None) is not None: + profiler.stop_session() + elif self.get_argument("clear", None) is not None: + profiler.clear_sessions() + + self.redirect(CaimiraProfiler.ROOT_URL) + + +class ProfilerReport(RequestHandler): + """Render the profiler HTML report.""" + def get(self, report_id) -> None: + profiler = CaimiraProfiler() + _, report_html = profiler.get_report(report_id) + if report_html: + self.finish(report_html) + else: + self.send_error(404) class BaseRequestHandler(RequestHandler): @@ -64,6 +111,22 @@ async def prepare(self): else: self.current_user = AnonymousUser() + profiler = CaimiraProfiler() + if profiler.is_active and not self.request.path.startswith(CaimiraProfiler.ROOT_URL): + self._request_profiler = profiler.start_profiler() + + def on_finish(self) -> None: + """Called at the end of the request.""" + profiler = CaimiraProfiler() + if profiler.is_active and self._request_profiler: + profiler.stop_profiler( + profiler=self._request_profiler, + uri=self.request.uri, + path=self.request.path, + query=self.request.query, + method=self.request.method, + ) + def write_error(self, status_code: int, **kwargs) -> None: template = self.settings["template_environment"].get_template( "error.html.j2") @@ -199,6 +262,7 @@ async def get(self) -> None: base_url = self.request.protocol + "://" + self.request.host report_generator: ReportGenerator = self.settings['report_generator'] executor = loky.get_reusable_executor(max_workers=self.settings['handler_worker_pool_size']) + report_task = executor.submit( report_generator.build_report, base_url, form, executor_factory=functools.partial( @@ -452,6 +516,13 @@ def make_app( 'filename': 'userguide.html.j2'}), ] + profiler = os.environ.get('CAIMIRA_PROFILER_ENABLED', 0) + if debug or profiler: + urls += [ + (get_root_url(CaimiraProfiler.ROOT_URL), ProfilerPage), + (get_root_url(r'{root_url}/(.*)'.format(root_url=CaimiraProfiler.ROOT_URL)), ProfilerReport), + ] + interface: str = os.environ.get('CAIMIRA_THEME', '') if interface != '' and (interface != '' and 'cern' not in interface): urls = list(filter(lambda i: i in base_urls, urls)) diff --git a/caimira/apps/calculator/__main__.py b/caimira/apps/calculator/__main__.py index 6bf16d30..5527e357 100644 --- a/caimira/apps/calculator/__main__.py +++ b/caimira/apps/calculator/__main__.py @@ -58,7 +58,12 @@ def main(): theme_dir = Path(theme_dir).absolute() assert theme_dir.exists() - app = make_app(debug=debug, APPLICATION_ROOT=args.app_root, calculator_prefix=args.prefix, theme_dir=theme_dir) + app = make_app( + debug=debug, + APPLICATION_ROOT=args.app_root, + calculator_prefix=args.prefix, + theme_dir=theme_dir + ) app.listen(args.port) IOLoop.current().start() diff --git a/caimira/apps/calculator/report_generator.py b/caimira/apps/calculator/report_generator.py index f4ab702d..f09f16ea 100644 --- a/caimira/apps/calculator/report_generator.py +++ b/caimira/apps/calculator/report_generator.py @@ -14,6 +14,7 @@ from caimira import models from caimira.apps.calculator import markdown_tools +from caimira.profiler import profile from caimira.store.data_registry import DataRegistry from ... import monte_carlo as mc from .model_generator import VirusFormData @@ -114,6 +115,7 @@ def concentrations_with_sr_breathing(form: VirusFormData, model: models.Exposure return lower_concentrations +@profile def calculate_report_data(form: VirusFormData, model: models.ExposureModel) -> typing.Dict[str, typing.Any]: times = interesting_times(model) short_range_intervals = [interaction.presence.boundaries()[0] for interaction in model.short_range] diff --git a/caimira/apps/templates/profiler.html.j2 b/caimira/apps/templates/profiler.html.j2 new file mode 100644 index 00000000..bba22598 --- /dev/null +++ b/caimira/apps/templates/profiler.html.j2 @@ -0,0 +1,62 @@ +{% extends "layout.html.j2" %} + +{% block main %} +
+
+ +

Profiler

+ {% if is_active %} +
+ The profiler is running. +
+ + {% else %} + The profiler is not running. +
+ + + + +
+ + +
+
+ + {% endif %} + + {{ xsrf_form_html }} + +

Sessions

+ {% if sessions %} +
    + {% for name, reports in sessions.items() %} +
  1. Name: {{ name }}
  2. +
      + {% if reports %} + {% for report in reports %} +
    • {{ report["ts"] }} - {{ report["method"] }} {{ report["uri"] }} - Report
    • + {% endfor %} + {% else %} + No reports yet! + {% endif %} +
    + {% endfor %} +
+ + {% if not is_active %} +
+ +
+ {% endif %} + + {% else %} + No sessions yet! + {% endif %} +
+
+{% endblock main %} diff --git a/caimira/profiler.py b/caimira/profiler.py new file mode 100644 index 00000000..a5f67576 --- /dev/null +++ b/caimira/profiler.py @@ -0,0 +1,294 @@ +import cProfile +import glob +import json +import logging +import os +import pstats +import shutil +import typing +from datetime import datetime +from enum import Enum +from functools import wraps +from pstats import f8, func_std_string + +from pyinstrument import Profiler as PyInstrumentProfiler + +logger = logging.getLogger("Profiler") + + +class Profilers(Enum): + PYINSTRUMENT = "pyinstrument" + CPROFILER = "cprofiler" + + @staticmethod + def from_str(value): + if value == Profilers.PYINSTRUMENT.value: + return Profilers.PYINSTRUMENT + elif value == Profilers.CPROFILER.value: + return Profilers.CPROFILER + else: + raise NotImplementedError + + +class PyInstrumentWrapper: + profiler = PyInstrumentProfiler(async_mode=True) + + @property + def is_running(self): + return self.profiler.is_running + + def start(self): + self.profiler.start() + + def stop(self): + self.profiler.stop() + report_html = self.profiler.output_html(timeline=True) + report_id = str(abs(hash(report_html))) + return report_id, Profilers.PYINSTRUMENT.value, report_html + + +class CProfilerWrapper: + profiler = cProfile.Profile() + _is_running = False + + @property + def is_running(self): + return self._is_running + + def start(self): + self.profiler.enable() + self._is_running = True + + def stop(self): + self.profiler.disable() + self._is_running = False + + ps = pstats.Stats(self.profiler).sort_stats("cumulative") + + report_html = "

" + report_html += f"{ps.total_calls} function calls" + if ps.total_calls != ps.prim_calls: + report_html += " (%d primitive calls)" % ps.prim_calls + report_html += " in %.3f seconds" % ps.total_tt + report_html += "

" + + report_html += "

" + report_html += f"Ordered by: {ps.sort_type}" + report_html += "

" + + report_html += "" + report_html += "" + report_html += "" + for func in ps.stats: + report_html += "" + cc, nc, tt, ct, callers = ps.stats[func] + c = str(nc) + if nc != cc: + c = c + "/" + str(cc) + report_html += f"" + report_html += f"" + report_html += f"" if nc == 0 else f"" + report_html += f"" + report_html += f"" if cc == 0 else f"" + report_html += f"" + report_html += "" + report_html += "
ncallstottimepercallcumtimepercallfilename:lineno(function)
{c.rjust(9)}{f8(tt)}{f8(tt/nc)}{f8(ct)}{f8(ct/cc)}{func_std_string(func)}
" + report_id = str(abs(hash(report_html))) + return report_id, Profilers.CPROFILER.value, report_html + + +class Profiler: + profiler = None + profiler_type = None + + def __init__(self, profiler_type): + self.profiler_type = profiler_type + + def start(self): + if self.profiler_type == Profilers.PYINSTRUMENT: + self.profiler = PyInstrumentWrapper() + elif self.profiler_type == Profilers.CPROFILER: + self.profiler = CProfilerWrapper() + else: + raise NotImplementedError + + self.profiler.start() + + def stop(self, session_name, uri, path="", query="", method=""): + report_id, report_type, report_html = self.profiler.stop() + self.profiler = None + return report_id, dict( + session_name=session_name, + ts=datetime.utcnow().strftime("%Y-%m-%d - %H:%M:%S"), + uri=uri, + path=path, + query=query, + method=method, + report_id=report_id, + report_type=report_type, + report_html=report_html, + ) + + def is_running(self): + return self.profiler and self.profiler.is_running + + +class CaimiraProfiler: + CACHE_DIR: str = os.environ.get("CAIMIRA_PROFILER_CACHE_DIR", "/tmp") + ROOT_URL: str = "/profiler" + _cache_dirpath: str = None + _cache_filepath: str = None + + def __init__(self, cache_dir: typing.Optional[str] = None): + _cache_dir = os.path.join(self.CACHE_DIR, "caimira-profiler") + self._cache_dirpath = _cache_dir + self._cache_filepath = os.path.join(_cache_dir, "active") + + def _set_active(self, name: str, profiler_type: Profilers): + try: + os.access(self._cache_dirpath, os.W_OK) + except Exception as e: + # Handle the exception, e.g., print an error message + logger.warning(f"cannot write in cache folder {self._cache_dirpath}") + + if not os.path.exists(self._cache_dirpath): + os.makedirs(self._cache_dirpath) + + with open(self._cache_filepath, "w") as fp: + json.dump(dict(name=name, profiler_type=profiler_type.value), fp) + + def _set_inactive(self): + with open(self._cache_filepath, "w") as fp: + json.dump(dict(), fp) + + @property + def sessions(self): + """Return all stored sessions.""" + if not os.path.exists(self._cache_dirpath): + return {} + + reports_by_session = {} + json_files = glob.glob(os.path.join(self._cache_dirpath, "*.json")) + for json_file in json_files: + with open(json_file) as fp: + report = json.load(fp) + + session_name = report["session_name"] + + reports_by_session.setdefault(session_name, []) + reports_by_session[session_name].append(report) + + return reports_by_session + + @property + def session(self): + """Return the session name.""" + if not os.path.exists(self._cache_filepath): + return None + + with open(self._cache_filepath) as fp: + d = json.load(fp) + return dict( + name=d["name"], profiler_type=Profilers.from_str(d["profiler_type"]) + ) + + @property + def is_active(self): + """Return True if a session is active.""" + if not os.path.exists(self._cache_filepath): + return False + + try: + with open(self._cache_filepath) as fp: + d = json.load(fp) + return True if d else False + except json.decoder.JSONDecodeError: + os.remove(self._cache_filepath) + return False + + def start_session( + self, + name: typing.Optional[str] = None, + profiler_type: typing.Optional[Profilers] = Profilers.PYINSTRUMENT, + ): + """Start a new session, assigning the given name.""" + now = datetime.utcnow().isoformat() + name = f"{name}-{now}" if name else now + self._set_active(name, profiler_type) + + def start_profiler(self): + """Start the profiler. + + In the context of HTTP requests, this should be called at the beginning of + each HTTP request. + """ + if not self.is_active: + raise Exception("No active session.") + + profiler_type = self.session["profiler_type"] + profiler = Profiler(profiler_type) + profiler.start() + return profiler + + def stop_profiler( + self, + profiler, + uri: str, + path: typing.Optional[str] = "", + query: typing.Optional[str] = "", + method: typing.Optional[str] = "", + ): + """Stop the profiler, previously obtained when starting it. + + In the context of HTTP requests, this should be called at the end of + each HTTP request. + """ + if not self.is_active: + raise Exception("No active session.") + + report_id, report = profiler.stop( + self.session["name"], uri, path, query, method + ) + with open(os.path.join(self._cache_dirpath, f"{report_id}.json"), "w") as fp: + json.dump(report, fp) + + def stop_session(self): + """Stop the current active session.""" + if not self.is_active: + raise Exception("No active session.") + + self._set_inactive() + + def get_report(self, report_id: str): + """Return a report by the given id.""" + for reports in self.sessions.values(): + for report in reports: + if report["report_id"] == report_id: + return ( + Profilers.from_str(report["report_type"]), + report["report_html"], + ) + + def clear_sessions(self): + """Delete all sessions.""" + if self.is_active: + raise Exception("Session still active.") + + shutil.rmtree(self._cache_dirpath) + + +def profile(func): + @wraps(func) + def wrapper(*args, **kwargs): + profiler = CaimiraProfiler() + if profiler.is_active: + profiler_run = profiler.start_profiler() + + result = func(*args, **kwargs) + + if profiler.is_active: + profiler.stop_profiler(profiler=profiler_run, uri=func.__name__) + + return result + + return wrapper diff --git a/open-source-licences/README.md b/open-source-licences/README.md index b1177d82..451a9f00 100644 --- a/open-source-licences/README.md +++ b/open-source-licences/README.md @@ -374,6 +374,10 @@ Including transitive dependencies is essential to acknowledge the full spectrum - License: [BSD License (BSD-2-Clause)](https://pypi.org/project/Pygments/) +#### pyinstrument 4.6.1 + +- License: [BSD License (BSD-3-Clause)](https://pypi.org/project/pyinstrument/) + #### pyparsing 3.1.1 - License: [MIT License](https://pypi.org/project/pyparsing/) diff --git a/setup.cfg b/setup.cfg index 38fa6323..87ec4d08 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,7 +1,6 @@ [tool:pytest] addopts = --mypy - [mypy] no_warn_no_return = True diff --git a/setup.py b/setup.py index 5bfc2729..c0d05007 100644 --- a/setup.py +++ b/setup.py @@ -30,6 +30,7 @@ 'numpy', 'pandas', 'psutil', + 'pyinstrument', 'pyjwt', 'python-dateutil', 'retry',