From 48d1d10de97aff955cef373ddf9930f1e9731bc5 Mon Sep 17 00:00:00 2001 From: T Ehrhardt Date: Sat, 24 Aug 2024 14:09:45 +0200 Subject: [PATCH 1/4] add report_profile method to DistributionMaker and Detectors and propagate profile choice in DistributionMaker --- pisa/core/detectors.py | 9 +++++++-- pisa/core/distribution_maker.py | 7 +++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/pisa/core/detectors.py b/pisa/core/detectors.py index 6819bdb00..4c69bc5b4 100644 --- a/pisa/core/detectors.py +++ b/pisa/core/detectors.py @@ -66,7 +66,7 @@ def __init__(self, pipelines, label=None, set_livetime_from_data=True, profile=F self._distribution_makers , self.det_names = [] , [] for pipeline in pipelines: if not isinstance(pipeline, Pipeline): - pipeline = Pipeline(pipeline) + pipeline = Pipeline(pipeline, profile=profile) name = pipeline.detector_name if name in self.det_names: @@ -111,7 +111,12 @@ def tabulate(self, tablefmt="plain"): def __iter__(self): return iter(self._distribution_makers) - + + def report_profile(self, detailed=False): + for distribution_maker in self._distribution_makers: + print(distribution_maker.detector_name + ':') + distribution_maker.report_profile(detailed=detailed) + @property def profile(self): return self._profile diff --git a/pisa/core/distribution_maker.py b/pisa/core/distribution_maker.py index 69aa1529f..17d853c21 100755 --- a/pisa/core/distribution_maker.py +++ b/pisa/core/distribution_maker.py @@ -104,6 +104,8 @@ def __init__(self, pipelines, label=None, set_livetime_from_data=True, profile=F for pipeline in pipelines: if not isinstance(pipeline, Pipeline): pipeline = Pipeline(pipeline, profile=profile) + else: + pipeline.profile = profile self._pipelines.append(pipeline) data_run_livetime = None @@ -215,6 +217,11 @@ def tabulate(self, tablefmt="plain"): def __iter__(self): return iter(self._pipelines) + def report_profile(self, detailed=False): + for pipeline in self.pipelines: + print(pipeline.name + ':') + pipeline.report_profile(detailed=detailed) + @property def profile(self): return self._profile From b2ddd7fafd5cd90ff34c42ddd9115e6337b35c4d Mon Sep 17 00:00:00 2001 From: T Ehrhardt Date: Wed, 28 Aug 2024 20:57:16 +0200 Subject: [PATCH 2/4] add pipeline timing information --- pisa/core/distribution_maker.py | 3 +- pisa/core/pipeline.py | 50 ++++++++++++++++++++++++++++++++- pisa/core/stage.py | 13 +++------ pisa/stages/utils/kde.py | 19 ++++++++++--- pisa/utils/format.py | 19 +++++++++++++ 5 files changed, 88 insertions(+), 16 deletions(-) diff --git a/pisa/core/distribution_maker.py b/pisa/core/distribution_maker.py index 17d853c21..a1a949db4 100755 --- a/pisa/core/distribution_maker.py +++ b/pisa/core/distribution_maker.py @@ -219,7 +219,6 @@ def __iter__(self): def report_profile(self, detailed=False): for pipeline in self.pipelines: - print(pipeline.name + ':') pipeline.report_profile(detailed=detailed) @property @@ -232,8 +231,8 @@ def profile(self, value): pipeline.profile = value self._profile = value - def run(self): + """Run all pipelines""" for pipeline in self: pipeline.run() diff --git a/pisa/core/pipeline.py b/pisa/core/pipeline.py index 311e21ee9..023f542c4 100755 --- a/pisa/core/pipeline.py +++ b/pisa/core/pipeline.py @@ -17,6 +17,7 @@ from inspect import getsource import os from tabulate import tabulate +from time import time import traceback import numpy as np @@ -30,6 +31,7 @@ from pisa.core.binning import MultiDimBinning from pisa.utils.config_parser import PISAConfigParser, parse_pipeline_config from pisa.utils.fileio import mkdir +from pisa.utils.format import format_times from pisa.utils.hash import hash_obj from pisa.utils.log import logging, set_verbosity from pisa.utils.profiler import profile @@ -107,6 +109,9 @@ def __init__(self, config, profile=False): self.output_key = config['pipeline']['output_key'] self._profile = profile + self._setup_times = [] + self._run_times = [] + self._get_outputs_times = [] self._stages = [] self._config = config @@ -141,6 +146,17 @@ def tabulate(self, tablefmt="plain"): return tabulate(table, headers, tablefmt=tablefmt, colalign=colalign) def report_profile(self, detailed=False): + print(f'Pipeline: {self.name}') + print('- setup: ', format_times(self._setup_times)) + if detailed: + print(' Individual runs: ', ', '.join(['%i: %.3f s' % (i, t) for i, t in enumerate(self._setup_times)])) + print('- run: ', format_times(self._run_times)) + if detailed: + print(' Individual runs: ', ', '.join(['%i: %.3f s' % (i, t) for i, t in enumerate(self._run_times)])) + print('- get_outputs: ', format_times(self._get_outputs_times)) + if detailed: + print(' Individual runs: ', ', '.join(['%i: %.3f s' % (i, t) for i, t in enumerate(self._get_outputs_times)])) + print('Individual services:') for stage in self.stages: stage.report_profile(detailed=detailed) @@ -315,7 +331,19 @@ def _init_stages(self): self.setup() - def get_outputs(self, output_binning=None, output_key=None): + def get_outputs(self, **get_outputs_kwargs): + """Wrapper around `_get_outputs`. The latter might + have quite some overhead compared to `run` alone""" + if self.profile: + start_t = time() + outputs = self._get_outputs(**get_outputs_kwargs) + end_t = time() + self._get_outputs_times.append(end_t - start_t) + else: + outputs = self._get_outputs(**get_outputs_kwargs) + return outputs + + def _get_outputs(self, output_binning=None, output_key=None): """Get MapSet output""" @@ -393,12 +421,32 @@ def _add_rotated(self, paramset:ParamSet, suppress_warning=False): return success def run(self): + """Wrapper around `_run_function`""" + if self.profile: + start_t = time() + self._run_function() + end_t = time() + self._run_times.append(end_t - start_t) + else: + self._run_function() + + def _run_function(self): """Run the pipeline to compute""" for stage in self.stages: logging.debug(f"Working on stage {stage.stage_name}.{stage.service_name}") stage.run() def setup(self): + """Wrapper around `_setup_function`""" + if self.profile: + start_t = time() + self._setup_function() + end_t = time() + self._setup_times.append(end_t - start_t) + else: + self._setup_function() + + def _setup_function(self): """Setup (reset) all stages""" self.data = ContainerSet(self.name) for stage in self.stages: diff --git a/pisa/core/stage.py b/pisa/core/stage.py index 6d865f7f5..63a8bbabe 100644 --- a/pisa/core/stage.py +++ b/pisa/core/stage.py @@ -12,6 +12,7 @@ import numpy as np from pisa.core.container import ContainerSet +from pisa.utils.format import format_times from pisa.utils.log import logging from pisa.core.param import ParamSelector from pisa.utils.format import arg_str_seq_none @@ -139,20 +140,14 @@ def __repr__(self): return 'Stage "%s"'%(self.__class__.__name__) def report_profile(self, detailed=False): - def format(times): - tot = np.sum(times) - n = len(times) - ave = 0. if n == 0 else tot/n - return 'Total time %.5f s, n calls: %i, time/call: %.5f s'%(tot, n, ave) - print(self.stage_name, self.service_name) - print('- setup: ', format(self.setup_times)) + print('- setup: ', format_times(self.setup_times)) if detailed: print(' Individual runs: ', ', '.join(['%i: %.3f s' % (i, t) for i, t in enumerate(self.setup_times)])) - print('- calc: ', format(self.calc_times)) + print('- calc: ', format_times(self.calc_times)) if detailed: print(' Individual runs: ', ', '.join(['%i: %.3f s' % (i, t) for i, t in enumerate(self.calc_times)])) - print('- apply: ', format(self.apply_times)) + print('- apply: ', format_times(self.apply_times)) if detailed: print(' Individual runs: ', ', '.join(['%i: %.3f s' % (i, t) for i, t in enumerate(self.apply_times)])) diff --git a/pisa/stages/utils/kde.py b/pisa/stages/utils/kde.py index 8cf9e9351..02bd5c076 100644 --- a/pisa/stages/utils/kde.py +++ b/pisa/stages/utils/kde.py @@ -5,6 +5,7 @@ from copy import deepcopy import numpy as np +from time import time from pisa.core.stage import Stage from pisa.core.binning import MultiDimBinning, OneDimBinning @@ -132,11 +133,21 @@ def setup_function(self): @profile def apply(self): - # this is special, we want the actual event weights in the kde - # therefor we're overwritting the apply function - # normally in a stage you would implement the `apply_function` method - # and not the `apply` method! + """This is special, we want the actual event weights in the kde + therefor we're overwritting the apply function + normally in a stage you would implement the `apply_function` method + and not the `apply` method! We also have to reimplement the profiling + functionality in apply of the Base class""" + + if self.profile: + start_t = time() + self.apply_function() + end_t = time() + self.apply_times.append(end_t - start_t) + else: + self.apply_function() + def apply_function(self): for container in self.data: if self.stash_valid: diff --git a/pisa/utils/format.py b/pisa/utils/format.py index 1c48b4e55..9dbca8a02 100644 --- a/pisa/utils/format.py +++ b/pisa/utils/format.py @@ -1267,6 +1267,25 @@ def format_num( return left_delimiter + num_str + right_delimiter +def format_times(times): + """Helper function used to report sum and average of times + in `times` sequence as well as its length (representing number + of calls of some function). + + Parameters + ---------- + times : Sequence + + Returns + ------- + formatted : string + """ + tot = np.sum(times) + n = len(times) + ave = 0. if n == 0 else tot/n + return 'Total time %.5f s, n calls: %i, time/call: %.5f s'%(tot, n, ave) + + def test_format_num(): """Unit tests for the `format_num` function""" # sci_thresh From 98f6e649fc4515c915ef4f85e0cc09460cf53507 Mon Sep 17 00:00:00 2001 From: T Ehrhardt Date: Fri, 30 Aug 2024 10:37:15 +0200 Subject: [PATCH 3/4] move formatting of run times to central location in utils.format and use format_num for customisability of outputs; small test of correct propagation of profile from DistributionMaker to Pipeline --- pisa/core/detectors.py | 11 +++++--- pisa/core/distribution_maker.py | 33 +++++++++++++++++++---- pisa/core/pipeline.py | 40 ++++++++++++++++++++-------- pisa/core/stage.py | 23 +++++++++------- pisa/utils/format.py | 47 +++++++++++++++++++++++++++------ 5 files changed, 116 insertions(+), 38 deletions(-) diff --git a/pisa/core/detectors.py b/pisa/core/detectors.py index 4c69bc5b4..9c3bade82 100644 --- a/pisa/core/detectors.py +++ b/pisa/core/detectors.py @@ -13,7 +13,6 @@ from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter from collections import OrderedDict import inspect -from itertools import product import os from tabulate import tabulate from copy import deepcopy @@ -21,7 +20,6 @@ import numpy as np from pisa import ureg -from pisa.core.map import MapSet from pisa.core.pipeline import Pipeline from pisa.core.distribution_maker import DistributionMaker from pisa.core.param import ParamSet, Param @@ -112,10 +110,15 @@ def tabulate(self, tablefmt="plain"): def __iter__(self): return iter(self._distribution_makers) - def report_profile(self, detailed=False): + def report_profile(self, detailed=False, format_num_kwargs=None): + """Report timing information on contained distribution makers. + See `Pipeline.report_profile` for details. + """ for distribution_maker in self._distribution_makers: print(distribution_maker.detector_name + ':') - distribution_maker.report_profile(detailed=detailed) + distribution_maker.report_profile( + detailed=detailed, format_num_kwargs=format_num_kwargs + ) @property def profile(self): diff --git a/pisa/core/distribution_maker.py b/pisa/core/distribution_maker.py index a1a949db4..24ffce248 100755 --- a/pisa/core/distribution_maker.py +++ b/pisa/core/distribution_maker.py @@ -11,7 +11,6 @@ from collections import OrderedDict from collections.abc import Mapping import inspect -from itertools import product import os from tabulate import tabulate @@ -105,7 +104,10 @@ def __init__(self, pipelines, label=None, set_livetime_from_data=True, profile=F if not isinstance(pipeline, Pipeline): pipeline = Pipeline(pipeline, profile=profile) else: - pipeline.profile = profile + if profile: + # Only propagate if set to `True` (don't allow negative + # default to negate any original choice for the instance) + pipeline.profile = profile self._pipelines.append(pipeline) data_run_livetime = None @@ -217,9 +219,14 @@ def tabulate(self, tablefmt="plain"): def __iter__(self): return iter(self._pipelines) - def report_profile(self, detailed=False): + def report_profile(self, detailed=False, format_num_kwargs=None): + """Report timing information on contained pipelines. + See `Pipeline.report_profile` for details. + """ for pipeline in self.pipelines: - pipeline.report_profile(detailed=detailed) + pipeline.report_profile( + detailed=detailed, format_num_kwargs=format_num_kwargs + ) @property def profile(self): @@ -547,6 +554,22 @@ def test_DistributionMaker(): #current_hier = new_hier #current_mat = new_mat + # test profile flag + p_cfg = 'settings/pipeline/example.cfg' + p = Pipeline(p_cfg, profile=True) + dm = DistributionMaker(pipelines=p) + # default init using Pipeline instance shouldn't negate + assert dm.pipelines[0].profile + # but explicit request should + dm.profile = False + assert not dm.pipelines[0].profile + # now init from cfg path and request profile + dm = DistributionMaker(pipelines=p_cfg, profile=True) + assert dm.pipelines[0].profile + # explicitly request no profile + dm = DistributionMaker(pipelines=p_cfg, profile=False) + assert not dm.pipelines[0].profile + def parse_args(): """Get command line arguments""" @@ -632,4 +655,4 @@ def main(return_outputs=False): if __name__ == '__main__': - distribution_maker, outputs = main(return_outputs=True) # pylint: disable=invalid-name + distribution_maker, outputs = main(return_outputs=True) diff --git a/pisa/core/pipeline.py b/pisa/core/pipeline.py index 023f542c4..b477c04a3 100755 --- a/pisa/core/pipeline.py +++ b/pisa/core/pipeline.py @@ -10,6 +10,7 @@ from argparse import ArgumentParser from collections import OrderedDict +from collections.abc import Mapping from configparser import NoSectionError from copy import deepcopy from importlib import import_module @@ -145,20 +146,37 @@ def tabulate(self, tablefmt="plain"): table[-1] += [len(s.params.fixed), len(s.params.free)] return tabulate(table, headers, tablefmt=tablefmt, colalign=colalign) - def report_profile(self, detailed=False): + def report_profile(self, detailed=False, format_num_kwargs=None): + """Report timing information on pipeline and contained services + + Parameters + ---------- + detailed : bool, default False + Whether to increase level of detail + format_num_kwargs : dict, optional + Dictionary containing arguments passed to `utils.format.format_num`. + Will display each number with three decimal digits by default. + + """ + if format_num_kwargs is None: + format_num_kwargs = { + 'precision': 1e-3, 'fmt': 'full', 'trailing_zeros': True + } + assert isinstance(format_num_kwargs, Mapping) print(f'Pipeline: {self.name}') - print('- setup: ', format_times(self._setup_times)) - if detailed: - print(' Individual runs: ', ', '.join(['%i: %.3f s' % (i, t) for i, t in enumerate(self._setup_times)])) - print('- run: ', format_times(self._run_times)) - if detailed: - print(' Individual runs: ', ', '.join(['%i: %.3f s' % (i, t) for i, t in enumerate(self._run_times)])) - print('- get_outputs: ', format_times(self._get_outputs_times)) - if detailed: - print(' Individual runs: ', ', '.join(['%i: %.3f s' % (i, t) for i, t in enumerate(self._get_outputs_times)])) + for func_str, times in [ + ('- setup: ', self._setup_times), + ('- run: ', self._run_times), + ('- get_outputs: ', self._get_outputs_times) + ]: + print(func_str, + format_times(times=times, + nindent_detailed=len(func_str) + 1, + detailed=detailed, **format_num_kwargs) + ) print('Individual services:') for stage in self.stages: - stage.report_profile(detailed=detailed) + stage.report_profile(detailed=detailed, **format_num_kwargs) @property def profile(self): diff --git a/pisa/core/stage.py b/pisa/core/stage.py index 63a8bbabe..3574bfae0 100644 --- a/pisa/core/stage.py +++ b/pisa/core/stage.py @@ -139,17 +139,20 @@ def __init__( def __repr__(self): return 'Stage "%s"'%(self.__class__.__name__) - def report_profile(self, detailed=False): + def report_profile(self, detailed=False, **format_num_kwargs): + """Report timing information on calls to setup, compute, and apply + """ print(self.stage_name, self.service_name) - print('- setup: ', format_times(self.setup_times)) - if detailed: - print(' Individual runs: ', ', '.join(['%i: %.3f s' % (i, t) for i, t in enumerate(self.setup_times)])) - print('- calc: ', format_times(self.calc_times)) - if detailed: - print(' Individual runs: ', ', '.join(['%i: %.3f s' % (i, t) for i, t in enumerate(self.calc_times)])) - print('- apply: ', format_times(self.apply_times)) - if detailed: - print(' Individual runs: ', ', '.join(['%i: %.3f s' % (i, t) for i, t in enumerate(self.apply_times)])) + for func_str, times in [ + ('- setup: ', self.setup_times), + ('- compute: ', self.calc_times), + ('- apply: ', self.apply_times) + ]: + print(func_str, + format_times(times=times, + nindent_detailed=len(func_str) + 1, + detailed=detailed, **format_num_kwargs) + ) def select_params(self, selections, error_on_missing=False): """Apply the `selections` to contained ParamSet. diff --git a/pisa/utils/format.py b/pisa/utils/format.py index 9dbca8a02..48ac3f517 100644 --- a/pisa/utils/format.py +++ b/pisa/utils/format.py @@ -247,18 +247,22 @@ def split(string, sep=',', force_case=None, parse_func=None): def arg_str_seq_none(inputs, name): """Simple input handler. + Parameters ---------- inputs : None, string, or iterable of strings Input value(s) provided by caller name : string Name of input, used for producing a meaningful error message + Returns ------- inputs : None, or list of strings + Raises ------ TypeError if unrecognized type + """ if isinstance(inputs, str): inputs = [inputs] @@ -1267,23 +1271,50 @@ def format_num( return left_delimiter + num_str + right_delimiter -def format_times(times): - """Helper function used to report sum and average of times - in `times` sequence as well as its length (representing number - of calls of some function). +def format_times(times, nindent_detailed=0, detailed=False, **format_num_kwargs): + """Report statistics derived from a sample of run times, + whose size may represent the number of calls to some function, + using a custom number format. Parameters ---------- - times : Sequence + times : Sequence of float + Sequence of run times + nindent_detailed : int, optional + Number of spaces for indentation of detailed info + detailed : bool, default False + Whether to output every individual run time also + **format_num_kwargs : dict, optional + Arguments to `format_num`: refer to its documentation for + the list of all possible arguments. Returns ------- - formatted : string + formatted : str + """ + assert isinstance(times, Sequence) tot = np.sum(times) n = len(times) - ave = 0. if n == 0 else tot/n - return 'Total time %.5f s, n calls: %i, time/call: %.5f s'%(tot, n, ave) + if n == 0: + return 'n calls: 0' + ave = format_num(tot/n, **format_num_kwargs) + tot = format_num(tot, **format_num_kwargs) + max_time = format_num(np.max(times), **format_num_kwargs) + min_time = format_num(np.min(times), **format_num_kwargs) + formatted = f'Total time (s): {tot}, n calls: {n}' + if n > 1: + formatted += ( + f', time/call (s): mean {ave}, max. {max_time}, min. {min_time}' + ) + if detailed: + assert isinstance(nindent_detailed, int) + formatted += '\n' + ' ' * nindent_detailed + 'Individual runs: ' + for i, t in enumerate(times): + formatted += '%i: %s s, ' % ( + i, format_num(t, **format_num_kwargs) + ) + return formatted def test_format_num(): From 732784771b158eedc8760514217386992917e672 Mon Sep 17 00:00:00 2001 From: T Ehrhardt Date: Fri, 30 Aug 2024 11:03:38 +0200 Subject: [PATCH 4/4] Warn at the pipeline level in case timings should be reported but profile is set to False --- pisa/core/pipeline.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pisa/core/pipeline.py b/pisa/core/pipeline.py index b477c04a3..9e0c952c6 100755 --- a/pisa/core/pipeline.py +++ b/pisa/core/pipeline.py @@ -158,6 +158,15 @@ def report_profile(self, detailed=False, format_num_kwargs=None): Will display each number with three decimal digits by default. """ + if not self.profile: + # Report warning only at the pipeline level, which is what the + # typical user should come across. Assume that users calling + # `report_profile` on a `Stage` instance directly know what they're + # doing. + logging.warn( + '`profile` is set to False. Results may not show the expected ' + 'numbers of function calls.' + ) if format_num_kwargs is None: format_num_kwargs = { 'precision': 1e-3, 'fmt': 'full', 'trailing_zeros': True